Skip to content

Commit 3249e71

Browse files
authored
Merge pull request #275 from broadinstitute/jb-exp-writer-integration
Fixing bug with slices landing on line ends (SCP-4648)
2 parents f1ba08e + c87e651 commit 3249e71

File tree

8 files changed

+178
-31
lines changed

8 files changed

+178
-31
lines changed

ingest/expression_writer.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
"""
2424
from __future__ import annotations
2525

26+
import logging
2627
import os
2728
import re
2829
import multiprocessing
@@ -69,7 +70,8 @@ def __init__(
6970
timestamp = datetime.datetime.now().isoformat(sep="T", timespec="seconds")
7071
url_safe_timestamp = re.sub(':', '', timestamp)
7172
log_name = f"expression_scatter_data_{url_safe_timestamp}_log.txt"
72-
self.dev_logger = setup_logger(__name__, log_name, format="support_configs")
73+
self.log_name = log_name
74+
self.dev_logger = setup_logger(__name__, log_name, level=logging.INFO, format="support_configs")
7375

7476
def get_storage_bucket_name(self):
7577
"""
@@ -110,7 +112,7 @@ def get_file_seek_points(self) -> list[list]:
110112
if current_byte == '': # eof
111113
current_seek.append(file_size)
112114
seek_points.append(current_seek)
113-
break
115+
return seek_points
114116
while current_byte != "\n":
115117
current_byte = matrix_file.read(1)
116118
seek_point += 1
@@ -213,14 +215,16 @@ def read_dense_matrix_slice(self, indexes, matrix_cells, cluster_cells, data_dir
213215
:param data_dir: (str) name of output dir
214216
"""
215217
start_pos, end_pos = indexes
216-
self.dev_logger.info(f" reading {self.local_matrix_path} at index {start_pos}:{end_pos}")
217218
with open_file(self.local_matrix_path)[0] as matrix_file:
218219
current_pos = start_pos
219220
matrix_file.seek(current_pos)
220221
while current_pos < end_pos:
221222
line = matrix_file.readline()
222-
process_dense_line(line, matrix_cells, cluster_cells, data_dir)
223-
current_pos += len(line)
223+
if line == '': # eof
224+
break
225+
else:
226+
process_dense_line(line, matrix_cells, cluster_cells, data_dir)
227+
current_pos += len(line)
224228

225229
def render_artifacts(self):
226230
"""
@@ -254,7 +258,7 @@ def render_artifacts(self):
254258

255259
def delocalize_outputs(self, cluster_name):
256260
"""
257-
Copy all output files to study bucket in parallel using gsutil (since there are usually ~25-30K files)
261+
Write all output files back to source bucket with Content-Encoding: gzip header
258262
259263
:param cluster_name: (str) encoded name of cluster
260264
"""
@@ -265,9 +269,9 @@ def delocalize_outputs(self, cluster_name):
265269
files_to_push = list(file for file in dir_files if 'gene_entries' not in file)
266270
for file in files_to_push:
267271
local_path = f"{cluster_name}/{file}"
268-
IngestFiles.delocalize_file(None, None, self.matrix_file_path, local_path, f"{bucket_path}/{file}")
272+
IngestFiles.delocalize_file(
273+
None, None, self.matrix_file_path, local_path, f"{bucket_path}/{file}", 'gzip'
274+
)
269275
self.dev_logger.info(" push completed")
270-
handler = self.dev_logger.handlers[0]
271-
log_filename = handler.baseFilename.split("/").pop()
272-
IngestFiles.delocalize_file(None, None, self.matrix_file_path, log_filename, f"parse_logs/{log_filename}")
276+
IngestFiles.delocalize_file(None, None, self.matrix_file_path, self.log_name, f"parse_logs/{self.log_name}")
273277

ingest/ingest_files.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,14 +170,14 @@ def reset_file(self, file_path, start_point, open_as=None):
170170

171171
@staticmethod
172172
def delocalize_file(
173-
study_file_id, study_id, file_path, file_to_delocalize, bucket_destination
173+
study_file_id, study_id, file_path, file_to_delocalize, bucket_destination, content_encoding=None
174174
):
175175
"""Writes local file to Google bucket
176176
Args:
177177
file_path: path of an ingest file (MUST BE GS url)
178178
file_to_delocalize: name of local file to delocalize (ie. errors.txt)
179179
bucket_destination: path to google bucket (ie. parse_logs/{study_file_id}/errors.txt)
180-
180+
content_encoding: set Content-Encoding header, if specified
181181
"""
182182

183183
if IngestFiles.is_remote_file(file_path):
@@ -187,6 +187,8 @@ def delocalize_file(
187187
storage_client = storage.Client()
188188
bucket = storage_client.get_bucket(bucket_name)
189189
blob = bucket.blob(bucket_destination)
190+
if content_encoding is not None:
191+
blob.content_encoding = content_encoding
190192
blob.upload_from_filename(file_to_delocalize)
191193
IngestFiles.dev_logger.info(
192194
f"File {file_to_delocalize} uploaded to {bucket_destination}."

ingest/writer_functions.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,8 @@ def process_dense_line(line, matrix_cells, cluster_cells, data_dir):
174174
filtered_expression = filter_expression_for_cluster(
175175
cluster_cells, matrix_cells, exp_vals
176176
)
177-
write_gene_scores(gene_name, filtered_expression, data_dir)
177+
if gene_name:
178+
write_gene_scores(gene_name, filtered_expression, data_dir)
178179

179180
def filter_expression_for_cluster(cluster_cells, exp_cells, exp_scores) -> list:
180181
"""
@@ -197,5 +198,5 @@ def write_gene_scores(gene_name, exp_values, data_dir):
197198
:param exp_values: (list) expression values
198199
:param data_dir: (str) name out output dir
199200
"""
200-
with gzip.open(f"{data_dir}/{gene_name}.json.gz", "wt") as file:
201+
with gzip.open(f"{data_dir}/{gene_name}.json", "wt") as file:
201202
json.dump(list(exp_values), file, separators=(',', ':'))
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
NAME X Y Category Intensity
2+
TYPE numeric numeric group numeric
3+
BA_1 70.06229534 15.4750564 A 1
4+
BA_2 8.806899522 123.2885491 A 1
5+
BA_3 24.6668434 16.74763274 A 1
6+
BA_4 140.9592493 20.79997729 A 1
7+
BA_5 148.4521712 21.60995118 A 1
8+
BA_6 1.517014096 0.485673286 A 1
9+
BA_7 167.2990346 13.72250288 A 1
10+
BA_8 185.5424524 5.845698625 A 1
11+
BA_9 161.52395382 0.761101364 A 1
12+
BA_10 146.7656655 24.58489322 A 1
13+
BA_11 159.75926361 11.57313875 A 1
14+
BA_12 145.08338493 10.79800326 A 1
15+
BA_13 150.9113465 4.322737256 A 1
16+
BA_14 172.1772514 9.328130584 A 1
17+
BA_15 163.9284063 9.44714249 A 1
18+
BA_16 156.964066 27.79164097 A 1
19+
BA_17 142.79662488 1.678858174 A 1
20+
BA_18 157.0374717 34.70952586 A 2
21+
BA_19 31.2173586 129.10474489 A 2
22+
BA_20 46.5505305 145.53499128 A 2
23+
BA_21 31.87778438 121.80205008 A 2
24+
BA_22 11.58075367 124.215546281 A 2
25+
BA_23 14.2254167 146.38115435 A 2
26+
BA_24 17.1623056 136.03986355 A 2
27+
BA_25 9.256746399 152.66900506 A 2
28+
BA_26 92.88195962 128.85942001 A 2
29+
BA_27 21.0534343 142.28349177 A 2
30+
BA_28 33.2161642 140.11622647 A 2
31+
BA_29 20.7751554 155.950639146 A 2
32+
BA_30 10.07873261 128.07937405 A 2
33+
BA_31 8.4942204 154.97189474 A 2
34+
BA_32 150.9208831 11.44725646 A 2
35+
BA_33 0.424210306 49.21867232 A 2
36+
BA_34 71.30275388 5.994743174 A 2
37+
BA_35 127.2545029 47.24387824 A 3
38+
BA_36 98.83813661 0.718426176 A 3
39+
BA_37 132.2584735 31.57533535 A 3
40+
BA_38 67.38111152 1.119845514 A 3
41+
BA_39 139.7403707 41.34434324 A 3
42+
BA_40 85.78861589 58.96233194 A 3
43+
BA_41 78.57174785 63.72167007 A 3
44+
BA_42 132.5204335 56.63684983 A 3
45+
BA_43 32.64257016 15.23518802 A 3
46+
BA_44 78.4197206 3.1847521 A 3
47+
BA_45 81.36090575 18.05646077 A 3
48+
BA_46 61.54906 6.837187755 A 3
49+
BA_47 104.5976551 74.28083691 A 3
50+
BA_48 131.9139686 33.24419774 A 3
51+
BA_49 118.1952445 58.94229754 A 3
52+
BA_50 94.69533307 38.20265588 A 3
53+
BA_51 98.54061104 58.5908845 A 3
54+
BA_52 61.97581777 7.333870573 A 3
55+
BA_53 23.50683238 24.53014934 A 3
56+
BA_54 87.07327242 75.21382467 A 4
57+
BA_55 10.47288015 48.46996628 A 4
58+
BA_56 116.565805 0.859084757 A 4
59+
BA_57 66.01394942 84.14061223 A 4
60+
BA_58 24.0864559 19.26830177 A 4
61+
BA_59 33.90947813 4.861695771 A 4
62+
BA_60 25.04189822 10.04715776 A 4
63+
BA_61 25.36921453 24.74097752 A 4
64+
BA_62 24.61156152 38.57784862 A 4
65+
BA_63 10.1522579 5.79175452 A 4
66+
BA_64 13.0302349 35.84194255 A 4
67+
BA_65 26.011124488 32.12754131 A 4
68+
BA_66 5.0803395 18.15514097 A 4
69+
BA_67 19.28749831 23.65124583 B 4
70+
BA_68 19.8382996 33.18104936 B 4
71+
BA_69 27.20239256 12.31891513 B 4
72+
BA_70 35.13339329 3.898564134 B 4
73+
BA_71 6.61786492 28.80320591 B 5
74+
BA_72 8.42778854 17.97710953 B 5
75+
BA_73 11.4551651 12.31783043 B 5
76+
BA_74 49.3611518 47.59145243 B 5
77+
BA_75 30.68246176 14.84335078 B 5
78+
BA_76 19.24083924 8.548619948 B 5
79+
BA_77 33.91691748 42.74321772 B 5
80+
BA_78 9.47015852 16.02367435 B 5
81+
BA_79 10.08003693 19.65298385 B 5
82+
BA_80 28.11169025 20.92169851 B 5
83+
BA_81 36.51370152 15.63933441 B 5
84+
BA_82 42.91618048 11.72576369 B 5
85+
BA_83 43.59390095 93.67225673 B 5
86+
BA_84 14.09804905 31.58528132 B 5
87+
BA_85 17.40598078 100.9513669 B 5
88+
BA_86 75.64724783 100.168992 B 5
89+
BA_87 90.80427682 40.88218955 B 5
90+
BA_88 51.61088169 98.32832101 B 5
91+
BA_89 65.43895097 76.10172565 B 6
92+
BA_90 81.22617002 22.83792967 B 6
93+
BA_91 64.41566946 92.39419771 B 6
94+
BA_92 83.356384 25.27722813 B 6
95+
BA_93 6.58784847 97.72445533 B 6
96+
BA_94 10.22712912 96.28708028 B 6
97+
BA_95 25.30950773 89.08937691 B 6
98+
BA_96 5.064443216 70.27891805 B 6
99+
BA_97 33.31745272 5.358689646 B 6
100+
BA_98 30.75465638 104.2346434 B 6
101+
BA_99 3.167842064 20.56360819 B 6
102+
BA_100 83.58125149 13.6926983 B 6
103+
BA_101 41.45546199 64.79995099 B 6
104+
BA_102 14.97411131 3.197034022 B 6
105+
BA_103 56.01933189 110.7914201 B 6
106+
BA_104 67.84116215 106.7649515 B 6
107+
BA_105 77.87455225 32.3238432 B 6
108+
BA_106 55.57453701 23.04472008 B 6
109+
BA_107 56.34315699 40.66431908 C 7
110+
BA_108 32.17126326 79.48627703 C 7
111+
BA_109 61.05967231 102.9731 C 7
112+
BA_110 34.70836803 18.10615973 C 7
113+
BA_111 16.4597735 41.43657929 C 7
114+
BA_112 21.30203917 47.79592699 C 7
115+
BA_113 29.53413987 26.28481726 C 7
116+
BA_114 4.085451703 54.43549834 C 7
117+
BA_115 24.78908857 98.16155471 C 7
118+
BA_116 65.59349525 23.72494654 C 7
119+
BA_117 13.71881681 120.8683135 C 7
120+
BA_118 59.74758488 81.45964404 C 7
121+
BA_119 48.80242017 37.60205067 C 7
122+
BA_120 72.00492188 135.9106901 C 8
123+
BA_121 40.0077021 143.214796 C 8
124+
BA_122 54.78365687 36.69922926 C 8
125+
BA_123 28.47215303 149.3005494 C 8
126+
BA_124 20.97539554 116.0357647 C 8
127+
BA_125 44.68091852 22.24667467 C 8
128+
BA_126 45.51897826 56.59054585 C 8
129+
BA_127 34.42198152 143.67498 C 8
130+
BA_128 1.032447791 78.83679245 C 8
131+
BA_129 65.79875582 111.0088786 C 8
132+
BA_130 66.44434159 82.45952119 C 8
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
GENE BA_1 BA_2 BA_3 BA_4 BA_5 BA_6 BA_7 BA_8 BA_9 BA_10 BA_11 BA_12 BA_13 BA_14 BA_15 BA_16 BA_17 BA_18 BA_19 BA_20 BA_21 BA_22 BA_23 BA_24 BA_25 BA_26 BA_27 BA_28 BA_29 BA_30 BA_31 BA_32 BA_33 BA_34 BA_35 BA_36 BA_37 BA_38 BA_39 BA_40 BA_41 BA_42 BA_43 BA_44 BA_45 BA_46 BA_47 BA_48 BA_49 BA_50 BA_51 BA_52 BA_53 BA_54 BA_55 BA_56 BA_57 BA_58 BA_59 BA_60 BA_61 BA_62 BA_63 BA_64 BA_65 BA_66 BA_67 BA_68 BA_69 BA_70 BA_71 BA_72 BA_73 BA_74 BA_75 BA_76 BA_77 BA_78 BA_79 BA_80 BA_81 BA_82 BA_83 BA_84 BA_85 BA_86 BA_87 BA_88 BA_89 BA_90 BA_91 BA_92 BA_93 BA_94 BA_95 BA_96 BA_97 BA_98 BA_99 BA_100 BA_101 BA_102 BA_103 BA_104 BA_105 BA_106 BA_107 BA_108 BA_109 BA_110 BA_111 BA_112 BA_113 BA_114 BA_115 BA_116 BA_117 BA_118 BA_119 BA_120 BA_121 BA_122 BA_123 BA_124 BA_125 BA_126 BA_127 BA_128 BA_129 BA_130
2+
Adcy5 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
3+
Agpat2 0 0 0 0 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 7 8 6 6 7 6 0 6 0 6 6 6 6 6 0 6 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
4+
Agtr1 6 6 6 6 6 6 6 6 6 6 6 6 4 4 5 5 8 6 6 6 6 6 6 6 6 5 4 6 6 6 4 6 6 6 4 6 7 6 6 4 6 6 8 7 5 6 6 5 6 6 6 6 6 6 8 7 6 7 7 6 6 6 7 6 5 6 8 7 6 6 7 5 6 7 6 8 6 7 6 6 5 6 2 6 6 3 6 4 6 5 6 7 6 9 6 9 6 7 6 6 5 6 6 2 6 4 6 9 6 6 6 7 6 6 6 5 6 6 6 3 6 6 6 3 6 6 6 5 6 6
5+
Aifm1 0 0 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
6+
Apex1 0 0 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
7+
Apoc3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
8+
Apoe 0 0 4 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

tests/test_expression_writer.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def seed_test_gene_entries(data_dir):
4545

4646
@classmethod
4747
def teardown_class(cls):
48-
logs = glob.glob('expression_scatter_images_*_log.txt')
48+
logs = glob.glob('expression_scatter_data_*_log.txt')
4949
for log in logs:
5050
os.remove(log)
5151
test_dirs = glob.glob(f"{TestExpressionWriter.TEST_PREFIX}*")
@@ -60,13 +60,13 @@ def test_process_dense_matrix(self):
6060
os.path.exists(cluster_name)
6161
)
6262
self.assertTrue(
63-
os.path.exists(f"{cluster_name}/Sergef.json.gz")
63+
os.path.exists(f"{cluster_name}/Sergef.json")
6464
)
6565
self.assertTrue(
66-
os.path.exists(f"{cluster_name}/Itm2a.json.gz")
66+
os.path.exists(f"{cluster_name}/Itm2a.json")
6767
)
6868
expected_data = json.loads(open(f"data/expression_writer/Sergef.json").read())
69-
rendered_data = json.loads(gzip.open(f"{cluster_name}/Sergef.json.gz").read())
69+
rendered_data = json.loads(gzip.open(f"{cluster_name}/Sergef.json").read())
7070
self.assertEqual(
7171
expected_data, rendered_data
7272
)
@@ -82,13 +82,13 @@ def test_process_sparse_matrix(self):
8282
genes.remove('HOMER2') # doesn't render gene entry file
8383
for gene in genes:
8484
self.assertTrue(
85-
os.path.exists(f"{cluster_name}/{gene}.json.gz")
85+
os.path.exists(f"{cluster_name}/{gene}.json")
8686
)
8787
self.assertTrue(
8888
os.path.exists(f"{cluster_name}/gene_entries/{gene}__entries.txt")
8989
)
90-
expected_data = json.loads(open(f"data/writer_functions/OXCT2.json").read())
91-
rendered_data = json.loads(gzip.open(f"{cluster_name}/OXCT2.json.gz").read())
90+
expected_data = json.loads(open(f"data/writer_functions/OXCT2.orig.json").read())
91+
rendered_data = json.loads(gzip.open(f"{cluster_name}/OXCT2.json").read())
9292
self.assertEqual(
9393
expected_data, rendered_data
9494
)
@@ -168,7 +168,7 @@ def test_process_sparse_data_fragments(self):
168168
genes.remove('HOMER2')
169169
for gene in genes:
170170
self.assertTrue(
171-
os.path.exists(f"{cluster_name}/{gene}.json.gz")
171+
os.path.exists(f"{cluster_name}/{gene}.json")
172172
)
173173

174174
def test_write_empty_sparse_genes(self):
@@ -180,7 +180,7 @@ def test_write_empty_sparse_genes(self):
180180
genes = load_entities_as_list(open(exp_writer.gene_file))
181181
exp_writer.write_empty_sparse_genes(genes, num_cells, cluster_name)
182182
# only empty gene should be HOMER2
183-
gene = 'HOMER2.json.gz'
183+
gene = 'HOMER2.json'
184184
self.assertTrue(
185185
os.path.exists(f"{cluster_name}/{gene}")
186186
)
@@ -198,8 +198,8 @@ def test_read_dense_matrix_slice(self):
198198
cells = list(f"CELL_000{i}" for i in range(1, 16))
199199
exp_writer.read_dense_matrix_slice(indexes, cells, cells, cluster_name)
200200
self.assertTrue(
201-
os.path.exists(f"{cluster_name}/Sergef.json.gz")
201+
os.path.exists(f"{cluster_name}/Sergef.json")
202202
)
203203
self.assertTrue(
204-
os.path.exists(f"{cluster_name}/Itm2a.json.gz")
204+
os.path.exists(f"{cluster_name}/Itm2a.json")
205205
)

tests/test_writer_functions.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -104,10 +104,10 @@ def test_process_sparse_fragment(self):
104104
# barcodes & cluster cells should be identical in this example
105105
process_sparse_fragment('OXCT2__entries.txt', barcodes, barcodes, data_dir)
106106
self.assertTrue(
107-
os.path.exists(f"{data_dir}/OXCT2.json.gz")
107+
os.path.exists(f"{data_dir}/OXCT2.json")
108108
)
109-
rendered_data = json.loads(gzip.open(f"{data_dir}/OXCT2.json.gz").read())
110-
expected_data = json.loads(open(f"{data_dir}/OXCT2.json").read())
109+
rendered_data = json.loads(gzip.open(f"{data_dir}/OXCT2.json").read())
110+
expected_data = json.loads(open(f"{data_dir}/OXCT2.orig.json").read())
111111
self.assertEqual(
112112
expected_data, rendered_data
113113
)
@@ -133,9 +133,9 @@ def test_process_dense_line(self):
133133
data_dir = 'data/writer_functions'
134134
process_dense_line(line, matrix_cells, cluster_cells, data_dir)
135135
self.assertTrue(
136-
os.path.exists(f"{data_dir}/Gad1.json.gz")
136+
os.path.exists(f"{data_dir}/Gad1.json")
137137
)
138-
rendered_data = json.loads(gzip.open(f"{data_dir}/Gad1.json.gz").read())
138+
rendered_data = json.loads(gzip.open(f"{data_dir}/Gad1.json").read())
139139
self.assertEqual(
140140
expected_data, rendered_data
141141
)
@@ -162,7 +162,7 @@ def test_write_gene_scores(self):
162162
gene = 'Egfr'
163163
data_dir = 'data/writer_functions'
164164
write_gene_scores(gene, data, data_dir)
165-
rendered_data = json.loads(gzip.open(f"{data_dir}/{gene}.json.gz").read())
165+
rendered_data = json.loads(gzip.open(f"{data_dir}/{gene}.json").read())
166166
self.assertEqual(
167167
data, rendered_data
168168
)

0 commit comments

Comments
 (0)