Skip to content

Commit

Permalink
refactor,style: un-hardcode output directory. QOL code changes
Browse files Browse the repository at this point in the history
changelog:
- output directory is now an `--output_dir` arg instead of being hardcoded as /tmp/
- filepaths are joined via `os.path.join` instead of f-strings, thus removing the assumption of forward-slash file separators
- mapping file retrieval function is explicitly called where needed
  • Loading branch information
christinehc committed Nov 27, 2024
1 parent b4981e4 commit 18c3d8b
Showing 1 changed file with 21 additions and 13 deletions.
34 changes: 21 additions & 13 deletions build_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from typing import Optional
import pandas as pd
from tqdm import tqdm
from sampleChemMapping.mapping import get_mapping_file


def collectFiles(
Expand Down Expand Up @@ -91,6 +92,7 @@ def runSampMap(
ctfile: str = "",
fses: str = "",
descfile: str = "",
output_dir: str = "/tmp/",
) -> list[str]:
"""
run sample mapping
Expand All @@ -105,6 +107,7 @@ def runSampMap(
f" --sample_files={fses}"
f" --chem_desc={descfile}"
f" --sample_map={smap}"
f" --output_dir={output_dir}"
)
if is_sample:
cmd = f"python sampleChemMapping/map_samples_to_chemicals.py --sample --drc_files={drc} {args}"
Expand All @@ -117,11 +120,16 @@ def runSampMap(
tqdm.write(f"{cmd}\n")
# os.system(cmd)
tqdm.write("ls -la . \n")
os.system(f"ls -la {output_dir}")
##now we validate the files that came out.
dblist = ["/tmp/samples.csv", "/tmp/chemicals.csv", "/tmp/sampleToChemicals.csv"]
dblist = [
os.path.join(output_dir, "samples.csv"),
os.path.join(output_dir, "chemicals.csv"),
os.path.join(output_dir, "sampleToChemicals.csv"),
]
for ftype in ["XYCoords.csv", "DoseResponse.csv", "BMDs.csv"]:
dblist.append(f"/tmp/zebrafishChem{ftype}")
dblist.append(f"/tmp/zebrafishSamp{ftype}")
dblist.append(os.path.join(output_dir, f"zebrafishChem{ftype}"))
dblist.append(os.path.join(output_dir, f"zebrafishSamp{ftype}"))
return dblist
# runSchemaCheck(dblist)

Expand Down Expand Up @@ -179,16 +187,16 @@ def main():
)

##now map sample information
sid = list(df.loc[df.name == "sampId"].location)[0]
cid = list(df.loc[df.name == "chemId"].location)[0]
cclass = list(df.loc[df.name == "class1"].location)[0]
emap = list(df.loc[df.name == "endpointMap"].location)[0]
fses = ",".join(list(df.loc[df.data_type == "sample"].location))
ctfile = list(df.loc[df.name == "compTox"].location)[0]
descfile = list(df.loc[df.name == "chemdesc"].location)[0]
smap = list(df.loc[df.name == "sampMap"].location)[0]
gex1 = ",".join(list(df.loc[df.data_type == "expression"].location))
ginfo = list(df.loc[df.name == "geneInfo"].location)[0]
sid = get_mapping_file(df, "sampId")
cid = get_mapping_file(df, "chemId")
cclass = get_mapping_file(df, "class1")
emap = get_mapping_file(df, "endpointMap")
fses = get_mapping_file(df, "sample", return_first=False)
ctfile = get_mapping_file(df, "compTox") # FIXME: now irrelevant?
descfile = get_mapping_file(df, "chemdesc")
smap = get_mapping_file(df, "sampMap")
gex1 = get_mapping_file(df, "expression", return_first=False)
ginfo = get_mapping_file(df, "geneInfo")

###now we can call individiual commands
parser = argparse.ArgumentParser(
Expand Down

0 comments on commit 18c3d8b

Please sign in to comment.