Skip to content

Commit

Permalink
added ability to specify columns in gget mutate
Browse files Browse the repository at this point in the history
  • Loading branch information
josephrich98 committed Dec 23, 2024
1 parent d4faefe commit 9b3bffa
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 0 deletions.
9 changes: 9 additions & 0 deletions docs/src/en/cosmic.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,15 @@ Whether to keep genome information in the modified database for use with gget mu
`--remove_duplicates`
Whether to remove duplicate rows from the modified database for use with gget mutate. Default: False

`--seq_id_column`
(str) Name of the seq_id column in the csv file created by gget_mutate. Default: "seq_ID"

`--mutation_column`
(str) Name of the mutation column in the csv file created by gget_mutate. Default: "mutation"

`--mut_id_column`
(str) Name of the mutation_id column in the csv file created by gget_mutate. Default: "mutation_id"

`--email`
Email for COSMIC login. Helpful for avoiding required input upon running gget COSMIC. Default: None

Expand Down
13 changes: 13 additions & 0 deletions gget/gget_cosmic.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,9 @@ def cosmic(
gget_mutate=True,
keep_genome_info=False,
remove_duplicates=False,
seq_id_column="seq_ID",
mutation_column="mutation",
mut_id_column="mutation_id",
email=None,
password=None,
out=None,
Expand Down Expand Up @@ -268,6 +271,9 @@ def cosmic(
- gget_mutate (True/False) Whether to create a modified version of the database for use with gget mutate. Default: True
- keep_genome_info (True/False) Whether to keep genome information (e.g. location of mutation in the genome) in the modified database for use with gget mutate. Default: False
- remove_duplicates (True/False) Whether to remove duplicate rows from the modified database for use with gget mutate. Default: False
- seq_id_column (str) Name of the seq_id column in the csv file created by gget_mutate. Default: "seq_ID"
- mutation_column (str) Name of the mutation column in the csv file created by gget_mutate. Default: "mutation"
- mut_id_column (str) Name of the mutation_id column in the csv file created by gget_mutate. Default: "mutation_id"
- email (str) Email for COSMIC login. Helpful for avoiding required input upon running gget COSMIC. Default: None
- password (str) Password for COSMIC login. Helpful for avoiding required input upon running gget COSMIC, but password will be stored in plain text in the script. Default: None
Expand Down Expand Up @@ -536,6 +542,13 @@ def cosmic(
df = df.drop_duplicates(subset=["seq_ID", "mutation"], keep="first")
df = df.drop(columns=["non_na_count"])

if isinstance(seq_id_column, str) and seq_id_column != "seq_ID":
df.rename(columns={"seq_ID": seq_id_column}, inplace=True)
if isinstance(mutation_column, str) and mutation_column and mutation_column != "mutation":
df.rename(columns={"mutation": mutation_column}, inplace=True)
if isinstance(mut_id_column, str) and mut_id_column != "mutation_id":
df.rename(columns={"mutation_id": mut_id_column}, inplace=True)

mutate_csv_out = mutation_tsv_file.replace(".tsv", "_mutation_workflow.csv")
df.to_csv(mutate_csv_out, index=False)

Expand Down
21 changes: 21 additions & 0 deletions gget/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -1880,6 +1880,27 @@ def main():
required=False,
help="Whether to remove duplicated rows from the modified database for use with gget mutate (only for use with --download_cosmic).",
),
parser_cosmic.add_argument(
"--seq_id_column",
default="seq_ID",
type=str,
required=False,
help="Whether to remove duplicated rows from the modified database for use with gget mutate (only for use with --download_cosmic).",
),
parser_cosmic.add_argument(
"--mutation_column",
default="mutation",
type=str,
required=False,
help="Whether to remove duplicated rows from the modified database for use with gget mutate (only for use with --download_cosmic).",
),
parser_cosmic.add_argument(
"--mut_id_column",
default="mutation_id",
type=str,
required=False,
help="Whether to remove duplicated rows from the modified database for use with gget mutate (only for use with --download_cosmic).",
),
parser_cosmic.add_argument(
"--email",
type=str,
Expand Down

0 comments on commit 9b3bffa

Please sign in to comment.