Skip to content

Commit 315fdd5

Browse files
committed
add: function to create indices if they do no exist when using duckdb connections
1 parent 3a460ab commit 315fdd5

2 files changed

Lines changed: 43 additions & 39 deletions

File tree

pyprophet/data_handling.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,20 @@ def check_sqlite_table(con, table):
9595

9696
return(table_present)
9797

98+
def create_index_if_not_exists(con, index_name, table_name, column_name):
99+
'''
100+
Create an index on a table if it does not already exist. For duckdb connections to sqlite files
101+
'''
102+
res = con.execute(f"""
103+
SELECT count(*)
104+
FROM duckdb_indexes()
105+
WHERE index_name = '{index_name}'
106+
AND table_name = '{table_name}'
107+
""").fetchone()
108+
109+
if res[0] == 0:
110+
con.execute(f"CREATE INDEX {index_name} ON {table_name} ({column_name})")
111+
98112

99113

100114
def is_parquet_file(file_path):

pyprophet/ipf.py

Lines changed: 29 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import time
1010

1111
from scipy.stats import rankdata
12-
from .data_handling import check_sqlite_table, is_parquet_file, get_parquet_column_names
12+
from .data_handling import check_sqlite_table, create_index_if_not_exists, is_parquet_file, get_parquet_column_names
1313
from shutil import copyfile
1414

1515

@@ -47,15 +47,13 @@ def read_pyp_peakgroup_precursor(path, ipf_max_peakgroup_pep, ipf_ms1_scoring, i
4747
if not check_sqlite_table(con_sqlite, "SCORE_MS2") or not check_sqlite_table(con_sqlite, "SCORE_TRANSITION"):
4848
raise click.ClickException("Apply scoring to MS2 and transition-level data before running IPF.")
4949

50-
con.execute('''
51-
CREATE INDEX IF NOT EXISTS idx_transition_id ON TRANSITION (ID);
52-
CREATE INDEX IF NOT EXISTS idx_precursor_precursor_id ON PRECURSOR (ID);
53-
CREATE INDEX IF NOT EXISTS idx_feature_precursor_id ON FEATURE (PRECURSOR_ID);
54-
CREATE INDEX IF NOT EXISTS idx_feature_feature_id ON FEATURE (ID);
55-
CREATE INDEX IF NOT EXISTS idx_score_ms2_feature_id ON SCORE_MS2 (FEATURE_ID);
56-
CREATE INDEX IF NOT EXISTS idx_score_transition_feature_id ON SCORE_TRANSITION (FEATURE_ID);
57-
CREATE INDEX IF NOT EXISTS idx_score_transition_transition_id ON SCORE_TRANSITION (TRANSITION_ID);
58-
''')
50+
create_index_if_not_exists(con, 'idx_transition_id', 'TRANSITION', 'ID')
51+
create_index_if_not_exists(con, 'idx_precursor_precursor_id', 'PRECURSOR', 'ID')
52+
create_index_if_not_exists(con, 'idx_feature_precursor_id', 'FEATURE', 'PRECURSOR_ID')
53+
create_index_if_not_exists(con, 'idx_feature_feature_id', 'FEATURE', 'ID')
54+
create_index_if_not_exists(con, 'idx_score_ms2_feature_id', 'SCORE_MS2', 'FEATURE_ID')
55+
create_index_if_not_exists(con, 'idx_score_transition_feature_id', 'SCORE_TRANSITION', 'FEATURE_ID')
56+
create_index_if_not_exists(con, 'idx_score_transition_transition_id', 'SCORE_TRANSITION', 'TRANSITION_ID')
5957

6058
data = con.execute('''
6159
SELECT FEATURE.ID AS FEATURE_ID,
@@ -81,13 +79,11 @@ def read_pyp_peakgroup_precursor(path, ipf_max_peakgroup_pep, ipf_ms1_scoring, i
8179
if not check_sqlite_table(con_sqlite, "SCORE_MS1") or not check_sqlite_table(con_sqlite, "SCORE_MS2") or not check_sqlite_table(con_sqlite, "SCORE_TRANSITION"):
8280
raise click.ClickException("Apply scoring to MS1, MS2 and transition-level data before running IPF.")
8381

84-
con.execute('''
85-
CREATE INDEX IF NOT EXISTS idx_precursor_precursor_id ON PRECURSOR (ID);
86-
CREATE INDEX IF NOT EXISTS idx_feature_precursor_id ON FEATURE (PRECURSOR_ID);
87-
CREATE INDEX IF NOT EXISTS idx_feature_feature_id ON FEATURE (ID);
88-
CREATE INDEX IF NOT EXISTS idx_score_ms1_feature_id ON SCORE_MS1 (FEATURE_ID);
89-
CREATE INDEX IF NOT EXISTS idx_score_ms2_feature_id ON SCORE_MS2 (FEATURE_ID);
90-
''')
82+
create_index_if_not_exists(con, 'idx_precursor_precursor_id', 'PRECURSOR', 'ID')
83+
create_index_if_not_exists(con, 'idx_feature_precursor_id', 'FEATURE', 'PRECURSOR_ID')
84+
create_index_if_not_exists(con, 'idx_feature_feature_id', 'FEATURE', 'ID')
85+
create_index_if_not_exists(con, 'idx_score_ms1_feature_id', 'SCORE_MS1', 'FEATURE_ID')
86+
create_index_if_not_exists(con, 'idx_score_ms2_feature_id', 'SCORE_MS2', 'FEATURE_ID')
9187

9288
data = con.execute('''
9389
SELECT FEATURE.ID AS FEATURE_ID,
@@ -107,16 +103,14 @@ def read_pyp_peakgroup_precursor(path, ipf_max_peakgroup_pep, ipf_ms1_scoring, i
107103
if not check_sqlite_table(con_sqlite, "SCORE_MS1") or not check_sqlite_table(con_sqlite, "SCORE_MS2") or not check_sqlite_table(con_sqlite, "SCORE_TRANSITION"):
108104
raise click.ClickException("Apply scoring to MS1, MS2 and transition-level data before running IPF.")
109105

110-
con.execute('''
111-
CREATE INDEX IF NOT EXISTS idx_transition_id ON TRANSITION (ID);
112-
CREATE INDEX IF NOT EXISTS idx_precursor_precursor_id ON PRECURSOR (ID);
113-
CREATE INDEX IF NOT EXISTS idx_feature_precursor_id ON FEATURE (PRECURSOR_ID);
114-
CREATE INDEX IF NOT EXISTS idx_feature_feature_id ON FEATURE (ID);
115-
CREATE INDEX IF NOT EXISTS idx_score_ms1_feature_id ON SCORE_MS1 (FEATURE_ID);
116-
CREATE INDEX IF NOT EXISTS idx_score_ms2_feature_id ON SCORE_MS2 (FEATURE_ID);
117-
CREATE INDEX IF NOT EXISTS idx_score_transition_feature_id ON SCORE_TRANSITION (FEATURE_ID);
118-
CREATE INDEX IF NOT EXISTS idx_score_transition_transition_id ON SCORE_TRANSITION (TRANSITION_ID);
119-
''')
106+
create_index_if_not_exists(con, 'idx_transition_id', 'TRANSITION', 'ID')
107+
create_index_if_not_exists(con, 'idx_precursor_precursor_id', 'PRECURSOR', 'ID')
108+
create_index_if_not_exists(con, 'idx_feature_precursor_id', 'FEATURE', 'PRECURSOR_ID')
109+
create_index_if_not_exists(con, 'idx_feature_feature_id', 'FEATURE', 'ID')
110+
create_index_if_not_exists(con, 'idx_score_ms1_feature_id', 'SCORE_MS1', 'FEATURE_ID')
111+
create_index_if_not_exists(con, 'idx_score_ms2_feature_id', 'SCORE_MS2', 'FEATURE_ID')
112+
create_index_if_not_exists(con, 'idx_score_transition_feature_id', 'SCORE_TRANSITION', 'FEATURE_ID')
113+
create_index_if_not_exists(con, 'idx_score_transition_transition_id', 'SCORE_TRANSITION', 'TRANSITION_ID')
120114

121115
data = con.execute('''
122116
SELECT FEATURE.ID AS FEATURE_ID,
@@ -143,12 +137,10 @@ def read_pyp_peakgroup_precursor(path, ipf_max_peakgroup_pep, ipf_ms1_scoring, i
143137
if not check_sqlite_table(con_sqlite, "SCORE_MS2") or not check_sqlite_table(con_sqlite, "SCORE_TRANSITION"):
144138
raise click.ClickException("Apply scoring to MS2 and transition-level data before running IPF.")
145139

146-
con.execute('''
147-
CREATE INDEX IF NOT EXISTS idx_precursor_precursor_id ON PRECURSOR (ID);
148-
CREATE INDEX IF NOT EXISTS idx_feature_precursor_id ON FEATURE (PRECURSOR_ID);
149-
CREATE INDEX IF NOT EXISTS idx_feature_feature_id ON FEATURE (ID);
150-
CREATE INDEX IF NOT EXISTS idx_score_ms2_feature_id ON SCORE_MS2 (FEATURE_ID);
151-
''')
140+
create_index_if_not_exists(con, 'idx_precursor_precursor_id', 'PRECURSOR', 'ID')
141+
create_index_if_not_exists(con, 'idx_feature_precursor_id', 'FEATURE', 'PRECURSOR_ID')
142+
create_index_if_not_exists(con, 'idx_feature_feature_id', 'FEATURE', 'ID')
143+
create_index_if_not_exists(con, 'idx_score_ms2_feature_id', 'SCORE_MS2', 'FEATURE_ID')
152144

153145
data = con.execute('''
154146
SELECT FEATURE.ID AS FEATURE_ID,
@@ -179,12 +171,10 @@ def read_pyp_transition(path, ipf_max_transition_pep, ipf_h0):
179171

180172
con = duckdb.connect(database=path, read_only=False)
181173

182-
con.execute('''
183-
CREATE INDEX IF NOT EXISTS idx_transition_peptide_mapping_transition_id ON TRANSITION_PEPTIDE_MAPPING (TRANSITION_ID);
184-
CREATE INDEX IF NOT EXISTS idx_transition_id ON TRANSITION (ID);
185-
CREATE INDEX IF NOT EXISTS idx_score_transition_feature_id ON SCORE_TRANSITION (FEATURE_ID);
186-
CREATE INDEX IF NOT EXISTS idx_score_transition_transition_id ON SCORE_TRANSITION (TRANSITION_ID);
187-
''')
174+
create_index_if_not_exists(con, 'idx_transition_peptide_mapping_transition_id', 'TRANSITION_PEPTIDE_MAPPING', 'TRANSITION_ID')
175+
create_index_if_not_exists(con, 'idx_transition_id', 'TRANSITION', 'ID')
176+
create_index_if_not_exists(con, 'idx_score_transition_feature_id', 'SCORE_TRANSITION', 'FEATURE_ID')
177+
create_index_if_not_exists(con, 'idx_score_transition_transition_id', 'SCORE_TRANSITION', 'TRANSITION_ID')
188178

189179
# transition-level evidence
190180
evidence = con.execute('''

0 commit comments

Comments
 (0)