Skip to content
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
bc519c0
Add protein description string to FI .idx file so that the original F…
jke000 Dec 22, 2025
4cbcd7e
Per request, the .idx files (for both peptide indexing and fragment i…
jke000 Dec 23, 2025
dec7fc9
additional support for making .idx files independent; support in all …
jke000 Dec 30, 2025
15e9b18
Add prev/next AA, aka flanking residues, to FI .idx. This was actual…
jke000 Dec 30, 2025
e9b9f6f
flanking residues added to peptide index searches with independent .i…
jke000 Dec 30, 2025
f830146
Two items to facilitate Crux support: (1) change "struct Scores" and…
jke000 Jan 6, 2026
b948427
Update CometSearch/CometDataInternal.h
jke000 Jan 6, 2026
0c58809
Update CometSearch/CometSearch.cpp
jke000 Jan 6, 2026
8394e51
remove deprecated code for GetPrevNextAA(), SeekPrevNextAA()
jke000 Jan 6, 2026
73191c5
free the allocated lProteinIndex
jke000 Jan 6, 2026
e44d32b
forgot to return the matched protein(s) when searching .idx files
jke000 Jan 6, 2026
a7ef3ea
misc copilot suggested changes
jke000 Jan 6, 2026
d779c1a
remove duplicate comparison in operator<
jke000 Jan 6, 2026
b626b63
properly set start/end positions for XcorrScore() for .idx search of …
jke000 Jan 7, 2026
de6481f
a few strlen() replacements using a local stored variable after being…
jke000 Jan 7, 2026
421a8f7
update a comment in CometDataInternal.h that copilot is hung up on; m…
jke000 Jan 7, 2026
1e75491
initialize two file pointers to NULL
jke000 Jan 7, 2026
2c4be14
revert moving fclose after cleanup_result in CometSearchManager.cpp
jke000 Jan 7, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions CometSearch/CometData.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ struct IntRange
}
};

struct Scores
struct CometScores
{
double xCorr;
double dSp;
Expand All @@ -108,7 +108,7 @@ struct Scores
int totalIons;
string sAScoreProSiteScores; // AScore site scores as string

Scores() :
CometScores() :
xCorr(0),
dSp(0),
dCn(0),
Expand All @@ -120,7 +120,7 @@ struct Scores
sAScoreProSiteScores("")
{ }

Scores(double xCorr, double dSp, double dCn, double dExpect, double dAScorePro, double mass, int matchedIons, int totalIons, string sAScoreProSiteScores) :
CometScores(double xCorr, double dSp, double dCn, double dExpect, double dAScorePro, double mass, int matchedIons, int totalIons, string sAScoreProSiteScores) :
xCorr(xCorr),
dSp(dSp),
dCn(dCn),
Expand All @@ -132,7 +132,7 @@ struct Scores
sAScoreProSiteScores(sAScoreProSiteScores)
{ }

Scores(const Scores& a) :
CometScores(const CometScores& a) :
xCorr(a.xCorr),
dSp(a.dSp),
dCn(a.dCn),
Expand All @@ -144,7 +144,7 @@ struct Scores
sAScoreProSiteScores(a.sAScoreProSiteScores)
{ }

Scores& operator=(Scores& a)
CometScores& operator=(CometScores& a)
{
xCorr = a.xCorr;
dSp = a.dSp;
Expand All @@ -159,31 +159,31 @@ struct Scores
}
};

struct ScoresMS1
struct CometScoresMS1
{
float fDotProduct;
float fRTime; // in seconds
int iScanNumber;

ScoresMS1() :
CometScoresMS1() :
fDotProduct(0),
fRTime(0),
iScanNumber(0)
{ }

ScoresMS1(float fDotProduct, float fRTime, int iScanNumber) :
CometScoresMS1(float fDotProduct, float fRTime, int iScanNumber) :
fDotProduct(fDotProduct),
fRTime(fRTime),
iScanNumber(iScanNumber)
{ }

ScoresMS1(const ScoresMS1& a) :
CometScoresMS1(const CometScoresMS1& a) :
fDotProduct(a.fDotProduct),
fRTime(a.fRTime),
iScanNumber(a.iScanNumber)
{ }

ScoresMS1& operator=(ScoresMS1& a)
CometScoresMS1& operator=(CometScoresMS1& a)
{
fDotProduct = a.fDotProduct;
fRTime = a.fRTime;
Expand Down
17 changes: 12 additions & 5 deletions CometSearch/CometDataInternal.h
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ struct Results
unsigned short usiMatchedIons;
unsigned short usiTotalIons;
comet_fileoffset_t lProteinFilePosition; // for indexdb, this is the entry in g_pvProteinsList
long lWhichProtein;
long lWhichProtein; // which entry in g_pvProteinsList[] contains the matched proteins
int piVarModSites[MAX_PEPTIDE_LEN_P2]; // store variable mods encoding, +2 to accomodate N/C-term
double pdVarModSites[MAX_PEPTIDE_LEN_P2]; // store variable mods mass diffs, +2 to accomodate N/C-term
char pszMod[MAX_PEPTIDE_LEN][MAX_PEFFMOD_LEN]; // store PEFF mod string
Expand Down Expand Up @@ -452,6 +452,8 @@ struct DBInfo
struct DBIndex
{
char szPeptide[MAX_PEPTIDE_LEN];
char cPrevAA;
char cNextAA;
char pcVarModSites[MAX_PEPTIDE_LEN_P2]; // encodes 0 to VMODS-1 indicating which var mod at which position
comet_fileoffset_t lIndexProteinFilePosition; // points to entry in g_pvProteinsList
double dPepMass; // MH+ pep mass
Expand Down Expand Up @@ -494,10 +496,9 @@ struct DBIndex
return pcVarModSites[i] < rhs.pcVarModSites[i];
}

if (lIndexProteinFilePosition != rhs.lIndexProteinFilePosition)
return lIndexProteinFilePosition < rhs.lIndexProteinFilePosition;

return false; // equal
// FINAL tie-breaker: lowest protein index first in order
// to grab flanking residues from the first protein
return lIndexProteinFilePosition < rhs.lIndexProteinFilePosition;
}
};

Expand All @@ -506,6 +507,8 @@ struct DBIndex
struct PlainPeptideIndexStruct
{
string sPeptide;
char cPrevAA;
char cNextAA;
comet_fileoffset_t lIndexProteinFilePosition; // points to entry in g_pvProteinsList
double dPepMass; // MH+ pep mass, unmodified mass; modified mass in FragmentPeptidesStruct
unsigned short siVarModProteinFilter; // bitwise representation of mmapProtein
Expand Down Expand Up @@ -1030,6 +1033,7 @@ struct StaticParams
extern StaticParams g_staticParams;

extern vector<DBIndex> g_pvDBIndex; // used in both peptide index and fragment ion index; latter to store plain peptides
extern map<long long, IndexProteinStruct> g_pvProteinNames; // indexed database protein names and file positions

extern vector<vector<comet_fileoffset_t>> g_pvProteinsList;

Expand Down Expand Up @@ -1065,6 +1069,9 @@ extern bool g_bPerformDatabaseSearch; // set to true if doing database search
extern bool g_bCometPreprocessMemoryAllocated; // set to true when memory has been allocated
extern bool g_bCometSearchMemoryAllocated; // set to true when memory has been allocated

extern bool g_bIdxNoFasta; // set to true when .idx file being search but corresponding .fasta not present
// used in mzid output to skip sequence retrieval

// Query stores information for peptide scoring and results
// This struct is allocated for each spectrum/charge combination
struct Query
Expand Down
53 changes: 47 additions & 6 deletions CometSearch/CometFragmentIndex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,8 @@ void CometFragmentIndex::GenerateFragmentIndex(ThreadPool *tp)
// In the for loop below, peptide references (iWhichFragmentPeptide) are stored in the FI.
// As the FI is an array of unsigned int pointers, need to ensure that iWhichFragmentPeptide
// will fit into an unsigned int.
if (g_vFragmentPeptides.size() > std::numeric_limits<unsigned int>::max())
// NOTE: explicitly use (std::numeric_limits<unsigned int>::max)() to avoid macro expansion on Windows.
if (g_vFragmentPeptides.size() > (std::numeric_limits<unsigned int>::max)())
{
// handle error: value too large to fit in unsigned int
throw std::overflow_error(" Error: g_vFragmentPeptides.size() too large for unsigned int");
Expand Down Expand Up @@ -558,7 +559,7 @@ if (!(iWhichPeptide%1000))
}


bool CometFragmentIndex::WritePlainPeptideIndex(ThreadPool *tp)
bool CometFragmentIndex::WriteFIPlainPeptideIndex(ThreadPool *tp)
{
FILE *fp;
bool bSucceeded;
Expand Down Expand Up @@ -740,7 +741,22 @@ bool CometFragmentIndex::WritePlainPeptideIndex(ThreadPool *tp)
fprintf(fp, "RequireVariableMod: %d", g_staticParams.variableModParameters.iRequireVarMod);
for (int x = 0; x < FRAGINDEX_VMODS; ++x)
fprintf(fp, " %d", g_staticParams.variableModParameters.varModList[x].iRequireThisMod);
fprintf(fp, "\n");
fprintf(fp, "\n\n");

int iTmp = (int)g_pvProteinNames.size();
comet_fileoffset_t* lProteinIndex = new comet_fileoffset_t[iTmp];
for (int i = 0; i < iTmp; i++)
lProteinIndex[i] = -1;

// first just write out protein names. Track file position of each protein name
int ctProteinNames = 0;
for (auto it = g_pvProteinNames.begin(); it != g_pvProteinNames.end(); ++it)
{
lProteinIndex[ctProteinNames] = comet_ftell(fp);
fwrite(it->second.szProt, sizeof(char) * WIDTH_REFERENCE, 1, fp);
it->second.iWhichProtein = ctProteinNames;
ctProteinNames++;
}

comet_fileoffset_t clPeptidesFilePos = comet_ftell(fp);
size_t tNumPeptides = g_pvDBIndex.size();
Expand All @@ -753,12 +769,14 @@ bool CometFragmentIndex::WritePlainPeptideIndex(ThreadPool *tp)

fwrite(&iLen, sizeof(int), 1, fp);
fwrite((*it).szPeptide, sizeof(char), iLen, fp);
fwrite(&((*it).cPrevAA), sizeof(char), 1, fp); // write prev AA
fwrite(&((*it).cNextAA), sizeof(char), 1, fp); // write next AA
fwrite(&((*it).dPepMass), sizeof(double), 1, fp);
fwrite(&((*it).siVarModProteinFilter), sizeof(unsigned short), 1, fp);
fwrite(&((*it).lIndexProteinFilePosition), clSizeCometFileOffset, 1, fp);

sTmp.sPeptide = (*it).szPeptide;
sTmp.lIndexProteinFilePosition = clSizeCometFileOffset;
sTmp.lIndexProteinFilePosition = (*it).lIndexProteinFilePosition;
sTmp.dPepMass = (*it).dPepMass;
sTmp.siVarModProteinFilter = (*it).siVarModProteinFilter;
g_vRawPeptides.push_back(sTmp);
Expand All @@ -768,16 +786,37 @@ bool CometFragmentIndex::WritePlainPeptideIndex(ThreadPool *tp)
comet_fileoffset_t clProteinsFilePos = comet_ftell(fp);
tTmp = g_pvProteinsList.size();
fwrite(&tTmp, clSizeCometFileOffset, 1, fp);
int iWhichProtein;
for (auto it = g_pvProteinsList.begin(); it != g_pvProteinsList.end(); ++it)
{
tTmp = (*it).size();
fwrite(&tTmp, sizeof(size_t), 1, fp);

for (size_t it2 = 0; it2 < tTmp; ++it2)
{
fwrite(&((*it).at(it2)), clSizeCometFileOffset, 1, fp);
iWhichProtein = -1;

auto result = g_pvProteinNames.find((*it).at(it2));
if (result != g_pvProteinNames.end())
{
iWhichProtein = result->second.iWhichProtein;
}

if (iWhichProtein == -1)
{
string strErrorMsg = " Error writing protein index; protein not found in name map.\n";
logerr(strErrorMsg);
fclose(fp);
delete[] lProteinIndex;
return false;
}

fwrite(&lProteinIndex[iWhichProtein], clSizeCometFileOffset, 1, fp);
}
}

delete[] lProteinIndex;

// now permute mods on the peptides
PermuteIndexPeptideMods(g_vRawPeptides);

Expand All @@ -793,7 +832,7 @@ bool CometFragmentIndex::WritePlainPeptideIndex(ThreadPool *tp)
fwrite(MOD_SEQ_MOD_NUM_START, sizeof(int), ulSizeModSeqs, fp);
fwrite(MOD_SEQ_MOD_NUM_CNT, sizeof(int), ulSizeModSeqs, fp);
fwrite(PEPTIDE_MOD_SEQ_IDXS, sizeof(int), ulSizevRawPeptides, fp);
int iTmp;

for (unsigned long i = 0; i < ulSizeModSeqs; ++i)
{
iTmp = (int)MOD_SEQS[i].size();
Expand Down Expand Up @@ -1065,6 +1104,8 @@ bool CometFragmentIndex::ReadPlainPeptideIndex(void)
tTmp = fread(szPeptide, sizeof(char), iLen, fp);
szPeptide[iLen] = '\0';
sTmp.sPeptide = szPeptide;
tTmp = fread(&(sTmp.cPrevAA), sizeof(char), 1, fp);
tTmp = fread(&(sTmp.cNextAA), sizeof(char), 1, fp);
tTmp = fread(&(sTmp.dPepMass), sizeof(double), 1, fp);
tTmp = fread(&(sTmp.siVarModProteinFilter), sizeof(unsigned short), 1, fp);
tTmp = fread(&(sTmp.lIndexProteinFilePosition), clSizeCometFileOffset, 1, fp);
Expand Down
2 changes: 1 addition & 1 deletion CometSearch/CometFragmentIndex.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class CometFragmentIndex
CometFragmentIndex();
~CometFragmentIndex();

static bool WritePlainPeptideIndex(ThreadPool *tp);
static bool WriteFIPlainPeptideIndex(ThreadPool *tp);
static bool ReadPlainPeptideIndex(void);
static bool CreateFragmentIndex(ThreadPool *tp);
static int WhichPrecursorBin(double dMass);
Expand Down
4 changes: 2 additions & 2 deletions CometSearch/CometInterfaces.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,15 @@ namespace CometInterfaces
vector<string>& strReturnPeptide,
vector<string>& strReturnProtein,
vector<vector<Fragment>>& matchedFragments,
vector<Scores>& scores) = 0;
vector<CometScores>& scores) = 0;
virtual bool DoMS1SearchMultiResults(const double dMaxMS1RTDiff,
const double dMaxQueryRT,
const int topN,
const double dRT,
double* dMass,
double* dInten,
const int iNumPeaks,
vector<ScoresMS1>& scores) = 0;
vector<CometScoresMS1>& scores) = 0;
virtual void AddInputFiles(vector<InputFileInfo*> &pvInputFiles) = 0;
virtual void SetOutputFileBaseName(const char *pszBaseName) = 0;
virtual void SetParam(const string &name, const string &strValue, const string &value) = 0;
Expand Down
Loading
Loading