diff --git a/CometSearch/CometData.h b/CometSearch/CometData.h index 75079172..4e469cce 100644 --- a/CometSearch/CometData.h +++ b/CometSearch/CometData.h @@ -96,7 +96,7 @@ struct IntRange } }; -struct Scores +struct CometScores { double xCorr; double dSp; @@ -108,7 +108,7 @@ struct Scores int totalIons; string sAScoreProSiteScores; // AScore site scores as string - Scores() : + CometScores() : xCorr(0), dSp(0), dCn(0), @@ -120,7 +120,7 @@ struct Scores sAScoreProSiteScores("") { } - Scores(double xCorr, double dSp, double dCn, double dExpect, double dAScorePro, double mass, int matchedIons, int totalIons, string sAScoreProSiteScores) : + CometScores(double xCorr, double dSp, double dCn, double dExpect, double dAScorePro, double mass, int matchedIons, int totalIons, string sAScoreProSiteScores) : xCorr(xCorr), dSp(dSp), dCn(dCn), @@ -132,7 +132,7 @@ struct Scores sAScoreProSiteScores(sAScoreProSiteScores) { } - Scores(const Scores& a) : + CometScores(const CometScores& a) : xCorr(a.xCorr), dSp(a.dSp), dCn(a.dCn), @@ -144,7 +144,7 @@ struct Scores sAScoreProSiteScores(a.sAScoreProSiteScores) { } - Scores& operator=(Scores& a) + CometScores& operator=(const CometScores& a) { xCorr = a.xCorr; dSp = a.dSp; @@ -159,31 +159,31 @@ struct Scores } }; -struct ScoresMS1 +struct CometScoresMS1 { float fDotProduct; float fRTime; // in seconds int iScanNumber; - ScoresMS1() : + CometScoresMS1() : fDotProduct(0), fRTime(0), iScanNumber(0) { } - ScoresMS1(float fDotProduct, float fRTime, int iScanNumber) : + CometScoresMS1(float fDotProduct, float fRTime, int iScanNumber) : fDotProduct(fDotProduct), fRTime(fRTime), iScanNumber(iScanNumber) { } - ScoresMS1(const ScoresMS1& a) : + CometScoresMS1(const CometScoresMS1& a) : fDotProduct(a.fDotProduct), fRTime(a.fRTime), iScanNumber(a.iScanNumber) { } - ScoresMS1& operator=(ScoresMS1& a) + CometScoresMS1& operator=(const CometScoresMS1& a) { fDotProduct = a.fDotProduct; fRTime = a.fRTime; diff --git a/CometSearch/CometDataInternal.h b/CometSearch/CometDataInternal.h index eef28e16..a5bdac3c 100644 --- a/CometSearch/CometDataInternal.h +++ b/CometSearch/CometDataInternal.h @@ -256,7 +256,7 @@ struct Results unsigned short usiMatchedIons; unsigned short usiTotalIons; comet_fileoffset_t lProteinFilePosition; // for indexdb, this is the entry in g_pvProteinsList - long lWhichProtein; + long lWhichProtein; // which entry in g_pvProteinsList[] contains the matched proteins int piVarModSites[MAX_PEPTIDE_LEN_P2]; // store variable mods encoding, +2 to accomodate N/C-term double pdVarModSites[MAX_PEPTIDE_LEN_P2]; // store variable mods mass diffs, +2 to accomodate N/C-term char pszMod[MAX_PEPTIDE_LEN][MAX_PEFFMOD_LEN]; // store PEFF mod string @@ -452,6 +452,8 @@ struct DBInfo struct DBIndex { char szPeptide[MAX_PEPTIDE_LEN]; + char cPrevAA; + char cNextAA; char pcVarModSites[MAX_PEPTIDE_LEN_P2]; // encodes 0 to VMODS-1 indicating which var mod at which position comet_fileoffset_t lIndexProteinFilePosition; // points to entry in g_pvProteinsList double dPepMass; // MH+ pep mass @@ -494,10 +496,9 @@ struct DBIndex return pcVarModSites[i] < rhs.pcVarModSites[i]; } - if (lIndexProteinFilePosition != rhs.lIndexProteinFilePosition) - return lIndexProteinFilePosition < rhs.lIndexProteinFilePosition; - - return false; // equal + // FINAL tie-breaker: lowest protein index first in order + // to grab flanking residues from the first protein + return lIndexProteinFilePosition < rhs.lIndexProteinFilePosition; } }; @@ -506,6 +507,8 @@ struct DBIndex struct PlainPeptideIndexStruct { string sPeptide; + char cPrevAA; + char cNextAA; comet_fileoffset_t lIndexProteinFilePosition; // points to entry in g_pvProteinsList double dPepMass; // MH+ pep mass, unmodified mass; modified mass in FragmentPeptidesStruct unsigned short siVarModProteinFilter; // bitwise representation of mmapProtein @@ -1030,6 +1033,7 @@ struct StaticParams extern StaticParams g_staticParams; extern vector g_pvDBIndex; // used in both peptide index and fragment ion index; latter to store plain peptides +extern map g_pvProteinNames; // indexed database protein names and file positions extern vector> g_pvProteinsList; @@ -1065,6 +1069,9 @@ extern bool g_bPerformDatabaseSearch; // set to true if doing database search extern bool g_bCometPreprocessMemoryAllocated; // set to true when memory has been allocated extern bool g_bCometSearchMemoryAllocated; // set to true when memory has been allocated +extern bool g_bIdxNoFasta; // set to true when .idx file being search but corresponding .fasta not present + // used in mzid output to skip sequence retrieval + // Query stores information for peptide scoring and results // This struct is allocated for each spectrum/charge combination struct Query diff --git a/CometSearch/CometFragmentIndex.cpp b/CometSearch/CometFragmentIndex.cpp index 38c99724..4382626b 100644 --- a/CometSearch/CometFragmentIndex.cpp +++ b/CometSearch/CometFragmentIndex.cpp @@ -186,7 +186,8 @@ void CometFragmentIndex::GenerateFragmentIndex(ThreadPool *tp) // In the for loop below, peptide references (iWhichFragmentPeptide) are stored in the FI. // As the FI is an array of unsigned int pointers, need to ensure that iWhichFragmentPeptide // will fit into an unsigned int. - if (g_vFragmentPeptides.size() > std::numeric_limits::max()) + // NOTE: explicitly use (std::numeric_limits::max)() to avoid macro expansion on Windows. + if (g_vFragmentPeptides.size() > (std::numeric_limits::max)()) { // handle error: value too large to fit in unsigned int throw std::overflow_error(" Error: g_vFragmentPeptides.size() too large for unsigned int"); @@ -558,7 +559,7 @@ if (!(iWhichPeptide%1000)) } -bool CometFragmentIndex::WritePlainPeptideIndex(ThreadPool *tp) +bool CometFragmentIndex::WriteFIPlainPeptideIndex(ThreadPool *tp) { FILE *fp; bool bSucceeded; @@ -740,7 +741,22 @@ bool CometFragmentIndex::WritePlainPeptideIndex(ThreadPool *tp) fprintf(fp, "RequireVariableMod: %d", g_staticParams.variableModParameters.iRequireVarMod); for (int x = 0; x < FRAGINDEX_VMODS; ++x) fprintf(fp, " %d", g_staticParams.variableModParameters.varModList[x].iRequireThisMod); - fprintf(fp, "\n"); + fprintf(fp, "\n\n"); + + int iTmp = (int)g_pvProteinNames.size(); + comet_fileoffset_t* lProteinIndex = new comet_fileoffset_t[iTmp]; + for (int i = 0; i < iTmp; i++) + lProteinIndex[i] = -1; + + // first just write out protein names. Track file position of each protein name + int ctProteinNames = 0; + for (auto it = g_pvProteinNames.begin(); it != g_pvProteinNames.end(); ++it) + { + lProteinIndex[ctProteinNames] = comet_ftell(fp); + fwrite(it->second.szProt, sizeof(char) * WIDTH_REFERENCE, 1, fp); + it->second.iWhichProtein = ctProteinNames; + ctProteinNames++; + } comet_fileoffset_t clPeptidesFilePos = comet_ftell(fp); size_t tNumPeptides = g_pvDBIndex.size(); @@ -753,12 +769,14 @@ bool CometFragmentIndex::WritePlainPeptideIndex(ThreadPool *tp) fwrite(&iLen, sizeof(int), 1, fp); fwrite((*it).szPeptide, sizeof(char), iLen, fp); + fwrite(&((*it).cPrevAA), sizeof(char), 1, fp); // write prev AA + fwrite(&((*it).cNextAA), sizeof(char), 1, fp); // write next AA fwrite(&((*it).dPepMass), sizeof(double), 1, fp); fwrite(&((*it).siVarModProteinFilter), sizeof(unsigned short), 1, fp); fwrite(&((*it).lIndexProteinFilePosition), clSizeCometFileOffset, 1, fp); sTmp.sPeptide = (*it).szPeptide; - sTmp.lIndexProteinFilePosition = clSizeCometFileOffset; + sTmp.lIndexProteinFilePosition = (*it).lIndexProteinFilePosition; sTmp.dPepMass = (*it).dPepMass; sTmp.siVarModProteinFilter = (*it).siVarModProteinFilter; g_vRawPeptides.push_back(sTmp); @@ -768,16 +786,37 @@ bool CometFragmentIndex::WritePlainPeptideIndex(ThreadPool *tp) comet_fileoffset_t clProteinsFilePos = comet_ftell(fp); tTmp = g_pvProteinsList.size(); fwrite(&tTmp, clSizeCometFileOffset, 1, fp); + int iWhichProtein; for (auto it = g_pvProteinsList.begin(); it != g_pvProteinsList.end(); ++it) { tTmp = (*it).size(); fwrite(&tTmp, sizeof(size_t), 1, fp); + for (size_t it2 = 0; it2 < tTmp; ++it2) { - fwrite(&((*it).at(it2)), clSizeCometFileOffset, 1, fp); + iWhichProtein = -1; + + auto result = g_pvProteinNames.find((*it).at(it2)); + if (result != g_pvProteinNames.end()) + { + iWhichProtein = result->second.iWhichProtein; + } + + if (iWhichProtein == -1) + { + string strErrorMsg = " Error writing protein index; protein not found in name map.\n"; + logerr(strErrorMsg); + fclose(fp); + delete[] lProteinIndex; + return false; + } + + fwrite(&lProteinIndex[iWhichProtein], clSizeCometFileOffset, 1, fp); } } + delete[] lProteinIndex; + // now permute mods on the peptides PermuteIndexPeptideMods(g_vRawPeptides); @@ -793,7 +832,7 @@ bool CometFragmentIndex::WritePlainPeptideIndex(ThreadPool *tp) fwrite(MOD_SEQ_MOD_NUM_START, sizeof(int), ulSizeModSeqs, fp); fwrite(MOD_SEQ_MOD_NUM_CNT, sizeof(int), ulSizeModSeqs, fp); fwrite(PEPTIDE_MOD_SEQ_IDXS, sizeof(int), ulSizevRawPeptides, fp); - int iTmp; + for (unsigned long i = 0; i < ulSizeModSeqs; ++i) { iTmp = (int)MOD_SEQS[i].size(); @@ -1065,6 +1104,8 @@ bool CometFragmentIndex::ReadPlainPeptideIndex(void) tTmp = fread(szPeptide, sizeof(char), iLen, fp); szPeptide[iLen] = '\0'; sTmp.sPeptide = szPeptide; + tTmp = fread(&(sTmp.cPrevAA), sizeof(char), 1, fp); + tTmp = fread(&(sTmp.cNextAA), sizeof(char), 1, fp); tTmp = fread(&(sTmp.dPepMass), sizeof(double), 1, fp); tTmp = fread(&(sTmp.siVarModProteinFilter), sizeof(unsigned short), 1, fp); tTmp = fread(&(sTmp.lIndexProteinFilePosition), clSizeCometFileOffset, 1, fp); diff --git a/CometSearch/CometFragmentIndex.h b/CometSearch/CometFragmentIndex.h index b39b9832..840c1b34 100644 --- a/CometSearch/CometFragmentIndex.h +++ b/CometSearch/CometFragmentIndex.h @@ -26,7 +26,7 @@ class CometFragmentIndex CometFragmentIndex(); ~CometFragmentIndex(); - static bool WritePlainPeptideIndex(ThreadPool *tp); + static bool WriteFIPlainPeptideIndex(ThreadPool *tp); static bool ReadPlainPeptideIndex(void); static bool CreateFragmentIndex(ThreadPool *tp); static int WhichPrecursorBin(double dMass); diff --git a/CometSearch/CometInterfaces.h b/CometSearch/CometInterfaces.h index bdcbbaa0..111e78e7 100644 --- a/CometSearch/CometInterfaces.h +++ b/CometSearch/CometInterfaces.h @@ -45,7 +45,7 @@ namespace CometInterfaces vector& strReturnPeptide, vector& strReturnProtein, vector>& matchedFragments, - vector& scores) = 0; + vector& scores) = 0; virtual bool DoMS1SearchMultiResults(const double dMaxMS1RTDiff, const double dMaxQueryRT, const int topN, @@ -53,7 +53,7 @@ namespace CometInterfaces double* dMass, double* dInten, const int iNumPeaks, - vector& scores) = 0; + vector& scores) = 0; virtual void AddInputFiles(vector &pvInputFiles) = 0; virtual void SetOutputFileBaseName(const char *pszBaseName) = 0; virtual void SetParam(const string &name, const string &strValue, const string &value) = 0; diff --git a/CometSearch/CometMassSpecUtils.cpp b/CometSearch/CometMassSpecUtils.cpp index 81270296..00b9438e 100644 --- a/CometSearch/CometMassSpecUtils.cpp +++ b/CometSearch/CometMassSpecUtils.cpp @@ -130,38 +130,14 @@ void CometMassSpecUtils::AssignMass(double *pdAAMass, // return a single protein name as a C char string -void CometMassSpecUtils::GetProteinName(FILE *fpfasta, +// compatible with both FASTA and indexed database +void CometMassSpecUtils::GetProteinName(FILE *fpdb, comet_fileoffset_t lFilePosition, char *szProteinName) { - comet_fseek(fpfasta, lFilePosition, SEEK_SET); - - if (g_staticParams.iIndexDb) //fragment ion or peptide index - { - long lSize; - - size_t tTmp = fread(&lSize, sizeof(long), 1, fpfasta); - vector vOffsets; - for (long x = 0; x < lSize; ++x) // read file offsets - { - comet_fileoffset_t tmpoffset; - tTmp = fread(&tmpoffset, sizeof(comet_fileoffset_t), 1, fpfasta); - vOffsets.push_back(tmpoffset); - } - for (long x = 0; x < lSize; ++x) // read name from fasta - { - char szTmp[WIDTH_REFERENCE]; - comet_fseek(fpfasta, vOffsets.at(x), SEEK_SET); - tTmp = fread(szTmp, sizeof(char)*WIDTH_REFERENCE, 1, fpfasta); - sscanf(szTmp, "%511s", szProteinName); // WIDTH_REFERENCE-1 - break; //break here to only get first protein reference (out of lSize) - } - } - else //regular fasta database - { - iRet = fscanf(fpfasta, "%511s", szProteinName); // WIDTH_REFERENCE-1 - szProteinName[511] = '\0'; - } + comet_fseek(fpdb, lFilePosition, SEEK_SET); + iRet = fscanf(fpdb, "%511s", szProteinName); // WIDTH_REFERENCE-1 + szProteinName[511] = '\0'; } @@ -204,7 +180,7 @@ void CometMassSpecUtils::GetProteinSequence(FILE *fpfasta, // return all matched protein names in a vector of strings -void CometMassSpecUtils::GetProteinNameString(FILE *fpfasta, +void CometMassSpecUtils::GetProteinNameString(FILE *fpdb, int iWhichQuery, // which search int iWhichResult, // which peptide within the search int iPrintTargetDecoy, // 0 = target+decoys, 1=target only, 2=decoy only @@ -220,101 +196,7 @@ void CometMassSpecUtils::GetProteinNameString(FILE *fpfasta, // FIX: protein references is so convoluted with the restoration of peptide index. This // seems to work now but definitely needs to be revisited to be cleaned up. // Look into lProteinFilePosition and lWhichProtein with Results struct. - - if (g_staticParams.iIndexDb == 2) // peptide index - { - Results* pOutput; - - if (iPrintTargetDecoy != 2) - pOutput = g_pvQuery.at(iWhichQuery)->_pResults; - else - pOutput = g_pvQuery.at(iWhichQuery)->_pDecoys; - - *uiNumTotProteins = (unsigned int)g_pvProteinsList.at(pOutput[iWhichResult].lProteinFilePosition).size(); - - int iPrintDuplicateProteinCt = 0; // track # proteins, exit when at iMaxDuplicateProteins - - vector vTmp; // store decoy matches here to append at end - - for (auto it = pOutput[iWhichResult].pWhichProtein.begin(); it != pOutput[iWhichResult].pWhichProtein.end(); ++it) - { - for (auto it2 = g_pvProteinsList.at((*it).lWhichProtein).begin(); it2 != g_pvProteinsList.at((*it).lWhichProtein).end(); ++it2) - { - comet_fileoffset_t lEntry = (*it2); - comet_fseek(fpfasta, lEntry, SEEK_SET); - - if (bReturnFullProteinString) - { - if (fgets(szProteinName, 511, fpfasta) == NULL) - { - // throw error - } - } - else - iRet = fscanf(fpfasta, "%500s", szProteinName); - - szProteinName[500] = '\0'; // limit protein name strings to 500 chars - - // remove all terminating chars - while ((szProteinName[strlen(szProteinName) - 1] == '\n') || (szProteinName[strlen(szProteinName) - 1] == '\r')) - szProteinName[strlen(szProteinName) - 1] = '\0'; - - if (!strncmp(szProteinName, g_staticParams.szDecoyPrefix, iLenDecoyPrefix)) - vTmp.push_back(szProteinName); - else - vProteinTargets.push_back(szProteinName); - - iPrintDuplicateProteinCt++; - if (iPrintDuplicateProteinCt >= g_staticParams.options.iMaxDuplicateProteins) - break; - } - if (iPrintDuplicateProteinCt >= g_staticParams.options.iMaxDuplicateProteins) - break; - } - if (vTmp.size() > 0) // append any decoy matches now; these would be decoys present in fasta - vProteinTargets.insert(vProteinTargets.end(), vTmp.begin(), vTmp.end()); - - - vTmp.clear(); - for (auto it = pOutput[iWhichResult].pWhichDecoyProtein.begin(); it != pOutput[iWhichResult].pWhichDecoyProtein.end(); ++it) - { - for (auto it2 = g_pvProteinsList.at((*it).lWhichProtein).begin(); it2 != g_pvProteinsList.at((*it).lWhichProtein).end(); ++it2) - { - comet_fileoffset_t lEntry = (*it2); - comet_fseek(fpfasta, lEntry, SEEK_SET); - - if (bReturnFullProteinString) - { - if (fgets(szProteinName, 511, fpfasta) == NULL) - { - // throw error - } - } - else - iRet = fscanf(fpfasta, "%500s", szProteinName); - - szProteinName[500] = '\0'; // limit protein name strings to 500 chars - - // remove all terminating chars - while ((szProteinName[strlen(szProteinName) - 1] == '\n') || (szProteinName[strlen(szProteinName) - 1] == '\r')) - szProteinName[strlen(szProteinName) - 1] = '\0'; - - if (!strncmp(szProteinName, g_staticParams.szDecoyPrefix, iLenDecoyPrefix)) - vTmp.push_back(szProteinName); - else - vProteinDecoys.push_back(szProteinName); - - iPrintDuplicateProteinCt++; - if (iPrintDuplicateProteinCt >= g_staticParams.options.iMaxDuplicateProteins) - break; - } - if (iPrintDuplicateProteinCt >= g_staticParams.options.iMaxDuplicateProteins) - break; - } - if (vTmp.size() > 0) // append any decoy matches now; these would be decoys present in fasta - vProteinTargets.insert(vProteinTargets.end(), vTmp.begin(), vTmp.end()); - } - else if (g_staticParams.iIndexDb == 1) // fragment ion index + if (g_staticParams.iIndexDb) { Results* pOutput; @@ -333,18 +215,20 @@ void CometMassSpecUtils::GetProteinNameString(FILE *fpfasta, vector vTmp; // store decoy matches here to append at end comet_fileoffset_t lEntry = pOutput[iWhichResult].lProteinFilePosition; + for (auto it = g_pvProteinsList.at(lEntry).begin(); it != g_pvProteinsList.at(lEntry).end(); ++it) { - comet_fseek(fpfasta, *it, SEEK_SET); + comet_fseek(fpdb, *it, SEEK_SET); + if (bReturnFullProteinString) { - if (fgets(szProteinName, 511, fpfasta) == NULL) + if (fgets(szProteinName, 511, fpdb) == NULL) { // throw error } } else - iRet = fscanf(fpfasta, "%500s", szProteinName); + iRet = fscanf(fpdb, "%500s", szProteinName); szProteinName[500] = '\0'; // limit protein name strings to 500 chars @@ -386,16 +270,16 @@ void CometMassSpecUtils::GetProteinNameString(FILE *fpfasta, { for (auto it=pOutput[iWhichResult].pWhichProtein.begin(); it!=pOutput[iWhichResult].pWhichProtein.end(); ++it) { - comet_fseek(fpfasta, (*it).lWhichProtein, SEEK_SET); + comet_fseek(fpdb, (*it).lWhichProtein, SEEK_SET); if (bReturnFullProteinString) { - if (fgets(szProteinName, 511, fpfasta) == NULL) + if (fgets(szProteinName, 511, fpdb) == NULL) { // throw error } } else - iRet = fscanf(fpfasta, "%500s", szProteinName); + iRet = fscanf(fpdb, "%500s", szProteinName); szProteinName[500] = '\0'; // limit protein name strings to 500 chars // remove all terminating chars @@ -421,16 +305,16 @@ void CometMassSpecUtils::GetProteinNameString(FILE *fpfasta, if (iPrintDuplicateProteinCt >= g_staticParams.options.iMaxDuplicateProteins) break; - comet_fseek(fpfasta, (*it).lWhichProtein, SEEK_SET); + comet_fseek(fpdb, (*it).lWhichProtein, SEEK_SET); if (bReturnFullProteinString) { - if (fgets(szProteinName, 511, fpfasta) == NULL) + if (fgets(szProteinName, 511, fpdb) == NULL) { // throw error } } else - iRet = fscanf(fpfasta, "%500s", szProteinName); + iRet = fscanf(fpdb, "%500s", szProteinName); szProteinName[500] = '\0'; // limit protein name strings to 500 chars // remove all terminating chars @@ -451,226 +335,6 @@ void CometMassSpecUtils::GetProteinNameString(FILE *fpfasta, } -// find prev, next AA from first matched protein -// this is only valid if searching indexed db with peptide/protein .idx file -void CometMassSpecUtils::GetPrevNextAA(FILE *fpfasta, - int iWhichQuery, // which search - int iWhichResult, // which peptide within the search - int iPrintTargetDecoy, // 0 = target+decoys, 1=target only, 2=decoy only - int iWhichTerm) // 0=no term constraint, 1=protein N-term, 2=protein C-term -{ - if (g_staticParams.iIndexDb) // fragment ion or peptide index - { - Results *pOutput; - - pOutput = g_pvQuery.at(iWhichQuery)->_pResults; - - pOutput[iWhichResult].cPrevAA = '-'; - pOutput[iWhichResult].cNextAA = '-'; - - // for peptide index, if target size is zero, peptide must only be matched to a decoy so don't bother with prev/next AA - if (g_staticParams.iIndexDb == 2 && g_pvQuery.at(iWhichQuery)->_pResults[iWhichResult].pWhichProtein.size() == 0) - return; - - if (g_staticParams.iIndexDb == 2) // peptide index - { - for (auto it = pOutput[iWhichResult].pWhichProtein.begin(); it != pOutput[iWhichResult].pWhichProtein.end(); ++it) - { - for (auto it2 = g_pvProteinsList.at((*it).lWhichProtein).begin(); it2 != g_pvProteinsList.at((*it).lWhichProtein).end(); ++it2) - { - if (SeekPrevNextAA(pOutput, fpfasta, *it2, iWhichQuery, iWhichResult, iWhichTerm)) - return; - } - } - - } - else // fragment ion index - { - comet_fileoffset_t lEntry = pOutput[iWhichResult].lProteinFilePosition; - - for (auto it = g_pvProteinsList.at(lEntry).begin(); it != g_pvProteinsList.at(lEntry).end(); ++it) - { - if (SeekPrevNextAA(pOutput, fpfasta, *it, iWhichQuery, iWhichResult, iWhichTerm)) - return; - } - } - } -} - - -bool CometMassSpecUtils::SeekPrevNextAA(struct Results *pOutput, - FILE *fpfasta, - comet_fileoffset_t tFilePos, - int iWhichQuery, - int iWhichResult, - int iWhichTerm) -{ - string strSeq; - int iTmpCh = 0; - - int iLenPeptide = (int)strlen(pOutput[iWhichResult].szPeptide); - - comet_fseek(fpfasta, tFilePos, SEEK_SET); - - // skip through protein name string to first carriage return - while (((iTmpCh = getc(fpfasta)) != '\n') && (iTmpCh != '\r') && (iTmpCh != EOF)); - - // Load sequence - while (((iTmpCh = getc(fpfasta)) != '>') && (iTmpCh != EOF)) - { - if ('a' <= iTmpCh && iTmpCh <= 'z') - { - strSeq += iTmpCh - 32; // convert toupper case so subtract 32 (i.e. 'A'-'a') - g_staticParams.databaseInfo.uliTotAACount++; - } - else if ('A' <= iTmpCh && iTmpCh <= 'Z') - { - strSeq += iTmpCh; - g_staticParams.databaseInfo.uliTotAACount++; - } - else if (iTmpCh == '*') // stop codon - { - strSeq += iTmpCh; - } - - if (iWhichTerm == 1 && (int)strSeq.length() == iLenPeptide + 1) - { - break; // protein N-terminal peptide identified so have enough sequence now - } - } - - if (strSeq.size() < 1) - { - printf(" Error: parsed sequence in GetPrevNextAA() is empty. File pointer %" PRIu64 ", query %d, result %d.\n", tFilePos, iWhichQuery, iWhichResult); - pOutput[iWhichResult].cPrevAA = pOutput[iWhichResult].cNextAA = '-'; - return false; - } - char* szSequence = (char*)malloc(strSeq.size() + 1); - - if (szSequence == NULL) - { - printf(" Error: cannot allocate memory for szSequence[%zd]\n", strSeq.size() + 1); - exit(1); - } - strcpy(szSequence, strSeq.c_str()); - - int iLenSequence = (int)strlen(szSequence); - - CometSearch cs; - cs._proteinInfo.iTmpProteinSeqLength = iLenSequence; // used in CheckEnzymeTermini - - if (iWhichTerm == 0) - { - size_t iStartPos = 0; - - // Find all occurrences of the peptide in the sequence - // Take first one consistent with the enzyme constraint for prev/next AA - while (std::string::npos != (iStartPos = (int)strSeq.find(pOutput[iWhichResult].szPeptide, iStartPos))) - { - int iEndPos = (int)iStartPos + iLenPeptide - 1; - - if (cs.CheckEnzymeTermini(szSequence, (int)iStartPos, iEndPos)) - { - if (iStartPos == 0) - pOutput[iWhichResult].cPrevAA = '-'; - else - pOutput[iWhichResult].cPrevAA = szSequence[iStartPos - 1]; - - if (iEndPos == iLenSequence - 1) - pOutput[iWhichResult].cNextAA = '-'; - else - pOutput[iWhichResult].cNextAA = szSequence[iEndPos + 1]; - - free(szSequence); - strSeq.clear(); - return true; - } - else if (g_staticParams.options.bClipNtermMet && iStartPos == 1 && szSequence[0] == 'M' && cs.CheckEnzymeEndTermini(szSequence, iEndPos)) - { - pOutput[iWhichResult].cPrevAA = 'M'; - - if (iEndPos == iLenSequence - 1) - pOutput[iWhichResult].cNextAA = '-'; - else - pOutput[iWhichResult].cNextAA = szSequence[iEndPos + 1]; - - free(szSequence); - strSeq.clear(); - return true; - } - - ++iStartPos; - } - } - else if (iWhichTerm == 1) - { - int iEndPos = iLenPeptide; // used for clip n-term met so needs to be set to iLenPeptide - - if (!strncmp(szSequence, pOutput[iWhichResult].szPeptide, iLenPeptide) - && cs.CheckEnzymeEndTermini(szSequence, iLenPeptide - 1)) - { - pOutput[iWhichResult].cPrevAA = '-'; - - if (iLenSequence >= iLenPeptide) // for n-term pep, iLenPeptide is following residue position - pOutput[iWhichResult].cNextAA = szSequence[iLenPeptide]; - else - pOutput[iWhichResult].cNextAA = '-'; - - free(szSequence); - strSeq.clear(); - return true; - } - else if (g_staticParams.options.bClipNtermMet - && szSequence[0] == 'M' - && !strncmp(szSequence + 1, pOutput[iWhichResult].szPeptide, iLenPeptide) - && cs.CheckEnzymeEndTermini(szSequence, iEndPos)) - { - pOutput[iWhichResult].cPrevAA = 'M'; - pOutput[iWhichResult].cNextAA = szSequence[iEndPos + 1]; - - free(szSequence); - strSeq.clear(); - return true; - } - } - else if (iWhichTerm == 2) - { - int iStartPos = iLenSequence - iLenPeptide; - - if (!strncmp(szSequence + iStartPos, pOutput[iWhichResult].szPeptide, iLenPeptide)) - { - if (cs.CheckEnzymeStartTermini(szSequence, iLenSequence - iLenPeptide)) - { - if (iStartPos > 0) - pOutput[iWhichResult].cPrevAA = szSequence[iStartPos - 1]; - else - pOutput[iWhichResult].cPrevAA = '-'; - - pOutput[iWhichResult].cNextAA = '-'; - - free(szSequence); - strSeq.clear(); - return true; - } - else if (g_staticParams.options.bClipNtermMet && iStartPos == 1 && szSequence[0] == 'M') - { - pOutput[iWhichResult].cPrevAA = 'M'; - pOutput[iWhichResult].cNextAA = '-'; - - free(szSequence); - strSeq.clear(); - return true; - } - } - } - - free(szSequence); - strSeq.clear(); - - return false; -} - - // return nth entry in string s string CometMassSpecUtils::GetField(std::string *s, unsigned int n, diff --git a/CometSearch/CometMassSpecUtils.h b/CometSearch/CometMassSpecUtils.h index 31723726..2a66b9cb 100644 --- a/CometSearch/CometMassSpecUtils.h +++ b/CometSearch/CometMassSpecUtils.h @@ -57,19 +57,6 @@ class CometMassSpecUtils vector& vProteinTargets, // the target protein names vector& vProteinDecoys); // the decoy protein names if applicable - static void GetPrevNextAA(FILE *fpdb, - int iWhichQuery, // which search - int iWhichResult, // which peptide within the search - int iPrintTargetDecoy, - int iWhichTerm); // 0=no term constraint, 1=protein N-term, 2=protein C-term - - static bool SeekPrevNextAA(struct Results *pOutput, - FILE *fpfasta, - comet_fileoffset_t tFilePos, - int iWhichQuery, - int iWhichResult, - int iWhichTerm); - static string GetField(std::string *s, unsigned int n, char cDelimeter); diff --git a/CometSearch/CometPeptideIndex.cpp b/CometSearch/CometPeptideIndex.cpp index 59c57b45..60639d48 100644 --- a/CometSearch/CometPeptideIndex.cpp +++ b/CometSearch/CometPeptideIndex.cpp @@ -118,7 +118,7 @@ bool CometPeptideIndex::WritePeptideIndex(ThreadPool *tp) } else { - // different peptide + mod state so go ahead and push temp onto pvProteinsListLocalk + // different peptide + mod state so go ahead and push temp onto pvProteinsListLocal // and store current protein reference into new temp // temp can have duplicates due to mod forms of peptide so make unique here sort(temp.begin(), temp.end()); @@ -129,6 +129,7 @@ bool CometPeptideIndex::WritePeptideIndex(ThreadPool *tp) temp.clear(); temp.push_back(g_pvDBIndex.at(i).lIndexProteinFilePosition); g_pvDBIndex.at(i).lIndexProteinFilePosition = lProtCount; + } } } @@ -209,20 +210,57 @@ bool CometPeptideIndex::WritePeptideIndex(ThreadPool *tp) } fprintf(fptr, "\n\n"); + int iTmp = (int)g_pvProteinNames.size(); + comet_fileoffset_t* lProteinIndex = new comet_fileoffset_t[iTmp]; + for (int i = 0; i < iTmp; i++) + lProteinIndex[i] = -1; + + // first just write out protein names. Track file position of each protein name + int ctProteinNames = 0; + for (auto it = g_pvProteinNames.begin(); it != g_pvProteinNames.end(); ++it) + { + lProteinIndex[ctProteinNames] = comet_ftell(fptr); + fwrite(it->second.szProt, sizeof(char) * WIDTH_REFERENCE, 1, fptr); + it->second.iWhichProtein = ctProteinNames; + ctProteinNames++; + } + // Now write out: vector> pvProteinsListLocal comet_fileoffset_t clProteinsFilePos = comet_ftell(fptr); size_t tTmp = pvProteinsListLocal.size(); + int iWhichProtein; fwrite(&tTmp, clSizeCometFileOffset, 1, fptr); for (auto it = pvProteinsListLocal.begin(); it != pvProteinsListLocal.end(); ++it) { tTmp = (*it).size(); fwrite(&tTmp, sizeof(size_t), 1, fptr); + for (size_t it2 = 0; it2 < tTmp; ++it2) { - fwrite(&((*it).at(it2)), clSizeCometFileOffset, 1, fptr); + iWhichProtein = -1; + + // find protein by matching g_pvProteinNames.lProteinFilePosition to g_pvProteinNames.lProteinIndex; + auto result = g_pvProteinNames.find((*it).at(it2)); + if (result != g_pvProteinNames.end()) + { + iWhichProtein = result->second.iWhichProtein; + } + + if (iWhichProtein == -1) + { + string strErrorMsg = " Error in WritePeptideIndex(): cannot find protein file position in protein names map.\n"; + logerr(strErrorMsg); + fclose(fptr); + delete[] lProteinIndex; + return false; + } + + fwrite(&lProteinIndex[iWhichProtein], clSizeCometFileOffset, 1, fptr); } } + delete[] lProteinIndex; + // next write out the peptides and track peptide mass index int iMaxPeptideMass = (int)(g_staticParams.options.dPeptideMassHigh); int iMaxPeptideMass10 = iMaxPeptideMass * 10; // make mass index at resolution of 0.1 Da @@ -245,10 +283,13 @@ bool CometPeptideIndex::WritePeptideIndex(ThreadPool *tp) fwrite(&iLen, sizeof(int), 1, fptr); fwrite((*it).szPeptide, sizeof(char), iLen, fptr); + fwrite(&((*it).cPrevAA), sizeof(char), 1, fptr); + fwrite(&((*it).cNextAA), sizeof(char), 1, fptr); + // write out for char 0=no mod, N=mod. If N, write out var mods as N pairs (pos,whichmod) int iLen2 = iLen + 2; - unsigned char cNumMods = 0; - for (unsigned char x=0; x 0) { - for (unsigned char x=0; xszPeptide, sizeof(char), iLen, fp); sDBI->szPeptide[iLen] = '\0'; + tTmp = fread(&(sDBI->cPrevAA), sizeof(char), 1, fp); + tTmp = fread(&(sDBI->cNextAA), sizeof(char), 1, fp); + unsigned char cNumMods; // number of var mods encoded as position:residue pairs tTmp = fread(&cNumMods, sizeof(unsigned char), 1, fp); // read how many var mods are stored diff --git a/CometSearch/CometSearch.cpp b/CometSearch/CometSearch.cpp index b418fc80..f0c48a83 100644 --- a/CometSearch/CometSearch.cpp +++ b/CometSearch/CometSearch.cpp @@ -14,22 +14,7 @@ #include "Common.h" #include "CometSearch.h" -#include "CometSpecLib.h" -#include "CometDataInternal.h" -#include "ThreadPool.h" -#include "CometFragmentIndex.h" -#include "CometMassSpecUtils.h" -#include "CometModificationsPermuter.h" -#include "CometPeptideIndex.h" -#include "CometPostAnalysis.h" -#include "CometSearchManager.h" -#include "CometStatus.h" - -#include -#include -#include -#include -#include + #define BINARYSEARCHCUTOFF 20 // do linear search through FI if # entries is this or less @@ -774,6 +759,18 @@ bool CometSearch::RunSearch(int iPercentStart, return false; } + if (g_staticParams.options.bCreateFragmentIndex || g_staticParams.options.bCreatePeptideIndex) + { + struct IndexProteinStruct sEntry; + + // store protein name + std::strncpy(sEntry.szProt, dbe.strName.c_str(), sizeof(sEntry.szProt) - 1); + sEntry.szProt[sizeof(sEntry.szProt) - 1] = '\0'; + sEntry.lProteinFilePosition = dbe.lProteinFilePosition; + sEntry.iWhichProtein = -1; // not used for index creation + g_pvProteinNames.insert({ sEntry.lProteinFilePosition, sEntry }); + } + // Load sequence while (((iTmpCh=getc(fp)) != '>') && (iTmpCh != EOF)) { @@ -1379,7 +1376,7 @@ void CometSearch::SearchFragmentIndex(size_t iWhichQuery, int ctIonSeries; int ctLen; int iLenMinus1; - char szPeptide[512]; + char szPeptide[MAX_PEPTIDE_LEN]; int piVarModSites[MAX_PEPTIDE_LEN_P2]; int iPositionNLB[FRAGINDEX_VMODS]; int iPositionNLY[FRAGINDEX_VMODS]; @@ -1625,12 +1622,35 @@ void CometSearch::SearchFragmentIndex(size_t iWhichQuery, struct sDBEntry dbe; + char cPrevAA = g_vRawPeptides.at(g_vFragmentPeptides[ix->first].iWhichPeptide).cPrevAA; + char cNextAA = g_vRawPeptides.at(g_vFragmentPeptides[ix->first].iWhichPeptide).cNextAA; + char szProtein[MAX_PEPTIDE_LEN_P2]; + if (cPrevAA == '-') + { + iStartPos = 0; + strcpy(szProtein, szPeptide); + } + else + { + iStartPos = 1; + sprintf(szProtein, "%c%s", cPrevAA, szPeptide); + } + if (cNextAA == '-') + { + iEndPos = strlen(szProtein) - 1; + } + else + { + sprintf(szProtein, "%s%c", szProtein, cNextAA); + iEndPos = strlen(szProtein) - 2; + } + dbe.strName = ""; - dbe.strSeq = szPeptide; + dbe.strSeq = szProtein; // this lProteinFilePosition is actually the entry in g_pvProteinsList that contains the list of proteins for that peptide dbe.lProteinFilePosition = g_vRawPeptides.at(g_vFragmentPeptides[ix->first].iWhichPeptide).lIndexProteinFilePosition; - XcorrScoreI(szPeptide, iStartPos, iEndPos, iFoundVariableMod, dCalcPepMass, false, iWhichQuery, + XcorrScoreI(szProtein, iStartPos, iEndPos, iFoundVariableMod, dCalcPepMass, false, iWhichQuery, iLenPeptide, piVarModSites, &dbe, uiBinnedIonMasses, uiBinnedPrecursorNL, ix->second); uiNumScored++; @@ -2495,11 +2515,37 @@ void CometSearch::AnalyzePeptideIndex(int iWhichQuery, } } - XcorrScore(sDBI.szPeptide, iUnused, iUnused, iStartPos, iEndPos, iFoundVariableMod, + char cPrevAA = sDBI.cPrevAA; + char cNextAA = sDBI.cNextAA; + char szProtein[MAX_PEPTIDE_LEN_P2]; + if (cPrevAA == '-') + { + iStartPos = 0; + strcpy(szProtein, sDBI.szPeptide); + } + else + { + iStartPos = 1; + sprintf(szProtein, "%c%s", cPrevAA, sDBI.szPeptide); + } + + if (cNextAA == '-') + { + iEndPos = strlen(szProtein) - 1; + } + else + { + sprintf(szProtein, "%s%c", szProtein, cNextAA); + iEndPos = strlen(szProtein) - 2; + } + + XcorrScore(szProtein, iUnused, iUnused, iStartPos, iEndPos, iFoundVariableMod, sDBI.dPepMass, false, iWhichQuery, iLenPeptide, piVarModSites, dbe); if (g_staticParams.options.iDecoySearch) { + iStartPos = 0; + iEndPos = iLenPeptide - 1; XcorrScore(szDecoyPeptide, iUnused, iUnused, iStartPos, iEndPos, iFoundVariableModDecoy, sDBI.dPepMass, true, iWhichQuery, iLenPeptide, piVarModSitesDecoy, dbe); } @@ -2759,6 +2805,8 @@ bool CometSearch::SearchForPeptides(struct sDBEntry dbe, strncpy(sEntry.szPeptide, szProteinSeq + iStartPos, iLenPeptide); sEntry.szPeptide[iLenPeptide]='\0'; + sEntry.cPrevAA = (iStartPos > 0) ? szProteinSeq[iStartPos - 1] : '-'; + sEntry.cNextAA = (iEndPos < iProteinSeqLengthMinus1) ? szProteinSeq[iEndPos + 1] : '-'; sEntry.siVarModProteinFilter = siVarModProteinFilter; // little sanity check here to not include peptides with '*' in them @@ -4615,30 +4663,26 @@ void CometSearch::StorePeptide(size_t iWhichQuery, } pQuery->_pDecoys[siLowestDecoyXcorrScoreIndex].fXcorr = (float)dXcorr; + pQuery->_pDecoys[siLowestDecoyXcorrScoreIndex].bClippedM = false; - if (!g_staticParams.iIndexDb) + if (iStartPos == 0) { - pQuery->_pDecoys[siLowestDecoyXcorrScoreIndex].bClippedM = false; - - if (iStartPos == 0) + // check if clip n-term met + if (g_staticParams.options.bClipNtermMet && dbe->strSeq.c_str()[0] == 'M' && !strcmp(dbe->strSeq.c_str() + 1, szProteinSeq)) { - // check if clip n-term met - if (g_staticParams.options.bClipNtermMet && dbe->strSeq.c_str()[0] == 'M' && !strcmp(dbe->strSeq.c_str() + 1, szProteinSeq)) - { - pQuery->_pDecoys[siLowestDecoyXcorrScoreIndex].cPrevAA = 'M'; - pQuery->_pDecoys[siLowestDecoyXcorrScoreIndex].bClippedM = true; - } - else - pQuery->_pDecoys[siLowestDecoyXcorrScoreIndex].cPrevAA = '-'; + pQuery->_pDecoys[siLowestDecoyXcorrScoreIndex].cPrevAA = 'M'; + pQuery->_pDecoys[siLowestDecoyXcorrScoreIndex].bClippedM = true; } else - pQuery->_pDecoys[siLowestDecoyXcorrScoreIndex].cPrevAA = szProteinSeq[iStartPos - 1]; - - if (iEndPos == _proteinInfo.iTmpProteinSeqLength-1) - pQuery->_pDecoys[siLowestDecoyXcorrScoreIndex].cNextAA = '-'; - else - pQuery->_pDecoys[siLowestDecoyXcorrScoreIndex].cNextAA = szProteinSeq[iEndPos + 1]; + pQuery->_pDecoys[siLowestDecoyXcorrScoreIndex].cPrevAA = '-'; } + else + pQuery->_pDecoys[siLowestDecoyXcorrScoreIndex].cPrevAA = szProteinSeq[iStartPos - 1]; + + if (iEndPos == _proteinInfo.iTmpProteinSeqLength - 1) + pQuery->_pDecoys[siLowestDecoyXcorrScoreIndex].cNextAA = '-'; + else + pQuery->_pDecoys[siLowestDecoyXcorrScoreIndex].cNextAA = szProteinSeq[iEndPos + 1]; // store PEFF info; +1 and -1 to account for PEFF in flanking positions if (_proteinInfo.iPeffOrigResiduePosition != NO_PEFF_VARIANT @@ -4822,29 +4866,21 @@ void CometSearch::StorePeptide(size_t iWhichQuery, pQuery->_pResults[siLowestXcorrScoreIndex].fXcorr = (float)dXcorr; - if (g_staticParams.iIndexDb) + if (iStartPos == 0) { - pQuery->_pResults[siLowestXcorrScoreIndex].cPrevAA = _proteinInfo.cPrevAA; - pQuery->_pResults[siLowestXcorrScoreIndex].cNextAA = _proteinInfo.cNextAA; + // check if clip n-term met + if (g_staticParams.options.bClipNtermMet && dbe->strSeq.c_str()[0] == 'M' && !strcmp(dbe->strSeq.c_str() + 1, szProteinSeq)) + pQuery->_pResults[siLowestXcorrScoreIndex].cPrevAA = 'M'; + else + pQuery->_pResults[siLowestXcorrScoreIndex].cPrevAA = '-'; } else - { - if (iStartPos == 0) - { - // check if clip n-term met - if (g_staticParams.options.bClipNtermMet && dbe->strSeq.c_str()[0] == 'M' && !strcmp(dbe->strSeq.c_str() + 1, szProteinSeq)) - pQuery->_pResults[siLowestXcorrScoreIndex].cPrevAA = 'M'; - else - pQuery->_pResults[siLowestXcorrScoreIndex].cPrevAA = '-'; - } - else - pQuery->_pResults[siLowestXcorrScoreIndex].cPrevAA = szProteinSeq[iStartPos - 1]; + pQuery->_pResults[siLowestXcorrScoreIndex].cPrevAA = szProteinSeq[iStartPos - 1]; - if (iEndPos == _proteinInfo.iTmpProteinSeqLength-1) - pQuery->_pResults[siLowestXcorrScoreIndex].cNextAA = '-'; - else - pQuery->_pResults[siLowestXcorrScoreIndex].cNextAA = szProteinSeq[iEndPos + 1]; - } + if (iEndPos == _proteinInfo.iTmpProteinSeqLength - 1) + pQuery->_pResults[siLowestXcorrScoreIndex].cNextAA = '-'; + else + pQuery->_pResults[siLowestXcorrScoreIndex].cNextAA = szProteinSeq[iEndPos + 1]; // store PEFF info; +1 and -1 to account for PEFF in flanking positions if (_proteinInfo.iPeffOrigResiduePosition != NO_PEFF_VARIANT @@ -4959,10 +4995,10 @@ void CometSearch::StorePeptideI(size_t iWhichQuery, int* piVarModSites, struct sDBEntry* dbe) { - int iLenPeptide; - Query* pQuery = g_pvQuery.at(iWhichQuery); + int iLenPeptide = iEndPos - iStartPos + 1; + int iLenProteinMinus1 = (int)strlen(szProteinSeq) - 1; - iLenPeptide = iEndPos - iStartPos + 1; + Query* pQuery = g_pvQuery.at(iWhichQuery); short siLowestXcorrScoreIndex = pQuery->siLowestXcorrScoreIndex; @@ -4987,8 +5023,15 @@ void CometSearch::StorePeptideI(size_t iWhichQuery, pQuery->_pResults[siLowestXcorrScoreIndex].fXcorr = (float)dXcorr; - pQuery->_pResults[siLowestXcorrScoreIndex].cPrevAA = '-'; - pQuery->_pResults[siLowestXcorrScoreIndex].cNextAA = '-'; + if (iStartPos == 0) + pQuery->_pResults[siLowestXcorrScoreIndex].cPrevAA = '-'; + else + pQuery->_pResults[siLowestXcorrScoreIndex].cPrevAA = szProteinSeq[iStartPos - 1]; + + if (iEndPos == iLenProteinMinus1) + pQuery->_pResults[siLowestXcorrScoreIndex].cNextAA = '-'; + else + pQuery->_pResults[siLowestXcorrScoreIndex].cNextAA = szProteinSeq[iEndPos + 1]; pQuery->_pResults[siLowestXcorrScoreIndex].iPeffOrigResiduePosition = NO_PEFF_VARIANT; pQuery->_pResults[siLowestXcorrScoreIndex].sPeffOrigResidues.clear(); @@ -5071,13 +5114,12 @@ int CometSearch::CheckDuplicate(int iWhichQuery, int *piVarModSites, struct sDBEntry *dbe) { - int i, - iLenPeptide, - bIsDuplicate=0; + int i; + int iLenPeptide = iEndPos - iStartPos + 1; + int iLenProteinMinus1 = (int)strlen(szProteinSeq) - 1; + int bIsDuplicate=0; Query* pQuery = g_pvQuery.at(iWhichQuery); - iLenPeptide = iEndPos - iStartPos + 1; - if (g_staticParams.options.iDecoySearch == 2 && bDecoyPep) { for (i = 0; i < g_staticParams.options.iNumStored; ++i) @@ -5171,7 +5213,7 @@ int CometSearch::CheckDuplicate(int iWhichQuery, else pTmp.cPrevAA = szProteinSeq[iStartResidue - 1]; - if (iEndResidue == (int)(strlen(szProteinSeq) - 1)) + if (iEndResidue == iLenProteinMinus1) pTmp.cNextAA = '-'; else pTmp.cNextAA = szProteinSeq[iEndResidue + 1]; @@ -5288,7 +5330,7 @@ int CometSearch::CheckDuplicate(int iWhichQuery, else pTmp.cPrevAA = szProteinSeq[iStartResidue - 1]; - if (iEndResidue == (int)(strlen(szProteinSeq) - 1)) + if (iEndResidue == iLenProteinMinus1) pTmp.cNextAA = '-'; else pTmp.cNextAA = szProteinSeq[iEndResidue + 1]; @@ -6972,6 +7014,16 @@ bool CometSearch::MergeVarMods(char *szProteinSeq, strncpy(sDBTmp.szPeptide, szProteinSeq + _varModInfo.iStartPos, iLenPeptide); sDBTmp.szPeptide[iLenPeptide]='\0'; + if (_varModInfo.iStartPos == 0) + sDBTmp.cPrevAA = '-'; + else + sDBTmp.cPrevAA = szProteinSeq[_varModInfo.iStartPos - 1]; + + if (_varModInfo.iEndPos == iLenProteinMinus1) + sDBTmp.cNextAA = '-'; + else + sDBTmp.cNextAA = szProteinSeq[_varModInfo.iEndPos + 1]; + sDBTmp.lIndexProteinFilePosition = _proteinInfo.lProteinFilePosition; memset(sDBTmp.pcVarModSites, 0, sizeof(sDBTmp.pcVarModSites)); diff --git a/CometSearch/CometSearch.h b/CometSearch/CometSearch.h index c1e68b01..31b2fcbe 100644 --- a/CometSearch/CometSearch.h +++ b/CometSearch/CometSearch.h @@ -15,12 +15,25 @@ #ifndef _COMETSEARCH_H_ #define _COMETSEARCH_H_ +#include "Common.h" #include "CometDataInternal.h" +#include "CometFragmentIndex.h" +#include "CometMassSpecUtils.h" +#include "CometModificationsPermuter.h" +#include "CometPeptideIndex.h" +#include "CometPostAnalysis.h" +#include "CometSearchManager.h" +#include "CometSpecLib.h" +#include "CometStatus.h" #include "ThreadPool.h" -#include "Common.h" -#include -#include #include +#include +#include +#include +#include +#include +#include +#include struct SearchThreadData { diff --git a/CometSearch/CometSearchManager.cpp b/CometSearch/CometSearchManager.cpp index 03c316c7..d9833a51 100644 --- a/CometSearch/CometSearchManager.cpp +++ b/CometSearch/CometSearchManager.cpp @@ -50,6 +50,8 @@ Mutex g_preprocessMemoryPoolMutex; Mutex g_searchMemoryPoolMutex; CometStatus g_cometStatus; string g_sCometVersion; +map g_pvProteinNames; // for either db index + AScoreProCpp::AScoreOptions g_AScoreOptions; // AScore options AScoreProCpp::AScoreDllInterface* g_AScoreInterface; @@ -71,7 +73,7 @@ bool g_bPerformDatabaseSearch = false; bool g_bCometPreprocessMemoryAllocated = false; bool g_bCometSearchMemoryAllocated = false; -FILE* fpfasta; // file pointer to FASTA; would be same as fpdb if input db was already FASTA but otherwise needed if input is .idx file +bool g_bIdxNoFasta = false; // if true, .idx file has no associated .fasta file double dMaxSpecLibRT = 0.0; @@ -381,14 +383,17 @@ static bool ValidateSequenceDatabaseFile() // open FASTA for retrieving protein names string sTmpDB = g_staticParams.databaseInfo.szDatabase; + if (!strcmp(g_staticParams.databaseInfo.szDatabase + strlen(g_staticParams.databaseInfo.szDatabase) - 4, ".idx")) sTmpDB = sTmpDB.erase(sTmpDB.size() - 4); // need plain fasta if indexdb input - if ((fpfasta = fopen(sTmpDB.c_str(), "r")) == NULL) + if ((fpcheck = fopen(sTmpDB.c_str(), "r")) == NULL) { - string strErrorMsg = " Error (4) - cannot read FASTA sequence database file \"" + sTmpDB + "\".\n"; - g_cometStatus.SetStatus(CometResult_Failed, strErrorMsg); - logerr(strErrorMsg); - return false; + g_bIdxNoFasta = true; // .idx database but corresponding fasta not found + } + else + { + fclose(fpcheck); + g_bIdxNoFasta = false; } // if .idx database specified but does not exist, first see if corresponding @@ -415,26 +420,10 @@ static bool ValidateSequenceDatabaseFile() return true; } } - else - { - string strFasta = g_staticParams.databaseInfo.szDatabase; - strFasta.erase(strFasta.length() - 4); // remove .idx extension - if ((fpcheck=fopen(strFasta.c_str(), "r")) == NULL) - { - string strErrorMsg = " Error - peptide index file \"" + std::string(g_staticParams.databaseInfo.szDatabase) - + "\" is present but corresponding FASTA file \"" + strFasta + "\" file is missing.\n"; - g_cometStatus.SetStatus(CometResult_Failed, strErrorMsg); - logerr(strErrorMsg); - return false; - } - else - { - fclose(fpcheck); - g_staticParams.options.bCreateFragmentIndex = false; - return true; - } - } + fclose(fpcheck); + g_staticParams.options.bCreateFragmentIndex = false; + return true; } #ifndef WIN32 @@ -1469,12 +1458,6 @@ bool CometSearchManager::InitializeStaticParams() for (int i=0; i::iterator it; it = _mapStaticParams.find(name); @@ -1953,7 +1936,7 @@ void CometSearchManager::SetParam(const string &name, const string &strValue, co } } -bool CometSearchManager::GetParamValue(const string &name, VarMods &value) +bool CometSearchManager::GetParamValue(const string &name, VarMods & value) { std::map::iterator it; it = _mapStaticParams.find(name); @@ -2230,7 +2213,7 @@ bool CometSearchManager::DoSearch() { // write out .idx file containing unmodified peptides and protein refs; // this calls RunSearch just to query fasta and generate uniq peptide list - bSucceeded = CometFragmentIndex::WritePlainPeptideIndex(tp); + bSucceeded = CometFragmentIndex::WriteFIPlainPeptideIndex(tp); if (!bSucceeded) return bSucceeded; @@ -2654,19 +2637,44 @@ bool CometSearchManager::DoSearch() // We need to reset some of the static variables in-between input files CometPreprocess::Reset(); - FILE *fpdb; // need FASTA file again to grab headers for output (currently just store file positions) + FILE* fpfasta = NULL; // pointer to FASTA file; if .idx search, FASTA is used to retrieve sequences (mzid output) + FILE* fpidx = NULL; // pointer to .idx file if used if (g_bPerformDatabaseSearch) { string sTmpDB = g_staticParams.databaseInfo.szDatabase; + if (g_staticParams.iIndexDb > 0) + { + // .idx db so first open .idx file + if ((fpidx = fopen(sTmpDB.c_str(), "r")) == NULL) + { + string strErrorMsg = " Error (1a) - cannot read .idx file \"" + sTmpDB + "\".\n"; + g_cometStatus.SetStatus(CometResult_Failed, strErrorMsg); + logerr(strErrorMsg); + return false; + } + + // .idx db so next check if FASTA is present (not required) sTmpDB = sTmpDB.erase(sTmpDB.size() - 4); // need plain fasta if indexdb input - if ((fpdb = fopen(sTmpDB.c_str(), "r")) == NULL) + if ((fpfasta = fopen(sTmpDB.c_str(), "r")) == NULL) + { + g_bIdxNoFasta = true; + fpfasta = NULL; + } + } + else { - string strErrorMsg = " Error (1) - cannot read FASTA sequence database file \"" + sTmpDB + "\".\n"; - g_cometStatus.SetStatus(CometResult_Failed, strErrorMsg); - logerr(strErrorMsg); - return false; + // FASTA search only + fpidx = NULL; + + if ((fpfasta = fopen(sTmpDB.c_str(), "r")) == NULL) + { + string strErrorMsg = " Error (1b) - cannot read sequence database file \"" + sTmpDB + "\".\n"; + g_cometStatus.SetStatus(CometResult_Failed, strErrorMsg); + logerr(strErrorMsg); + return false; + } } } @@ -2723,6 +2731,15 @@ bool CometSearchManager::DoSearch() fflush(stdout); } + FILE* fpdb = NULL; + if (g_bPerformDatabaseSearch) + { + if (g_staticParams.iIndexDb > 0) + fpdb = fpidx; + else + fpdb = fpfasta; + } + int iBatchNum = 0; while (!CometPreprocess::DoneProcessingAllSpectra()) // Loop through iMaxSpectraPerSearch { @@ -2921,52 +2938,6 @@ bool CometSearchManager::DoSearch() // Sort g_pvQuery vector by scan. std::sort(g_pvQuery.begin(), g_pvQuery.end(), compareByScanNumber); - // Get flanking amino acid residues - if (g_bPerformDatabaseSearch && g_staticParams.iIndexDb) - { - for (int iWhichQuery = 0; iWhichQuery < (int)g_pvQuery.size(); ++iWhichQuery) - { - int iNumPrintLines; - int iPrintTargetDecoy = 0; // will set to 1 or 2 when index db supports internal decoys - - iNumPrintLines = g_pvQuery.at(iWhichQuery)->iMatchPeptideCount; - if (iNumPrintLines > g_staticParams.options.iNumPeptideOutputLines) - iNumPrintLines = g_staticParams.options.iNumPeptideOutputLines; - - for (int iWhichResult = 0; iWhichResult < iNumPrintLines; ++iWhichResult) - { - if (g_pvQuery.at(iWhichQuery)->_pResults[iWhichResult].usiLenPeptide > 0 && g_pvQuery.at(iWhichQuery)->_pResults[iWhichResult].fXcorr > g_staticParams.options.dMinimumXcorr) - { - int iNtermMod = g_pvQuery.at(iWhichQuery)->_pResults[iWhichResult].piVarModSites[0]; - int iCtermMod = g_pvQuery.at(iWhichQuery)->_pResults[iWhichResult].piVarModSites[g_pvQuery.at(iWhichQuery)->_pResults[iWhichResult].usiLenPeptide - 1]; - - if (!(iNtermMod <0 || iNtermMod > FRAGINDEX_VMODS) && !(iCtermMod < 0 || iCtermMod > FRAGINDEX_VMODS)) - { - if (iNtermMod > 0 - && g_staticParams.variableModParameters.varModList[iNtermMod - 1].iVarModTermDistance == 0 - && g_staticParams.variableModParameters.varModList[iNtermMod - 1].iWhichTerm == 0) - { - // only match to peptides at the N-terminus of proteins as protein terminal mod applied - CometMassSpecUtils::GetPrevNextAA(fpfasta, iWhichQuery, iWhichResult, iPrintTargetDecoy, 1); - } - else if (iCtermMod > 0 - && g_staticParams.variableModParameters.varModList[iCtermMod - 1].iVarModTermDistance == 0 - && g_staticParams.variableModParameters.varModList[iCtermMod - 1].iWhichTerm == 1) - { - // only match to peptides at the C-terminus of proteins as protein terminal mod applied - CometMassSpecUtils::GetPrevNextAA(fpfasta, iWhichQuery, iWhichResult, iPrintTargetDecoy, 2); - } - else - { - // peptide can be anywhere in sequence - CometMassSpecUtils::GetPrevNextAA(fpfasta, iWhichQuery, iWhichResult, iPrintTargetDecoy, 0); - } - } - } - } - } - } - if (!g_staticParams.options.bOutputSqtStream && !g_staticParams.iIndexDb) { logout(" done\n"); @@ -3091,7 +3062,10 @@ bool CometSearchManager::DoSearch() } } - fclose(fpdb); + if (fpidx != NULL) + fclose(fpidx); + if (fpfasta != NULL) + fclose(fpfasta); } //MH: Deallocate spectral processing memory. @@ -3273,7 +3247,6 @@ bool CometSearchManager::InitializeSingleSpectrumSearch() return bSucceeded; } - if (g_staticParams.iIndexDb == 1 && !g_bPlainPeptideIndexRead) { sqSearch.ReadPlainPeptideIndex(); @@ -3307,8 +3280,6 @@ void CometSearchManager::FinalizeSingleSpectrumSearch() // Deallocate search memory CometSearch::DeallocateMemory(singleSearchThreadCount); - fclose(fpfasta); - if (g_staticParams.options.iPrintAScoreProScore) DeleteAScoreDllInterface(g_AScoreInterface); @@ -3361,9 +3332,6 @@ void CometSearchManager::FinalizeSingleSpectrumMS1Search() { // Deallocate search memory CometSearch::DeallocateMemory(singleSearchThreadCount); - - fclose(fpfasta); - singleSearchMS1InitializationComplete = false; } } @@ -3378,10 +3346,8 @@ bool CometSearchManager::DoSingleSpectrumSearchMultiResults(const int topN, vector& strReturnPeptide, vector& strReturnProtein, vector>& matchedFragments, - vector& scores) + vector& scores) { - FILE* fpdb = nullptr; // need FASTA file again to grab headers for output (currently just store file positions) - if (iNumPeaks == 0) return false; @@ -3469,18 +3435,6 @@ bool CometSearchManager::DoSingleSpectrumSearchMultiResults(const int topN, if (takeSearchResultsN > iSize) takeSearchResultsN = iSize; - //FIX ... is there a way to not have to do this just once and not have to fopen/fclose this db - // file pointer for each query? - sTmpDB = g_staticParams.databaseInfo.szDatabase; - sTmpDB = sTmpDB.erase(sTmpDB.size() - 4); // need plain fasta if indexdb input - if ((fpdb = fopen(sTmpDB.c_str(), "r")) == NULL) - { - string strErrorMsg = " Error (5) - cannot read database file \"" + sTmpDB + "\".\n"; - g_cometStatus.SetStatus(CometResult_Failed, strErrorMsg); - logerr(strErrorMsg); - return false; - } - if (bSucceeded && pQuery->iMatchPeptideCount > 0) { CometPostAnalysis::CalculateSP(pQuery->_pResults, 0, takeSearchResultsN); @@ -3501,9 +3455,20 @@ bool CometSearchManager::DoSingleSpectrumSearchMultiResults(const int topN, if (!bSucceeded) goto cleanup_results; + // Open .idx file for retrieving protein names + FILE* fp; + if ((fp = fopen(g_staticParams.databaseInfo.szDatabase, "rb")) == NULL) + { + string strErrorMsg = " Error - cannot read indexed database file \"" + std::string(g_staticParams.databaseInfo.szDatabase) + + "\" " + std::strerror(errno) + "\n."; + g_cometStatus.SetStatus(CometResult_Failed, strErrorMsg); + logerr(strErrorMsg); + return false; + } + for (int iWhichResult = 0; iWhichResult < takeSearchResultsN; ++iWhichResult) { - Scores score; + CometScores score; score.dCn = 0; score.xCorr = g_staticParams.options.dMinimumXcorr; score.matchedIons = 0; @@ -3519,9 +3484,6 @@ bool CometSearchManager::DoSingleSpectrumSearchMultiResults(const int topN, { Results* pOutput = pQuery->_pResults; - // Comment out this next line to not parse previous/next amino acids from fasta for RTS reporting - CometMassSpecUtils::GetPrevNextAA(fpfasta, 0, iWhichResult, 0, 0); - // Set return values for peptide sequence, protein, xcorr and E-value eachStrReturnPeptide = std::string(1, pOutput[iWhichResult].cPrevAA) + "."; @@ -3562,7 +3524,7 @@ bool CometSearchManager::DoSingleSpectrumSearchMultiResults(const int topN, unsigned int uiNumTotProteins = 0; bool bReturnFullProteinString = true; - CometMassSpecUtils::GetProteinNameString(fpdb, iWhichQuery, iWhichResult, iPrintTargetDecoy, + CometMassSpecUtils::GetProteinNameString(fp, iWhichQuery, iWhichResult, iPrintTargetDecoy, bReturnFullProteinString, &uiNumTotProteins, vProteinTargets, vProteinDecoys); bool bPrintDelim = false; @@ -3802,10 +3764,10 @@ bool CometSearchManager::DoSingleSpectrumSearchMultiResults(const int topN, scores.push_back(score); } -cleanup_results: + if (fp != NULL) + fclose(fp); - if (fpdb != nullptr) - fclose(fpdb); //FIX: would be nice to not fopen/fclose with each query +cleanup_results: // Deleting each Query object in the vector calls its destructor, which // frees the spectral memory (see definition for Query in CometDataInternal.h). @@ -3832,7 +3794,7 @@ bool CometSearchManager::DoMS1SearchMultiResults(const double dMaxMS1RTDiff, double* pdMass, double* pdInten, int iNumPeaks, - vector& scoresMS1) + vector& scoresMS1) { bool bSucceeded = false; double dMatchedSpecLibRT = 0.0; @@ -3881,7 +3843,7 @@ bool CometSearchManager::DoMS1SearchMultiResults(const double dMaxMS1RTDiff, if (bSucceeded) { - ScoresMS1 scoreMS1; + CometScoresMS1 scoreMS1; scoreMS1.fDotProduct = pQueryMS1->_pSpecLibResultsMS1.fDotProduct; // scoreMS1.fRTime = pQueryMS1->_pSpecLibResultsMS1.fRTime; scoreMS1.fRTime = (float)dLinearRegressionRT; @@ -3893,9 +3855,6 @@ bool CometSearchManager::DoMS1SearchMultiResults(const double dMaxMS1RTDiff, cleanup_results: - // close raw file here? - // fclose(fpdb); //FIX: would be nice to not fopen/fclose with each query - // Deleting each Query object in the vector calls its destructor, which // frees the spectral memory (see definition for Query in CometDataInternal.h). for (auto it = g_pvQuery.begin(); it != g_pvQuery.end(); ++it) diff --git a/CometSearch/CometSearchManager.h b/CometSearch/CometSearchManager.h index 33549c50..5065abd6 100644 --- a/CometSearch/CometSearchManager.h +++ b/CometSearch/CometSearchManager.h @@ -67,7 +67,7 @@ class CometSearchManager : public ICometSearchManager vector& strReturnPeptide, vector& strReturnProtein, vector>& matchedFragments, - vector& scores); + vector& scores); virtual bool DoMS1SearchMultiResults(const double dMaxMS1RTDiff, const double dMaxQueryRT, const int topN, @@ -75,7 +75,7 @@ class CometSearchManager : public ICometSearchManager double* pdMass, double* pdInten, int iNumPeaks, - vector& scores); + vector& scores); virtual void AddInputFiles(vector &pvInputFiles); virtual void SetOutputFileBaseName(const char *pszBaseName); virtual void SetParam(const string &name, const string &strValue, const string &value); diff --git a/CometSearch/CometWriteMzIdentML.cpp b/CometSearch/CometWriteMzIdentML.cpp index 08c0b581..2430f216 100644 --- a/CometSearch/CometWriteMzIdentML.cpp +++ b/CometSearch/CometWriteMzIdentML.cpp @@ -318,7 +318,7 @@ bool CometWriteMzIdentML::ParseTmpFile(FILE *fpout, CometMassSpecUtils::EscapeString(strProteinName); fprintf(fpout, " 0) @@ -1465,13 +1465,30 @@ void CometWriteMzIdentML::PrintTmpPSM(int iWhichQuery, std::vector::iterator it; if (pOutput[iWhichResult].pWhichProtein.size() > 0) { - for (it=pOutput[iWhichResult].pWhichProtein.begin(); it!=pOutput[iWhichResult].pWhichProtein.end(); ++it) + if (g_staticParams.iIndexDb) { + comet_fileoffset_t lEntry = pOutput[iWhichResult].lProteinFilePosition; + + for (auto it = g_pvProteinsList.at(lEntry).begin(); it != g_pvProteinsList.at(lEntry).end(); ++it) + { #ifdef _WIN32 - fprintf(fpout, "%I64d:%d;", (*it).lWhichProtein, (*it).iStartResidue); + fprintf(fpout, "%I64d:%d;", *it, 0); #else - fprintf(fpout, "%ld:%d;", (*it).lWhichProtein, (*it).iStartResidue); + fprintf(fpout, "%ld:%d;", *it, 0); +#endif + } + + } + else + { + for (it = pOutput[iWhichResult].pWhichProtein.begin(); it != pOutput[iWhichResult].pWhichProtein.end(); ++it) + { +#ifdef _WIN32 + fprintf(fpout, "%I64d:%d;", (*it).lWhichProtein, (*it).iStartResidue); +#else + fprintf(fpout, "%ld:%d;", (*it).lWhichProtein, (*it).iStartResidue); #endif + } } fprintf(fpout, "\t"); } diff --git a/CometWrapper/CometDataWrapper.h b/CometWrapper/CometDataWrapper.h index 6dd37538..e149a20f 100644 --- a/CometWrapper/CometDataWrapper.h +++ b/CometWrapper/CometDataWrapper.h @@ -331,9 +331,9 @@ namespace CometWrapper { public ref class ScoreWrapper { public: - ScoreWrapper(const Scores & score) + ScoreWrapper(const CometScores & score) { - pScores = new Scores(score); + pScores = new CometScores(score); } ~ScoreWrapper() @@ -392,15 +392,15 @@ namespace CometWrapper { } private: - Scores * pScores; + CometScores * pScores; }; public ref class ScoreWrapperMS1 { public: - ScoreWrapperMS1(const ScoresMS1& score) + ScoreWrapperMS1(const CometScoresMS1& score) { - pScoresMS1 = new ScoresMS1(score); + pScoresMS1 = new CometScoresMS1(score); } ~ScoreWrapperMS1() @@ -429,7 +429,7 @@ namespace CometWrapper { } private: - ScoresMS1* pScoresMS1; + CometScoresMS1* pScoresMS1; }; public enum class IonSeries : int { a, b, c, x, y, z }; diff --git a/CometWrapper/CometWrapper.cpp b/CometWrapper/CometWrapper.cpp index 11a4a3d8..455db887 100644 --- a/CometWrapper/CometWrapper.cpp +++ b/CometWrapper/CometWrapper.cpp @@ -137,7 +137,7 @@ bool CometSearchManagerWrapper::DoSingleSpectrumSearchMultiResults( // --- Prepare native output containers --- std::vector stdPeptides; std::vector stdProteins; - std::vector stdScores; + std::vector stdScores; std::vector> stdMatchedFrags; // --- Perform native search --- @@ -204,7 +204,7 @@ bool CometSearchManagerWrapper::DoMS1SearchMultiResults(double dMaxMS1RTDiff, vector stdStringszPeptide; vector stdStringszProtein; - vector scoresMS1; + vector scoresMS1; vector> matchedFragments; // perform the search diff --git a/RealtimeSearch/SearchMS1MS2.cs b/RealtimeSearch/SearchMS1MS2.cs index 1a5a1301..6a4aee05 100644 --- a/RealtimeSearch/SearchMS1MS2.cs +++ b/RealtimeSearch/SearchMS1MS2.cs @@ -1,15 +1,15 @@ namespace RealTimeSearch { + //using System.Threading.Tasks; + + using CometWrapper; using System; using System.Collections.Generic; using System.Diagnostics; using System.IO; using System.Linq; using System.Text; - //using System.Threading.Tasks; - - using CometWrapper; - + using System.Threading; using ThermoFisher.CommonCore.Data.Business; using ThermoFisher.CommonCore.Data.FilterEnums; using ThermoFisher.CommonCore.Data.Interfaces; @@ -129,12 +129,12 @@ static void Main(string[] args) // have a different maximum RT value. Assumes a linear gradient. dMaxQueryRT = 60.0 * rawFile.RetentionTimeFromScanNumber(iLastScan); - int iPrintEveryScan = 1; + int iPrintEveryScan = 100; int iMS2TopN = 1; // report up to topN hits per MS/MS query bool bContinuousLoop = false; // set to true to continuously loop through the raw file bool bPrintHistogram = true; bool bPrintMatchedFragmentIons = false; - bool bPerformMS1Search = true; + bool bPerformMS1Search = false; bool bPerformMS2Search = true; if (bPerformMS1Search) @@ -151,6 +151,7 @@ static void Main(string[] args) iFirstScan = 10000; iLastScan = 20000; */ + watchGlobal.Start(); for (int iScanNumber = iFirstScan; iScanNumber <= iLastScan; ++iScanNumber) @@ -254,7 +255,7 @@ static void Main(string[] args) out vPeptide, out vProtein, out List> vMatchingFragments, out List vScores); watch.Stop(); - int iProteinLengthCutoff = 30; + int iProteinLengthCutoff = 90; if (vPeptide.Count > 0 && (iScanNumber % iPrintEveryScan) == 0) { @@ -268,10 +269,10 @@ static void Main(string[] args) if (protein.Length > iProteinLengthCutoff) protein = protein.Substring(0, iProteinLengthCutoff); // trim to avoid printing long protein description string - Console.WriteLine(" MS2 {0}\t{1} {2:F4} {3:0.##E+00} {4:F4} AScore {5:F2} Sites '{6}' {7} ms", + Console.WriteLine(" MS2 {0}\t{1} {2:F4} {3:0.##E+00} {4:F4} AScore {5:F2} Sites '{6}' {7} ms prot '{8}'", iScanNumber, vPeptide[x], vScores[x].xCorr, vScores[x].dExpect, dExpPepMass, vScores[x].dAScoreScore, vScores[x].sAScoreProSiteScores, - watch.ElapsedMilliseconds); + watch.ElapsedMilliseconds, protein); /* if (vScores[x].dAScoreScore >= 13.0 && vScores[x].xCorr > 2.0