Skip to content

Commit

Permalink
Added a '--prefer-mane-transcripts' mode that enforces MANE_Select ta…
Browse files Browse the repository at this point in the history
…gged Gencode transcripts where possible (#9012)

* Added a '--prefer-mane-transcripts' mode that enforces MANE_Select tagged Gencode transcripts where possible
jamesemery authored Oct 23, 2024
1 parent d056c32 commit c4860d4
Showing 29 changed files with 297 additions and 16 deletions.
Original file line number Diff line number Diff line change
@@ -80,6 +80,14 @@ public abstract class BaseFuncotatorArgumentCollection implements Serializable {
)
public TranscriptSelectionMode transcriptSelectionMode = FuncotatorArgumentDefinitions.TRANSCRIPT_SELECTION_MODE_DEFAULT_VALUE;

@Advanced
@Argument(
fullName = FuncotatorArgumentDefinitions.PREFER_MANE_TRANSCRIPT_MODE,
optional = true,
doc = "If this flag is set, Funcotator will prefer 'MANE_Plus_Clinical' followed by 'MANE_select' transcripts (including those not tagged 'basic') if one is present for a given variant. If neither tag is present it use the default behavior (only base transcripts)."
)
public boolean MANETranscriptMode = false;

@Argument(
fullName = FuncotatorArgumentDefinitions.TRANSCRIPT_LIST_LONG_NAME,
optional = true,
Original file line number Diff line number Diff line change
@@ -147,7 +147,8 @@ public void onTraversalStart() {
new FlankSettings(0,0),
true,
funcotatorArgs.minNumBasesForValidSegment,
funcotatorArgs.spliceSiteWindow
funcotatorArgs.spliceSiteWindow,
funcotatorArgs.MANETranscriptMode
).stream()
.filter(DataSourceFuncotationFactory::isSupportingSegmentFuncotation)
.collect(Collectors.toList());
Original file line number Diff line number Diff line change
@@ -794,7 +794,8 @@ public void onTraversalStart() {
new FlankSettings(funcotatorArgs.fivePrimeFlankSize, funcotatorArgs.threePrimeFlankSize),
false,
funcotatorArgs.minNumBasesForValidSegment,
funcotatorArgs.spliceSiteWindow
funcotatorArgs.spliceSiteWindow,
funcotatorArgs.MANETranscriptMode
);

logger.info("Initializing Funcotator Engine...");
Original file line number Diff line number Diff line change
@@ -36,6 +36,8 @@ public class FuncotatorArgumentDefinitions {
public static final String TRANSCRIPT_SELECTION_MODE_LONG_NAME = "transcript-selection-mode";
public static final TranscriptSelectionMode TRANSCRIPT_SELECTION_MODE_DEFAULT_VALUE = TranscriptSelectionMode.CANONICAL;

public static final String PREFER_MANE_TRANSCRIPT_MODE = "prefer-mane-transcripts";

/**
* Do not give this a static default value or the integration tests will get hosed.
*/
Original file line number Diff line number Diff line change
@@ -329,6 +329,7 @@ private static boolean isValidDirectory(final Path p) {
* ignored for those that don't.
* @param minBasesForValidSegment The minimum number of bases for a segment to be considered valid.
* @param spliceSiteWindowSize The number of bases on either side of a splice site for a variant to be a {@link org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation.VariantClassification#SPLICE_SITE} variant.
* @param preferMANETranscriptsWhereApplicable If this is set, in {@link GencodeFuncotationFactory}, we will only emit MANE transcripts if any are availible for a given variant, otherwise behaves as normal.
* @return A {@link List} of {@link DataSourceFuncotationFactory} given the data source metadata, overrides, and transcript reporting priority information.
*/
public static List<DataSourceFuncotationFactory> createDataSourceFuncotationFactoriesForDataSources(final Map<Path, Properties> dataSourceMetaData,
@@ -340,7 +341,8 @@ public static List<DataSourceFuncotationFactory> createDataSourceFuncotationFact
final FlankSettings flankSettings,
final boolean doAttemptSegmentFuncotationForTranscriptDatasources,
final int minBasesForValidSegment,
final int spliceSiteWindowSize) {
final int spliceSiteWindowSize,
final boolean preferMANETranscriptsWhereApplicable) {
Utils.nonNull(dataSourceMetaData);
Utils.nonNull(annotationOverridesMap);
Utils.nonNull(transcriptSelectionMode);
@@ -379,7 +381,7 @@ public static List<DataSourceFuncotationFactory> createDataSourceFuncotationFact
case GENCODE:
featureInput = createAndRegisterFeatureInputs(path, properties, gatkToolInstance, lookaheadFeatureCachingInBp, GencodeGtfFeature.class, false);
funcotationFactory = DataSourceUtils.createGencodeDataSource(path, properties, annotationOverridesMap, transcriptSelectionMode,
userTranscriptIdSet, featureInput, flankSettings, doAttemptSegmentFuncotationForTranscriptDatasources, minBasesForValidSegment, spliceSiteWindowSize);
userTranscriptIdSet, featureInput, flankSettings, doAttemptSegmentFuncotationForTranscriptDatasources, minBasesForValidSegment, spliceSiteWindowSize, preferMANETranscriptsWhereApplicable);
break;
case VCF:
featureInput = createAndRegisterFeatureInputs(path, properties, gatkToolInstance, lookaheadFeatureCachingInBp, VariantContext.class, false);
@@ -596,7 +598,8 @@ private static GencodeFuncotationFactory createGencodeDataSource(final Path data
final FlankSettings flankSettings,
final boolean isSegmentFuncotationEnabled,
final int minBasesForValidSegment,
final int spliceSiteWindowSize) {
final int spliceSiteWindowSize,
final boolean onlyUseMANETranscriptsWhenApplicable) {
Utils.nonNull(dataSourceFile);
Utils.nonNull(dataSourceProperties);
Utils.nonNull(annotationOverridesMap);
@@ -626,7 +629,8 @@ private static GencodeFuncotationFactory createGencodeDataSource(final Path data
ncbiBuildVersion,
isSegmentFuncotationEnabled,
minBasesForValidSegment,
spliceSiteWindowSize
spliceSiteWindowSize,
onlyUseMANETranscriptsWhenApplicable
);
}

Original file line number Diff line number Diff line change
@@ -242,6 +242,11 @@ public class GencodeFuncotationFactory extends DataSourceFuncotationFactory {
*/
private boolean isSegmentFuncotationEnabled;

/**
* If this is true, only MANE transcripts will be used for funcotation creation when at least one is present.
*/
private boolean preferMANETranscripts;

//==================================================================================================================
// Constructors:

@@ -354,7 +359,7 @@ public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFilePath,

this(gencodeTranscriptFastaFilePath, version, name, transcriptSelectionMode, userRequestedTranscripts,
annotationOverrides, mainFeatureInput, flankSettings, isDataSourceB37, ncbiBuildVersion,
isSegmentFuncotationEnabled, minBasesForValidSegment, FuncotatorUtils.DEFAULT_SPLICE_SITE_WINDOW_SIZE);
isSegmentFuncotationEnabled, minBasesForValidSegment, FuncotatorUtils.DEFAULT_SPLICE_SITE_WINDOW_SIZE, false);
}

/**
@@ -385,7 +390,8 @@ public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFilePath,
final String ncbiBuildVersion,
final boolean isSegmentFuncotationEnabled,
final int minBasesForValidSegment,
final int spliceSiteWindowSize) {
final int spliceSiteWindowSize,
final boolean preferMANETranscriptsWhereApplicable) {

super(mainFeatureInput, minBasesForValidSegment);

@@ -429,6 +435,8 @@ public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFilePath,

// Initialize overrides / defaults:
initializeAnnotationOverrides( annotationOverrides );

this.preferMANETranscripts = preferMANETranscriptsWhereApplicable;
}

private Path localizeGencodeTranscriptFastaFile( final Path gencodeTranscriptFastaFilePath ) {
@@ -622,6 +630,28 @@ private static List<GencodeGtfGeneFeature> convertFeaturesToGencodeGtfGeneFeatur
.collect(Collectors.toList());
}

/**
* If MANE_Plus_Clinical transcripts are avalible, only return them, followed by MANE_Select transcripts, followed by only the basic transcripts if none were MANE_Plus_Clinical or MANE_Select.
* @param transcripts of gencode transcripts to possibly filter
* @return
*/
@VisibleForTesting
static List<GencodeGtfTranscriptFeature> retreiveMANESelectModeTranscriptsCriteria(final List<GencodeGtfTranscriptFeature> transcripts) {
final List<GencodeGtfTranscriptFeature> plusClincal = transcripts.stream()
.filter(g -> hasTag(g, MANE_PLUS_CLINICAL)).toList();
if (plusClincal.size() > 0) {
return plusClincal;
}

final List<GencodeGtfTranscriptFeature> maneSelectTranscripts = transcripts.stream()
.filter(g -> hasTag(g, MANE_SELECT)).toList();

if (maneSelectTranscripts.size() > 0) {
return maneSelectTranscripts;
}

return transcripts.stream().filter(GencodeFuncotationFactory::isBasic).collect(Collectors.toList());
}

/**
* {@inheritDoc}
@@ -853,16 +883,21 @@ static boolean isVariantInCodingRegion(final GencodeFuncotation.VariantClassific
*/
private List<GencodeFuncotation> createFuncotationsHelper(final VariantContext variant, final Allele altAllele, final GencodeGtfGeneFeature gtfFeature, final ReferenceContext reference) {

final List<GencodeGtfTranscriptFeature> transcriptList;
List<GencodeGtfTranscriptFeature> transcriptList;

// Only get basic transcripts if we're using data from Gencode:
if ( gtfFeature.getGtfSourceFileType().equals(GencodeGtfCodec.GTF_FILE_TYPE_STRING) ) {
transcriptList = retrieveBasicTranscripts(gtfFeature);
}
else {
if (preferMANETranscripts) {
// Filter out the non-MANE_Select/Mane_Plus_Clinical transcripts if we're only using MANE transcripts:
transcriptList = retreiveMANESelectModeTranscriptsCriteria(gtfFeature.getTranscripts());
} else {
transcriptList = retrieveBasicTranscripts(gtfFeature);
}
} else {
transcriptList = gtfFeature.getTranscripts();
}


return createFuncotationsHelper(variant, altAllele, reference, transcriptList);
}

@@ -979,9 +1014,14 @@ static final GencodeFuncotation createDefaultFuncotationsOnProblemVariant( final

private static boolean isBasic(final GencodeGtfTranscriptFeature transcript) {
// Check if this transcript has the `basic` tag:
return hasTag(transcript, GencodeGTFFieldConstants.FeatureTag.BASIC);
}

private static boolean hasTag(final GencodeGtfTranscriptFeature transcript, final GencodeGTFFieldConstants.FeatureTag tag) {
// Check if this transcript has the given tag:
return transcript.getOptionalFields().stream()
.filter( f -> f.getName().equals("tag") )
.filter( f -> f.getValue().equals(GencodeGTFFieldConstants.FeatureTag.BASIC.toString()) )
.filter( f -> f.getValue().equals(tag.toString()) )
.count() > 0;
}

Original file line number Diff line number Diff line change
@@ -65,7 +65,8 @@ public void testGetFuncotationFactoriesAndCreateFuncotationMapForVariant(final F
new FlankSettings(0, 0),
false,
FuncotatorUtils.DEFAULT_MIN_NUM_BASES_FOR_VALID_SEGMENT,
FuncotatorUtils.DEFAULT_SPLICE_SITE_WINDOW_SIZE)
FuncotatorUtils.DEFAULT_SPLICE_SITE_WINDOW_SIZE,
false)
);

for (int i = 0; i < entireVcf.getRight().size(); i++) {
Loading

0 comments on commit c4860d4

Please sign in to comment.