Skip to content

Commit d22ad1e

Browse files
committed
Add unsupervised anomaly detection to the fully automated pipeline
1 parent abc46bd commit d22ad1e

File tree

3 files changed

+956
-1
lines changed

3 files changed

+956
-1
lines changed

domains/anomaly-detection/anomalyDetectionPython.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,10 @@ anomaly_detection_using_python() {
108108
time "${ANOMALY_DETECTION_SCRIPT_DIR}/tunedNodeEmbeddingClustering.py" "${@}" ${verboseMode}
109109
# Reduce the dimensionality of the node embeddings down to 2D for visualization using UMAP
110110
time "${ANOMALY_DETECTION_SCRIPT_DIR}/umap2dNodeEmbeddings.py" "${@}" ${verboseMode}
111-
111+
# Plot the results with clustering and UMAP embeddings to reveal anomalies in rare feature combinations
112112
time "${ANOMALY_DETECTION_SCRIPT_DIR}/anomalyDetectionFeaturePlots.py" "${@}" "--report_directory" "${FULL_REPORT_DIRECTORY}" ${verboseMode}
113+
# Run an unsupervised anomaly detection algorithm including tuning and explainability
114+
time "${ANOMALY_DETECTION_SCRIPT_DIR}/tunedAnomalyDetectionExplained.py" "${@}" "--report_directory" "${FULL_REPORT_DIRECTORY}" ${verboseMode}
113115
# Query Results: Output all collected features into a CSV file.
114116
execute_cypher "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeatures.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}AnomalyDetection_Features.csv"
115117
}

domains/anomaly-detection/features/AnomalyDetectionFeatures.cypher

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,14 @@
1717
AND codeUnit.clusteringHDBSCANRadiusMax IS NOT NULL
1818
AND codeUnit.clusteringHDBSCANRadiusAverage IS NOT NULL
1919
AND codeUnit.clusteringHDBSCANNormalizedDistanceToMedoid IS NOT NULL
20+
AND codeUnit.anomalyLabel IS NOT NULL
21+
AND codeUnit.anomalyScore IS NOT NULL
22+
AND codeUnit.anomalyTopFeature1 IS NOT NULL
23+
AND codeUnit.anomalyTopFeature2 IS NOT NULL
24+
AND codeUnit.anomalyTopFeature3 IS NOT NULL
25+
AND codeUnit.anomalyTopFeatureSHAPValue1 IS NOT NULL
26+
AND codeUnit.anomalyTopFeatureSHAPValue2 IS NOT NULL
27+
AND codeUnit.anomalyTopFeatureSHAPValue3 IS NOT NULL
2028
AND codeUnit.embeddingFastRandomProjectionVisualizationX IS NOT NULL
2129
AND codeUnit.embeddingFastRandomProjectionVisualizationY IS NOT NULL
2230
OPTIONAL MATCH (artifact:Java:Artifact)-[:CONTAINS]->(codeUnit)
@@ -44,6 +52,14 @@ OPTIONAL MATCH (projectRoot:Directory)<-[:HAS_ROOT]-(proj:TS:Project)-[:CONTAINS
4452
,codeUnit.clusteringHDBSCANRadiusMax AS clusteringRadiusMax
4553
,codeUnit.clusteringHDBSCANRadiusAverage AS clusteringRadiusAverage
4654
,codeUnit.clusteringHDBSCANNormalizedDistanceToMedoid AS clusteringNormalizedDistanceToMedoid
55+
,codeUnit.anomalyLabel AS anomalyLabel
56+
,codeUnit.anomalyScore AS anomalyScore
57+
,codeUnit.anomalyTopFeature1 AS anomalyTopFeature1
58+
,codeUnit.anomalyTopFeature2 AS anomalyTopFeature2
59+
,codeUnit.anomalyTopFeature3 AS anomalyTopFeature3
60+
,codeUnit.anomalyTopFeatureSHAPValue1 AS anomalyTopFeatureSHAPValue1
61+
,codeUnit.anomalyTopFeatureSHAPValue2 AS anomalyTopFeatureSHAPValue2
62+
,codeUnit.anomalyTopFeatureSHAPValue3 AS anomalyTopFeatureSHAPValue3
4763
,codeUnit.embeddingFastRandomProjectionVisualizationX AS visualizationX
4864
,codeUnit.embeddingFastRandomProjectionVisualizationY AS visualizationY
4965
,coalesce(codeUnit.centralityPageRank, 0.00001) AS centrality

0 commit comments

Comments
 (0)