Skip to content

Commit c64c379

Browse files
committed
Add unsupervised anomaly detection to the fully automated pipeline
1 parent d6e54da commit c64c379

File tree

3 files changed

+972
-1
lines changed

3 files changed

+972
-1
lines changed

domains/anomaly-detection/anomalyDetectionPython.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,10 @@ anomaly_detection_using_python() {
108108
time "${ANOMALY_DETECTION_SCRIPT_DIR}/tunedNodeEmbeddingClustering.py" "${@}" ${verboseMode}
109109
# Reduce the dimensionality of the node embeddings down to 2D for visualization using UMAP
110110
time "${ANOMALY_DETECTION_SCRIPT_DIR}/umap2dNodeEmbeddings.py" "${@}" ${verboseMode}
111-
111+
# Plot the results with clustering and UMAP embeddings to reveal anomalies in rare feature combinations
112112
time "${ANOMALY_DETECTION_SCRIPT_DIR}/anomalyDetectionFeaturePlots.py" "${@}" "--report_directory" "${FULL_REPORT_DIRECTORY}" ${verboseMode}
113+
# Run an unsupervised anomaly detection algorithm including tuning and explainability
114+
time "${ANOMALY_DETECTION_SCRIPT_DIR}/tunedAnomalyDetectionExplained.py" "${@}" "--report_directory" "${FULL_REPORT_DIRECTORY}" ${verboseMode}
113115
# Query Results: Output all collected features into a CSV file.
114116
execute_cypher "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeatures.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}AnomalyDetection_Features.csv"
115117
}

domains/anomaly-detection/features/AnomalyDetectionFeatures.cypher

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,15 @@
1717
AND codeUnit.clusteringHDBSCANRadiusMax IS NOT NULL
1818
AND codeUnit.clusteringHDBSCANRadiusAverage IS NOT NULL
1919
AND codeUnit.clusteringHDBSCANNormalizedDistanceToMedoid IS NOT NULL
20+
// The following columns can be null if there is not enough data or an error in the anomaly detection pipeline
21+
// AND codeUnit.anomalyLabel IS NOT NULL
22+
// AND codeUnit.anomalyScore IS NOT NULL
23+
// AND codeUnit.anomalyTopFeature1 IS NOT NULL
24+
// AND codeUnit.anomalyTopFeature2 IS NOT NULL
25+
// AND codeUnit.anomalyTopFeature3 IS NOT NULL
26+
// AND codeUnit.anomalyTopFeatureSHAPValue1 IS NOT NULL
27+
// AND codeUnit.anomalyTopFeatureSHAPValue2 IS NOT NULL
28+
// AND codeUnit.anomalyTopFeatureSHAPValue3 IS NOT NULL
2029
AND codeUnit.embeddingsFastRandomProjectionTunedForClusteringVisualizationX IS NOT NULL
2130
AND codeUnit.embeddingsFastRandomProjectionTunedForClusteringVisualizationY IS NOT NULL
2231
OPTIONAL MATCH (artifact:Java:Artifact)-[:CONTAINS]->(codeUnit)
@@ -44,5 +53,13 @@ OPTIONAL MATCH (projectRoot:Directory)<-[:HAS_ROOT]-(proj:TS:Project)-[:CONTAINS
4453
,codeUnit.clusteringHDBSCANRadiusMax AS clusteringRadiusMax
4554
,codeUnit.clusteringHDBSCANRadiusAverage AS clusteringRadiusAverage
4655
,codeUnit.clusteringHDBSCANNormalizedDistanceToMedoid AS clusteringNormalizedDistanceToMedoid
56+
,codeUnit.anomalyLabel AS anomalyLabel
57+
,codeUnit.anomalyScore AS anomalyScore
58+
,codeUnit.anomalyTopFeature1 AS anomalyTopFeature1
59+
,codeUnit.anomalyTopFeature2 AS anomalyTopFeature2
60+
,codeUnit.anomalyTopFeature3 AS anomalyTopFeature3
61+
,codeUnit.anomalyTopFeatureSHAPValue1 AS anomalyTopFeatureSHAPValue1
62+
,codeUnit.anomalyTopFeatureSHAPValue2 AS anomalyTopFeatureSHAPValue2
63+
,codeUnit.anomalyTopFeatureSHAPValue3 AS anomalyTopFeatureSHAPValue3
4764
,codeUnit.embeddingsFastRandomProjectionTunedForClusteringVisualizationX AS visualizationX
4865
,codeUnit.embeddingsFastRandomProjectionTunedForClusteringVisualizationY AS visualizationY

0 commit comments

Comments
 (0)