JohT
diff --git a/‎domains/anomaly-detection/anomalyDetectionCsv.sh
Lines changed: 25 additions & 19 deletions b/‎domains/anomaly-detection/anomalyDetectionCsv.sh
Lines changed: 25 additions & 19 deletions
diff --git a/‎domains/anomaly-detection/anomalyDetectionFeaturePlots.py
Lines changed: 50 additions & 38 deletions b/‎domains/anomaly-detection/anomalyDetectionFeaturePlots.py
Lines changed: 50 additions & 38 deletions
@@ -69,17 +69,20 @@ anomaly_detection_queries() {
     local nodeLabel
     nodeLabel=$( extractQueryParameter "projection_node_label" "${@}" )
 
+    local language
+    language=$( extractQueryParameter "projection_language" "${@}" )
+    
     echo "anomalyDetectionCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Executing Queries for ${nodeLabel} nodes..."
-    execute_cypher "${ANOMALY_DETECTION_QUERY_CYPHER_DIR}/AnomalyDetectionPotentialImbalancedRoles.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}AnomalyDetection_PotentialImbalancedRoles.csv"
-    execute_cypher "${ANOMALY_DETECTION_QUERY_CYPHER_DIR}/AnomalyDetectionPotentialOverEngineerOrIsolated.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}AnomalyDetection_PotentialOverEngineerOrIsolated.csv"
+    execute_cypher "${ANOMALY_DETECTION_QUERY_CYPHER_DIR}/AnomalyDetectionPotentialImbalancedRoles.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${language}_${nodeLabel}AnomalyDetection_PotentialImbalancedRoles.csv"
+    execute_cypher "${ANOMALY_DETECTION_QUERY_CYPHER_DIR}/AnomalyDetectionPotentialOverEngineerOrIsolated.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${language}_${nodeLabel}AnomalyDetection_PotentialOverEngineerOrIsolated.csv"
 
-    execute_cypher "${ANOMALY_DETECTION_QUERY_CYPHER_DIR}/AnomalyDetectionHiddenBridgeNodes.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}AnomalyDetection_HiddenBridgeNodes.csv"
-    execute_cypher "${ANOMALY_DETECTION_QUERY_CYPHER_DIR}/AnomalyDetectionPopularBottlenecks.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}AnomalyDetection_PopularBottlenecks.csv"
-    execute_cypher "${ANOMALY_DETECTION_QUERY_CYPHER_DIR}/AnomalyDetectionSilentCoordinators.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}AnomalyDetection_SilentCoordinators.csv"
-    execute_cypher "${ANOMALY_DETECTION_QUERY_CYPHER_DIR}/AnomalyDetectionOverReferencesUtilities.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}AnomalyDetection_OverReferencesUtilities.csv"
-    execute_cypher "${ANOMALY_DETECTION_QUERY_CYPHER_DIR}/AnomalyDetectionFragileStructuralBridges.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}AnomalyDetection_FragileStructuralBridges.csv"
-    execute_cypher "${ANOMALY_DETECTION_QUERY_CYPHER_DIR}/AnomalyDetectionDependencyHungryOrchestrators.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}AnomalyDetection_DependencyHungryOrchestrators.csv"
-    execute_cypher "${ANOMALY_DETECTION_QUERY_CYPHER_DIR}/AnomalyDetectionUnexpectedCentralNodes.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}AnomalyDetection_UnexpectedCentralNodes.csv"
+    execute_cypher "${ANOMALY_DETECTION_QUERY_CYPHER_DIR}/AnomalyDetectionHiddenBridgeNodes.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${language}_${nodeLabel}AnomalyDetection_HiddenBridgeNodes.csv"
+    execute_cypher "${ANOMALY_DETECTION_QUERY_CYPHER_DIR}/AnomalyDetectionPopularBottlenecks.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${language}_${nodeLabel}AnomalyDetection_PopularBottlenecks.csv"
+    execute_cypher "${ANOMALY_DETECTION_QUERY_CYPHER_DIR}/AnomalyDetectionSilentCoordinators.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${language}_${nodeLabel}AnomalyDetection_SilentCoordinators.csv"
+    execute_cypher "${ANOMALY_DETECTION_QUERY_CYPHER_DIR}/AnomalyDetectionOverReferencesUtilities.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${language}_${nodeLabel}AnomalyDetection_OverReferencesUtilities.csv"
+    execute_cypher "${ANOMALY_DETECTION_QUERY_CYPHER_DIR}/AnomalyDetectionFragileStructuralBridges.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${language}_${nodeLabel}AnomalyDetection_FragileStructuralBridges.csv"
+    execute_cypher "${ANOMALY_DETECTION_QUERY_CYPHER_DIR}/AnomalyDetectionDependencyHungryOrchestrators.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${language}_${nodeLabel}AnomalyDetection_DependencyHungryOrchestrators.csv"
+    execute_cypher "${ANOMALY_DETECTION_QUERY_CYPHER_DIR}/AnomalyDetectionUnexpectedCentralNodes.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${language}_${nodeLabel}AnomalyDetection_UnexpectedCentralNodes.csv"
 }
 
 # Run the anomaly detection pipeline.
@@ -111,36 +114,39 @@ ALGORITHM_NODE="projection_node_label"
 PROJECTION_WEIGHT="dependencies_projection_weight_property"
 ALGORITHM_WEIGHT="projection_weight_property"
 
+PROJECTION_LANGUAGE="dependencies_projection_language"
+ALGORITHM_LANGUAGE="projection_language"
+
 # Code independent algorithm parameters
 COMMUNITY_PROPERTY="community_property=communityLeidenIdTuned"
 EMBEDDING_PROPERTY="embedding_property=embeddingsFastRandomProjectionTunedForClustering"
 
 # -- Java Artifact Node Embeddings -------------------------------
 
-if createUndirectedDependencyProjection "${PROJECTION_NAME}=artifact-anomaly-detection" "${PROJECTION_NODE}=Artifact" "${PROJECTION_WEIGHT}=weight"; then
-    createDirectedDependencyProjection "${PROJECTION_NAME}=artifact-anomaly-detection-directed" "${PROJECTION_NODE}=Artifact" "${PROJECTION_WEIGHT}=weight"
-    anomaly_detection_csv_reports "${ALGORITHM_PROJECTION}=artifact-anomaly-detection" "${ALGORITHM_NODE}=Artifact" "${ALGORITHM_WEIGHT}=weight" "${COMMUNITY_PROPERTY}" "${EMBEDDING_PROPERTY}"
+if createUndirectedDependencyProjection "${PROJECTION_NAME}=artifact-anomaly-detection" "${PROJECTION_NODE}=Artifact" "${PROJECTION_WEIGHT}=weight" "${PROJECTION_LANGUAGE}=Java"; then
+    createDirectedDependencyProjection "${PROJECTION_NAME}=artifact-anomaly-detection-directed" "${PROJECTION_NODE}=Artifact" "${PROJECTION_WEIGHT}=weight" "${PROJECTION_LANGUAGE}=Java"
+    anomaly_detection_csv_reports "${ALGORITHM_PROJECTION}=artifact-anomaly-detection" "${ALGORITHM_NODE}=Artifact" "${ALGORITHM_WEIGHT}=weight" "${ALGORITHM_LANGUAGE}=Java" "${COMMUNITY_PROPERTY}" "${EMBEDDING_PROPERTY}"
 fi
 
 # -- Java Package Node Embeddings --------------------------------
 
-if createUndirectedDependencyProjection "${PROJECTION_NAME}=package-anomaly-detection" "${PROJECTION_NODE}=Package" "${PROJECTION_WEIGHT}=weight25PercentInterfaces"; then
-    createDirectedDependencyProjection "${PROJECTION_NAME}=package-anomaly-detection-directed" "${PROJECTION_NODE}=Package" "${PROJECTION_WEIGHT}=weight25PercentInterfaces"
-    anomaly_detection_csv_reports "${ALGORITHM_PROJECTION}=package-anomaly-detection" "${ALGORITHM_NODE}=Package" "${ALGORITHM_WEIGHT}=weight25PercentInterfaces" "${COMMUNITY_PROPERTY}" "${EMBEDDING_PROPERTY}"
+if createUndirectedDependencyProjection "${PROJECTION_NAME}=package-anomaly-detection" "${PROJECTION_NODE}=Package" "${PROJECTION_WEIGHT}=weight25PercentInterfaces" "${PROJECTION_LANGUAGE}=Java"; then
+    createDirectedDependencyProjection "${PROJECTION_NAME}=package-anomaly-detection-directed" "${PROJECTION_NODE}=Package" "${PROJECTION_WEIGHT}=weight25PercentInterfaces" "${PROJECTION_LANGUAGE}=Java"
+    anomaly_detection_csv_reports "${ALGORITHM_PROJECTION}=package-anomaly-detection" "${ALGORITHM_NODE}=Package" "${ALGORITHM_WEIGHT}=weight25PercentInterfaces" "${ALGORITHM_LANGUAGE}=Java" "${COMMUNITY_PROPERTY}" "${EMBEDDING_PROPERTY}"
 fi
 
 # -- Java Type Node Embeddings -----------------------------------
 
 if createUndirectedJavaTypeDependencyProjection "${PROJECTION_NAME}=type-anomaly-detection"; then
     createDirectedJavaTypeDependencyProjection "${PROJECTION_NAME}=type-anomaly-detection-directed"
-    anomaly_detection_csv_reports "${ALGORITHM_PROJECTION}=type-anomaly-detection" "${ALGORITHM_NODE}=Type" "${ALGORITHM_WEIGHT}=weight" "${COMMUNITY_PROPERTY}" "${EMBEDDING_PROPERTY}"
+    anomaly_detection_csv_reports "${ALGORITHM_PROJECTION}=type-anomaly-detection" "${ALGORITHM_NODE}=Type" "${ALGORITHM_WEIGHT}=weight" "${ALGORITHM_LANGUAGE}=Java" "${COMMUNITY_PROPERTY}" "${EMBEDDING_PROPERTY}"
 fi
 
 # -- Typescript Module Node Embeddings ---------------------------
 
-if createUndirectedDependencyProjection "${PROJECTION_NAME}=typescript-module-embedding" "${PROJECTION_NODE}=Module" "${PROJECTION_WEIGHT}=lowCouplingElement25PercentWeight"; then
-    createDirectedDependencyProjection "${PROJECTION_NAME}=typescript-module-embedding-directed" "${PROJECTION_NODE}=Module" "${PROJECTION_WEIGHT}=lowCouplingElement25PercentWeight"
-    anomaly_detection_csv_reports "${ALGORITHM_PROJECTION}=typescript-module-embedding" "${ALGORITHM_NODE}=Module" "${ALGORITHM_WEIGHT}=lowCouplingElement25PercentWeight" "${COMMUNITY_PROPERTY}" "${EMBEDDING_PROPERTY}"
+if createUndirectedDependencyProjection "${PROJECTION_NAME}=typescript-module-embedding" "${PROJECTION_NODE}=Module" "${PROJECTION_WEIGHT}=lowCouplingElement25PercentWeight" "${PROJECTION_LANGUAGE}=Typescript"; then
+    createDirectedDependencyProjection "${PROJECTION_NAME}=typescript-module-embedding-directed" "${PROJECTION_NODE}=Module" "${PROJECTION_WEIGHT}=lowCouplingElement25PercentWeight" "${PROJECTION_LANGUAGE}=Typescript"
+    anomaly_detection_csv_reports "${ALGORITHM_PROJECTION}=typescript-module-embedding" "${ALGORITHM_NODE}=Module" "${ALGORITHM_WEIGHT}=lowCouplingElement25PercentWeight" "${ALGORITHM_LANGUAGE}=Typescript" "${COMMUNITY_PROPERTY}" "${EMBEDDING_PROPERTY}"
 fi
 
 # ---------------------------------------------------------------
 
@@ -30,6 +30,7 @@
 
 from visualization import plot_annotation_style, annotate_each, annotate_each_with_index, scale_marker_sizes, zoom_into_center, zoom_into_center_while_preserving_scores_above_threshold, zoom_into_center_while_preserving_top_scores
 
+
 class Parameters:
     required_parameters_ = ["projection_node_label"]
 
@@ -88,9 +89,20 @@ def example(cls):
     def get_query_parameters(self) -> typing.Dict[str, str]:
         return self.query_parameters_.copy()  # copy enforces immutability
 
-    def get_projection_node_label(self) -> str:
+    def __get_projection_node_label(self) -> str:
         return self.query_parameters_["projection_node_label"]
 
+    def __is_code_language_available(self) -> bool:
+        return "projection_language" in self.query_parameters_
+
+    def __get_projection_language(self) -> str:
+        return self.query_parameters_["projection_language"] if self.__is_code_language_available() else ""
+
+    def get_plot_prefix(self) -> str:
+        if self.__is_code_language_available():
+            return self.__get_projection_language() + " " + self.__get_projection_node_label()
+        return self.__get_projection_node_label()
+
     def get_report_directory(self) -> str:
         return self.report_directory
 
@@ -451,7 +463,7 @@ def plot_clustering_coefficient_vs_page_rank(
 
     common_column_names_for_annotations = {
         "name_column": 'shortName',
-        "x_position_column": 'clusteringCoefficient', 
+        "x_position_column": 'clusteringCoefficient',
         "y_position_column": 'pageRank'
     }
 
@@ -461,8 +473,8 @@ def plot_clustering_coefficient_vs_page_rank(
     threshold_page_rank = mean_page_rank + 1.5 * standard_deviation_page_rank
     significant_points = combined_data[combined_data['pageRank'] > threshold_page_rank].sort_values(by='pageRank', ascending=False).reset_index(drop=True).head(10)
     annotate_each_with_index(
-        significant_points, 
-        using=plot.annotate, 
+        significant_points,
+        using=plot.annotate,
         value_column='pageRank',
         **common_column_names_for_annotations
     )
@@ -473,8 +485,8 @@ def plot_clustering_coefficient_vs_page_rank(
     top_clustering_coefficients = combined_data.sort_values(by='clusteringCoefficient', ascending=False).reset_index(drop=True).head(20)
     top_clustering_coefficients = top_clustering_coefficients.sort_values(by='pageRank', ascending=True).reset_index(drop=True).head(5)
     annotate_each_with_index(
-        top_clustering_coefficients, 
-        using=plot.annotate, 
+        top_clustering_coefficients,
+        using=plot.annotate,
         value_column='clusteringCoefficient',
         **common_column_names_for_annotations
     )
@@ -508,9 +520,9 @@ def plot_clusters(
     # Setup columns
     node_size_column = centrality_column_name
 
-    clustering_visualization_dataframe_zoomed=zoom_into_center(
-        clustering_visualization_dataframe, 
-        x_position_column, 
+    clustering_visualization_dataframe_zoomed = zoom_into_center(
+        clustering_visualization_dataframe,
+        x_position_column,
         y_position_column
     )
 
@@ -617,10 +629,10 @@ def plot_clusters_probabilities(
         print("No projected data to plot available")
         return
 
-    clustering_visualization_dataframe_zoomed=zoom_into_center_while_preserving_top_scores(
-        clustering_visualization_dataframe, 
-        x_position_column, 
-        y_position_column, 
+    clustering_visualization_dataframe_zoomed = zoom_into_center_while_preserving_top_scores(
+        clustering_visualization_dataframe,
+        x_position_column,
+        y_position_column,
         cluster_probability_column,
         annotate_n_lowest_probabilities,
         lowest_scores=True
@@ -636,7 +648,7 @@ def get_common_plot_parameters(data: pd.DataFrame) -> dict:
             "y": data[y_position_column],
             "s": data[size_column + '_scaled'],
         }
-    
+
     cluster_noise = clustering_visualization_dataframe_zoomed[clustering_visualization_dataframe_zoomed[cluster_label_column] == -1]
     cluster_non_noise = clustering_visualization_dataframe_zoomed[clustering_visualization_dataframe_zoomed[cluster_label_column] != -1]
     cluster_even_labels = clustering_visualization_dataframe_zoomed[clustering_visualization_dataframe_zoomed[cluster_label_column] % 2 == 0]
@@ -800,7 +812,7 @@ def get_common_plot_parameters(data: pd.DataFrame) -> dict:
 # ------------------------------------------------------------------------------------------------------------
 
 parameters = parse_input_parameters()
-plot_type = parameters.get_projection_node_label()
+plot_prefix = parameters.get_plot_prefix()
 report_directory = parameters.get_report_directory()
 
 driver = get_graph_database_driver()
@@ -813,31 +825,31 @@ def get_common_plot_parameters(data: pd.DataFrame) -> dict:
     data['pageRank'],
     data['articleRank'],
     data['shortCodeUnitName'],
-    title=f"{plot_type} distribution of PageRank - ArticleRank differences",
-    plot_file_path=get_file_path(f"{plot_type}_PageRank_Minus_ArticleRank_Distribution", parameters)
+    title=f"{plot_prefix} distribution of PageRank - ArticleRank differences",
+    plot_file_path=get_file_path(f"{plot_prefix}_PageRank_Minus_ArticleRank_Distribution", parameters)
 )
 
 plot_clustering_coefficient_distribution(
     data['clusteringCoefficient'],
-    title=f"{plot_type} distribution of clustering coefficients",
-    plot_file_path=get_file_path(f"{plot_type}_ClusteringCoefficient_distribution", parameters)
+    title=f"{plot_prefix} distribution of clustering coefficients",
+    plot_file_path=get_file_path(f"{plot_prefix}_ClusteringCoefficient_distribution", parameters)
 )
 
 plot_clustering_coefficient_vs_page_rank(
     data['clusteringCoefficient'],
     data['pageRank'],
     data['shortCodeUnitName'],
     data['clusterNoise'],
-    title=f"{plot_type} clustering coefficient versus PageRank",
-    plot_file_path=get_file_path(f"{plot_type}_ClusteringCoefficient_versus_PageRank", parameters)
+    title=f"{plot_prefix} clustering coefficient versus PageRank",
+    plot_file_path=get_file_path(f"{plot_prefix}_ClusteringCoefficient_versus_PageRank", parameters)
 )
 
 if (overall_cluster_count < 20):
     print(f"anomalyDetectionFeaturePlots: Less than 20 clusters: {overall_cluster_count}. Only one plot containing all clusters will be created.")
     plot_clusters(
         clustering_visualization_dataframe=data,
-        title=f"{plot_type} all clusters overall (less than 20)",
-        plot_file_path=get_file_path(f"{plot_type}_Clusters_Overall", parameters)
+        title=f"{plot_prefix} all clusters overall (less than 20)",
+        plot_file_path=get_file_path(f"{plot_prefix}_Clusters_Overall", parameters)
     )
 else:
     print(f"anomalyDetectionFeaturePlots: More than 20 clusters: {overall_cluster_count}. Different plots focussing on different features like cluster size will be created.")
@@ -846,57 +858,57 @@ def get_common_plot_parameters(data: pd.DataFrame) -> dict:
     )
     plot_clusters(
         clustering_visualization_dataframe=clusters_by_largest_size,
-        title=f"{plot_type} clusters with the largest size",
-        plot_file_path=get_file_path(f"{plot_type}_Clusters_largest_size", parameters)
+        title=f"{plot_prefix} clusters with the largest size",
+        plot_file_path=get_file_path(f"{plot_prefix}_Clusters_largest_size", parameters)
     )
 
     clusters_by_largest_max_radius = get_clusters_by_criteria(
         data, by='clusterRadiusMax', ascending=False, cluster_count=20
     )
     plot_clusters(
         clustering_visualization_dataframe=clusters_by_largest_max_radius,
-        title=f"{plot_type} clusters with the largest max radius",
-        plot_file_path=get_file_path(f"{plot_type}_Clusters_largest_max_radius", parameters)
+        title=f"{plot_prefix} clusters with the largest max radius",
+        plot_file_path=get_file_path(f"{plot_prefix}_Clusters_largest_max_radius", parameters)
     )
 
     clusters_by_largest_average_radius = get_clusters_by_criteria(
         data, by='clusterRadiusAverage', ascending=False, cluster_count=20
     )
     plot_clusters(
         clustering_visualization_dataframe=clusters_by_largest_average_radius,
-        title=f"{plot_type} clusters with the largest average radius",
-        plot_file_path=get_file_path(f"{plot_type}_Clusters_largest_average_radius", parameters)
+        title=f"{plot_prefix} clusters with the largest average radius",
+        plot_file_path=get_file_path(f"{plot_prefix}_Clusters_largest_average_radius", parameters)
     )
 
 plot_clusters_probabilities(
-    clustering_visualization_dataframe=data, 
-    title=f"{plot_type} clustering probabilities (red=high uncertainty)",
-    plot_file_path=get_file_path(f"{plot_type}_Cluster_probabilities", parameters)
+    clustering_visualization_dataframe=data,
+    title=f"{plot_prefix} clustering probabilities (red=high uncertainty)",
+    plot_file_path=get_file_path(f"{plot_prefix}_Cluster_probabilities", parameters)
 )
 
 plot_cluster_noise(
     clustering_visualization_dataframe=data,
-    title=f"{plot_type} clustering noise points that are surprisingly central (red) or popular (size)",
+    title=f"{plot_prefix} clustering noise points that are surprisingly central (red) or popular (size)",
     size_column_name='degree',
     color_column_name='pageRank',
-    plot_file_path=get_file_path(f"{plot_type}_ClusterNoise_highly_central_and_popular", parameters)
+    plot_file_path=get_file_path(f"{plot_prefix}_ClusterNoise_highly_central_and_popular", parameters)
 )
 
 plot_cluster_noise(
     clustering_visualization_dataframe=data,
-    title=f"{plot_type} clustering noise points that bridge flow (red) and are poorly integrated (size)",
+    title=f"{plot_prefix} clustering noise points that bridge flow (red) and are poorly integrated (size)",
     size_column_name='inverseClusteringCoefficient',
     color_column_name='betweenness',
-    plot_file_path=get_file_path(f"{plot_type}_ClusterNoise_poorly_integrated_bridges", parameters),
+    plot_file_path=get_file_path(f"{plot_prefix}_ClusterNoise_poorly_integrated_bridges", parameters),
     downscale_normal_sizes=0.4
 )
 
 plot_cluster_noise(
     clustering_visualization_dataframe=data,
-    title=f"{plot_type} clustering noise points with role inversion (size) possibly violating layering or dependency direction (red)",
+    title=f"{plot_prefix} clustering noise points with role inversion (size) possibly violating layering or dependency direction (red)",
     size_column_name='pageToArticleRankDifference',
     color_column_name='betweenness',
-    plot_file_path=get_file_path(f"{plot_type}_ClusterNoise_role_inverted_bridges", parameters)
+    plot_file_path=get_file_path(f"{plot_prefix}_ClusterNoise_role_inverted_bridges", parameters)
 )
 
 driver.close()