30
30
31
31
from visualization import plot_annotation_style , annotate_each , annotate_each_with_index , scale_marker_sizes , zoom_into_center , zoom_into_center_while_preserving_scores_above_threshold , zoom_into_center_while_preserving_top_scores
32
32
33
+
33
34
class Parameters :
34
35
required_parameters_ = ["projection_node_label" ]
35
36
@@ -88,9 +89,20 @@ def example(cls):
88
89
def get_query_parameters (self ) -> typing .Dict [str , str ]:
89
90
return self .query_parameters_ .copy () # copy enforces immutability
90
91
91
- def get_projection_node_label (self ) -> str :
92
+ def __get_projection_node_label (self ) -> str :
92
93
return self .query_parameters_ ["projection_node_label" ]
93
94
95
+ def __is_code_language_available (self ) -> bool :
96
+ return "projection_language" in self .query_parameters_
97
+
98
+ def __get_projection_language (self ) -> str :
99
+ return self .query_parameters_ ["projection_language" ] if self .__is_code_language_available () else ""
100
+
101
+ def get_plot_prefix (self ) -> str :
102
+ if self .__is_code_language_available ():
103
+ return self .__get_projection_language () + " " + self .__get_projection_node_label ()
104
+ return self .__get_projection_node_label ()
105
+
94
106
def get_report_directory (self ) -> str :
95
107
return self .report_directory
96
108
@@ -451,7 +463,7 @@ def plot_clustering_coefficient_vs_page_rank(
451
463
452
464
common_column_names_for_annotations = {
453
465
"name_column" : 'shortName' ,
454
- "x_position_column" : 'clusteringCoefficient' ,
466
+ "x_position_column" : 'clusteringCoefficient' ,
455
467
"y_position_column" : 'pageRank'
456
468
}
457
469
@@ -461,8 +473,8 @@ def plot_clustering_coefficient_vs_page_rank(
461
473
threshold_page_rank = mean_page_rank + 1.5 * standard_deviation_page_rank
462
474
significant_points = combined_data [combined_data ['pageRank' ] > threshold_page_rank ].sort_values (by = 'pageRank' , ascending = False ).reset_index (drop = True ).head (10 )
463
475
annotate_each_with_index (
464
- significant_points ,
465
- using = plot .annotate ,
476
+ significant_points ,
477
+ using = plot .annotate ,
466
478
value_column = 'pageRank' ,
467
479
** common_column_names_for_annotations
468
480
)
@@ -473,8 +485,8 @@ def plot_clustering_coefficient_vs_page_rank(
473
485
top_clustering_coefficients = combined_data .sort_values (by = 'clusteringCoefficient' , ascending = False ).reset_index (drop = True ).head (20 )
474
486
top_clustering_coefficients = top_clustering_coefficients .sort_values (by = 'pageRank' , ascending = True ).reset_index (drop = True ).head (5 )
475
487
annotate_each_with_index (
476
- top_clustering_coefficients ,
477
- using = plot .annotate ,
488
+ top_clustering_coefficients ,
489
+ using = plot .annotate ,
478
490
value_column = 'clusteringCoefficient' ,
479
491
** common_column_names_for_annotations
480
492
)
@@ -508,9 +520,9 @@ def plot_clusters(
508
520
# Setup columns
509
521
node_size_column = centrality_column_name
510
522
511
- clustering_visualization_dataframe_zoomed = zoom_into_center (
512
- clustering_visualization_dataframe ,
513
- x_position_column ,
523
+ clustering_visualization_dataframe_zoomed = zoom_into_center (
524
+ clustering_visualization_dataframe ,
525
+ x_position_column ,
514
526
y_position_column
515
527
)
516
528
@@ -617,10 +629,10 @@ def plot_clusters_probabilities(
617
629
print ("No projected data to plot available" )
618
630
return
619
631
620
- clustering_visualization_dataframe_zoomed = zoom_into_center_while_preserving_top_scores (
621
- clustering_visualization_dataframe ,
622
- x_position_column ,
623
- y_position_column ,
632
+ clustering_visualization_dataframe_zoomed = zoom_into_center_while_preserving_top_scores (
633
+ clustering_visualization_dataframe ,
634
+ x_position_column ,
635
+ y_position_column ,
624
636
cluster_probability_column ,
625
637
annotate_n_lowest_probabilities ,
626
638
lowest_scores = True
@@ -636,7 +648,7 @@ def get_common_plot_parameters(data: pd.DataFrame) -> dict:
636
648
"y" : data [y_position_column ],
637
649
"s" : data [size_column + '_scaled' ],
638
650
}
639
-
651
+
640
652
cluster_noise = clustering_visualization_dataframe_zoomed [clustering_visualization_dataframe_zoomed [cluster_label_column ] == - 1 ]
641
653
cluster_non_noise = clustering_visualization_dataframe_zoomed [clustering_visualization_dataframe_zoomed [cluster_label_column ] != - 1 ]
642
654
cluster_even_labels = clustering_visualization_dataframe_zoomed [clustering_visualization_dataframe_zoomed [cluster_label_column ] % 2 == 0 ]
@@ -800,7 +812,7 @@ def get_common_plot_parameters(data: pd.DataFrame) -> dict:
800
812
# ------------------------------------------------------------------------------------------------------------
801
813
802
814
parameters = parse_input_parameters ()
803
- plot_type = parameters .get_projection_node_label ()
815
+ plot_prefix = parameters .get_plot_prefix ()
804
816
report_directory = parameters .get_report_directory ()
805
817
806
818
driver = get_graph_database_driver ()
@@ -813,31 +825,31 @@ def get_common_plot_parameters(data: pd.DataFrame) -> dict:
813
825
data ['pageRank' ],
814
826
data ['articleRank' ],
815
827
data ['shortCodeUnitName' ],
816
- title = f"{ plot_type } distribution of PageRank - ArticleRank differences" ,
817
- plot_file_path = get_file_path (f"{ plot_type } _PageRank_Minus_ArticleRank_Distribution" , parameters )
828
+ title = f"{ plot_prefix } distribution of PageRank - ArticleRank differences" ,
829
+ plot_file_path = get_file_path (f"{ plot_prefix } _PageRank_Minus_ArticleRank_Distribution" , parameters )
818
830
)
819
831
820
832
plot_clustering_coefficient_distribution (
821
833
data ['clusteringCoefficient' ],
822
- title = f"{ plot_type } distribution of clustering coefficients" ,
823
- plot_file_path = get_file_path (f"{ plot_type } _ClusteringCoefficient_distribution" , parameters )
834
+ title = f"{ plot_prefix } distribution of clustering coefficients" ,
835
+ plot_file_path = get_file_path (f"{ plot_prefix } _ClusteringCoefficient_distribution" , parameters )
824
836
)
825
837
826
838
plot_clustering_coefficient_vs_page_rank (
827
839
data ['clusteringCoefficient' ],
828
840
data ['pageRank' ],
829
841
data ['shortCodeUnitName' ],
830
842
data ['clusterNoise' ],
831
- title = f"{ plot_type } clustering coefficient versus PageRank" ,
832
- plot_file_path = get_file_path (f"{ plot_type } _ClusteringCoefficient_versus_PageRank" , parameters )
843
+ title = f"{ plot_prefix } clustering coefficient versus PageRank" ,
844
+ plot_file_path = get_file_path (f"{ plot_prefix } _ClusteringCoefficient_versus_PageRank" , parameters )
833
845
)
834
846
835
847
if (overall_cluster_count < 20 ):
836
848
print (f"anomalyDetectionFeaturePlots: Less than 20 clusters: { overall_cluster_count } . Only one plot containing all clusters will be created." )
837
849
plot_clusters (
838
850
clustering_visualization_dataframe = data ,
839
- title = f"{ plot_type } all clusters overall (less than 20)" ,
840
- plot_file_path = get_file_path (f"{ plot_type } _Clusters_Overall" , parameters )
851
+ title = f"{ plot_prefix } all clusters overall (less than 20)" ,
852
+ plot_file_path = get_file_path (f"{ plot_prefix } _Clusters_Overall" , parameters )
841
853
)
842
854
else :
843
855
print (f"anomalyDetectionFeaturePlots: More than 20 clusters: { overall_cluster_count } . Different plots focussing on different features like cluster size will be created." )
@@ -846,57 +858,57 @@ def get_common_plot_parameters(data: pd.DataFrame) -> dict:
846
858
)
847
859
plot_clusters (
848
860
clustering_visualization_dataframe = clusters_by_largest_size ,
849
- title = f"{ plot_type } clusters with the largest size" ,
850
- plot_file_path = get_file_path (f"{ plot_type } _Clusters_largest_size" , parameters )
861
+ title = f"{ plot_prefix } clusters with the largest size" ,
862
+ plot_file_path = get_file_path (f"{ plot_prefix } _Clusters_largest_size" , parameters )
851
863
)
852
864
853
865
clusters_by_largest_max_radius = get_clusters_by_criteria (
854
866
data , by = 'clusterRadiusMax' , ascending = False , cluster_count = 20
855
867
)
856
868
plot_clusters (
857
869
clustering_visualization_dataframe = clusters_by_largest_max_radius ,
858
- title = f"{ plot_type } clusters with the largest max radius" ,
859
- plot_file_path = get_file_path (f"{ plot_type } _Clusters_largest_max_radius" , parameters )
870
+ title = f"{ plot_prefix } clusters with the largest max radius" ,
871
+ plot_file_path = get_file_path (f"{ plot_prefix } _Clusters_largest_max_radius" , parameters )
860
872
)
861
873
862
874
clusters_by_largest_average_radius = get_clusters_by_criteria (
863
875
data , by = 'clusterRadiusAverage' , ascending = False , cluster_count = 20
864
876
)
865
877
plot_clusters (
866
878
clustering_visualization_dataframe = clusters_by_largest_average_radius ,
867
- title = f"{ plot_type } clusters with the largest average radius" ,
868
- plot_file_path = get_file_path (f"{ plot_type } _Clusters_largest_average_radius" , parameters )
879
+ title = f"{ plot_prefix } clusters with the largest average radius" ,
880
+ plot_file_path = get_file_path (f"{ plot_prefix } _Clusters_largest_average_radius" , parameters )
869
881
)
870
882
871
883
plot_clusters_probabilities (
872
- clustering_visualization_dataframe = data ,
873
- title = f"{ plot_type } clustering probabilities (red=high uncertainty)" ,
874
- plot_file_path = get_file_path (f"{ plot_type } _Cluster_probabilities" , parameters )
884
+ clustering_visualization_dataframe = data ,
885
+ title = f"{ plot_prefix } clustering probabilities (red=high uncertainty)" ,
886
+ plot_file_path = get_file_path (f"{ plot_prefix } _Cluster_probabilities" , parameters )
875
887
)
876
888
877
889
plot_cluster_noise (
878
890
clustering_visualization_dataframe = data ,
879
- title = f"{ plot_type } clustering noise points that are surprisingly central (red) or popular (size)" ,
891
+ title = f"{ plot_prefix } clustering noise points that are surprisingly central (red) or popular (size)" ,
880
892
size_column_name = 'degree' ,
881
893
color_column_name = 'pageRank' ,
882
- plot_file_path = get_file_path (f"{ plot_type } _ClusterNoise_highly_central_and_popular" , parameters )
894
+ plot_file_path = get_file_path (f"{ plot_prefix } _ClusterNoise_highly_central_and_popular" , parameters )
883
895
)
884
896
885
897
plot_cluster_noise (
886
898
clustering_visualization_dataframe = data ,
887
- title = f"{ plot_type } clustering noise points that bridge flow (red) and are poorly integrated (size)" ,
899
+ title = f"{ plot_prefix } clustering noise points that bridge flow (red) and are poorly integrated (size)" ,
888
900
size_column_name = 'inverseClusteringCoefficient' ,
889
901
color_column_name = 'betweenness' ,
890
- plot_file_path = get_file_path (f"{ plot_type } _ClusterNoise_poorly_integrated_bridges" , parameters ),
902
+ plot_file_path = get_file_path (f"{ plot_prefix } _ClusterNoise_poorly_integrated_bridges" , parameters ),
891
903
downscale_normal_sizes = 0.4
892
904
)
893
905
894
906
plot_cluster_noise (
895
907
clustering_visualization_dataframe = data ,
896
- title = f"{ plot_type } clustering noise points with role inversion (size) possibly violating layering or dependency direction (red)" ,
908
+ title = f"{ plot_prefix } clustering noise points with role inversion (size) possibly violating layering or dependency direction (red)" ,
897
909
size_column_name = 'pageToArticleRankDifference' ,
898
910
color_column_name = 'betweenness' ,
899
- plot_file_path = get_file_path (f"{ plot_type } _ClusterNoise_role_inverted_bridges" , parameters )
911
+ plot_file_path = get_file_path (f"{ plot_prefix } _ClusterNoise_role_inverted_bridges" , parameters )
900
912
)
901
913
902
914
driver .close ()
0 commit comments