From 1c5e1addf84fa7c4bcdb3ee9a65def06bb2a3134 Mon Sep 17 00:00:00 2001 From: Sanat Kumar Gupta <123228827+SKG24@users.noreply.github.com> Date: Wed, 26 Mar 2025 21:17:25 +0530 Subject: [PATCH 1/6] Added stochastic_variability.py file This example demonstrates the variability of stochastic community detection methods by analyzing the consistency of multiple partitions using similarity measures (NMI, VI, RI) on both random and structured graphs. --- .../stochastic_variability.py | 96 +++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 doc/examples_sphinx-gallery/stochastic_variability.py diff --git a/doc/examples_sphinx-gallery/stochastic_variability.py b/doc/examples_sphinx-gallery/stochastic_variability.py new file mode 100644 index 000000000..b4217cb4d --- /dev/null +++ b/doc/examples_sphinx-gallery/stochastic_variability.py @@ -0,0 +1,96 @@ +""" +.. _tutorials-stochastic-variability: + +========================================================= +Stochastic Variability in Community Detection Algorithms +========================================================= + +This example demonstrates the variability of stochastic community detection methods by analyzing the consistency of multiple partitions using similarity measures (NMI, VI, RI) on both random and structured graphs. + +""" +# %% +# Import Libraries +import igraph as ig +import numpy as np +import matplotlib.pyplot as plt +import itertools + +# %% +# First, we generate a graph. +# Generates a random Erdos-Renyi graph (no clear community structure) +def generate_random_graph(n, p): + return ig.Graph.Erdos_Renyi(n=n, p=p) + +# %% +# Generates a clustered graph with clear communities using the Stochastic Block Model (SBM) +def generate_clustered_graph(n, clusters, intra_p, inter_p): + block_sizes = [n // clusters] * clusters + prob_matrix = [[intra_p if i == j else inter_p for j in range(clusters)] for i in range(clusters)] + return ig.Graph.SBM(sum(block_sizes), prob_matrix, block_sizes) + +# %% +# Computes pairwise similarity (NMI, VI, RI) between partitions +def compute_pairwise_similarity(partitions, method): + """Computes pairwise similarity measure between partitions.""" + scores = [] + for p1, p2 in itertools.combinations(partitions, 2): + scores.append(ig.compare_communities(p1, p2, method=method)) + return scores + +# %% +# Stochastic Community Detection +# Runs Louvain's method iteratively to generate partitions +# Computes similarity metrics: +def run_experiment(graph, iterations=50): + """Runs the stochastic method multiple times and collects community partitions.""" + partitions = [graph.community_multilevel().membership for _ in range(iterations)] + nmi_scores = compute_pairwise_similarity(partitions, method="nmi") + vi_scores = compute_pairwise_similarity(partitions, method="vi") + ri_scores = compute_pairwise_similarity(partitions, method="rand") + return nmi_scores, vi_scores, ri_scores + +# %% +# Parameters +n_nodes = 100 +p_random = 0.05 +clusters = 4 +p_intra = 0.3 # High intra-cluster connection probability +p_inter = 0.01 # Low inter-cluster connection probability + +# %% +# Generate graphs +random_graph = generate_random_graph(n_nodes, p_random) +clustered_graph = generate_clustered_graph(n_nodes, clusters, p_intra, p_inter) + +# %% +# Run experiments +nmi_random, vi_random, ri_random = run_experiment(random_graph) +nmi_clustered, vi_clustered, ri_clustered = run_experiment(clustered_graph) + +# %% +# Lets, plot the histograms +fig, axes = plt.subplots(3, 2, figsize=(12, 10)) +measures = [(nmi_random, nmi_clustered, "NMI"), (vi_random, vi_clustered, "VI"), (ri_random, ri_clustered, "RI")] +colors = ["red", "blue", "green"] + +for i, (random_scores, clustered_scores, measure) in enumerate(measures): + axes[i][0].hist(random_scores, bins=20, alpha=0.7, color=colors[i], edgecolor="black") + axes[i][0].set_title(f"Histogram of {measure} - Random Graph") + axes[i][0].set_xlabel(f"{measure} Score") + axes[i][0].set_ylabel("Frequency") + + axes[i][1].hist(clustered_scores, bins=20, alpha=0.7, color=colors[i], edgecolor="black") + axes[i][1].set_title(f"Histogram of {measure} - Clustered Graph") + axes[i][1].set_xlabel(f"{measure} Score") + +plt.tight_layout() +plt.show() + +# %% +# The results are plotted as histograms for random vs. clustered graphs, highlighting differences in detected community structures. +#The key reason for the inconsistency in random graphs and higher consistency in structured graphs is due to community structure strength: +#Random Graphs: Lack clear communities, leading to unstable partitions. Stochastic algorithms detect different structures across runs, resulting in low NMI, high VI, and inconsistent RI. +#Structured Graphs: Have well-defined communities, so detected partitions are more stable across multiple runs, leading to high NMI, low VI, and stable RI. + + +# %% From a3ac9c19e8a74bf5111dfcaec0058ef0246003f4 Mon Sep 17 00:00:00 2001 From: Sanat Kumar Gupta <123228827+SKG24@users.noreply.github.com> Date: Wed, 26 Mar 2025 21:20:14 +0530 Subject: [PATCH 2/6] Update stochastic_variability.py --- doc/examples_sphinx-gallery/stochastic_variability.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/doc/examples_sphinx-gallery/stochastic_variability.py b/doc/examples_sphinx-gallery/stochastic_variability.py index b4217cb4d..7c44f7add 100644 --- a/doc/examples_sphinx-gallery/stochastic_variability.py +++ b/doc/examples_sphinx-gallery/stochastic_variability.py @@ -91,6 +91,3 @@ def run_experiment(graph, iterations=50): #The key reason for the inconsistency in random graphs and higher consistency in structured graphs is due to community structure strength: #Random Graphs: Lack clear communities, leading to unstable partitions. Stochastic algorithms detect different structures across runs, resulting in low NMI, high VI, and inconsistent RI. #Structured Graphs: Have well-defined communities, so detected partitions are more stable across multiple runs, leading to high NMI, low VI, and stable RI. - - -# %% From dc52eb3c4dd0e3402fcb8685a18f73e77dfc568b Mon Sep 17 00:00:00 2001 From: Sanat Kumar Gupta <123228827+SKG24@users.noreply.github.com> Date: Thu, 27 Mar 2025 21:34:00 +0530 Subject: [PATCH 3/6] Update sg_execution_times.rst --- doc/source/sg_execution_times.rst | 87 ++++++++++++++++--------------- 1 file changed, 45 insertions(+), 42 deletions(-) diff --git a/doc/source/sg_execution_times.rst b/doc/source/sg_execution_times.rst index a63741754..65c6c041f 100644 --- a/doc/source/sg_execution_times.rst +++ b/doc/source/sg_execution_times.rst @@ -6,7 +6,7 @@ Computation times ================= -**00:10.013** total execution time for 25 files **from all galleries**: +**01:51.199** total execution time for 26 files **from all galleries**: .. container:: @@ -33,77 +33,80 @@ Computation times - Time - Mem (MB) * - :ref:`sphx_glr_tutorials_visualize_cliques.py` (``../examples_sphinx-gallery/visualize_cliques.py``) - - 00:02.970 + - 00:39.554 + - 0.0 + * - :ref:`sphx_glr_tutorials_visual_style.py` (``../examples_sphinx-gallery/visual_style.py``) + - 00:11.628 - 0.0 * - :ref:`sphx_glr_tutorials_ring_animation.py` (``../examples_sphinx-gallery/ring_animation.py``) - - 00:01.287 + - 00:09.870 - 0.0 - * - :ref:`sphx_glr_tutorials_cluster_contraction.py` (``../examples_sphinx-gallery/cluster_contraction.py``) - - 00:00.759 + * - :ref:`sphx_glr_tutorials_delaunay-triangulation.py` (``../examples_sphinx-gallery/delaunay-triangulation.py``) + - 00:09.261 - 0.0 * - :ref:`sphx_glr_tutorials_betweenness.py` (``../examples_sphinx-gallery/betweenness.py``) - - 00:00.735 - - 0.0 - * - :ref:`sphx_glr_tutorials_visual_style.py` (``../examples_sphinx-gallery/visual_style.py``) - - 00:00.711 - - 0.0 - * - :ref:`sphx_glr_tutorials_delaunay-triangulation.py` (``../examples_sphinx-gallery/delaunay-triangulation.py``) - - 00:00.504 + - 00:06.259 - 0.0 * - :ref:`sphx_glr_tutorials_configuration.py` (``../examples_sphinx-gallery/configuration.py``) - - 00:00.416 + - 00:05.379 - 0.0 - * - :ref:`sphx_glr_tutorials_online_user_actions.py` (``../examples_sphinx-gallery/online_user_actions.py``) - - 00:00.332 + * - :ref:`sphx_glr_tutorials_cluster_contraction.py` (``../examples_sphinx-gallery/cluster_contraction.py``) + - 00:04.307 - 0.0 * - :ref:`sphx_glr_tutorials_erdos_renyi.py` (``../examples_sphinx-gallery/erdos_renyi.py``) - - 00:00.313 + - 00:03.508 - 0.0 - * - :ref:`sphx_glr_tutorials_connected_components.py` (``../examples_sphinx-gallery/connected_components.py``) - - 00:00.216 + * - :ref:`sphx_glr_tutorials_bridges.py` (``../examples_sphinx-gallery/bridges.py``) + - 00:02.530 - 0.0 * - :ref:`sphx_glr_tutorials_complement.py` (``../examples_sphinx-gallery/complement.py``) - - 00:00.201 - - 0.0 - * - :ref:`sphx_glr_tutorials_generate_dag.py` (``../examples_sphinx-gallery/generate_dag.py``) - - 00:00.194 + - 00:02.393 - 0.0 * - :ref:`sphx_glr_tutorials_visualize_communities.py` (``../examples_sphinx-gallery/visualize_communities.py``) - - 00:00.176 + - 00:02.157 - 0.0 - * - :ref:`sphx_glr_tutorials_bridges.py` (``../examples_sphinx-gallery/bridges.py``) - - 00:00.169 + * - :ref:`sphx_glr_tutorials_stochastic_variability.py` (``../examples_sphinx-gallery/stochastic_variability.py``) + - 00:01.960 - 0.0 - * - :ref:`sphx_glr_tutorials_spanning_trees.py` (``../examples_sphinx-gallery/spanning_trees.py``) - - 00:00.161 + * - :ref:`sphx_glr_tutorials_online_user_actions.py` (``../examples_sphinx-gallery/online_user_actions.py``) + - 00:01.750 - 0.0 - * - :ref:`sphx_glr_tutorials_isomorphism.py` (``../examples_sphinx-gallery/isomorphism.py``) - - 00:00.153 + * - :ref:`sphx_glr_tutorials_connected_components.py` (``../examples_sphinx-gallery/connected_components.py``) + - 00:01.728 - 0.0 - * - :ref:`sphx_glr_tutorials_quickstart.py` (``../examples_sphinx-gallery/quickstart.py``) - - 00:00.142 + * - :ref:`sphx_glr_tutorials_isomorphism.py` (``../examples_sphinx-gallery/isomorphism.py``) + - 00:01.376 - 0.0 * - :ref:`sphx_glr_tutorials_minimum_spanning_trees.py` (``../examples_sphinx-gallery/minimum_spanning_trees.py``) - - 00:00.137 + - 00:01.135 + - 0.0 + * - :ref:`sphx_glr_tutorials_spanning_trees.py` (``../examples_sphinx-gallery/spanning_trees.py``) + - 00:01.120 + - 0.0 + * - :ref:`sphx_glr_tutorials_generate_dag.py` (``../examples_sphinx-gallery/generate_dag.py``) + - 00:00.939 + - 0.0 + * - :ref:`sphx_glr_tutorials_quickstart.py` (``../examples_sphinx-gallery/quickstart.py``) + - 00:00.902 - 0.0 * - :ref:`sphx_glr_tutorials_simplify.py` (``../examples_sphinx-gallery/simplify.py``) - - 00:00.079 + - 00:00.840 - 0.0 * - :ref:`sphx_glr_tutorials_bipartite_matching_maxflow.py` (``../examples_sphinx-gallery/bipartite_matching_maxflow.py``) - - 00:00.073 + - 00:00.674 - 0.0 - * - :ref:`sphx_glr_tutorials_articulation_points.py` (``../examples_sphinx-gallery/articulation_points.py``) - - 00:00.067 + * - :ref:`sphx_glr_tutorials_shortest_path_visualisation.py` (``../examples_sphinx-gallery/shortest_path_visualisation.py``) + - 00:00.609 - 0.0 - * - :ref:`sphx_glr_tutorials_topological_sort.py` (``../examples_sphinx-gallery/topological_sort.py``) - - 00:00.058 + * - :ref:`sphx_glr_tutorials_articulation_points.py` (``../examples_sphinx-gallery/articulation_points.py``) + - 00:00.396 - 0.0 * - :ref:`sphx_glr_tutorials_bipartite_matching.py` (``../examples_sphinx-gallery/bipartite_matching.py``) - - 00:00.058 + - 00:00.370 - 0.0 - * - :ref:`sphx_glr_tutorials_shortest_path_visualisation.py` (``../examples_sphinx-gallery/shortest_path_visualisation.py``) - - 00:00.052 + * - :ref:`sphx_glr_tutorials_topological_sort.py` (``../examples_sphinx-gallery/topological_sort.py``) + - 00:00.319 - 0.0 * - :ref:`sphx_glr_tutorials_maxflow.py` (``../examples_sphinx-gallery/maxflow.py``) - - 00:00.052 + - 00:00.234 - 0.0 From 97b7192b3a1c726259350e86efdfdef2ccc416ad Mon Sep 17 00:00:00 2001 From: Sanat Kumar Gupta <123228827+SKG24@users.noreply.github.com> Date: Fri, 28 Mar 2025 15:43:00 +0530 Subject: [PATCH 4/6] Update stochastic_variability.py I have made the changes as per the review. --- .../stochastic_variability.py | 137 ++++++++++++------ 1 file changed, 89 insertions(+), 48 deletions(-) diff --git a/doc/examples_sphinx-gallery/stochastic_variability.py b/doc/examples_sphinx-gallery/stochastic_variability.py index 7c44f7add..944f82118 100644 --- a/doc/examples_sphinx-gallery/stochastic_variability.py +++ b/doc/examples_sphinx-gallery/stochastic_variability.py @@ -5,89 +5,130 @@ Stochastic Variability in Community Detection Algorithms ========================================================= -This example demonstrates the variability of stochastic community detection methods by analyzing the consistency of multiple partitions using similarity measures (NMI, VI, RI) on both random and structured graphs. +This example demonstrates the variability of stochastic community detection methods by analyzing the consistency of multiple partitions using similarity measures normalized mutual information (NMI), variation of information (VI), rand index (RI) on both random and structured graphs. """ # %% -# Import Libraries +# Import libraries import igraph as ig -import numpy as np import matplotlib.pyplot as plt import itertools # %% # First, we generate a graph. -# Generates a random Erdos-Renyi graph (no clear community structure) -def generate_random_graph(n, p): - return ig.Graph.Erdos_Renyi(n=n, p=p) +# Load the karate club network +karate = ig.Graph.Famous("Zachary") # %% -# Generates a clustered graph with clear communities using the Stochastic Block Model (SBM) -def generate_clustered_graph(n, clusters, intra_p, inter_p): - block_sizes = [n // clusters] * clusters - prob_matrix = [[intra_p if i == j else inter_p for j in range(clusters)] for i in range(clusters)] - return ig.Graph.SBM(sum(block_sizes), prob_matrix, block_sizes) +#For the random graph, we use an Erdős-Rényi :math:`G(n, m)` model, where 'n' is the number of nodes +#and 'm' is the number of edges. We set 'm' to match the edge count of the empirical (Karate Club) +#network to ensure structural similarity in terms of connectivity, making comparisons meaningful. +n_nodes = karate.vcount() +n_edges = karate.ecount() +#Generate an Erdős-Rényi graph with the same number of nodes and edges +random_graph = ig.Graph.Erdos_Renyi(n=n_nodes, m=n_edges) # %% -# Computes pairwise similarity (NMI, VI, RI) between partitions +# Now, lets plot the graph to visually understand them. + +# Create subplots +fig, axes = plt.subplots(1, 2, figsize=(12, 6)) + +# Karate Club Graph +layout_karate = karate.layout("fr") +ig.plot( + karate, layout=layout_karate, target=axes[0], vertex_size=30, vertex_color="lightblue", edge_width=1, + vertex_label=[str(v.index) for v in karate.vs], vertex_label_size=10 +) +axes[0].set_title("Karate Club Network") + +# Erdős-Rényi Graph +layout_random = random_graph.layout("fr") +ig.plot( + random_graph, layout=layout_random, target=axes[1], vertex_size=30, vertex_color="lightcoral", edge_width=1, + vertex_label=[str(v.index) for v in random_graph.vs], vertex_label_size=10 +) +axes[1].set_title("Erdős-Rényi Random Graph") +# %% +# Function to compute similarity between partitions def compute_pairwise_similarity(partitions, method): - """Computes pairwise similarity measure between partitions.""" - scores = [] + similarities = [] + for p1, p2 in itertools.combinations(partitions, 2): - scores.append(ig.compare_communities(p1, p2, method=method)) - return scores + similarity = ig.compare_communities(p1, p2, method=method) + similarities.append(similarity) + + return similarities # %% -# Stochastic Community Detection -# Runs Louvain's method iteratively to generate partitions -# Computes similarity metrics: +# We have used, stochastic community detection using the Louvain method, iteratively generating partitions and computing similarity metrics to assess stability. +# The Louvain method is a modularity maximization approach for community detection. +# Since exact modularity maximization is NP-hard, the algorithm employs a greedy heuristic that processes vertices in a random order. +# This randomness leads to variations in the detected communities across different runs, which is why results may differ each time the method is applied. def run_experiment(graph, iterations=50): - """Runs the stochastic method multiple times and collects community partitions.""" partitions = [graph.community_multilevel().membership for _ in range(iterations)] nmi_scores = compute_pairwise_similarity(partitions, method="nmi") vi_scores = compute_pairwise_similarity(partitions, method="vi") ri_scores = compute_pairwise_similarity(partitions, method="rand") return nmi_scores, vi_scores, ri_scores -# %% -# Parameters -n_nodes = 100 -p_random = 0.05 -clusters = 4 -p_intra = 0.3 # High intra-cluster connection probability -p_inter = 0.01 # Low inter-cluster connection probability - -# %% -# Generate graphs -random_graph = generate_random_graph(n_nodes, p_random) -clustered_graph = generate_clustered_graph(n_nodes, clusters, p_intra, p_inter) - # %% # Run experiments +nmi_karate, vi_karate, ri_karate = run_experiment(karate) nmi_random, vi_random, ri_random = run_experiment(random_graph) -nmi_clustered, vi_clustered, ri_clustered = run_experiment(clustered_graph) -# %% -# Lets, plot the histograms +# %% +# Lastly, lets plot probability density histograms to understand the result. fig, axes = plt.subplots(3, 2, figsize=(12, 10)) -measures = [(nmi_random, nmi_clustered, "NMI"), (vi_random, vi_clustered, "VI"), (ri_random, ri_clustered, "RI")] +measures = [ + (nmi_karate, nmi_random, "NMI", 0, 1), # Normalized Mutual Information (0-1, higher = more similar) + (vi_karate, vi_random, "VI", 0, None), # Variation of Information (0+, lower = more similar) + (ri_karate, ri_random, "RI", 0, 1), # Rand Index (0-1, higher = more similar) +] colors = ["red", "blue", "green"] -for i, (random_scores, clustered_scores, measure) in enumerate(measures): - axes[i][0].hist(random_scores, bins=20, alpha=0.7, color=colors[i], edgecolor="black") - axes[i][0].set_title(f"Histogram of {measure} - Random Graph") +for i, (karate_scores, random_scores, measure, lower, upper) in enumerate(measures): + # Karate Club histogram + axes[i][0].hist( + karate_scores, bins=20, alpha=0.7, color=colors[i], edgecolor="black", + density=True # Probability density + ) + axes[i][0].set_title(f"Probability Density of {measure} - Karate Club Network") axes[i][0].set_xlabel(f"{measure} Score") - axes[i][0].set_ylabel("Frequency") - - axes[i][1].hist(clustered_scores, bins=20, alpha=0.7, color=colors[i], edgecolor="black") - axes[i][1].set_title(f"Histogram of {measure} - Clustered Graph") + axes[i][0].set_ylabel("Density") + axes[i][0].set_xlim(lower, upper) # Set axis limits explicitly + + # Erdős-Rényi Graph histogram + axes[i][1].hist( + random_scores, bins=20, alpha=0.7, color=colors[i], edgecolor="black", + density=True + ) + axes[i][1].set_title(f"Probability Density of {measure} - Erdős-Rényi Graph") axes[i][1].set_xlabel(f"{measure} Score") + axes[i][1].set_xlim(lower, upper) # Set axis limits explicitly plt.tight_layout() plt.show() # %% -# The results are plotted as histograms for random vs. clustered graphs, highlighting differences in detected community structures. -#The key reason for the inconsistency in random graphs and higher consistency in structured graphs is due to community structure strength: -#Random Graphs: Lack clear communities, leading to unstable partitions. Stochastic algorithms detect different structures across runs, resulting in low NMI, high VI, and inconsistent RI. -#Structured Graphs: Have well-defined communities, so detected partitions are more stable across multiple runs, leading to high NMI, low VI, and stable RI. +# We have compared the probability density of NMI, VI, and RI for the Karate Club network (structured) and an Erdős-Rényi random graph. +# +# **NMI (Normalized Mutual Information):** +# +# - Karate Club Network: The distribution is concentrated near 1, indicating high similarity across multiple runs, suggesting stable community detection. +# - Erdős-Rényi Graph: The values are more spread out, with lower NMI scores, showing inconsistent partitions due to the lack of clear community structures. +# +# **VI (Variation of Information):** +# +# - Karate Club Network: The values are low and clustered, indicating stable partitioning with minor variations across runs. +# - Erdős-Rényi Graph: The distribution is broader and shifted toward higher VI values, meaning higher partition variability and less consistency. +# +# **RI (Rand Index):** +# +# - Karate Club Network: The RI values are high and concentrated near 1, suggesting consistent clustering results across multiple iterations. +# - Erdős-Rényi Graph: The distribution is more spread out, but with lower RI values, confirming unstable community detection. +# +# **Conclusion** +# +# The Karate Club Network exhibits strong, well-defined community structures, leading to consistent results across runs. +# The Erdős-Rényi Graph, being random, lacks clear communities, causing high variability in detected partitions. \ No newline at end of file From b6b07e98e16669b3def9cbb8b103561b93632662 Mon Sep 17 00:00:00 2001 From: Sanat Kumar Gupta <123228827+SKG24@users.noreply.github.com> Date: Wed, 2 Apr 2025 06:33:08 +0530 Subject: [PATCH 5/6] Delete doc/source/sg_execution_times.rst --- doc/source/sg_execution_times.rst | 112 ------------------------------ 1 file changed, 112 deletions(-) delete mode 100644 doc/source/sg_execution_times.rst diff --git a/doc/source/sg_execution_times.rst b/doc/source/sg_execution_times.rst deleted file mode 100644 index 65c6c041f..000000000 --- a/doc/source/sg_execution_times.rst +++ /dev/null @@ -1,112 +0,0 @@ - -:orphan: - -.. _sphx_glr_sg_execution_times: - - -Computation times -================= -**01:51.199** total execution time for 26 files **from all galleries**: - -.. container:: - - .. raw:: html - - - - - - - - .. list-table:: - :header-rows: 1 - :class: table table-striped sg-datatable - - * - Example - - Time - - Mem (MB) - * - :ref:`sphx_glr_tutorials_visualize_cliques.py` (``../examples_sphinx-gallery/visualize_cliques.py``) - - 00:39.554 - - 0.0 - * - :ref:`sphx_glr_tutorials_visual_style.py` (``../examples_sphinx-gallery/visual_style.py``) - - 00:11.628 - - 0.0 - * - :ref:`sphx_glr_tutorials_ring_animation.py` (``../examples_sphinx-gallery/ring_animation.py``) - - 00:09.870 - - 0.0 - * - :ref:`sphx_glr_tutorials_delaunay-triangulation.py` (``../examples_sphinx-gallery/delaunay-triangulation.py``) - - 00:09.261 - - 0.0 - * - :ref:`sphx_glr_tutorials_betweenness.py` (``../examples_sphinx-gallery/betweenness.py``) - - 00:06.259 - - 0.0 - * - :ref:`sphx_glr_tutorials_configuration.py` (``../examples_sphinx-gallery/configuration.py``) - - 00:05.379 - - 0.0 - * - :ref:`sphx_glr_tutorials_cluster_contraction.py` (``../examples_sphinx-gallery/cluster_contraction.py``) - - 00:04.307 - - 0.0 - * - :ref:`sphx_glr_tutorials_erdos_renyi.py` (``../examples_sphinx-gallery/erdos_renyi.py``) - - 00:03.508 - - 0.0 - * - :ref:`sphx_glr_tutorials_bridges.py` (``../examples_sphinx-gallery/bridges.py``) - - 00:02.530 - - 0.0 - * - :ref:`sphx_glr_tutorials_complement.py` (``../examples_sphinx-gallery/complement.py``) - - 00:02.393 - - 0.0 - * - :ref:`sphx_glr_tutorials_visualize_communities.py` (``../examples_sphinx-gallery/visualize_communities.py``) - - 00:02.157 - - 0.0 - * - :ref:`sphx_glr_tutorials_stochastic_variability.py` (``../examples_sphinx-gallery/stochastic_variability.py``) - - 00:01.960 - - 0.0 - * - :ref:`sphx_glr_tutorials_online_user_actions.py` (``../examples_sphinx-gallery/online_user_actions.py``) - - 00:01.750 - - 0.0 - * - :ref:`sphx_glr_tutorials_connected_components.py` (``../examples_sphinx-gallery/connected_components.py``) - - 00:01.728 - - 0.0 - * - :ref:`sphx_glr_tutorials_isomorphism.py` (``../examples_sphinx-gallery/isomorphism.py``) - - 00:01.376 - - 0.0 - * - :ref:`sphx_glr_tutorials_minimum_spanning_trees.py` (``../examples_sphinx-gallery/minimum_spanning_trees.py``) - - 00:01.135 - - 0.0 - * - :ref:`sphx_glr_tutorials_spanning_trees.py` (``../examples_sphinx-gallery/spanning_trees.py``) - - 00:01.120 - - 0.0 - * - :ref:`sphx_glr_tutorials_generate_dag.py` (``../examples_sphinx-gallery/generate_dag.py``) - - 00:00.939 - - 0.0 - * - :ref:`sphx_glr_tutorials_quickstart.py` (``../examples_sphinx-gallery/quickstart.py``) - - 00:00.902 - - 0.0 - * - :ref:`sphx_glr_tutorials_simplify.py` (``../examples_sphinx-gallery/simplify.py``) - - 00:00.840 - - 0.0 - * - :ref:`sphx_glr_tutorials_bipartite_matching_maxflow.py` (``../examples_sphinx-gallery/bipartite_matching_maxflow.py``) - - 00:00.674 - - 0.0 - * - :ref:`sphx_glr_tutorials_shortest_path_visualisation.py` (``../examples_sphinx-gallery/shortest_path_visualisation.py``) - - 00:00.609 - - 0.0 - * - :ref:`sphx_glr_tutorials_articulation_points.py` (``../examples_sphinx-gallery/articulation_points.py``) - - 00:00.396 - - 0.0 - * - :ref:`sphx_glr_tutorials_bipartite_matching.py` (``../examples_sphinx-gallery/bipartite_matching.py``) - - 00:00.370 - - 0.0 - * - :ref:`sphx_glr_tutorials_topological_sort.py` (``../examples_sphinx-gallery/topological_sort.py``) - - 00:00.319 - - 0.0 - * - :ref:`sphx_glr_tutorials_maxflow.py` (``../examples_sphinx-gallery/maxflow.py``) - - 00:00.234 - - 0.0 From 132c0c79c0add4d5e2905babce59f20f7a7708d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Szabolcs=20Horva=CC=81t?= Date: Sun, 20 Jul 2025 13:34:41 +0800 Subject: [PATCH 6/6] minor cleanup / rewrite for conciseness --- .../stochastic_variability.py | 155 ++++++++++-------- 1 file changed, 85 insertions(+), 70 deletions(-) diff --git a/doc/examples_sphinx-gallery/stochastic_variability.py b/doc/examples_sphinx-gallery/stochastic_variability.py index 944f82118..ea126aecf 100644 --- a/doc/examples_sphinx-gallery/stochastic_variability.py +++ b/doc/examples_sphinx-gallery/stochastic_variability.py @@ -5,52 +5,61 @@ Stochastic Variability in Community Detection Algorithms ========================================================= -This example demonstrates the variability of stochastic community detection methods by analyzing the consistency of multiple partitions using similarity measures normalized mutual information (NMI), variation of information (VI), rand index (RI) on both random and structured graphs. +This example demonstrates the use of stochastic community detection methods to check whether a network possesses a strong community structure, and whether the partitionings we obtain are meaningul. Many community detection algorithms are randomized, and return somewhat different results after each run, depending on the random seed that was set. When there is a robust community structure, we expect these results to be similar to each other. When the community structure is weak or non-existent, the results may be noisy and highly variable. We will employ several partion similarity measures to analyse the consistency of the results, including the normalized mutual information (NMI), the variation of information (VI), and the Rand index (RI). """ # %% -# Import libraries import igraph as ig import matplotlib.pyplot as plt import itertools +import random # %% -# First, we generate a graph. -# Load the karate club network +# .. note:: +# We set a random seed to ensure that the results look exactly the same in +# the gallery. You don't need to do this when exploring randomness. +random.seed(42) + +# %% +# We will use Zachary's karate club dataset [1]_, a classic example of a network +# with a strong community structure: karate = ig.Graph.Famous("Zachary") # %% -#For the random graph, we use an Erdős-Rényi :math:`G(n, m)` model, where 'n' is the number of nodes -#and 'm' is the number of edges. We set 'm' to match the edge count of the empirical (Karate Club) -#network to ensure structural similarity in terms of connectivity, making comparisons meaningful. -n_nodes = karate.vcount() -n_edges = karate.ecount() -#Generate an Erdős-Rényi graph with the same number of nodes and edges -random_graph = ig.Graph.Erdos_Renyi(n=n_nodes, m=n_edges) +# We will compare it to an an Erdős-Rényi :math:`G(n, m)` random network having +# the same number of vertices and edges. The parameters 'n' and 'm' refer to the +# vertex and edge count, respectively. Since this is a random network, it should +# have no community structure. +random_graph = ig.Graph.Erdos_Renyi(n=karate.vcount(), m=karate.ecount()) # %% -# Now, lets plot the graph to visually understand them. +# First, let us plot the two networks for a visual comparison: # Create subplots -fig, axes = plt.subplots(1, 2, figsize=(12, 6)) +fig, axes = plt.subplots(1, 2, figsize=(12, 6), subplot_kw={'aspect': 'equal'}) -# Karate Club Graph -layout_karate = karate.layout("fr") +# Karate club network ig.plot( - karate, layout=layout_karate, target=axes[0], vertex_size=30, vertex_color="lightblue", edge_width=1, - vertex_label=[str(v.index) for v in karate.vs], vertex_label_size=10 + karate, target=axes[0], + vertex_color="lightblue", vertex_size=30, + vertex_label=range(karate.vcount()), vertex_label_size=10, + edge_width=1 ) -axes[0].set_title("Karate Club Network") +axes[0].set_title("Karate club network") -# Erdős-Rényi Graph -layout_random = random_graph.layout("fr") +# Random network ig.plot( - random_graph, layout=layout_random, target=axes[1], vertex_size=30, vertex_color="lightcoral", edge_width=1, - vertex_label=[str(v.index) for v in random_graph.vs], vertex_label_size=10 + random_graph, target=axes[1], + vertex_color="lightcoral", vertex_size=30, + vertex_label=range(random_graph.vcount()), vertex_label_size=10, + edge_width=1 ) -axes[1].set_title("Erdős-Rényi Random Graph") +axes[1].set_title("Erdős-Rényi random network") + +plt.show() + # %% -# Function to compute similarity between partitions +# Function to compute similarity between partitions using various methods: def compute_pairwise_similarity(partitions, method): similarities = [] @@ -61,74 +70,80 @@ def compute_pairwise_similarity(partitions, method): return similarities # %% -# We have used, stochastic community detection using the Louvain method, iteratively generating partitions and computing similarity metrics to assess stability. -# The Louvain method is a modularity maximization approach for community detection. -# Since exact modularity maximization is NP-hard, the algorithm employs a greedy heuristic that processes vertices in a random order. -# This randomness leads to variations in the detected communities across different runs, which is why results may differ each time the method is applied. -def run_experiment(graph, iterations=50): - partitions = [graph.community_multilevel().membership for _ in range(iterations)] +# The Leiden method, accessible through :meth:`igraph.Graph.community_leiden()`, +# is a modularity maximization approach for community detection. Since exact +# modularity maximization is NP-hard, the algorithm employs a greedy heuristic +# that processes vertices in a random order. This randomness leads to +# variation in the detected communities across different runs, which is why +# results may differ each time the method is applied. The following function +# runs the Leiden algorithm multiple times: +def run_experiment(graph, iterations=100): + partitions = [graph.community_leiden(objective_function='modularity').membership for _ in range(iterations)] nmi_scores = compute_pairwise_similarity(partitions, method="nmi") vi_scores = compute_pairwise_similarity(partitions, method="vi") ri_scores = compute_pairwise_similarity(partitions, method="rand") return nmi_scores, vi_scores, ri_scores # %% -# Run experiments +# Run the experiment on both networks: nmi_karate, vi_karate, ri_karate = run_experiment(karate) nmi_random, vi_random, ri_random = run_experiment(random_graph) # %% -# Lastly, lets plot probability density histograms to understand the result. -fig, axes = plt.subplots(3, 2, figsize=(12, 10)) +# Finally, let us plot histograms of the pairwise similarities of the obtained +# partitionings to understand the result: +fig, axes = plt.subplots(2, 3, figsize=(12, 6)) measures = [ - (nmi_karate, nmi_random, "NMI", 0, 1), # Normalized Mutual Information (0-1, higher = more similar) - (vi_karate, vi_random, "VI", 0, None), # Variation of Information (0+, lower = more similar) - (ri_karate, ri_random, "RI", 0, 1), # Rand Index (0-1, higher = more similar) + # Normalized Mutual Information (0-1, higher = more similar) + (nmi_karate, nmi_random, "NMI", 0, 1), + # Variation of Information (0+, lower = more similar) + (vi_karate, vi_random, "VI", 0, max(vi_karate + vi_random)), + # Rand Index (0-1, higher = more similar) + (ri_karate, ri_random, "RI", 0, 1), ] colors = ["red", "blue", "green"] for i, (karate_scores, random_scores, measure, lower, upper) in enumerate(measures): - # Karate Club histogram - axes[i][0].hist( - karate_scores, bins=20, alpha=0.7, color=colors[i], edgecolor="black", - density=True # Probability density + # Karate club histogram + axes[0][i].hist( + karate_scores, bins=20, range=(lower, upper), + density=True, # Probability density + alpha=0.7, color=colors[i], edgecolor="black" ) - axes[i][0].set_title(f"Probability Density of {measure} - Karate Club Network") - axes[i][0].set_xlabel(f"{measure} Score") - axes[i][0].set_ylabel("Density") - axes[i][0].set_xlim(lower, upper) # Set axis limits explicitly - - # Erdős-Rényi Graph histogram - axes[i][1].hist( - random_scores, bins=20, alpha=0.7, color=colors[i], edgecolor="black", - density=True + axes[0][i].set_title(f"{measure} - Karate club network") + axes[0][i].set_xlabel(f"{measure} score") + axes[0][i].set_ylabel("PDF") + + # Random network histogram + axes[1][i].hist( + random_scores, bins=20, range=(lower, upper), density=True, + alpha=0.7, color=colors[i], edgecolor="black" ) - axes[i][1].set_title(f"Probability Density of {measure} - Erdős-Rényi Graph") - axes[i][1].set_xlabel(f"{measure} Score") - axes[i][1].set_xlim(lower, upper) # Set axis limits explicitly + axes[1][i].set_title(f"{measure} - Random network") + axes[1][i].set_xlabel(f"{measure} score") + axes[0][i].set_ylabel("PDF") plt.tight_layout() plt.show() # %% -# We have compared the probability density of NMI, VI, and RI for the Karate Club network (structured) and an Erdős-Rényi random graph. +# We have compared the pairwise similarities using the NMI, VI, and RI measures +# between partitonings obtained for the karate club network (strong community +# structure) and a comparable random graph (which lacks communities). # -# **NMI (Normalized Mutual Information):** -# -# - Karate Club Network: The distribution is concentrated near 1, indicating high similarity across multiple runs, suggesting stable community detection. -# - Erdős-Rényi Graph: The values are more spread out, with lower NMI scores, showing inconsistent partitions due to the lack of clear community structures. +# The Normalized Mutual Information (NMI) and Rand Index (RI) both quantify +# similarity, and take values from :math:`[0,1]`. Higher values indicate more +# similar partitionings, with a value of 1 attained when the partitionings are +# identical. # -# **VI (Variation of Information):** +# The Variation of Information (VI) is a distance measure. It takes values from +# :math:`[0,\infty]`, with lower values indicating higher similarities. Identical +# partitionings have a distance of zero. # -# - Karate Club Network: The values are low and clustered, indicating stable partitioning with minor variations across runs. -# - Erdős-Rényi Graph: The distribution is broader and shifted toward higher VI values, meaning higher partition variability and less consistency. -# -# **RI (Rand Index):** -# -# - Karate Club Network: The RI values are high and concentrated near 1, suggesting consistent clustering results across multiple iterations. -# - Erdős-Rényi Graph: The distribution is more spread out, but with lower RI values, confirming unstable community detection. -# -# **Conclusion** -# -# The Karate Club Network exhibits strong, well-defined community structures, leading to consistent results across runs. -# The Erdős-Rényi Graph, being random, lacks clear communities, causing high variability in detected partitions. \ No newline at end of file +# For the karate club network, NMI and RI value are concentrated near 1, while +# VI is concentrated near 0, suggesting a robust community structure. In contrast +# the values obtained for the random network are much more spread out, showing +# inconsistent partitionings due to the lack of a clear community structure. + +# %% +# .. [1] W. Zachary: "An Information Flow Model for Conflict and Fission in Small Groups". Journal of Anthropological Research 33, no. 4 (1977): 452–73. https://www.jstor.org/stable/3629752