From a2ab531d539f983b04ef227c7ad14a653b954302 Mon Sep 17 00:00:00 2001 From: sbasaldua Date: Tue, 4 Feb 2025 13:31:22 +0100 Subject: [PATCH] Fixes mkdocs issues in docstrings --- mercury/graph/ml/louvain.py | 35 ++++++++++++---------- mercury/graph/ml/spark_spreadactivation.py | 11 +++---- mercury/graph/viz/moebius.py | 10 +++---- 3 files changed, 29 insertions(+), 27 deletions(-) diff --git a/mercury/graph/ml/louvain.py b/mercury/graph/ml/louvain.py index c29ecc6..b1bde4b 100644 --- a/mercury/graph/ml/louvain.py +++ b/mercury/graph/ml/louvain.py @@ -18,8 +18,8 @@ class LouvainCommunities(BaseClass): """ - Class that defines the functions that run a PySpark implementation of the - Louvain algorithm to find the partition that maximizes the modularity of an + Class that defines the functions that run a PySpark implementation of the + Louvain algorithm to find the partition that maximizes the modularity of an undirected graph (as in [^1]). This version of the algorithm differs from [^1] in that the reassignment of @@ -28,22 +28,22 @@ class LouvainCommunities(BaseClass): 2 -> C1) are resolved with a simple tie-breaking rule. This version also introduces the resolution parameter _gamma_, as in [^2]. - Contributed by Arturo Soberon Cedillo, Jose Antonio Guzman Vazquez and + Contributed by Arturo Soberon Cedillo, Jose Antonio Guzman Vazquez and Isaac Dodanim Hernandez Garcia. - [^1]: + [^1]: Blondel V D, Guillaume J-L, Lambiotte R and Lefebvre E (2008). Fast unfolding of communities in large networks. Journal of Statistical Mechanics: Theory and Experiment, 2008. - [^2]: + [^2]: Aynaud T, Blondel V D, Guillaume J-L and Lambiotte R (2013). Multilevel local optimization of modularity. Graph Partitioning (315--345), 2013. Args: min_modularity_gain (float): - Modularity gain threshold between each pass. The algorithm + Modularity gain threshold between each pass. The algorithm stops if the gain in modularity between the current pass and the previous one is less than the given threshold. @@ -229,8 +229,19 @@ def _verify_data(self, df, expected_cols_grouping, expected_cols_others): """Checks if `edges` meets the format expected by `LouvainCommunities`. Args: - edges (pyspark.sql.dataframe.DataFrame): - A pyspark dataframe on which to perform basic data availability tests + df (pyspark.sql.dataframe.DataFrame): + A pyspark dataframe representing the edges of an undirected graph. + It must have `src` and `dst` as its columns. The user may also + specify the weight of each edge via the additional `weight` column + (optional). + + expected_cols_grouping (list): + A list of strings representing the columns that must be present in + `df` to group the data. + + expected_cols_others (list): + A list of strings representing the columns that must be present in + `df` but are not used for grouping. """ cols = df.columns @@ -400,13 +411,7 @@ def _calculate_modularity(self, edges, partition, m=None) -> float: belongs to). The dataframe must have columns `id` (indicating each node's ID) and `c` (indicating each node's assigned community). - resolution (float): - The resolution parameter _gamma_. Its value - must be greater or equal to zero. If resolution is less than 1, - modularity favors larger communities, while values greater than 1 - favor smaller communities. - - (int): + m (int): The weighted size of the graph (the output of `_get_m()`). Returns: diff --git a/mercury/graph/ml/spark_spreadactivation.py b/mercury/graph/ml/spark_spreadactivation.py index ab61eba..0db977a 100644 --- a/mercury/graph/ml/spark_spreadactivation.py +++ b/mercury/graph/ml/spark_spreadactivation.py @@ -41,7 +41,7 @@ class SparkSpreadingActivation(BaseClass): based on the influence that has been accumulated. The diffusion model is based on Spreading Activation (SPA) techniques proposed in cognitive psychology - and later used for trust metric computations. For more details, please see paper entitled + and later used for trust metric computations. For more details, please see paper entitled ["Social Ties and their Relevance to Churn in Mobile Telecom Networks"](https://pdfs.semanticscholar.org/3275/3d80adb5ec2d4a974b5d1a872e2c957b263b.pdf) Args: @@ -121,7 +121,7 @@ def _set_seed_nodes( Set seed nodes which are the source of influence using pyspark dataframe. Args: - G (mercury.graph.core.Graph): A `mercury.graph` Graph object. + g (mercury.graph.core.Graph): A `mercury.graph` Graph object. seed_nodes (Union[List, pyspark.sql.DataFrame]): Collection of nodes that are the source to spread the influence. It must be pyspark dataframe with column 'id' or python list. """ @@ -170,7 +170,7 @@ def _compute_degrees(self, g: Graph): attributes: inDegree, outDegree, w_inDegree, w_outDegree. Args: - - graph: graphframe object, network + g: graphframe object, network """ g_vertices = g.graphframe.vertices g_edges = g.graphframe.edges @@ -202,10 +202,7 @@ def _spread_activation_step(self, g: Graph): One step in the spread activation model. Args: - graph: graphframe object, network - attribute: str, name of column for attribute/influence - spreading_factor: 0 - 1, amount of influence to spread - transfer_function: weighted or unweighted, how to transfer influence along edges + g: graphframe object, network Returns: (Graphframe): new network with updated new calculation of attribute in vertices diff --git a/mercury/graph/viz/moebius.py b/mercury/graph/viz/moebius.py index 1eff987..cf9f399 100644 --- a/mercury/graph/viz/moebius.py +++ b/mercury/graph/viz/moebius.py @@ -142,12 +142,12 @@ def generate_color_palette(self, cats, hue = 0, sat = 0.7, light = 0.5): Args: cats (iterable): An iterable of categories for which the color palette is to be generated. hue (float, optional): The base hue that is added to all the colors in the color palette. It must be in range, 0..1, all the - resulting hue values will be kept modulo 1.0. Default is 0 (no shift). + resulting hue values will be kept modulo 1.0. Default is 0 (no shift). sat (float, optional): The saturation level for the colors. Default is 0.7. Range is 0..1. light (float, optional): The lightness level for the colors. Default is 0.5. Range is 0..1. Returns: - dict: A dictionary where keys are categories and values are hex color codes. + (dict): A dictionary where keys are categories and values are hex color codes. """ cats = set(cats) cols = {} @@ -249,7 +249,7 @@ def _get_adjacent_nodes_moebius(self, node_id, limit = 20, depth = 1): depth (int): The depth of the graph to be returned. Returns: - A JSON string with the adjacent nodes and edges. + (str): A JSON string with the adjacent nodes and edges. """ if self.use_spark: @@ -316,7 +316,7 @@ def _get_one_level_subgraph_graphframes(self, node_id, _testing=False): node_id (str): The ID of the node for which to get the one-level subgraph. Returns: - tuple: A tuple containing two Spark DataFrames: + (tuple): A tuple containing two Spark DataFrames: - nodes_df: DataFrame with columns 'id', 'count', '_int_id', and any other node attributes. - edges_df: DataFrame with columns 'source', 'target', '_int_id' the edges connecting the nodes in the subgraph. """ @@ -387,7 +387,7 @@ def _get_one_level_subgraph_networkx(self, node_id): node_id (str): The ID of the node for which to get the one-level subgraph. Returns: - tuple: A tuple containing two Pandas DataFrames: + (tuple): A tuple containing two Pandas DataFrames: - nodes_df: DataFrame with columns 'id', 'count', '_int_id', and any other node attributes. - edges_df: DataFrame with columns 'source', 'target', '_int_id' the edges connecting the nodes in the subgraph. """