Fixes mkdocs issues in docstrings

BBVA · Feb 4, 2025 · a2ab531 · a2ab531
1 parent 4902c82
commit a2ab531
Show file tree

Hide file tree

Showing 3 changed files with 29 additions and 27 deletions.
diff --git a/mercury/graph/ml/louvain.py b/mercury/graph/ml/louvain.py
@@ -18,8 +18,8 @@
 
 class LouvainCommunities(BaseClass):
     """
-    Class that defines the functions that run a PySpark implementation of the 
-    Louvain algorithm to find the partition that maximizes the modularity of an 
+    Class that defines the functions that run a PySpark implementation of the
+    Louvain algorithm to find the partition that maximizes the modularity of an
     undirected graph (as in [^1]).
 
     This version of the algorithm differs from [^1] in that the reassignment of
@@ -28,22 +28,22 @@ class LouvainCommunities(BaseClass):
     2 -> C1) are resolved with a simple tie-breaking rule. This version also
     introduces the resolution parameter _gamma_, as in [^2].
 
-    Contributed by Arturo Soberon Cedillo, Jose Antonio Guzman Vazquez and 
+    Contributed by Arturo Soberon Cedillo, Jose Antonio Guzman Vazquez and
     Isaac Dodanim Hernandez Garcia.
 
-    [^1]: 
+    [^1]:
         Blondel V D, Guillaume J-L, Lambiotte R and Lefebvre E (2008). Fast
         unfolding of communities in large networks. Journal of Statistical
         Mechanics: Theory and Experiment, 2008.
         <https://doi.org/10.1088/1742-5468/2008/10/p10008>
 
-    [^2]: 
+    [^2]:
         Aynaud T, Blondel V D, Guillaume J-L and Lambiotte R (2013). Multilevel
         local optimization of modularity. Graph Partitioning (315--345), 2013.
 
     Args:
         min_modularity_gain (float):
-            Modularity gain threshold between each pass. The algorithm 
+            Modularity gain threshold between each pass. The algorithm
             stops if the gain in modularity between the current pass
             and the previous one is less than the given threshold.
 
@@ -229,8 +229,19 @@ def _verify_data(self, df, expected_cols_grouping, expected_cols_others):
         """Checks if `edges` meets the format expected by `LouvainCommunities`.
 
         Args:
-            edges (pyspark.sql.dataframe.DataFrame):
-                A pyspark dataframe on which to perform basic data availability tests
+            df (pyspark.sql.dataframe.DataFrame):
+                A pyspark dataframe representing the edges of an undirected graph.
+                It must have `src` and `dst` as its columns. The user may also
+                specify the weight of each edge via the additional `weight` column
+                (optional).
+
+            expected_cols_grouping (list):
+                A list of strings representing the columns that must be present in
+                `df` to group the data.
+
+            expected_cols_others (list):
+                A list of strings representing the columns that must be present in
+                `df` but are not used for grouping.
         """
 
         cols = df.columns
@@ -400,13 +411,7 @@ def _calculate_modularity(self, edges, partition, m=None) -> float:
                 belongs to). The dataframe must have columns `id` (indicating each
                 node's ID) and `c` (indicating each node's assigned community).
 
-            resolution (float):
-                The resolution parameter _gamma_. Its value
-                must be greater or equal to zero. If resolution is less than 1,
-                modularity favors larger communities, while values greater than 1
-                favor smaller communities.
-
-            (int):
+            m (int):
                 The weighted size of the graph (the output of `_get_m()`).
 
         Returns:

diff --git a/mercury/graph/ml/spark_spreadactivation.py b/mercury/graph/ml/spark_spreadactivation.py
@@ -41,7 +41,7 @@ class SparkSpreadingActivation(BaseClass):
     based on the influence that has been accumulated.
 
     The diffusion model is based on Spreading Activation (SPA) techniques proposed in cognitive psychology
-    and later used for trust metric computations. For more details, please see paper entitled 
+    and later used for trust metric computations. For more details, please see paper entitled
     ["Social Ties and their Relevance to Churn in Mobile Telecom Networks"](https://pdfs.semanticscholar.org/3275/3d80adb5ec2d4a974b5d1a872e2c957b263b.pdf)
 
     Args:
@@ -121,7 +121,7 @@ def _set_seed_nodes(
         Set seed nodes which are the source of influence using pyspark dataframe.
 
         Args:
-            G (mercury.graph.core.Graph): A `mercury.graph` Graph object.
+            g (mercury.graph.core.Graph): A `mercury.graph` Graph object.
             seed_nodes (Union[List, pyspark.sql.DataFrame]): Collection of nodes that are the source to spread
                 the influence. It must be pyspark dataframe with column 'id' or python list.
         """
@@ -170,7 +170,7 @@ def _compute_degrees(self, g: Graph):
         attributes: inDegree, outDegree, w_inDegree, w_outDegree.
 
         Args:
-            - graph: graphframe object, network
+            g: graphframe object, network
         """
         g_vertices = g.graphframe.vertices
         g_edges = g.graphframe.edges
@@ -202,10 +202,7 @@ def _spread_activation_step(self, g: Graph):
         One step in the spread activation model.
 
         Args:
-            graph: graphframe object, network
-            attribute: str, name of column for attribute/influence
-            spreading_factor: 0 - 1, amount of influence to spread
-            transfer_function: weighted or unweighted, how to transfer influence along edges
+            g: graphframe object, network
 
         Returns:
             (Graphframe): new network with updated new calculation of attribute in vertices

diff --git a/mercury/graph/viz/moebius.py b/mercury/graph/viz/moebius.py
@@ -142,12 +142,12 @@ def generate_color_palette(self, cats, hue = 0, sat = 0.7, light = 0.5):
         Args:
             cats (iterable): An iterable of categories for which the color palette is to be generated.
             hue (float, optional): The base hue that is added to all the colors in the color palette. It must be in range, 0..1, all the
-            resulting hue values will be kept modulo 1.0. Default is 0 (no shift).
+                resulting hue values will be kept modulo 1.0. Default is 0 (no shift).
             sat (float, optional): The saturation level for the colors. Default is 0.7. Range is 0..1.
             light (float, optional): The lightness level for the colors. Default is 0.5. Range is 0..1.
 
         Returns:
-            dict: A dictionary where keys are categories and values are hex color codes.
+            (dict): A dictionary where keys are categories and values are hex color codes.
         """
         cats = set(cats)
         cols = {}
@@ -249,7 +249,7 @@ def _get_adjacent_nodes_moebius(self, node_id, limit = 20, depth = 1):
             depth (int): The depth of the graph to be returned.
 
         Returns:
-            A JSON string with the adjacent nodes and edges.
+            (str): A JSON string with the adjacent nodes and edges.
         """
 
         if self.use_spark:
@@ -316,7 +316,7 @@ def _get_one_level_subgraph_graphframes(self, node_id, _testing=False):
             node_id (str): The ID of the node for which to get the one-level subgraph.
 
         Returns:
-            tuple: A tuple containing two Spark DataFrames:
+            (tuple): A tuple containing two Spark DataFrames:
                 - nodes_df: DataFrame with columns 'id', 'count', '_int_id', and any other node attributes.
                 - edges_df: DataFrame with columns 'source', 'target', '_int_id' the edges connecting the nodes in the subgraph.
         """
@@ -387,7 +387,7 @@ def _get_one_level_subgraph_networkx(self, node_id):
             node_id (str): The ID of the node for which to get the one-level subgraph.
 
         Returns:
-            tuple: A tuple containing two Pandas DataFrames:
+            (tuple): A tuple containing two Pandas DataFrames:
                 - nodes_df: DataFrame with columns 'id', 'count', '_int_id', and any other node attributes.
                 - edges_df: DataFrame with columns 'source', 'target', '_int_id' the edges connecting the nodes in the subgraph.
         """