Revert "Follow Up: Make '--platform' argument mandatory in CLI (#1473)…

…" (#1498) Fixes #1497. In #1472, the `--platform` argument was made a required parameter in Fire. In our tests, it was found that the short flag -p for platform is no longer functioning. Upon investigation, it seems that Fire does not support short flags for required arguments. This PR reverts the above commit and makes the platform argument optional in Fire. This change will ensure compatibility with the -p flag. The platform argument will still be validated internally to ensure its requirement is enforced. ### Case 1 : Platform not provided ``` spark_rapids qualification --eventlogs "/path/to/dataproc-cpu" --verbose ``` #### STDOUT ``` 2025-01-10 13:35:16,011 INFO spark_rapids_tools.argparser: ...applying argument case: Missing Platform argument 2025-01-10 13:35:16,012 ERROR spark_rapids_tools.argparser: Validation err: 1 validation error for QualifyUserArgModel Cannot run tool cmd without platform argument. Re-run the command providing the platform argument. Error: [type=invalid_argument, input_value=ArgsKwargs((), {'eventlog...ols_config_path': None}), input_type=ArgsKwargs] ``` ### Case 2: Platform provided by `--platform` and `-p` ``` spark_rapids qualification --platform dataproc --eventlogs "/path/to/dataproc-cpu" --verbose spark_rapids qualification -p dataproc --eventlogs "/path/to/dataproc-cpu" --verbose ``` #### STDOUT ``` Report Summary: ---------------------- - Total applications 1 Processed applications 1 Top candidates 0 ---------------------- - Processing Completed! ``` --------- Signed-off-by: Partho Sarthi <[email protected]>
NVIDIA · Jan 21, 2025 · 4d34fc7 · 4d34fc7
1 parent 5ab53d8
commit 4d34fc7
Showing 1 changed file with 11 additions and 13 deletions.
diff --git a/user_tools/src/spark_rapids_tools/cmdli/tools_cli.py b/user_tools/src/spark_rapids_tools/cmdli/tools_cli.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -35,10 +35,9 @@ class ToolsCLI(object):  # pylint: disable=too-few-public-methods
     """
 
     def qualification(self,
-                      *,  # force named arguments
-                      platform: str,
                       eventlogs: str = None,
                       cluster: str = None,
+                      platform: str = None,
                       output_folder: str = None,
                       filter_apps: str = None,
                       custom_model_file: str = None,
@@ -56,15 +55,15 @@ def qualification(self,
         The cmd will process each app individually, but will group apps with the same name into the
         same output row after averaging duration metrics accordingly.
 
-        :param platform: Defines one of the following: "onprem", "emr", "dataproc", "dataproc-gke",
-               "databricks-aws", and "databricks-azure".
         :param eventlogs: Event log filenames or CSP storage directories containing event logs
                 (comma separated).
 
                 Skipping this argument requires that the cluster argument points to a valid
                 cluster name on the CSP.
         :param cluster: The CPU cluster on which the Spark application(s) were executed.
                Name or ID (for databricks platforms) of cluster or path to cluster-properties.
+        :param platform: Defines one of the following: "onprem", "emr", "dataproc", "dataproc-gke",
+               "databricks-aws", and "databricks-azure".
         :param output_folder: Local path to store the output.
         :param tools_jar: Path to a bundled jar including Rapids tool. The path is a local filesystem,
                 or remote cloud storage url. If missing, the wrapper downloads the latest rapids-4-spark-tools_*.jar
@@ -90,8 +89,8 @@ def qualification(self,
                 For more details on Qualification tool options, please visit
                 https://docs.nvidia.com/spark-rapids/user-guide/latest/qualification/jar-usage.html#running-the-qualification-tool-standalone-on-spark-event-logs
         """
-        platform = Utils.get_value_or_pop(platform, rapids_options, 'p')
         eventlogs = Utils.get_value_or_pop(eventlogs, rapids_options, 'e')
+        platform = Utils.get_value_or_pop(platform, rapids_options, 'p')
         tools_jar = Utils.get_value_or_pop(tools_jar, rapids_options, 't')
         output_folder = Utils.get_value_or_pop(output_folder, rapids_options, 'o')
         filter_apps = Utils.get_value_or_pop(filter_apps, rapids_options, 'f')
@@ -109,9 +108,9 @@ def qualification(self,
         if estimation_model_args is None:
             return None
         qual_args = AbsToolUserArgModel.create_tool_args('qualification',
-                                                         platform=platform,
                                                          eventlogs=eventlogs,
                                                          cluster=cluster,
+                                                         platform=platform,
                                                          output_folder=output_folder,
                                                          tools_jar=tools_jar,
                                                          jvm_heap_size=jvm_heap_size,
@@ -128,10 +127,9 @@ def qualification(self,
         return None
 
     def profiling(self,
-                  *,  # force named arguments
-                  platform: str,
                   eventlogs: str = None,
                   cluster: str = None,
+                  platform: str = None,
                   driverlog: str = None,
                   output_folder: str = None,
                   tools_jar: str = None,
@@ -148,14 +146,14 @@ def profiling(self,
         The tool also will recommend setting for the application assuming that the job will be able
         to use all the cluster resources (CPU and GPU) when it is running.
 
-        :param platform: defines one of the following "onprem", "emr", "dataproc", "databricks-aws",
-                and "databricks-azure".
         :param eventlogs: Event log filenames or cloud storage directories
                 containing event logs (comma separated). If missing, the wrapper reads the Spark's
                 property `spark.eventLog.dir` defined in the `cluster`.
         :param cluster: The cluster on which the Spark applications were executed. The argument
                 can be a cluster name or ID (for databricks platforms) or a valid path to the cluster's
                 properties file (json format) generated by the CSP SDK.
+        :param platform: defines one of the following "onprem", "emr", "dataproc", "databricks-aws",
+                and "databricks-azure".
         :param driverlog: Valid path to the GPU driver log file.
         :param output_folder: path to store the output.
         :param tools_jar: Path to a bundled jar including Rapids tool. The path is a local filesystem,
@@ -175,9 +173,9 @@ def profiling(self,
                 For more details on Profiling tool options, please visit
                 https://docs.nvidia.com/spark-rapids/user-guide/latest/profiling/jar-usage.html#prof-tool-title-options
         """
-        platform = Utils.get_value_or_pop(platform, rapids_options, 'p')
         eventlogs = Utils.get_value_or_pop(eventlogs, rapids_options, 'e')
         cluster = Utils.get_value_or_pop(cluster, rapids_options, 'c')
+        platform = Utils.get_value_or_pop(platform, rapids_options, 'p')
         driverlog = Utils.get_value_or_pop(driverlog, rapids_options, 'd')
         output_folder = Utils.get_value_or_pop(output_folder, rapids_options, 'o')
         tools_jar = Utils.get_value_or_pop(tools_jar, rapids_options, 't')
@@ -186,9 +184,9 @@ def profiling(self,
             ToolLogging.enable_debug_mode()
         init_environment('prof')
         prof_args = AbsToolUserArgModel.create_tool_args('profiling',
-                                                         platform=platform,
                                                          eventlogs=eventlogs,
                                                          cluster=cluster,
+                                                         platform=platform,
                                                          driverlog=driverlog,
                                                          jvm_heap_size=jvm_heap_size,
                                                          jvm_threads=jvm_threads,