Skip to content

fix OpenAPI Context #524

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
79 changes: 68 additions & 11 deletions dpdispatcher/contexts/openapi_context.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
import glob
import os
import shutil
import uuid
from zipfile import ZipFile

import tqdm

try:
from bohriumsdk.client import Client
from bohriumsdk.job import Job
from bohriumsdk.storage import Storage
from bohriumsdk.util import Util
except ModuleNotFoundError:
from bohrium import Bohrium
from bohrium.resources import Job, Tiefblue
except ModuleNotFoundError as e:
found_bohriumsdk = False
import_bohrium_error = e
else:
found_bohriumsdk = True
import_bohrium_error = None

from dpdispatcher.base_context import BaseContext
from dpdispatcher.dlog import dlog
Expand All @@ -21,6 +23,37 @@
DP_CLOUD_SERVER_HOME_DIR = os.path.join(
os.path.expanduser("~"), ".dpdispatcher/", "dp_cloud_server/"
)
os.makedirs(DP_CLOUD_SERVER_HOME_DIR, exist_ok=True)
Comment on lines 23 to +26
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Avoid creating directories at module import time.

Creating directories during module import can cause issues in restricted environments. Consider creating the directory lazily when first needed.

Move the directory creation to methods that actually use it, or create a helper function:

def ensure_home_dir_exists():
    os.makedirs(DP_CLOUD_SERVER_HOME_DIR, exist_ok=True)

Then call this function in methods like write_home_file and read_home_file.

🤖 Prompt for AI Agents
In dpdispatcher/contexts/openapi_context.py around lines 23 to 26, the directory
is created at module import time, which can cause issues in restricted
environments. Remove the os.makedirs call from the module level and instead
create a helper function that ensures the directory exists. Call this helper
function inside methods that use the directory, such as write_home_file and
read_home_file, to create the directory lazily when first needed.



def unzip_file(zip_file, out_dir="./"):
obj = ZipFile(zip_file, "r")
for item in obj.namelist():
obj.extract(item, out_dir)
Comment on lines +29 to +32
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Eliminate code duplication by creating a shared utility module.

The unzip_file function is duplicated in both openapi.py and openapi_context.py. This violates the DRY principle.

Create a shared utility module (e.g., dpdispatcher/utils/zip_utils.py) and move both unzip_file and zip_file_list functions there:

# dpdispatcher/utils/zip_utils.py
from zipfile import ZipFile
import glob
import os
from dpdispatcher.dlog import dlog

def unzip_file(zip_file, out_dir="./"):
    """Extract zip file to specified directory."""
    try:
        with ZipFile(zip_file, "r") as obj:
            for item in obj.namelist():
                obj.extract(item, out_dir)
    except Exception as e:
        dlog.error(f"Failed to unzip file {zip_file}: {str(e)}")
        raise

def zip_file_list(root_path, zip_filename, file_list=[]):
    """Create zip file from list of files."""
    # ... existing implementation with error handling ...

Then import and use these functions in both files.

🤖 Prompt for AI Agents
In dpdispatcher/contexts/openapi_context.py around lines 29 to 32, the
unzip_file function duplicates code found in openapi.py, violating the DRY
principle. To fix this, create a new shared utility module at
dpdispatcher/utils/zip_utils.py, move both unzip_file and zip_file_list
functions there with proper error handling and logging, then update both
openapi_context.py and openapi.py to import and use these functions from the new
utility module.



def zip_file_list(root_path, zip_filename, file_list=[]):
out_zip_file = os.path.join(root_path, zip_filename)
# print('debug: file_list', file_list)
zip_obj = ZipFile(out_zip_file, "w")
for f in file_list:
matched_files = os.path.join(root_path, f)
for ii in glob.glob(matched_files):
# print('debug: matched_files:ii', ii)
if os.path.isdir(ii):
arcname = os.path.relpath(ii, start=root_path)
zip_obj.write(ii, arcname)
for root, dirs, files in os.walk(ii):
for file in files:
filename = os.path.join(root, file)
arcname = os.path.relpath(filename, start=root_path)
# print('debug: filename:arcname:root_path', filename, arcname, root_path)
zip_obj.write(filename, arcname)
else:
arcname = os.path.relpath(ii, start=root_path)
zip_obj.write(ii, arcname)
zip_obj.close()
return out_zip_file
Comment on lines +35 to +56
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Fix mutable default argument and add error handling.

The function has several issues that need addressing:

  1. Mutable default argument can cause unexpected behavior
  2. Debug print statements should be removed
  3. Missing error handling
  4. Missing context manager for ZipFile
-def zip_file_list(root_path, zip_filename, file_list=[]):
+def zip_file_list(root_path, zip_filename, file_list=None):
+    if file_list is None:
+        file_list = []
     out_zip_file = os.path.join(root_path, zip_filename)
-    # print('debug: file_list', file_list)
-    zip_obj = ZipFile(out_zip_file, "w")
-    for f in file_list:
-        matched_files = os.path.join(root_path, f)
-        for ii in glob.glob(matched_files):
-            # print('debug: matched_files:ii', ii)
-            if os.path.isdir(ii):
-                arcname = os.path.relpath(ii, start=root_path)
-                zip_obj.write(ii, arcname)
-                for root, dirs, files in os.walk(ii):
-                    for file in files:
-                        filename = os.path.join(root, file)
-                        arcname = os.path.relpath(filename, start=root_path)
-                        # print('debug: filename:arcname:root_path', filename, arcname, root_path)
-                        zip_obj.write(filename, arcname)
-            else:
-                arcname = os.path.relpath(ii, start=root_path)
-                zip_obj.write(ii, arcname)
-    zip_obj.close()
-    return out_zip_file
+    try:
+        with ZipFile(out_zip_file, "w") as zip_obj:
+            for f in file_list:
+                matched_files = os.path.join(root_path, f)
+                for ii in glob.glob(matched_files):
+                    if os.path.isdir(ii):
+                        arcname = os.path.relpath(ii, start=root_path)
+                        zip_obj.write(ii, arcname)
+                        for root, dirs, files in os.walk(ii):
+                            for file in files:
+                                filename = os.path.join(root, file)
+                                arcname = os.path.relpath(filename, start=root_path)
+                                zip_obj.write(filename, arcname)
+                    else:
+                        arcname = os.path.relpath(ii, start=root_path)
+                        zip_obj.write(ii, arcname)
+        return out_zip_file
+    except Exception as e:
+        dlog.error(f"Failed to create zip file {out_zip_file}: {str(e)}")
+        raise
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def zip_file_list(root_path, zip_filename, file_list=[]):
out_zip_file = os.path.join(root_path, zip_filename)
# print('debug: file_list', file_list)
zip_obj = ZipFile(out_zip_file, "w")
for f in file_list:
matched_files = os.path.join(root_path, f)
for ii in glob.glob(matched_files):
# print('debug: matched_files:ii', ii)
if os.path.isdir(ii):
arcname = os.path.relpath(ii, start=root_path)
zip_obj.write(ii, arcname)
for root, dirs, files in os.walk(ii):
for file in files:
filename = os.path.join(root, file)
arcname = os.path.relpath(filename, start=root_path)
# print('debug: filename:arcname:root_path', filename, arcname, root_path)
zip_obj.write(filename, arcname)
else:
arcname = os.path.relpath(ii, start=root_path)
zip_obj.write(ii, arcname)
zip_obj.close()
return out_zip_file
def zip_file_list(root_path, zip_filename, file_list=None):
if file_list is None:
file_list = []
out_zip_file = os.path.join(root_path, zip_filename)
try:
with ZipFile(out_zip_file, "w") as zip_obj:
for f in file_list:
matched_files = os.path.join(root_path, f)
for ii in glob.glob(matched_files):
if os.path.isdir(ii):
arcname = os.path.relpath(ii, start=root_path)
zip_obj.write(ii, arcname)
for root, dirs, files in os.walk(ii):
for file in files:
filename = os.path.join(root, file)
arcname = os.path.relpath(filename, start=root_path)
zip_obj.write(filename, arcname)
else:
arcname = os.path.relpath(ii, start=root_path)
zip_obj.write(ii, arcname)
return out_zip_file
except Exception as e:
dlog.error(f"Failed to create zip file {out_zip_file}: {e}")
raise
🤖 Prompt for AI Agents
In dpdispatcher/contexts/openapi_context.py lines 35 to 56, fix the mutable
default argument by changing file_list's default to None and initializing it
inside the function. Remove all debug print statements. Add error handling
around the file zipping process to catch and log exceptions. Use a context
manager (with statement) for the ZipFile object to ensure it is properly closed
even if errors occur.



class OpenAPIContext(BaseContext):
Expand All @@ -35,15 +68,39 @@ def __init__(
if not found_bohriumsdk:
raise ModuleNotFoundError(
"bohriumsdk not installed. Install dpdispatcher with `pip install dpdispatcher[bohrium]`"
)
) from import_bohrium_error
self.init_local_root = local_root
self.init_remote_root = remote_root
self.temp_local_root = os.path.abspath(local_root)
self.remote_profile = remote_profile
self.client = Client()
self.storage = Storage(client=self.client)
access_key = (
remote_profile.get("access_key", None)
or os.getenv("BOHRIUM_ACCESS_KEY", None)
or os.getenv("ACCESS_KEY", None)
)
project_id = (
remote_profile.get("project_id", None)
or os.getenv("BOHRIUM_PROJECT_ID", None)
or os.getenv("PROJECT_ID", None)
)
app_key = (
remote_profile.get("app_key", None)
or os.getenv("BOHRIUM_APP_KEY", None)
or os.getenv("APP_KEY", None)
)
if access_key is None:
raise ValueError(
"remote_profile must contain 'access_key' or set environment variable 'BOHRIUM_ACCESS_KEY'"
)
if project_id is None:
raise ValueError(
"remote_profile must contain 'project_id' or set environment variable 'BOHRIUM_PROJECT_ID'"
)
self.client = Bohrium(
access_key=access_key, project_id=project_id, app_key=app_key
)
self.storage = Tiefblue()
self.job = Job(client=self.client)
self.util = Util()
self.jgid = None

@classmethod
Expand Down Expand Up @@ -97,7 +154,7 @@ def upload_job(self, job, common_files=None):
for file in task.forward_files:
upload_file_list.append(os.path.join(task.task_work_path, file))

upload_zip = Util.zip_file_list(
upload_zip = zip_file_list(
self.local_root, zip_task_file, file_list=upload_file_list
)
project_id = self.remote_profile.get("project_id", 0)
Expand Down Expand Up @@ -189,7 +246,7 @@ def download(self, submission):
):
continue
self.storage.download_from_url(info["resultUrl"], target_result_zip)
Util.unzip_file(target_result_zip, out_dir=self.local_root)
unzip_file(target_result_zip, out_dir=self.local_root)
self._backup(self.local_root, target_result_zip)
self._clean_backup(
self.local_root, keep_backup=self.remote_profile.get("keep_backup", True)
Expand Down
46 changes: 38 additions & 8 deletions dpdispatcher/machines/openapi.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
import os
import shutil
import time
from zipfile import ZipFile

from dpdispatcher.utils.utils import customized_script_header_template

try:
from bohriumsdk.client import Client
from bohriumsdk.job import Job
from bohriumsdk.storage import Storage
from bohriumsdk.util import Util
from bohrium import Bohrium
from bohrium.resources import Job, Tiefblue
except ModuleNotFoundError:
found_bohriumsdk = False
else:
Expand All @@ -23,6 +22,12 @@
"""


def unzip_file(zip_file, out_dir="./"):
obj = ZipFile(zip_file, "r")
for item in obj.namelist():
obj.extract(item, out_dir)

Comment on lines +25 to +29
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Add error handling for zip file operations.

The unzip_file function should handle potential errors such as corrupted zip files or extraction failures.

 def unzip_file(zip_file, out_dir="./"):
-    obj = ZipFile(zip_file, "r")
-    for item in obj.namelist():
-        obj.extract(item, out_dir)
+    try:
+        with ZipFile(zip_file, "r") as obj:
+            for item in obj.namelist():
+                obj.extract(item, out_dir)
+    except Exception as e:
+        dlog.error(f"Failed to unzip file {zip_file}: {str(e)}")
+        raise
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def unzip_file(zip_file, out_dir="./"):
obj = ZipFile(zip_file, "r")
for item in obj.namelist():
obj.extract(item, out_dir)
def unzip_file(zip_file, out_dir="./"):
try:
with ZipFile(zip_file, "r") as obj:
for item in obj.namelist():
obj.extract(item, out_dir)
except Exception as e:
dlog.error(f"Failed to unzip file {zip_file}: {str(e)}")
raise
🤖 Prompt for AI Agents
In dpdispatcher/machines/openapi.py around lines 25 to 29, the unzip_file
function lacks error handling for zip file operations. Add try-except blocks
around the ZipFile opening and extraction code to catch exceptions like
BadZipFile or extraction errors. Log or raise appropriate errors to handle
corrupted zip files or extraction failures gracefully.


class OpenAPI(Machine):
def __init__(self, context):
if not found_bohriumsdk:
Expand All @@ -35,9 +40,35 @@ def __init__(self, context):
self.grouped = self.remote_profile.get("grouped", True)
self.retry_count = self.remote_profile.get("retry_count", 3)
self.ignore_exit_code = context.remote_profile.get("ignore_exit_code", True)
self.client = Client()

access_key = (
self.remote_profile.get("access_key", None)
or os.getenv("BOHRIUM_ACCESS_KEY", None)
or os.getenv("ACCESS_KEY", None)
)
project_id = (
self.remote_profile.get("project_id", None)
or os.getenv("BOHRIUM_PROJECT_ID", None)
or os.getenv("PROJECT_ID", None)
)
app_key = (
self.remote_profile.get("app_key", None)
or os.getenv("BOHRIUM_APP_KEY", None)
or os.getenv("APP_KEY", None)
)
if access_key is None:
raise ValueError(
"remote_profile must contain 'access_key' or set environment variable 'BOHRIUM_ACCESS_KEY'"
)
if project_id is None:
raise ValueError(
"remote_profile must contain 'project_id' or set environment variable 'BOHRIUM_PROJECT_ID'"
)
self.client = Bohrium(
access_key=access_key, project_id=project_id, app_key=app_key
)
self.storage = Tiefblue()
self.job = Job(client=self.client)
self.storage = Storage(client=self.client)
self.group_id = None

def gen_script(self, job):
Expand Down Expand Up @@ -102,7 +133,6 @@ def do_submit(self, job):
}
if job.job_state == JobStatus.unsubmitted:
openapi_params["job_id"] = job.job_id

data = self.job.insert(**openapi_params)

job.job_id = data.get("jobId", 0) # type: ignore
Expand Down Expand Up @@ -170,7 +200,7 @@ def _download_job(self, job):
result_filename = job_hash + "_back.zip"
target_result_zip = os.path.join(self.context.local_root, result_filename)
self.storage.download_from_url(job_url, target_result_zip)
Util.unzip_file(target_result_zip, out_dir=self.context.local_root)
unzip_file(target_result_zip, out_dir=self.context.local_root)
try:
os.makedirs(os.path.join(self.context.local_root, "backup"), exist_ok=True)
shutil.move(
Expand Down
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ dependencies = [
'typing_extensions; python_version < "3.7"',
'pyyaml',
'tomli >= 1.1.0; python_version < "3.11"',
'httpx',
'distro',
'pyhumps'
Comment on lines +32 to +34
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These should be claimed in bohrium-sdk. I don't think they should be claimed in dpdispatcher

]
requires-python = ">=3.7"
readme = "README.md"
Expand Down
Loading