Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 5 additions & 70 deletions gcsfuse-micro-benchmarking/helpers/bucket.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,16 @@
import subprocess
import shlex
import sys
import os

def create_gcs_bucket(location, project,config):
"""
Creates a GCS bucket using the gcloud storage CLI based on a configuration dictionary.

Args:
config (dict): A dictionary with bucket configuration.
e.g., {'bucket_name': 'my-bucket', 'location': 'us-central1', ...}

Raises:
ValueError: If a required configuration field is missing.
RuntimeError: If the gcloud command fails to execute.
"""
# 1. Validate required fields
def create_gcs_bucket(location, project, config):
bucket_name = config.get('bucket_name')

if not bucket_name:
raise ValueError("Bucket configuration failed: 'bucket_name' is missing.")
if not location:
raise ValueError("Bucket configuration failed: 'location' is missing.")

# 2. Build the base gcloud command
cmd = [
'gcloud', 'storage', 'buckets', 'create', f'gs://{bucket_name}', f'--project={project}', '--quiet'
]

# Add optional flags if they are present in the config
# GCS uses 'location' for region, multi-region, or dual-region.
cmd.append(f'--location={shlex.quote(location)}')

if config.get('storage_class'):
Expand All @@ -43,81 +24,35 @@ def create_gcs_bucket(location, project,config):
cmd.append(f'--placement={shlex.quote(config["placement"])}')

try:
# 3. Execute the command
# print(f"Executing command: {' '.join(cmd)}")

result = subprocess.run(cmd, check=True, capture_output=True, text=True)
print(f"Bucket '{bucket_name}' created successfully.")
print(result.stdout)

except FileNotFoundError:
raise RuntimeError("The 'gcloud' command was not found. Please ensure Google Cloud SDK is installed and configured in your system's PATH.")
except subprocess.CalledProcessError as e:
raise RuntimeError(f"Failed to create bucket: {e.stderr}")
except Exception as e:
raise RuntimeError(f"An unexpected error occurred: {e}")
raise RuntimeError(f"Failed to create bucket. An unexpected error occurred: {e}")
Comment on lines 30 to +33
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The exception handling has been simplified, but it now loses valuable debugging information. The previous implementation specifically caught subprocess.CalledProcessError and raised an exception with e.stderr, which is very helpful for diagnosing gcloud command failures. The new generic except Exception loses this detail. I suggest reintroducing specific handling for CalledProcessError to improve error diagnostics.

Suggested change
except FileNotFoundError:
raise RuntimeError("The 'gcloud' command was not found. Please ensure Google Cloud SDK is installed and configured in your system's PATH.")
except subprocess.CalledProcessError as e:
raise RuntimeError(f"Failed to create bucket: {e.stderr}")
except Exception as e:
raise RuntimeError(f"An unexpected error occurred: {e}")
raise RuntimeError(f"Failed to create bucket. An unexpected error occurred: {e}")
except FileNotFoundError:
raise RuntimeError("The 'gcloud' command was not found. Please ensure Google Cloud SDK is installed and configured in your system's PATH.")
except subprocess.CalledProcessError as e:
raise RuntimeError(f"Failed to create bucket: {e.stderr}")
except Exception as e:
raise RuntimeError(f"Failed to create bucket. An unexpected error occurred: {e}")

return True


def delete_gcs_bucket(bucket_name, project):
"""
Deletes a Google Cloud Storage bucket using the gcloud CLI.

This function requires the gcloud CLI to be installed and authenticated.

Args:
bucket_name (str): The name of the bucket to delete.
"""
try:
# Construct the gcloud command. The --quiet flag suppresses
# the confirmation prompt.
command = [
'gcloud', '--quiet', 'storage', 'rm',
'--recursive',
f'gs://{bucket_name}',
f'--project={project}'
]
# Run the command and capture the output
result = subprocess.run(
subprocess.run(
command,
check=True, # This will raise an exception if the command fails
check=True,
capture_output=True,
text=True
)

print(f"Bucket '{bucket_name}' deleted successfully.")

except subprocess.CalledProcessError as e:
print(f"Error deleting bucket '{bucket_name}':")
print("Error Output:", e.stderr)
print("Return Code:", e.returncode)
except FileNotFoundError:
print("Error: The 'gcloud' command was not found. Please ensure the gcloud CLI is installed and in your system's PATH.")


# Example Usage:
if __name__ == '__main__':
# Assuming 'is_zonal' and 'benchmark_id' are defined elsewhere
is_zonal = False
benchmark_id = 'my-benchmark-id'
location = 'us-central1'
default_zone = 'us-central1-a'
default_enable_hns = False
project='my-project'

config = {
'bucket_name': f"{benchmark_id}-bucket",
'placement': default_zone if is_zonal else "",
'storage_class': "RAPID" if is_zonal else "",
'enable_hns': True if is_zonal else default_enable_hns,
}

try:
create_gcs_bucket(location, project, config)
except (ValueError, RuntimeError) as e:
print(f"Error: {e}")

try:
delete_gcs_bucket(config.get('bucket_name'), project)
except Exception as e:
print(f"Could not delete the GCS bucket, Error: {e}")
print("Error: The 'gcloud' command was not found. Please ensure the gcloud CLI is installed and in your system's PATH.")
118 changes: 12 additions & 106 deletions gcsfuse-micro-benchmarking/helpers/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,38 +2,16 @@
import shlex
import os
import time
import json
import requests
from .constants import *


def contruct_metadata_string_from_config(metadata_config):
"""
Constructs a metadata string from a dictionary for gcloud.

Args:
metadata_config (dict): A dictionary of key-value pairs.

Returns:
str: A comma-separated string of key-value pairs.
"""
metadata_items = []
for key, value in metadata_config.items():
# Ensure the value is a string, which is required for gcloud metadata
metadata_items.append(f"{key}={str(value)}")
return ','.join(metadata_items)



def create_vm_if_not_exists(vm_details, zone, project):
"""
Creates a VM if it does not already exist.

Returns:
True: If the VM was newly created in this call.
False: If the VM already existed.
None: If an error occurred.
"""
vm_name = vm_details.get('vm_name')
if not vm_name:
print("Error: 'vm_name' is a required key in vm_details.")
Expand All @@ -48,7 +26,7 @@ def create_vm_if_not_exists(vm_details, zone, project):
try:
subprocess.run(check_cmd, check=True, capture_output=True, text=True, timeout=30)
print(f"VM '{vm_name}' already exists in zone '{zone}'.")
return False # Already exists
return False
except subprocess.CalledProcessError:
print(f"VM '{vm_name}' does not exist in zone '{zone}'. Attempting to create...")

Expand All @@ -66,34 +44,20 @@ def create_vm_if_not_exists(vm_details, zone, project):
cmd_list.append(f'--service-account={vm_details["service_account"]}')

try:
# print(f"Executing creation command: {' '.join(shlex.quote(arg) for arg in cmd_list)}")
subprocess.run(cmd_list, check=True, capture_output=True, text=True, timeout=360) # Increased timeout
subprocess.run(cmd_list, check=True, capture_output=True, text=True, timeout=360)
print(f"VM '{vm_name}' creation command finished successfully.")
return True # Newly Created
except subprocess.CalledProcessError as e:
print(f"Error creating VM {vm_name}: {e}")
print(f"STDERR: {e.stderr}")
return None # Error
except subprocess.TimeoutExpired:
print(f"Timeout creating VM {vm_name}.")
return None # Error
except Exception as e:
return True
except (subprocess.CalledProcessError, subprocess.TimeoutExpired):
return None
except Exception:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

This except block will raise a NameError because the variable e is used in the print statement on the next line but is not defined in this scope. You should change except Exception: to except Exception as e: to capture the exception object.

Suggested change
except Exception:
except Exception as e:

print(f"Unexpected error checking VM existence: {e}")
return None

def is_running_on_gce():
"""
Checks if the script is running on a GCE VM *on the same network*.
Returns False for Cloudtop/Corp workstations to force External IP/IAP usage.
"""
# 1. CRITICAL: Check for Corporate Environment (Cloudtop)
# Cloudtops reside on a different VPC and cannot use --internal-ip
# to reach project VMs. We detect them by their unique home directory path.
if os.path.exists("/usr/local/google/home"):
print("Debug: Detected Cloudtop environment. Forcing External/IAP connection.")
return False

# 2. Standard GCE Metadata Check
try:
response = requests.get(
"http://metadata.google.internal/computeMetadata/v1/instance/",
Expand All @@ -105,12 +69,10 @@ def is_running_on_gce():
return False

def wait_for_ssh(vm_name, zone, project, retries=15, delay=20):
"""Tries to SSH into the VM until it succeeds or retries are exhausted."""

ssh_cmd = [
'gcloud', 'compute', 'ssh', vm_name,
f'--zone={zone}', f'--project={project}',
'--quiet', # Suppress interactive prompts
'--quiet',
]
if is_running_on_gce():
print("Detected environment: GCE VM. Using internal IP.")
Expand All @@ -137,18 +99,7 @@ def wait_for_ssh(vm_name, zone, project, retries=15, delay=20):
return False

def update_vm_metadata_parameter(vm_name, zone, project,metadata_config):
"""
Updates the metadata of a Google Cloud VM instance.

Args:
vm_name (str): The name of the VM instance.
zone (str): The zone where the VM instance is located.
metadata_config (dict): A dictionary of key-value pairs to set as metadata.
"""
# First, construct the metadata string
metadata_string = contruct_metadata_string_from_config(metadata_config)

# Construct the gcloud command
command = (
f"gcloud compute instances add-metadata {vm_name} "
f"--zone={zone} "
Expand All @@ -157,13 +108,10 @@ def update_vm_metadata_parameter(vm_name, zone, project,metadata_config):
)

try:
# Use shlex.split to safely parse the command
command_list = shlex.split(command)

# Run the command
subprocess.run(
command_list,
check=True, # Raises an exception on command failure
check=True,
capture_output=True,
text=True
)
Expand All @@ -189,7 +137,6 @@ def run_script_remotely(vm_name, zone, project, startup_script, max_retries=max_
remote_script_path = f"/tmp/startup_script_{script_filename}"
remote_script_path_quoted = shlex.quote(remote_script_path)

# Step 1: Upload the script
scp_cmd = gcloud_base + ['scp', startup_script, f'{vm_name}:{remote_script_path}', f'--zone={zone}', f'--project={project}']
if is_running_on_gce():
print("Detected GCE environment for SCP. Adding --internal-ip.")
Expand All @@ -211,7 +158,7 @@ def run_script_remotely(vm_name, zone, project, startup_script, max_retries=max_
time.sleep(retry_delay)
else:
print(f"Failed to upload script after {max_retries} retries.")
return False # Return False only after all retries are exhausted
return False
except subprocess.TimeoutExpired as e:
print(f"Timeout expired while uploading script on attempt {i+1}: {e}")
if i < max_retries - 1:
Expand All @@ -223,19 +170,15 @@ def run_script_remotely(vm_name, zone, project, startup_script, max_retries=max_
print(f"An unexpected error occurred during upload: {e}")
return False
else:
# This else block runs only if the for loop completes without a 'break'
# which means all retries failed.
return False

# Step 2: Execute the script inside a detached tmux session
ssh_base = gcloud_base + ['ssh', vm_name, f'--zone={zone}', f'--project={project}']
if is_running_on_gce():
print("Detected GCE environment for execution SSH. Adding --internal-ip.")
ssh_base.append('--internal-ip')
tmux_session_name = f"startup_{vm_name}"
tmux_session_name_quoted = shlex.quote(tmux_session_name)

# Command to install tmux non-interactively if not present (for Debian/Ubuntu)
install_tmux_cmd = (
"if ! command -v tmux &> /dev/null; then "
"echo 'tmux not found, attempting to install...'; "
Expand All @@ -244,8 +187,6 @@ def run_script_remotely(vm_name, zone, project, startup_script, max_retries=max_
"fi"
)

# Command to kill existing session if it exists, then start a new one
# Export DEBIAN_FRONTEND to affect all commands in the shell
remote_exec_cmd = (
f"export DEBIAN_FRONTEND=noninteractive && "
f"{install_tmux_cmd} && "
Expand Down Expand Up @@ -301,13 +242,12 @@ def startup_benchmark_vm(cfg, zone, project,metadata_config):

creation_status = create_vm_if_not_exists(cfg, zone, project)

if creation_status is None: # Error during check/create
if creation_status is None:
print(f"Failed to ensure VM {vm_name} exists.")
return False
elif creation_status is True: # Newly created
elif creation_status is True:
if not wait_for_ssh(vm_name, zone, project):
return False
# If False, VM already existed, proceed.

if not update_vm_metadata_parameter(vm_name, zone, project, metadata_config):
return False
Expand All @@ -317,37 +257,23 @@ def startup_benchmark_vm(cfg, zone, project,metadata_config):


def delete_gce_vm(vm_name, zone, project):
"""
Deletes a GCE VM without a confirmation prompt.

Args:
vm_name (str): The name of the VM to delete.

Returns:
bool: True if the deletion was successful, False otherwise.
"""
if not vm_name:
print("Error: A VM name is required for deletion.")
return False

cmd_list = ['gcloud', 'compute', 'instances', 'delete', vm_name, f'--zone={zone}', f'--project={project}', '--quiet']

try:
# print(f"Executing command: {' '.join(shlex.quote(arg) for arg in cmd_list)}")

# Use subprocess.run to execute the command
subprocess.run(cmd_list, check=True, capture_output=True, text=True)

print(f"VM '{vm_name}' deletion command executed successfully.")

# Poll for deletion to confirm it's complete
print("Waiting for VM to be fully deleted...")
check_cmd_string = f'gcloud compute instances describe {vm_name} --format=value(name)'

while True:
try:
subprocess.run(shlex.split(check_cmd_string), check=True, capture_output=True, text=True)
time.sleep(10) # Wait for 10 seconds before checking again
time.sleep(10)
except subprocess.CalledProcessError:
print(f"VM '{vm_name}' has been successfully deleted.")
return True
Expand All @@ -360,23 +286,3 @@ def delete_gce_vm(vm_name, zone, project):
except FileNotFoundError:
print("Error: 'gcloud' command not found. Please ensure Google Cloud SDK is installed and in your PATH.")
return False


if __name__ == '__main__':
vm_config = {
'vm_name': 'my-test-vm',
'machine_type': 'e2-micro',
'image_family': 'debian-11',
'image_project': 'debian-cloud',
'disk_size': '10GB',
'startup_script': 'test_script.sh'
}

# Create a dummy script for demonstration
with open('test_script.sh', 'w') as f:
f.write("#!/bin/bash\n")
f.write("echo 'Hello from the script!' > /tmp/hello.txt\n")
f.write("echo 'Script executed successfully.'\n")
f.write("touch /tmp/script_status.txt")

delete_gce_vm(vm_config.get('vm_name'), default_zone, default_project)
Loading