From 221856c57ac669dc66ef307d09caa570e434efbf Mon Sep 17 00:00:00 2001 From: Harshith-umesh Date: Thu, 24 Jul 2025 15:55:58 -0400 Subject: [PATCH 1/5] Add --version flag to guidellm Signed-off-by: Harshith-umesh --- src/guidellm/__main__.py | 20 ++++++++ tests/unit/test_cli.py | 101 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 121 insertions(+) create mode 100644 tests/unit/test_cli.py diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py index ac0872c3..27f79035 100644 --- a/src/guidellm/__main__.py +++ b/src/guidellm/__main__.py @@ -19,12 +19,32 @@ from guidellm.utils import DefaultGroupHandler from guidellm.utils import cli as cli_tools +try: + from guidellm.version import version +except ImportError: + version = "unknown" + STRATEGY_PROFILE_CHOICES = list( set(list(get_args(ProfileType)) + list(get_args(StrategyType))) ) +def _version_callback(ctx: click.Context, _param: click.Parameter, value: bool) -> None: + """Callback for --version flag.""" + if value: + click.echo(f"guidellm version: {version}") + ctx.exit() + + @click.group() +@click.option( + "--version", + is_flag=True, + expose_value=False, + is_eager=True, + help="Show the version and exit.", + callback=_version_callback, +) def cli(): pass diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py new file mode 100644 index 00000000..08fd124d --- /dev/null +++ b/tests/unit/test_cli.py @@ -0,0 +1,101 @@ +""" +Unit tests for CLI functionality, specifically the version flag. +""" + +import pytest +from click.testing import CliRunner + +from guidellm.__main__ import cli + + +@pytest.mark.smoke +def test_version_flag_long(): + """Test that --version flag works correctly.""" + runner = CliRunner() + result = runner.invoke(cli, ["--version"]) + + assert result.exit_code == 0 + assert "guidellm version:" in result.output + assert result.output.strip().startswith("guidellm version:") + + +@pytest.mark.smoke +def test_version_flag_displays_actual_version(): + """Test that --version displays the actual version from version.py.""" + runner = CliRunner() + result = runner.invoke(cli, ["--version"]) + + assert result.exit_code == 0 + import re + + version_pattern = r"guidellm version: \d+\.\d+" + assert re.search(version_pattern, result.output) + + +@pytest.mark.smoke +def test_version_flag_exits_cleanly(): + """Test that --version exits without processing other commands.""" + runner = CliRunner() + result = runner.invoke(cli, ["--version", "benchmark"]) + + assert result.exit_code == 0 + assert "guidellm version:" in result.output + assert "Commands to run a new benchmark" not in result.output + + +@pytest.mark.smoke +def test_help_shows_version_option(): + """Test that --help shows the --version option.""" + runner = CliRunner() + result = runner.invoke(cli, ["--help"]) + + assert result.exit_code == 0 + assert "--version" in result.output + assert "Show the version and exit" in result.output + + +@pytest.mark.smoke +def test_other_commands_still_work(): + """Test that other CLI commands still work after adding version flag.""" + runner = CliRunner() + result = runner.invoke(cli, ["--help"]) + + assert result.exit_code == 0 + assert "benchmark" in result.output + assert "config" in result.output + assert "preprocess" in result.output + + +@pytest.mark.smoke +def test_version_flag_case_sensitivity(): + """Test that --version flag is case sensitive.""" + runner = CliRunner() + + result = runner.invoke(cli, ["--version"]) + assert result.exit_code == 0 + assert "guidellm version:" in result.output + + # --VERSION should not work + result = runner.invoke(cli, ["--VERSION"]) + assert result.exit_code != 0 + assert "No such option" in result.output + + +@pytest.mark.integration +def test_version_integration_with_actual_version(): + """Integration test to verify version matches what's in version.py.""" + try: + from guidellm.version import version as actual_version + + runner = CliRunner() + result = runner.invoke(cli, ["--version"]) + + assert result.exit_code == 0 + expected_output = f"guidellm version: {actual_version}" + assert expected_output in result.output + except ImportError: + runner = CliRunner() + result = runner.invoke(cli, ["--version"]) + + assert result.exit_code == 0 + assert "guidellm version: unknown" in result.output From 8bec7b8f7b35074a7f6ca83cd379210be8632381 Mon Sep 17 00:00:00 2001 From: Harshith-umesh Date: Fri, 25 Jul 2025 10:49:56 -0400 Subject: [PATCH 2/5] Fix linting errors 2 Signed-off-by: Harshith-umesh --- src/guidellm/__main__.py | 3 ++- tests/unit/test_cli.py | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py index 27f79035..46057cd1 100644 --- a/src/guidellm/__main__.py +++ b/src/guidellm/__main__.py @@ -19,6 +19,7 @@ from guidellm.utils import DefaultGroupHandler from guidellm.utils import cli as cli_tools +# Import version information try: from guidellm.version import version except ImportError: @@ -71,7 +72,7 @@ def benchmark(): readable=True, file_okay=True, dir_okay=False, - path_type=Path, # type: ignore[type-var] + path_type=Path, ), click.Choice(get_builtin_scenarios()), ), diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py index 08fd124d..9e176493 100644 --- a/tests/unit/test_cli.py +++ b/tests/unit/test_cli.py @@ -85,7 +85,9 @@ def test_version_flag_case_sensitivity(): def test_version_integration_with_actual_version(): """Integration test to verify version matches what's in version.py.""" try: - from guidellm.version import version as actual_version + from guidellm.version import ( + version as actual_version, + ) runner = CliRunner() result = runner.invoke(cli, ["--version"]) From fec23c061e2234d424f0d4273c1c29b0592e8a38 Mon Sep 17 00:00:00 2001 From: Elijah DeLee Date: Tue, 29 Jul 2025 18:20:34 -0400 Subject: [PATCH 3/5] Add CLI options for backend args (like headers and verify) (#230) This PR adds the ability to configure custom request headers and control SSL certificate verification when running benchmarks. * The OpenAIHTTPBackend now supports passing custom headers and a verify flag to disable SSL verification. * Headers are now merged with the following precedence: CLI arguments (--backend-args), scenario file arguments, environment variables, and then default values. * Headers can be removed by setting their value to null in the --backend-args JSON string. * The --backend-args help text has been updated with an example of how to use these new features. * New documentation has been added for the CLI, configuration options, and supported data formats. * Unit tests have been added to verify the new header and SSL verification logic, as well as the CLI argument parsing. This provides a way to benchmark targets that require custom authentication, other headers, or use self-signed SSL certificates. Signed-off-by: Elijah DeLee --- docs/guides/cli.md | 37 +++++++- docs/guides/configuration.md | 60 ++++++++++++- docs/guides/data_formats.md | 67 ++++++++++++++ src/guidellm/__main__.py | 4 +- src/guidellm/backend/openai.py | 44 ++++++---- src/guidellm/config.py | 2 + tests/unit/backend/test_openai_backend.py | 4 +- .../test_openai_backend_custom_configs.py | 88 +++++++++++++++++++ tests/unit/test_config.py | 4 + tests/unit/test_main.py | 84 ++++++++++++++++++ 10 files changed, 373 insertions(+), 21 deletions(-) create mode 100644 docs/guides/data_formats.md create mode 100644 tests/unit/backend/test_openai_backend_custom_configs.py create mode 100644 tests/unit/test_main.py diff --git a/docs/guides/cli.md b/docs/guides/cli.md index d30962bd..77796892 100644 --- a/docs/guides/cli.md +++ b/docs/guides/cli.md @@ -1 +1,36 @@ -# Coming Soon +# CLI Reference + +This page provides a reference for the `guidellm` command-line interface. For more advanced configuration, including environment variables and `.env` files, see the [Configuration Guide](./configuration.md). + +## `guidellm benchmark run` + +This command is the primary entrypoint for running benchmarks. It has many options that can be specified on the command line or in a scenario file. + +### Scenario Configuration + +| Option | Description | +| --------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------- | +| `--scenario ` | The name of a builtin scenario or path to a scenario configuration file. Options specified on the command line will override the scenario file. | + +### Target and Backend Configuration + +These options configure how `guidellm` connects to the system under test. + +| Option | Description | +| ----------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `--target ` | **Required.** The endpoint of the target system, e.g., `http://localhost:8080`. Can also be set with the `GUIDELLM__OPENAI__BASE_URL` environment variable. | +| `--backend-type ` | The type of backend to use. Defaults to `openai_http`. | +| `--backend-args ` | A JSON string for backend-specific arguments. For example: `--backend-args '{"headers": {"Authorization": "Bearer my-token"}, "verify": false}'` to pass custom headers and disable certificate verification. | +| `--model ` | The ID of the model to benchmark within the backend. | + +### Data and Request Configuration + +These options define the data to be used for benchmarking and how requests will be generated. + +| Option | Description | +| ------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `--data ` | The data source. This can be a HuggingFace dataset ID, a path to a local data file, or a synthetic data configuration. See the [Data Formats Guide](./data_formats.md) for more details. | +| `--rate-type ` | The type of request generation strategy to use (e.g., `constant`, `poisson`, `sweep`). | +| `--rate ` | The rate of requests per second for `constant` or `poisson` strategies, or the number of steps for a `sweep`. | +| `--max-requests ` | The maximum number of requests to run for each benchmark. | +| `--max-seconds ` | The maximum number of seconds to run each benchmark for. | diff --git a/docs/guides/configuration.md b/docs/guides/configuration.md index d30962bd..90c770f1 100644 --- a/docs/guides/configuration.md +++ b/docs/guides/configuration.md @@ -1 +1,59 @@ -# Coming Soon +# Configuration + +The `guidellm` application can be configured using command-line arguments, environment variables, or a `.env` file. This page details the file-based and environment variable configuration options. + +## Configuration Methods + +Settings are loaded with the following priority (highest priority first): + +1. Command-line arguments. +2. Environment variables. +3. Values in a `.env` file in the directory where the command is run. +4. Default values. + +## Environment Variable Format + +All settings can be configured using environment variables. The variables must be prefixed with `GUIDELLM__`, and nested settings are separated by a double underscore `__`. + +For example, to set the `api_key` for the `openai` backend, you would use the following environment variable: + +```bash +export GUIDELLM__OPENAI__API_KEY="your-api-key" +``` + +### Target and Backend Configuration + +You can configure the connection to the target system using environment variables. This is an alternative to using the `--target-*` command-line flags. + +| Environment Variable | Description | Example | +| ------------------------------------- | -------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------- | +| `GUIDELLM__OPENAI__BASE_URL` | The endpoint of the target system. Equivalent to the `--target` CLI option. | `export GUIDELLM__OPENAI__BASE_URL="http://localhost:8080"` | +| `GUIDELLM__OPENAI__API_KEY` | The API key to use for bearer token authentication. | `export GUIDELLM__OPENAI__API_KEY="your-secret-api-key"` | +| `GUIDELLM__OPENAI__BEARER_TOKEN` | The full bearer token to use for authentication. | `export GUIDELLM__OPENAI__BEARER_TOKEN="Bearer your-secret-token"` | +| `GUIDELLM__OPENAI__HEADERS` | A JSON string representing a dictionary of headers to send to the target. These headers will override any default headers. | `export GUIDELLM__OPENAI__HEADERS='{"Authorization": "Bearer my-token"}'` | +| `GUIDELLM__OPENAI__ORGANIZATION` | The OpenAI organization to use for requests. | `export GUIDELLM__OPENAI__ORGANIZATION="org-12345"` | +| `GUIDELLM__OPENAI__PROJECT` | The OpenAI project to use for requests. | `export GUIDELLM__OPENAI__PROJECT="proj-67890"` | +| `GUIDELLM__OPENAI__VERIFY` | Set to `false` or `0` to disable certificate verification. | `export GUIDELLM__OPENAI__VERIFY=false` | +| `GUIDELLM__OPENAI__MAX_OUTPUT_TOKENS` | The default maximum number of tokens to request for completions. | `export GUIDELLM__OPENAI__MAX_OUTPUT_TOKENS=2048` | + +### General HTTP Settings + +These settings control the behavior of the underlying HTTP client. + +| Environment Variable | Description | +| ------------------------------------ | ------------------------------------------------------------------------------- | +| `GUIDELLM__REQUEST_TIMEOUT` | The timeout in seconds for HTTP requests. Defaults to 300. | +| `GUIDELLM__REQUEST_HTTP2` | Set to `true` or `1` to enable HTTP/2 support. Defaults to true. | +| `GUIDELLM__REQUEST_FOLLOW_REDIRECTS` | Set to `true` or `1` to allow the client to follow redirects. Defaults to true. | + +### Using a `.env` file + +You can also place these variables in a `.env` file in your project's root directory: + +```dotenv +# .env file +GUIDELLM__OPENAI__BASE_URL="http://localhost:8080" +GUIDELLM__OPENAI__API_KEY="your-api-key" +GUIDELLM__OPENAI__HEADERS='{"Authorization": "Bearer my-token"}' +GUIDELLM__OPENAI__VERIFY=false +``` diff --git a/docs/guides/data_formats.md b/docs/guides/data_formats.md new file mode 100644 index 00000000..dd12c591 --- /dev/null +++ b/docs/guides/data_formats.md @@ -0,0 +1,67 @@ +# Data Formats + +The `--data` argument for the `guidellm benchmark run` command accepts several different formats for specifying the data to be used for benchmarking. + +## Local Data Files + +You can provide a path to a local data file in one of the following formats: + +- **CSV (.csv)**: A comma-separated values file. The loader will attempt to find a column with a common name for the prompt (e.g., `prompt`, `text`, `instruction`). +- **JSON (.json)**: A JSON file. The structure should be a list of objects, where each object represents a row of data. +- **JSON Lines (.jsonl)**: A file where each line is a valid JSON object. +- **Text (.txt)**: A plain text file, where each line is treated as a separate prompt. + +If the prompt column cannot be automatically determined, you can specify it using the `--data-args` option: + +```bash +--data-args '{"text_column": "my_custom_prompt_column"}' +``` + +## Synthetic Data + +You can generate synthetic data on the fly by providing a configuration string or file. + +### Configuration Options + +| Parameter | Description | +| --------------------- | --------------------------------------------------------------------------------------------------------------- | +| `prompt_tokens` | **Required.** The average number of tokens for the generated prompts. | +| `output_tokens` | **Required.** The average number of tokens for the generated outputs. | +| `samples` | The total number of samples to generate. Defaults to 1000. | +| `source` | The source text to use for generating the synthetic data. Defaults to a built-in copy of "Pride and Prejudice". | +| `prompt_tokens_stdev` | The standard deviation of the tokens generated for prompts. | +| `prompt_tokens_min` | The minimum number of text tokens generated for prompts. | +| `prompt_tokens_max` | The maximum number of text tokens generated for prompts. | +| `output_tokens_stdev` | The standard deviation of the tokens generated for outputs. | +| `output_tokens_min` | The minimum number of text tokens generated for outputs. | +| `output_tokens_max` | The maximum number of text tokens generated for outputs. | + +### Configuration Formats + +You can provide the synthetic data configuration in one of three ways: + +1. **Key-Value String:** + + ```bash + --data "prompt_tokens=256,output_tokens=128,samples=500" + ``` + +2. **JSON String:** + + ```bash + --data '{"prompt_tokens": 256, "output_tokens": 128, "samples": 500}' + ``` + +3. **YAML or Config File:** Create a file (e.g., `my_config.yaml`): + + ```yaml + prompt_tokens: 256 + output_tokens: 128 + samples: 500 + ``` + + And use it with the `--data` argument: + + ```bash + --data my_config.yaml + ``` diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py index 46057cd1..6e23a506 100644 --- a/src/guidellm/__main__.py +++ b/src/guidellm/__main__.py @@ -103,7 +103,9 @@ def benchmark(): default=GenerativeTextScenario.get_default("backend_args"), help=( "A JSON string containing any arguments to pass to the backend as a " - "dict with **kwargs." + "dict with **kwargs. Headers can be removed by setting their value to " + "null. For example: " + """'{"headers": {"Authorization": null, "Custom-Header": "Custom-Value"}}'""" ), ) @click.option( diff --git a/src/guidellm/backend/openai.py b/src/guidellm/backend/openai.py index 6e24dfc5..e62e9003 100644 --- a/src/guidellm/backend/openai.py +++ b/src/guidellm/backend/openai.py @@ -95,6 +95,8 @@ def __init__( extra_query: Optional[dict] = None, extra_body: Optional[dict] = None, remove_from_body: Optional[list[str]] = None, + headers: Optional[dict] = None, + verify: Optional[bool] = None, ): super().__init__(type_="openai_http") self._target = target or settings.openai.base_url @@ -111,13 +113,32 @@ def __init__( self._model = model + # Start with default headers based on other params + default_headers: dict[str, str] = {} api_key = api_key or settings.openai.api_key - self.authorization = ( - f"Bearer {api_key}" if api_key else settings.openai.bearer_token - ) + bearer_token = settings.openai.bearer_token + if api_key: + default_headers["Authorization"] = f"Bearer {api_key}" + elif bearer_token: + default_headers["Authorization"] = bearer_token self.organization = organization or settings.openai.organization + if self.organization: + default_headers["OpenAI-Organization"] = self.organization + self.project = project or settings.openai.project + if self.project: + default_headers["OpenAI-Project"] = self.project + + # User-provided headers from kwargs or settings override defaults + merged_headers = default_headers.copy() + merged_headers.update(settings.openai.headers or {}) + if headers: + merged_headers.update(headers) + + # Remove headers with None values for backward compatibility and convenience + self.headers = {k: v for k, v in merged_headers.items() if v is not None} + self.timeout = timeout if timeout is not None else settings.request_timeout self.http2 = http2 if http2 is not None else settings.request_http2 self.follow_redirects = ( @@ -125,6 +146,7 @@ def __init__( if follow_redirects is not None else settings.request_follow_redirects ) + self.verify = verify if verify is not None else settings.openai.verify self.max_output_tokens = ( max_output_tokens if max_output_tokens is not None @@ -161,9 +183,7 @@ def info(self) -> dict[str, Any]: "timeout": self.timeout, "http2": self.http2, "follow_redirects": self.follow_redirects, - "authorization": bool(self.authorization), - "organization": self.organization, - "project": self.project, + "headers": self.headers, "text_completions_path": TEXT_COMPLETIONS_PATH, "chat_completions_path": CHAT_COMPLETIONS_PATH, } @@ -384,6 +404,7 @@ def _get_async_client(self) -> httpx.AsyncClient: http2=self.http2, timeout=self.timeout, follow_redirects=self.follow_redirects, + verify=self.verify, ) self._async_client = client else: @@ -395,16 +416,7 @@ def _headers(self) -> dict[str, str]: headers = { "Content-Type": "application/json", } - - if self.authorization: - headers["Authorization"] = self.authorization - - if self.organization: - headers["OpenAI-Organization"] = self.organization - - if self.project: - headers["OpenAI-Project"] = self.project - + headers.update(self.headers) return headers def _params(self, endpoint_type: EndpointType) -> dict[str, str]: diff --git a/src/guidellm/config.py b/src/guidellm/config.py index 3b426bd8..beda55fc 100644 --- a/src/guidellm/config.py +++ b/src/guidellm/config.py @@ -81,10 +81,12 @@ class OpenAISettings(BaseModel): api_key: Optional[str] = None bearer_token: Optional[str] = None + headers: Optional[dict[str, str]] = None organization: Optional[str] = None project: Optional[str] = None base_url: str = "http://localhost:8000" max_output_tokens: int = 16384 + verify: bool = True class ReportGenerationSettings(BaseModel): diff --git a/tests/unit/backend/test_openai_backend.py b/tests/unit/backend/test_openai_backend.py index b461acff..0a4c2c38 100644 --- a/tests/unit/backend/test_openai_backend.py +++ b/tests/unit/backend/test_openai_backend.py @@ -11,7 +11,7 @@ def test_openai_http_backend_default_initialization(): backend = OpenAIHTTPBackend() assert backend.target == settings.openai.base_url assert backend.model is None - assert backend.authorization == settings.openai.bearer_token + assert backend.headers.get("Authorization") == settings.openai.bearer_token assert backend.organization == settings.openai.organization assert backend.project == settings.openai.project assert backend.timeout == settings.request_timeout @@ -37,7 +37,7 @@ def test_openai_http_backend_intialization(): ) assert backend.target == "http://test-target" assert backend.model == "test-model" - assert backend.authorization == "Bearer test-key" + assert backend.headers.get("Authorization") == "Bearer test-key" assert backend.organization == "test-org" assert backend.project == "test-proj" assert backend.timeout == 10 diff --git a/tests/unit/backend/test_openai_backend_custom_configs.py b/tests/unit/backend/test_openai_backend_custom_configs.py new file mode 100644 index 00000000..7f6706ad --- /dev/null +++ b/tests/unit/backend/test_openai_backend_custom_configs.py @@ -0,0 +1,88 @@ +import pytest + +from guidellm.backend import OpenAIHTTPBackend +from guidellm.config import settings + + +@pytest.mark.smoke +def test_openai_http_backend_default_initialization(): + backend = OpenAIHTTPBackend() + assert backend.verify is True + + +@pytest.mark.smoke +def test_openai_http_backend_custom_ssl_verification(): + backend = OpenAIHTTPBackend(verify=False) + assert backend.verify is False + + +@pytest.mark.smoke +def test_openai_http_backend_custom_headers_override(): + # Set a default api_key, which would normally create an Authorization header + settings.openai.api_key = "default-api-key" + + # Set custom headers that override the default Authorization and add a new header + openshift_token = "Bearer sha256~xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + override_headers = { + "Authorization": openshift_token, + "Custom-Header": "Custom-Value", + } + + # Initialize the backend + backend = OpenAIHTTPBackend(headers=override_headers) + + # Check that the override headers are used + assert backend.headers["Authorization"] == openshift_token + assert backend.headers["Custom-Header"] == "Custom-Value" + assert len(backend.headers) == 2 + + # Reset the settings + settings.openai.api_key = None + settings.openai.headers = None + + +@pytest.mark.smoke +def test_openai_http_backend_kwarg_headers_override_settings(): + # Set headers via settings (simulating environment variables) + settings.openai.headers = {"Authorization": "Bearer settings-token"} + + # Set different headers via kwargs (simulating --backend-args) + override_headers = { + "Authorization": "Bearer kwargs-token", + "Custom-Header": "Custom-Value", + } + + # Initialize the backend with kwargs + backend = OpenAIHTTPBackend(headers=override_headers) + + # Check that the kwargs headers took precedence + assert backend.headers["Authorization"] == "Bearer kwargs-token" + assert backend.headers["Custom-Header"] == "Custom-Value" + assert len(backend.headers) == 2 + + # Reset the settings + settings.openai.headers = None + + +@pytest.mark.smoke +def test_openai_http_backend_remove_header_with_none(): + # Set a default api_key, which would normally create an Authorization header + settings.openai.api_key = "default-api-key" + + # Set a custom header and explicitly set Authorization to None to remove it + override_headers = { + "Authorization": None, + "Custom-Header": "Custom-Value", + } + + # Initialize the backend + backend = OpenAIHTTPBackend(headers=override_headers) + + # Check that the Authorization header is removed and the custom header is present + assert "Authorization" not in backend.headers + assert backend.headers["Custom-Header"] == "Custom-Value" + assert len(backend.headers) == 1 + + # Reset the settings + settings.openai.api_key = None + settings.openai.headers = None diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index db9f5a61..f5d9415c 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -178,9 +178,13 @@ def test_settings_with_env_variables(mocker): "GUIDELLM__DATASET__PREFERRED_DATA_COLUMNS": '["custom_column"]', "GUIDELLM__OPENAI__API_KEY": "env_api_key", "GUIDELLM__TABLE_BORDER_CHAR": "*", + "GUIDELLM__OPENAI__HEADERS": '{"Authorization": "Bearer env-token"}', + "GUIDELLM__OPENAI__VERIFY": "false", }, ) settings = Settings() assert settings.dataset.preferred_data_columns == ["custom_column"] assert settings.openai.api_key == "env_api_key" assert settings.table_border_char == "*" + assert settings.openai.headers == {"Authorization": "Bearer env-token"} + assert settings.openai.verify is False diff --git a/tests/unit/test_main.py b/tests/unit/test_main.py new file mode 100644 index 00000000..e813dba4 --- /dev/null +++ b/tests/unit/test_main.py @@ -0,0 +1,84 @@ +import json +from pathlib import Path +from unittest.mock import patch + +import pytest +from click.testing import CliRunner + +from guidellm.__main__ import cli + + +@pytest.mark.smoke +def test_benchmark_run_with_backend_args(): + runner = CliRunner() + result = runner.invoke( + cli, + [ + "benchmark", + "run", + "--backend-args", + '{"headers": {"Authorization": "Bearer my-token"}, "verify": false}', + "--target", + "http://localhost:8000", + "--data", + "prompt_tokens=1,output_tokens=1", + "--rate-type", + "constant", + "--rate", + "1", + "--max-requests", + "1", + ], + ) + # This will fail because it can't connect to the server, + # but it will pass the header parsing, which is what we want to test. + assert result.exit_code != 0 + assert "Invalid header format" not in result.output + + +@patch("guidellm.__main__.benchmark_with_scenario") +def test_cli_backend_args_header_removal(mock_benchmark_func, tmp_path: Path): + """ + Tests that --backend-args from the CLI correctly overrides scenario + values and that `null` correctly removes a header. + """ + scenario_path = tmp_path / "scenario.json" + + # Create a scenario file with a header that should be overridden and removed + scenario_content = { + "backend_type": "openai_http", + "backend_args": {"headers": {"Authorization": "should-be-removed"}}, + "data": "prompt_tokens=10,output_tokens=10", + "max_requests": 1, + "target": "http://dummy-target", + "rate_type": "synchronous", + "processor": "gpt2", + } + with scenario_path.open("w") as f: + json.dump(scenario_content, f) + + runner = CliRunner() + result = runner.invoke( + cli, + [ + "benchmark", + "run", + "--scenario", + str(scenario_path), + "--backend-args", + '{"headers": {"Authorization": null, "Custom-Header": "Custom-Value"}}', + ], + catch_exceptions=False, + ) + + assert result.exit_code == 0, result.output + + # Assert that benchmark_with_scenario was called with the correct scenario + mock_benchmark_func.assert_called_once() + call_args = mock_benchmark_func.call_args[1] + scenario = call_args["scenario"] + + # Verify the backend_args were merged correctly + backend_args = scenario.backend_args + expected_headers = {"Authorization": None, "Custom-Header": "Custom-Value"} + assert backend_args["headers"] == expected_headers From 1313bca9cdd6cce68536427eb51dd7414aedab30 Mon Sep 17 00:00:00 2001 From: Samuel Monson Date: Wed, 30 Jul 2025 18:09:41 -0400 Subject: [PATCH 4/5] Drop Entrypoint and Improve Containerfile (#250) * Drop the container entrypoint script as GuideLLM has had native support for its features since #99 * Make containerfile more rebuild friendly based on #213 * Drop the ENV default scenario as it is confusing to users setting CLI args Closes: #213 --------- Signed-off-by: Samuel Monson --- README.md | 19 +++++++++++++++++++ deploy/Containerfile | 38 ++++++++++++++++---------------------- deploy/entrypoint.sh | 43 ------------------------------------------- 3 files changed, 35 insertions(+), 65 deletions(-) delete mode 100755 deploy/entrypoint.sh diff --git a/README.md b/README.md index 9312c55f..b1abc75f 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,25 @@ pip install git+https://github.com/vllm-project/guidellm.git For detailed installation instructions and requirements, see the [Installation Guide](https://github.com/vllm-project/guidellm/blob/main/docs/install.md). +### With Podman / Docker + +Alternatively we publish container images at [ghcr.io/vllm-project/guidellm](https://github.com/vllm-project/guidellm/pkgs/container/guidellm). Running a container is (by default) equivalent to `guidellm benchmark run`: + +```bash +podman run \ + --rm -it \ + -v "./results:/results:rw" \ + -e GUIDELLM_TARGET=http://localhost:8000 \ + -e GUIDELLM_RATE_TYPE=sweep \ + -e GUIDELLM_MAX_SECONDS=30 \ + -e GUIDELLM_DATA="prompt_tokens=256,output_tokens=128" \ + ghcr.io/vllm-project/guidellm:latest +``` + +> [!TIP] CLI options can also be specified as ENV variables (E.g. `--rate-type sweep` -> `GUIDELLM_RATE_TYPE=sweep`). If both are specified then the CLI option overrides the the ENV. + +Replace `latest` with `stable` for the newest tagged release or set a specific release if desired. + ### Quick Start #### 1. Start an OpenAI Compatible Server (vLLM) diff --git a/deploy/Containerfile b/deploy/Containerfile index 2702e24d..7715de93 100644 --- a/deploy/Containerfile +++ b/deploy/Containerfile @@ -1,26 +1,26 @@ -ARG PYTHON=3.13 +ARG BASE_IMAGE=docker.io/python:3.13-slim # Use a multi-stage build to create a lightweight production image -FROM docker.io/python:${PYTHON}-slim as builder +FROM $BASE_IMAGE as builder + +# Ensure files are installed as root +USER root # Copy repository files -COPY / /src +COPY / /opt/app-root/src # Create a venv and install guidellm -RUN python3 -m venv /opt/guidellm \ - && /opt/guidellm/bin/pip install --no-cache-dir /src - -# Copy entrypoint script into the venv bin directory -RUN install -m0755 /src/deploy/entrypoint.sh /opt/guidellm/bin/entrypoint.sh +RUN python3 -m venv /opt/app-root/guidellm \ + && /opt/app-root/guidellm/bin/pip install --no-cache-dir /opt/app-root/src # Prod image -FROM docker.io/python:${PYTHON}-slim +FROM $BASE_IMAGE # Copy the virtual environment from the builder stage -COPY --from=builder /opt/guidellm /opt/guidellm +COPY --from=builder /opt/app-root/guidellm /opt/app-root/guidellm # Add guidellm bin to PATH -ENV PATH="/opt/guidellm/bin:$PATH" +ENV PATH="/opt/app-root/guidellm/bin:$PATH" # Create a non-root user RUN useradd -md /results guidellm @@ -35,14 +35,8 @@ WORKDIR /results LABEL org.opencontainers.image.source="https://github.com/vllm-project/guidellm" \ org.opencontainers.image.description="GuideLLM Performance Benchmarking Container" -# Set the environment variable for the benchmark script -# TODO: Replace with scenario environment variables -ENV GUIDELLM_TARGET="http://localhost:8000" \ - GUIDELLM_MODEL="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w4a16" \ - GUIDELLM_RATE_TYPE="sweep" \ - GUIDELLM_DATA="prompt_tokens=256,output_tokens=128" \ - GUIDELLM_MAX_REQUESTS="100" \ - GUIDELLM_MAX_SECONDS="" \ - GUIDELLM_OUTPUT_PATH="/results/results.json" - -ENTRYPOINT [ "/opt/guidellm/bin/entrypoint.sh" ] +# Argument defaults can be set with GUIDELLM_ +ENV GUIDELLM_OUTPUT_PATH="/results/benchmarks.json" + +ENTRYPOINT [ "/opt/app-root/guidellm/bin/guidellm" ] +CMD [ "benchmark", "run" ] diff --git a/deploy/entrypoint.sh b/deploy/entrypoint.sh deleted file mode 100755 index d6ff4ea0..00000000 --- a/deploy/entrypoint.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# Path to the guidellm binary -guidellm_bin="/opt/guidellm/bin/guidellm" - -# If we receive any arguments switch to guidellm command -if [ $# -gt 0 ]; then - echo "Running command: guidellm $*" - exec $guidellm_bin "$@" -fi - -# Get a list of environment variables that start with GUIDELLM_ -args="$(printenv | cut -d= -f1 | grep -E '^GUIDELLM_')" - -# NOTE: Bash array + exec prevent shell escape issues -CMD=("${guidellm_bin}" "benchmark") - -# Parse environment variables for the benchmark command -for var in $args; do - # Remove GUIDELLM_ prefix - arg_name="${var#GUIDELLM_}" - - # If there is an extra underscore at the - # start than this is a config variable - if [ "${arg_name:0:1}" == "_" ]; then - continue - fi - - # Convert to lowercase - arg_name="${arg_name,,}" - # Replace underscores with dashes - arg_name="${arg_name//_/-}" - - # Add the argument to the command array if set - if [ -n "${!var}" ]; then - CMD+=("--${arg_name}" "${!var}") - fi -done - -# Execute the command -echo "Running command: ${CMD[*]}" -exec "${CMD[@]}" From d82ae7e147f80cea6b3db8bdfd59dc4f96ad6698 Mon Sep 17 00:00:00 2001 From: Harshith-umesh Date: Fri, 1 Aug 2025 13:25:44 -0400 Subject: [PATCH 5/5] use click to get package version Signed-off-by: Harshith-umesh --- src/guidellm/__main__.py | 22 +--------------------- tests/unit/test_cli.py | 16 +++++++++------- 2 files changed, 10 insertions(+), 28 deletions(-) diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py index 6e23a506..7cba6a7c 100644 --- a/src/guidellm/__main__.py +++ b/src/guidellm/__main__.py @@ -19,33 +19,13 @@ from guidellm.utils import DefaultGroupHandler from guidellm.utils import cli as cli_tools -# Import version information -try: - from guidellm.version import version -except ImportError: - version = "unknown" - STRATEGY_PROFILE_CHOICES = list( set(list(get_args(ProfileType)) + list(get_args(StrategyType))) ) -def _version_callback(ctx: click.Context, _param: click.Parameter, value: bool) -> None: - """Callback for --version flag.""" - if value: - click.echo(f"guidellm version: {version}") - ctx.exit() - - @click.group() -@click.option( - "--version", - is_flag=True, - expose_value=False, - is_eager=True, - help="Show the version and exit.", - callback=_version_callback, -) +@click.version_option(package_name="guidellm", message="guidellm version: %(version)s") def cli(): pass diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py index 9e176493..f58e7f38 100644 --- a/tests/unit/test_cli.py +++ b/tests/unit/test_cli.py @@ -83,11 +83,11 @@ def test_version_flag_case_sensitivity(): @pytest.mark.integration def test_version_integration_with_actual_version(): - """Integration test to verify version matches what's in version.py.""" + """Integration test to verify version matches importlib.metadata.""" + import importlib.metadata + try: - from guidellm.version import ( - version as actual_version, - ) + actual_version = importlib.metadata.version("guidellm") runner = CliRunner() result = runner.invoke(cli, ["--version"]) @@ -95,9 +95,11 @@ def test_version_integration_with_actual_version(): assert result.exit_code == 0 expected_output = f"guidellm version: {actual_version}" assert expected_output in result.output - except ImportError: + except importlib.metadata.PackageNotFoundError: + # If package is not installed, the CLI should show an error + # This is expected behavior when the package isn't properly installed runner = CliRunner() result = runner.invoke(cli, ["--version"]) - assert result.exit_code == 0 - assert "guidellm version: unknown" in result.output + # Click will handle the error when package is not found + assert result.exit_code != 0