From 221856c57ac669dc66ef307d09caa570e434efbf Mon Sep 17 00:00:00 2001
From: Harshith-umesh <harshith.umesh.nat@gmail.com>
Date: Thu, 24 Jul 2025 15:55:58 -0400
Subject: [PATCH 1/5] Add --version flag to guidellm

Signed-off-by: Harshith-umesh <harshith.umesh.nat@gmail.com>
---
 src/guidellm/__main__.py |  20 ++++++++
 tests/unit/test_cli.py   | 101 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 121 insertions(+)
 create mode 100644 tests/unit/test_cli.py

diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py
index ac0872c3..27f79035 100644
--- a/src/guidellm/__main__.py
+++ b/src/guidellm/__main__.py
@@ -19,12 +19,32 @@
 from guidellm.utils import DefaultGroupHandler
 from guidellm.utils import cli as cli_tools
 
+try:
+    from guidellm.version import version
+except ImportError:
+    version = "unknown"
+
 STRATEGY_PROFILE_CHOICES = list(
     set(list(get_args(ProfileType)) + list(get_args(StrategyType)))
 )
 
 
+def _version_callback(ctx: click.Context, _param: click.Parameter, value: bool) -> None:
+    """Callback for --version flag."""
+    if value:
+        click.echo(f"guidellm version: {version}")
+        ctx.exit()
+
+
 @click.group()
+@click.option(
+    "--version",
+    is_flag=True,
+    expose_value=False,
+    is_eager=True,
+    help="Show the version and exit.",
+    callback=_version_callback,
+)
 def cli():
     pass
 
diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py
new file mode 100644
index 00000000..08fd124d
--- /dev/null
+++ b/tests/unit/test_cli.py
@@ -0,0 +1,101 @@
+"""
+Unit tests for CLI functionality, specifically the version flag.
+"""
+
+import pytest
+from click.testing import CliRunner
+
+from guidellm.__main__ import cli
+
+
+@pytest.mark.smoke
+def test_version_flag_long():
+    """Test that --version flag works correctly."""
+    runner = CliRunner()
+    result = runner.invoke(cli, ["--version"])
+
+    assert result.exit_code == 0
+    assert "guidellm version:" in result.output
+    assert result.output.strip().startswith("guidellm version:")
+
+
+@pytest.mark.smoke
+def test_version_flag_displays_actual_version():
+    """Test that --version displays the actual version from version.py."""
+    runner = CliRunner()
+    result = runner.invoke(cli, ["--version"])
+
+    assert result.exit_code == 0
+    import re
+
+    version_pattern = r"guidellm version: \d+\.\d+"
+    assert re.search(version_pattern, result.output)
+
+
+@pytest.mark.smoke
+def test_version_flag_exits_cleanly():
+    """Test that --version exits without processing other commands."""
+    runner = CliRunner()
+    result = runner.invoke(cli, ["--version", "benchmark"])
+
+    assert result.exit_code == 0
+    assert "guidellm version:" in result.output
+    assert "Commands to run a new benchmark" not in result.output
+
+
+@pytest.mark.smoke
+def test_help_shows_version_option():
+    """Test that --help shows the --version option."""
+    runner = CliRunner()
+    result = runner.invoke(cli, ["--help"])
+
+    assert result.exit_code == 0
+    assert "--version" in result.output
+    assert "Show the version and exit" in result.output
+
+
+@pytest.mark.smoke
+def test_other_commands_still_work():
+    """Test that other CLI commands still work after adding version flag."""
+    runner = CliRunner()
+    result = runner.invoke(cli, ["--help"])
+
+    assert result.exit_code == 0
+    assert "benchmark" in result.output
+    assert "config" in result.output
+    assert "preprocess" in result.output
+
+
+@pytest.mark.smoke
+def test_version_flag_case_sensitivity():
+    """Test that --version flag is case sensitive."""
+    runner = CliRunner()
+
+    result = runner.invoke(cli, ["--version"])
+    assert result.exit_code == 0
+    assert "guidellm version:" in result.output
+
+    # --VERSION should not work
+    result = runner.invoke(cli, ["--VERSION"])
+    assert result.exit_code != 0
+    assert "No such option" in result.output
+
+
+@pytest.mark.integration
+def test_version_integration_with_actual_version():
+    """Integration test to verify version matches what's in version.py."""
+    try:
+        from guidellm.version import version as actual_version
+
+        runner = CliRunner()
+        result = runner.invoke(cli, ["--version"])
+
+        assert result.exit_code == 0
+        expected_output = f"guidellm version: {actual_version}"
+        assert expected_output in result.output
+    except ImportError:
+        runner = CliRunner()
+        result = runner.invoke(cli, ["--version"])
+
+        assert result.exit_code == 0
+        assert "guidellm version: unknown" in result.output

From 8bec7b8f7b35074a7f6ca83cd379210be8632381 Mon Sep 17 00:00:00 2001
From: Harshith-umesh <harshith.umesh.nat@gmail.com>
Date: Fri, 25 Jul 2025 10:49:56 -0400
Subject: [PATCH 2/5] Fix linting errors 2

Signed-off-by: Harshith-umesh <harshith.umesh.nat@gmail.com>
---
 src/guidellm/__main__.py | 3 ++-
 tests/unit/test_cli.py   | 4 +++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py
index 27f79035..46057cd1 100644
--- a/src/guidellm/__main__.py
+++ b/src/guidellm/__main__.py
@@ -19,6 +19,7 @@
 from guidellm.utils import DefaultGroupHandler
 from guidellm.utils import cli as cli_tools
 
+# Import version information
 try:
     from guidellm.version import version
 except ImportError:
@@ -71,7 +72,7 @@ def benchmark():
             readable=True,
             file_okay=True,
             dir_okay=False,
-            path_type=Path,  # type: ignore[type-var]
+            path_type=Path,
         ),
         click.Choice(get_builtin_scenarios()),
     ),
diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py
index 08fd124d..9e176493 100644
--- a/tests/unit/test_cli.py
+++ b/tests/unit/test_cli.py
@@ -85,7 +85,9 @@ def test_version_flag_case_sensitivity():
 def test_version_integration_with_actual_version():
     """Integration test to verify version matches what's in version.py."""
     try:
-        from guidellm.version import version as actual_version
+        from guidellm.version import (
+            version as actual_version,
+        )
 
         runner = CliRunner()
         result = runner.invoke(cli, ["--version"])

From fec23c061e2234d424f0d4273c1c29b0592e8a38 Mon Sep 17 00:00:00 2001
From: Elijah DeLee <kdelee@redhat.com>
Date: Tue, 29 Jul 2025 18:20:34 -0400
Subject: [PATCH 3/5] Add CLI options for backend args (like headers and
 verify) (#230)

This PR adds the ability to configure custom request headers and control
SSL certificate verification when running benchmarks.

* The OpenAIHTTPBackend now supports passing custom headers and a verify
flag to disable SSL verification.
* Headers are now merged with the following precedence: CLI arguments
(--backend-args), scenario file arguments, environment variables, and
then default values.
* Headers can be removed by setting their value to null in the
--backend-args JSON string.
* The --backend-args help text has been updated with an example of how
to use these new features.
* New documentation has been added for the CLI, configuration options,
and supported data formats.
* Unit tests have been added to verify the new header and SSL
verification logic, as well as the CLI argument parsing.

This provides a way to benchmark targets that require custom
authentication, other headers, or use self-signed SSL certificates.

Signed-off-by: Elijah DeLee <kdelee@redhat.com>
---
 docs/guides/cli.md                            | 37 +++++++-
 docs/guides/configuration.md                  | 60 ++++++++++++-
 docs/guides/data_formats.md                   | 67 ++++++++++++++
 src/guidellm/__main__.py                      |  4 +-
 src/guidellm/backend/openai.py                | 44 ++++++----
 src/guidellm/config.py                        |  2 +
 tests/unit/backend/test_openai_backend.py     |  4 +-
 .../test_openai_backend_custom_configs.py     | 88 +++++++++++++++++++
 tests/unit/test_config.py                     |  4 +
 tests/unit/test_main.py                       | 84 ++++++++++++++++++
 10 files changed, 373 insertions(+), 21 deletions(-)
 create mode 100644 docs/guides/data_formats.md
 create mode 100644 tests/unit/backend/test_openai_backend_custom_configs.py
 create mode 100644 tests/unit/test_main.py

diff --git a/docs/guides/cli.md b/docs/guides/cli.md
index d30962bd..77796892 100644
--- a/docs/guides/cli.md
+++ b/docs/guides/cli.md
@@ -1 +1,36 @@
-# Coming Soon
+# CLI Reference
+
+This page provides a reference for the `guidellm` command-line interface. For more advanced configuration, including environment variables and `.env` files, see the [Configuration Guide](./configuration.md).
+
+## `guidellm benchmark run`
+
+This command is the primary entrypoint for running benchmarks. It has many options that can be specified on the command line or in a scenario file.
+
+### Scenario Configuration
+
+| Option                      | Description                                                                                                                                     |
+| --------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------- |
+| `--scenario <PATH or NAME>` | The name of a builtin scenario or path to a scenario configuration file. Options specified on the command line will override the scenario file. |
+
+### Target and Backend Configuration
+
+These options configure how `guidellm` connects to the system under test.
+
+| Option                  | Description                                                                                                                                                                                                   |
+| ----------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `--target <URL>`        | **Required.** The endpoint of the target system, e.g., `http://localhost:8080`. Can also be set with the `GUIDELLM__OPENAI__BASE_URL` environment variable.                                                   |
+| `--backend-type <TYPE>` | The type of backend to use. Defaults to `openai_http`.                                                                                                                                                        |
+| `--backend-args <JSON>` | A JSON string for backend-specific arguments. For example: `--backend-args '{"headers": {"Authorization": "Bearer my-token"}, "verify": false}'` to pass custom headers and disable certificate verification. |
+| `--model <NAME>`        | The ID of the model to benchmark within the backend.                                                                                                                                                          |
+
+### Data and Request Configuration
+
+These options define the data to be used for benchmarking and how requests will be generated.
+
+| Option                    | Description                                                                                                                                                                              |
+| ------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `--data <SOURCE>`         | The data source. This can be a HuggingFace dataset ID, a path to a local data file, or a synthetic data configuration. See the [Data Formats Guide](./data_formats.md) for more details. |
+| `--rate-type <TYPE>`      | The type of request generation strategy to use (e.g., `constant`, `poisson`, `sweep`).                                                                                                   |
+| `--rate <NUMBER>`         | The rate of requests per second for `constant` or `poisson` strategies, or the number of steps for a `sweep`.                                                                            |
+| `--max-requests <NUMBER>` | The maximum number of requests to run for each benchmark.                                                                                                                                |
+| `--max-seconds <NUMBER>`  | The maximum number of seconds to run each benchmark for.                                                                                                                                 |
diff --git a/docs/guides/configuration.md b/docs/guides/configuration.md
index d30962bd..90c770f1 100644
--- a/docs/guides/configuration.md
+++ b/docs/guides/configuration.md
@@ -1 +1,59 @@
-# Coming Soon
+# Configuration
+
+The `guidellm` application can be configured using command-line arguments, environment variables, or a `.env` file. This page details the file-based and environment variable configuration options.
+
+## Configuration Methods
+
+Settings are loaded with the following priority (highest priority first):
+
+1. Command-line arguments.
+2. Environment variables.
+3. Values in a `.env` file in the directory where the command is run.
+4. Default values.
+
+## Environment Variable Format
+
+All settings can be configured using environment variables. The variables must be prefixed with `GUIDELLM__`, and nested settings are separated by a double underscore `__`.
+
+For example, to set the `api_key` for the `openai` backend, you would use the following environment variable:
+
+```bash
+export GUIDELLM__OPENAI__API_KEY="your-api-key"
+```
+
+### Target and Backend Configuration
+
+You can configure the connection to the target system using environment variables. This is an alternative to using the `--target-*` command-line flags.
+
+| Environment Variable                  | Description                                                                                                                | Example                                                                   |
+| ------------------------------------- | -------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------- |
+| `GUIDELLM__OPENAI__BASE_URL`          | The endpoint of the target system. Equivalent to the `--target` CLI option.                                                | `export GUIDELLM__OPENAI__BASE_URL="http://localhost:8080"`               |
+| `GUIDELLM__OPENAI__API_KEY`           | The API key to use for bearer token authentication.                                                                        | `export GUIDELLM__OPENAI__API_KEY="your-secret-api-key"`                  |
+| `GUIDELLM__OPENAI__BEARER_TOKEN`      | The full bearer token to use for authentication.                                                                           | `export GUIDELLM__OPENAI__BEARER_TOKEN="Bearer your-secret-token"`        |
+| `GUIDELLM__OPENAI__HEADERS`           | A JSON string representing a dictionary of headers to send to the target. These headers will override any default headers. | `export GUIDELLM__OPENAI__HEADERS='{"Authorization": "Bearer my-token"}'` |
+| `GUIDELLM__OPENAI__ORGANIZATION`      | The OpenAI organization to use for requests.                                                                               | `export GUIDELLM__OPENAI__ORGANIZATION="org-12345"`                       |
+| `GUIDELLM__OPENAI__PROJECT`           | The OpenAI project to use for requests.                                                                                    | `export GUIDELLM__OPENAI__PROJECT="proj-67890"`                           |
+| `GUIDELLM__OPENAI__VERIFY`            | Set to `false` or `0` to disable certificate verification.                                                                 | `export GUIDELLM__OPENAI__VERIFY=false`                                   |
+| `GUIDELLM__OPENAI__MAX_OUTPUT_TOKENS` | The default maximum number of tokens to request for completions.                                                           | `export GUIDELLM__OPENAI__MAX_OUTPUT_TOKENS=2048`                         |
+
+### General HTTP Settings
+
+These settings control the behavior of the underlying HTTP client.
+
+| Environment Variable                 | Description                                                                     |
+| ------------------------------------ | ------------------------------------------------------------------------------- |
+| `GUIDELLM__REQUEST_TIMEOUT`          | The timeout in seconds for HTTP requests. Defaults to 300.                      |
+| `GUIDELLM__REQUEST_HTTP2`            | Set to `true` or `1` to enable HTTP/2 support. Defaults to true.                |
+| `GUIDELLM__REQUEST_FOLLOW_REDIRECTS` | Set to `true` or `1` to allow the client to follow redirects. Defaults to true. |
+
+### Using a `.env` file
+
+You can also place these variables in a `.env` file in your project's root directory:
+
+```dotenv
+# .env file
+GUIDELLM__OPENAI__BASE_URL="http://localhost:8080"
+GUIDELLM__OPENAI__API_KEY="your-api-key"
+GUIDELLM__OPENAI__HEADERS='{"Authorization": "Bearer my-token"}'
+GUIDELLM__OPENAI__VERIFY=false
+```
diff --git a/docs/guides/data_formats.md b/docs/guides/data_formats.md
new file mode 100644
index 00000000..dd12c591
--- /dev/null
+++ b/docs/guides/data_formats.md
@@ -0,0 +1,67 @@
+# Data Formats
+
+The `--data` argument for the `guidellm benchmark run` command accepts several different formats for specifying the data to be used for benchmarking.
+
+## Local Data Files
+
+You can provide a path to a local data file in one of the following formats:
+
+- **CSV (.csv)**: A comma-separated values file. The loader will attempt to find a column with a common name for the prompt (e.g., `prompt`, `text`, `instruction`).
+- **JSON (.json)**: A JSON file. The structure should be a list of objects, where each object represents a row of data.
+- **JSON Lines (.jsonl)**: A file where each line is a valid JSON object.
+- **Text (.txt)**: A plain text file, where each line is treated as a separate prompt.
+
+If the prompt column cannot be automatically determined, you can specify it using the `--data-args` option:
+
+```bash
+--data-args '{"text_column": "my_custom_prompt_column"}'
+```
+
+## Synthetic Data
+
+You can generate synthetic data on the fly by providing a configuration string or file.
+
+### Configuration Options
+
+| Parameter             | Description                                                                                                     |
+| --------------------- | --------------------------------------------------------------------------------------------------------------- |
+| `prompt_tokens`       | **Required.** The average number of tokens for the generated prompts.                                           |
+| `output_tokens`       | **Required.** The average number of tokens for the generated outputs.                                           |
+| `samples`             | The total number of samples to generate. Defaults to 1000.                                                      |
+| `source`              | The source text to use for generating the synthetic data. Defaults to a built-in copy of "Pride and Prejudice". |
+| `prompt_tokens_stdev` | The standard deviation of the tokens generated for prompts.                                                     |
+| `prompt_tokens_min`   | The minimum number of text tokens generated for prompts.                                                        |
+| `prompt_tokens_max`   | The maximum number of text tokens generated for prompts.                                                        |
+| `output_tokens_stdev` | The standard deviation of the tokens generated for outputs.                                                     |
+| `output_tokens_min`   | The minimum number of text tokens generated for outputs.                                                        |
+| `output_tokens_max`   | The maximum number of text tokens generated for outputs.                                                        |
+
+### Configuration Formats
+
+You can provide the synthetic data configuration in one of three ways:
+
+1. **Key-Value String:**
+
+   ```bash
+   --data "prompt_tokens=256,output_tokens=128,samples=500"
+   ```
+
+2. **JSON String:**
+
+   ```bash
+   --data '{"prompt_tokens": 256, "output_tokens": 128, "samples": 500}'
+   ```
+
+3. **YAML or Config File:** Create a file (e.g., `my_config.yaml`):
+
+   ```yaml
+   prompt_tokens: 256
+   output_tokens: 128
+   samples: 500
+   ```
+
+   And use it with the `--data` argument:
+
+   ```bash
+   --data my_config.yaml
+   ```
diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py
index 46057cd1..6e23a506 100644
--- a/src/guidellm/__main__.py
+++ b/src/guidellm/__main__.py
@@ -103,7 +103,9 @@ def benchmark():
     default=GenerativeTextScenario.get_default("backend_args"),
     help=(
         "A JSON string containing any arguments to pass to the backend as a "
-        "dict with **kwargs."
+        "dict with **kwargs. Headers can be removed by setting their value to "
+        "null. For example: "
+        """'{"headers": {"Authorization": null, "Custom-Header": "Custom-Value"}}'"""
     ),
 )
 @click.option(
diff --git a/src/guidellm/backend/openai.py b/src/guidellm/backend/openai.py
index 6e24dfc5..e62e9003 100644
--- a/src/guidellm/backend/openai.py
+++ b/src/guidellm/backend/openai.py
@@ -95,6 +95,8 @@ def __init__(
         extra_query: Optional[dict] = None,
         extra_body: Optional[dict] = None,
         remove_from_body: Optional[list[str]] = None,
+        headers: Optional[dict] = None,
+        verify: Optional[bool] = None,
     ):
         super().__init__(type_="openai_http")
         self._target = target or settings.openai.base_url
@@ -111,13 +113,32 @@ def __init__(
 
         self._model = model
 
+        # Start with default headers based on other params
+        default_headers: dict[str, str] = {}
         api_key = api_key or settings.openai.api_key
-        self.authorization = (
-            f"Bearer {api_key}" if api_key else settings.openai.bearer_token
-        )
+        bearer_token = settings.openai.bearer_token
+        if api_key:
+            default_headers["Authorization"] = f"Bearer {api_key}"
+        elif bearer_token:
+            default_headers["Authorization"] = bearer_token
 
         self.organization = organization or settings.openai.organization
+        if self.organization:
+            default_headers["OpenAI-Organization"] = self.organization
+
         self.project = project or settings.openai.project
+        if self.project:
+            default_headers["OpenAI-Project"] = self.project
+
+        # User-provided headers from kwargs or settings override defaults
+        merged_headers = default_headers.copy()
+        merged_headers.update(settings.openai.headers or {})
+        if headers:
+            merged_headers.update(headers)
+
+        # Remove headers with None values for backward compatibility and convenience
+        self.headers = {k: v for k, v in merged_headers.items() if v is not None}
+
         self.timeout = timeout if timeout is not None else settings.request_timeout
         self.http2 = http2 if http2 is not None else settings.request_http2
         self.follow_redirects = (
@@ -125,6 +146,7 @@ def __init__(
             if follow_redirects is not None
             else settings.request_follow_redirects
         )
+        self.verify = verify if verify is not None else settings.openai.verify
         self.max_output_tokens = (
             max_output_tokens
             if max_output_tokens is not None
@@ -161,9 +183,7 @@ def info(self) -> dict[str, Any]:
             "timeout": self.timeout,
             "http2": self.http2,
             "follow_redirects": self.follow_redirects,
-            "authorization": bool(self.authorization),
-            "organization": self.organization,
-            "project": self.project,
+            "headers": self.headers,
             "text_completions_path": TEXT_COMPLETIONS_PATH,
             "chat_completions_path": CHAT_COMPLETIONS_PATH,
         }
@@ -384,6 +404,7 @@ def _get_async_client(self) -> httpx.AsyncClient:
                 http2=self.http2,
                 timeout=self.timeout,
                 follow_redirects=self.follow_redirects,
+                verify=self.verify,
             )
             self._async_client = client
         else:
@@ -395,16 +416,7 @@ def _headers(self) -> dict[str, str]:
         headers = {
             "Content-Type": "application/json",
         }
-
-        if self.authorization:
-            headers["Authorization"] = self.authorization
-
-        if self.organization:
-            headers["OpenAI-Organization"] = self.organization
-
-        if self.project:
-            headers["OpenAI-Project"] = self.project
-
+        headers.update(self.headers)
         return headers
 
     def _params(self, endpoint_type: EndpointType) -> dict[str, str]:
diff --git a/src/guidellm/config.py b/src/guidellm/config.py
index 3b426bd8..beda55fc 100644
--- a/src/guidellm/config.py
+++ b/src/guidellm/config.py
@@ -81,10 +81,12 @@ class OpenAISettings(BaseModel):
 
     api_key: Optional[str] = None
     bearer_token: Optional[str] = None
+    headers: Optional[dict[str, str]] = None
     organization: Optional[str] = None
     project: Optional[str] = None
     base_url: str = "http://localhost:8000"
     max_output_tokens: int = 16384
+    verify: bool = True
 
 
 class ReportGenerationSettings(BaseModel):
diff --git a/tests/unit/backend/test_openai_backend.py b/tests/unit/backend/test_openai_backend.py
index b461acff..0a4c2c38 100644
--- a/tests/unit/backend/test_openai_backend.py
+++ b/tests/unit/backend/test_openai_backend.py
@@ -11,7 +11,7 @@ def test_openai_http_backend_default_initialization():
     backend = OpenAIHTTPBackend()
     assert backend.target == settings.openai.base_url
     assert backend.model is None
-    assert backend.authorization == settings.openai.bearer_token
+    assert backend.headers.get("Authorization") == settings.openai.bearer_token
     assert backend.organization == settings.openai.organization
     assert backend.project == settings.openai.project
     assert backend.timeout == settings.request_timeout
@@ -37,7 +37,7 @@ def test_openai_http_backend_intialization():
     )
     assert backend.target == "http://test-target"
     assert backend.model == "test-model"
-    assert backend.authorization == "Bearer test-key"
+    assert backend.headers.get("Authorization") == "Bearer test-key"
     assert backend.organization == "test-org"
     assert backend.project == "test-proj"
     assert backend.timeout == 10
diff --git a/tests/unit/backend/test_openai_backend_custom_configs.py b/tests/unit/backend/test_openai_backend_custom_configs.py
new file mode 100644
index 00000000..7f6706ad
--- /dev/null
+++ b/tests/unit/backend/test_openai_backend_custom_configs.py
@@ -0,0 +1,88 @@
+import pytest
+
+from guidellm.backend import OpenAIHTTPBackend
+from guidellm.config import settings
+
+
+@pytest.mark.smoke
+def test_openai_http_backend_default_initialization():
+    backend = OpenAIHTTPBackend()
+    assert backend.verify is True
+
+
+@pytest.mark.smoke
+def test_openai_http_backend_custom_ssl_verification():
+    backend = OpenAIHTTPBackend(verify=False)
+    assert backend.verify is False
+
+
+@pytest.mark.smoke
+def test_openai_http_backend_custom_headers_override():
+    # Set a default api_key, which would normally create an Authorization header
+    settings.openai.api_key = "default-api-key"
+
+    # Set custom headers that override the default Authorization and add a new header
+    openshift_token = "Bearer sha256~xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+    override_headers = {
+        "Authorization": openshift_token,
+        "Custom-Header": "Custom-Value",
+    }
+
+    # Initialize the backend
+    backend = OpenAIHTTPBackend(headers=override_headers)
+
+    # Check that the override headers are used
+    assert backend.headers["Authorization"] == openshift_token
+    assert backend.headers["Custom-Header"] == "Custom-Value"
+    assert len(backend.headers) == 2
+
+    # Reset the settings
+    settings.openai.api_key = None
+    settings.openai.headers = None
+
+
+@pytest.mark.smoke
+def test_openai_http_backend_kwarg_headers_override_settings():
+    # Set headers via settings (simulating environment variables)
+    settings.openai.headers = {"Authorization": "Bearer settings-token"}
+
+    # Set different headers via kwargs (simulating --backend-args)
+    override_headers = {
+        "Authorization": "Bearer kwargs-token",
+        "Custom-Header": "Custom-Value",
+    }
+
+    # Initialize the backend with kwargs
+    backend = OpenAIHTTPBackend(headers=override_headers)
+
+    # Check that the kwargs headers took precedence
+    assert backend.headers["Authorization"] == "Bearer kwargs-token"
+    assert backend.headers["Custom-Header"] == "Custom-Value"
+    assert len(backend.headers) == 2
+
+    # Reset the settings
+    settings.openai.headers = None
+
+
+@pytest.mark.smoke
+def test_openai_http_backend_remove_header_with_none():
+    # Set a default api_key, which would normally create an Authorization header
+    settings.openai.api_key = "default-api-key"
+
+    # Set a custom header and explicitly set Authorization to None to remove it
+    override_headers = {
+        "Authorization": None,
+        "Custom-Header": "Custom-Value",
+    }
+
+    # Initialize the backend
+    backend = OpenAIHTTPBackend(headers=override_headers)
+
+    # Check that the Authorization header is removed and the custom header is present
+    assert "Authorization" not in backend.headers
+    assert backend.headers["Custom-Header"] == "Custom-Value"
+    assert len(backend.headers) == 1
+
+    # Reset the settings
+    settings.openai.api_key = None
+    settings.openai.headers = None
diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py
index db9f5a61..f5d9415c 100644
--- a/tests/unit/test_config.py
+++ b/tests/unit/test_config.py
@@ -178,9 +178,13 @@ def test_settings_with_env_variables(mocker):
             "GUIDELLM__DATASET__PREFERRED_DATA_COLUMNS": '["custom_column"]',
             "GUIDELLM__OPENAI__API_KEY": "env_api_key",
             "GUIDELLM__TABLE_BORDER_CHAR": "*",
+            "GUIDELLM__OPENAI__HEADERS": '{"Authorization": "Bearer env-token"}',
+            "GUIDELLM__OPENAI__VERIFY": "false",
         },
     )
     settings = Settings()
     assert settings.dataset.preferred_data_columns == ["custom_column"]
     assert settings.openai.api_key == "env_api_key"
     assert settings.table_border_char == "*"
+    assert settings.openai.headers == {"Authorization": "Bearer env-token"}
+    assert settings.openai.verify is False
diff --git a/tests/unit/test_main.py b/tests/unit/test_main.py
new file mode 100644
index 00000000..e813dba4
--- /dev/null
+++ b/tests/unit/test_main.py
@@ -0,0 +1,84 @@
+import json
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+from click.testing import CliRunner
+
+from guidellm.__main__ import cli
+
+
+@pytest.mark.smoke
+def test_benchmark_run_with_backend_args():
+    runner = CliRunner()
+    result = runner.invoke(
+        cli,
+        [
+            "benchmark",
+            "run",
+            "--backend-args",
+            '{"headers": {"Authorization": "Bearer my-token"}, "verify": false}',
+            "--target",
+            "http://localhost:8000",
+            "--data",
+            "prompt_tokens=1,output_tokens=1",
+            "--rate-type",
+            "constant",
+            "--rate",
+            "1",
+            "--max-requests",
+            "1",
+        ],
+    )
+    # This will fail because it can't connect to the server,
+    # but it will pass the header parsing, which is what we want to test.
+    assert result.exit_code != 0
+    assert "Invalid header format" not in result.output
+
+
+@patch("guidellm.__main__.benchmark_with_scenario")
+def test_cli_backend_args_header_removal(mock_benchmark_func, tmp_path: Path):
+    """
+    Tests that --backend-args from the CLI correctly overrides scenario
+    values and that `null` correctly removes a header.
+    """
+    scenario_path = tmp_path / "scenario.json"
+
+    # Create a scenario file with a header that should be overridden and removed
+    scenario_content = {
+        "backend_type": "openai_http",
+        "backend_args": {"headers": {"Authorization": "should-be-removed"}},
+        "data": "prompt_tokens=10,output_tokens=10",
+        "max_requests": 1,
+        "target": "http://dummy-target",
+        "rate_type": "synchronous",
+        "processor": "gpt2",
+    }
+    with scenario_path.open("w") as f:
+        json.dump(scenario_content, f)
+
+    runner = CliRunner()
+    result = runner.invoke(
+        cli,
+        [
+            "benchmark",
+            "run",
+            "--scenario",
+            str(scenario_path),
+            "--backend-args",
+            '{"headers": {"Authorization": null, "Custom-Header": "Custom-Value"}}',
+        ],
+        catch_exceptions=False,
+    )
+
+    assert result.exit_code == 0, result.output
+
+    # Assert that benchmark_with_scenario was called with the correct scenario
+    mock_benchmark_func.assert_called_once()
+    call_args = mock_benchmark_func.call_args[1]
+    scenario = call_args["scenario"]
+
+    # Verify the backend_args were merged correctly
+    backend_args = scenario.backend_args
+    expected_headers = {"Authorization": None, "Custom-Header": "Custom-Value"}
+    assert backend_args["headers"] == expected_headers

From 1313bca9cdd6cce68536427eb51dd7414aedab30 Mon Sep 17 00:00:00 2001
From: Samuel Monson <smonson@redhat.com>
Date: Wed, 30 Jul 2025 18:09:41 -0400
Subject: [PATCH 4/5] Drop Entrypoint and Improve Containerfile (#250)

* Drop the container entrypoint script as GuideLLM has had native
support for its features since #99
* Make containerfile more rebuild friendly based on #213
* Drop the ENV default scenario as it is confusing to users setting CLI
args

Closes: #213

---------

Signed-off-by: Samuel Monson <smonson@redhat.com>
---
 README.md            | 19 +++++++++++++++++++
 deploy/Containerfile | 38 ++++++++++++++++----------------------
 deploy/entrypoint.sh | 43 -------------------------------------------
 3 files changed, 35 insertions(+), 65 deletions(-)
 delete mode 100755 deploy/entrypoint.sh

diff --git a/README.md b/README.md
index 9312c55f..b1abc75f 100644
--- a/README.md
+++ b/README.md
@@ -52,6 +52,25 @@ pip install git+https://github.com/vllm-project/guidellm.git
 
 For detailed installation instructions and requirements, see the [Installation Guide](https://github.com/vllm-project/guidellm/blob/main/docs/install.md).
 
+### With Podman / Docker
+
+Alternatively we publish container images at [ghcr.io/vllm-project/guidellm](https://github.com/vllm-project/guidellm/pkgs/container/guidellm). Running a container is (by default) equivalent to `guidellm benchmark run`:
+
+```bash
+podman run \
+  --rm -it \
+  -v "./results:/results:rw" \
+  -e GUIDELLM_TARGET=http://localhost:8000 \
+  -e GUIDELLM_RATE_TYPE=sweep \
+  -e GUIDELLM_MAX_SECONDS=30 \
+  -e GUIDELLM_DATA="prompt_tokens=256,output_tokens=128" \
+  ghcr.io/vllm-project/guidellm:latest
+```
+
+> [!TIP] CLI options can also be specified as ENV variables (E.g. `--rate-type sweep` -> `GUIDELLM_RATE_TYPE=sweep`). If both are specified then the CLI option overrides the the ENV.
+
+Replace `latest` with `stable` for the newest tagged release or set a specific release if desired.
+
 ### Quick Start
 
 #### 1. Start an OpenAI Compatible Server (vLLM)
diff --git a/deploy/Containerfile b/deploy/Containerfile
index 2702e24d..7715de93 100644
--- a/deploy/Containerfile
+++ b/deploy/Containerfile
@@ -1,26 +1,26 @@
-ARG PYTHON=3.13
+ARG BASE_IMAGE=docker.io/python:3.13-slim
 
 # Use a multi-stage build to create a lightweight production image
-FROM docker.io/python:${PYTHON}-slim as builder
+FROM $BASE_IMAGE as builder
+
+# Ensure files are installed as root
+USER root
 
 # Copy repository files
-COPY / /src
+COPY / /opt/app-root/src
 
 # Create a venv and install guidellm
-RUN python3 -m venv /opt/guidellm \
-    && /opt/guidellm/bin/pip install --no-cache-dir /src
-
-# Copy entrypoint script into the venv bin directory
-RUN install -m0755 /src/deploy/entrypoint.sh /opt/guidellm/bin/entrypoint.sh
+RUN python3 -m venv /opt/app-root/guidellm \
+    && /opt/app-root/guidellm/bin/pip install --no-cache-dir /opt/app-root/src
 
 # Prod image
-FROM docker.io/python:${PYTHON}-slim
+FROM $BASE_IMAGE
 
 # Copy the virtual environment from the builder stage
-COPY --from=builder /opt/guidellm /opt/guidellm
+COPY --from=builder /opt/app-root/guidellm /opt/app-root/guidellm
 
 # Add guidellm bin to PATH
-ENV PATH="/opt/guidellm/bin:$PATH"
+ENV PATH="/opt/app-root/guidellm/bin:$PATH"
 
 # Create a non-root user
 RUN useradd -md /results guidellm
@@ -35,14 +35,8 @@ WORKDIR /results
 LABEL org.opencontainers.image.source="https://github.com/vllm-project/guidellm" \
       org.opencontainers.image.description="GuideLLM Performance Benchmarking Container"
 
-# Set the environment variable for the benchmark script
-# TODO: Replace with scenario environment variables
-ENV GUIDELLM_TARGET="http://localhost:8000" \
-    GUIDELLM_MODEL="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w4a16" \
-    GUIDELLM_RATE_TYPE="sweep" \
-    GUIDELLM_DATA="prompt_tokens=256,output_tokens=128" \
-    GUIDELLM_MAX_REQUESTS="100" \
-    GUIDELLM_MAX_SECONDS="" \
-    GUIDELLM_OUTPUT_PATH="/results/results.json"
-
-ENTRYPOINT [ "/opt/guidellm/bin/entrypoint.sh" ]
+# Argument defaults can be set with GUIDELLM_<ARG>
+ENV GUIDELLM_OUTPUT_PATH="/results/benchmarks.json"
+
+ENTRYPOINT [ "/opt/app-root/guidellm/bin/guidellm" ]
+CMD [ "benchmark", "run" ]
diff --git a/deploy/entrypoint.sh b/deploy/entrypoint.sh
deleted file mode 100755
index d6ff4ea0..00000000
--- a/deploy/entrypoint.sh
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/env bash
-set -euo pipefail
-
-# Path to the guidellm binary
-guidellm_bin="/opt/guidellm/bin/guidellm"
-
-# If we receive any arguments switch to guidellm command
-if [ $# -gt 0 ]; then
-    echo "Running command: guidellm $*"
-    exec $guidellm_bin "$@"
-fi
-
-# Get a list of environment variables that start with GUIDELLM_
-args="$(printenv | cut -d= -f1 | grep -E '^GUIDELLM_')"
-
-# NOTE: Bash array + exec prevent shell escape issues
-CMD=("${guidellm_bin}" "benchmark")
-
-# Parse environment variables for the benchmark command
-for var in $args; do
-    # Remove GUIDELLM_ prefix
-    arg_name="${var#GUIDELLM_}"
-
-    # If there is an extra underscore at the
-    # start than this is a config variable
-    if [ "${arg_name:0:1}" == "_" ]; then
-        continue
-    fi
-
-    # Convert to lowercase
-    arg_name="${arg_name,,}"
-    # Replace underscores with dashes
-    arg_name="${arg_name//_/-}"
-
-    # Add the argument to the command array if set
-    if [ -n "${!var}" ]; then
-        CMD+=("--${arg_name}" "${!var}")
-    fi
-done
-
-# Execute the command
-echo "Running command: ${CMD[*]}"
-exec "${CMD[@]}"

From d82ae7e147f80cea6b3db8bdfd59dc4f96ad6698 Mon Sep 17 00:00:00 2001
From: Harshith-umesh <harshith.umesh.nat@gmail.com>
Date: Fri, 1 Aug 2025 13:25:44 -0400
Subject: [PATCH 5/5] use click to get package version

Signed-off-by: Harshith-umesh <harshith.umesh.nat@gmail.com>
---
 src/guidellm/__main__.py | 22 +---------------------
 tests/unit/test_cli.py   | 16 +++++++++-------
 2 files changed, 10 insertions(+), 28 deletions(-)

diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py
index 6e23a506..7cba6a7c 100644
--- a/src/guidellm/__main__.py
+++ b/src/guidellm/__main__.py
@@ -19,33 +19,13 @@
 from guidellm.utils import DefaultGroupHandler
 from guidellm.utils import cli as cli_tools
 
-# Import version information
-try:
-    from guidellm.version import version
-except ImportError:
-    version = "unknown"
-
 STRATEGY_PROFILE_CHOICES = list(
     set(list(get_args(ProfileType)) + list(get_args(StrategyType)))
 )
 
 
-def _version_callback(ctx: click.Context, _param: click.Parameter, value: bool) -> None:
-    """Callback for --version flag."""
-    if value:
-        click.echo(f"guidellm version: {version}")
-        ctx.exit()
-
-
 @click.group()
-@click.option(
-    "--version",
-    is_flag=True,
-    expose_value=False,
-    is_eager=True,
-    help="Show the version and exit.",
-    callback=_version_callback,
-)
+@click.version_option(package_name="guidellm", message="guidellm version: %(version)s")
 def cli():
     pass
 
diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py
index 9e176493..f58e7f38 100644
--- a/tests/unit/test_cli.py
+++ b/tests/unit/test_cli.py
@@ -83,11 +83,11 @@ def test_version_flag_case_sensitivity():
 
 @pytest.mark.integration
 def test_version_integration_with_actual_version():
-    """Integration test to verify version matches what's in version.py."""
+    """Integration test to verify version matches importlib.metadata."""
+    import importlib.metadata
+
     try:
-        from guidellm.version import (
-            version as actual_version,
-        )
+        actual_version = importlib.metadata.version("guidellm")
 
         runner = CliRunner()
         result = runner.invoke(cli, ["--version"])
@@ -95,9 +95,11 @@ def test_version_integration_with_actual_version():
         assert result.exit_code == 0
         expected_output = f"guidellm version: {actual_version}"
         assert expected_output in result.output
-    except ImportError:
+    except importlib.metadata.PackageNotFoundError:
+        # If package is not installed, the CLI should show an error
+        # This is expected behavior when the package isn't properly installed
         runner = CliRunner()
         result = runner.invoke(cli, ["--version"])
 
-        assert result.exit_code == 0
-        assert "guidellm version: unknown" in result.output
+        # Click will handle the error when package is not found
+        assert result.exit_code != 0