vllm-project
diff --git a/‎.github/workflows/test.yml‎
Lines changed: 6 additions & 13 deletions b/‎.github/workflows/test.yml‎
Lines changed: 6 additions & 13 deletions
diff --git a/‎docs/contributing/README.md‎
Lines changed: 67 additions & 2 deletions b/‎docs/contributing/README.md‎
Lines changed: 67 additions & 2 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 3 additions & 1 deletion b/‎pyproject.toml‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎tests/e2e/test_spyre_cb.py‎
Lines changed: 37 additions & 2 deletions b/‎tests/e2e/test_spyre_cb.py‎
Lines changed: 37 additions & 2 deletions
diff --git a/‎tools/check_aiu.sh‎
Lines changed: 39 additions & 0 deletions b/‎tools/check_aiu.sh‎
Lines changed: 39 additions & 0 deletions
@@ -43,17 +43,14 @@ jobs:
             markers: "v0 and cpu and e2e"
             flags: "--timeout=300"
           - name: "V1-e2e"
-            markers: "v1 and cpu and e2e"
+            markers: "v1 and cpu and e2e and not cb"
             flags: "--timeout=300 --forked"
-          - name: "V1-worker"
-            markers: "v1 and not e2e"
-            flags: "--timeout=300"
-          - name: "utils"
-            markers: "utils"
-            flags: "--timeout=300"
-          - name: "cb"
-            markers: "cb"
+          - name: "V1-cb"
+            markers: "v1 and cpu and cb"
             flags: "--timeout=300 --forked"
+          - name: "V1-worker and utils"
+            markers: "v1 and not e2e or utils"
+            flags: "--timeout=300"
 
     name: "${{ matrix.test_suite.name }} (${{ matrix.vllm_version.name }})"
 
@@ -163,10 +160,6 @@ jobs:
           # `uv run`, to avoid having `uv run` re-sync any dependencies or 
           # re-install the vllm_sypre package from source
           source .venv/bin/activate
-          if [ ${{ matrix.test_suite.markers }} == "cb" ]; then
-          # install custom fms branch
-              uv pip install git+https://github.com/foundation-model-stack/foundation-model-stack@paged_attn_mock --force-reinstall
-          fi
           # commands to run if condition is true
           python3 -m pytest ${{ matrix.test_suite.flags }} \
             tests -v -m "${{ matrix.test_suite.markers }}"
@@ -12,7 +12,7 @@ If you encounter a bug or have a feature request, please search [existing issues
 
 You can also reach out for support in the `#sig-spyre` channel in the [vLLM Slack](https://inviter.co/vllm-slack) workspace.
 
-## Developing
+## Docs
 
 ### Building the docs with MkDocs
 
@@ -21,7 +21,7 @@ You can also reach out for support in the `#sig-spyre` channel in the [vLLM Slac
 Install MkDocs along with the [plugins](https://github.com/vllm-project/vllm-spyre/blob/main/mkdocs.yaml) used in the vLLM Spyre documentation.
 
 ```bash
-pip install -r docs/requirements-docs.txt
+uv pip install -r docs/requirements-docs.txt
 ```
 
 !!! note
@@ -118,6 +118,71 @@ Then, run the continuous batching tests:
 python -m pytest -v -x tests/e2e -m cb
 ```
 
+## Debugging
+
+!!! tip
+    You can `oc edit` a pod and change the image without having the pod schedule to a different node. This can be useful for testing whether software or hardware is the issue.
+
+- The script `/opt/sentient/bin/aiu-query-devices` in the pod can be used to see the connectivity between the `AIUs` on the machine. You can also infer this from environment variables with names like `AIU_TIER_\d_SET_\d_RANK_\d`.
+  
+- `SPYRE_DEVICES` can be used to select which devices will be selected for each `RANK`. This is similar to how `CUDA_VISIBLE_DEVICES` works for GPU.
+  
+    !!! example
+        `0,2,4,6` will assign rank `0` to AIU index `0`, rank `1` to AIU index `2`, rank `2` to AIU index `4`, and rank `3` to AIU index `6`.
+  
+    - An alternative is to use `AIU_WORLD_RANK_\d=0000:aa:00.0` to explicitly map ranks to `PCI` addresses (make sure there are no duplicates used at runtime).
+  
+- A bash script that uses `/opt/sentient/senlib/bin/senlib_unit_test` to check each `AIU` allocated to the pod to see if they work for a basic test:
+  
+    ```shell
+    --8<-- "tools/check_aiu.sh"
+    ```
+
+### Logging levels
+
+Various log levels that can be configured:
+
+- `DTLOG_LEVEL` - `TRACE, DEBUG, INFO, WARNING, ERROR`
+- `TORCH_SENDNN_LOG` - `WARNING, CRITICAL`
+- `VLLM_LOGGING_LEVEL` - `DEBUG, INFO, WARNING, ERROR`
+
+!!! tip
+    `DTLOG_LEVEL=INFO` (piped to file) can help you see what device addresses are actually in use. Look for the string `Opened: SEN:VFIO`.
+
+!!! tip
+    In order to stop massive log spew, this configuration is ideal:
+    ```
+    export DTLOG_LEVEL=ERROR
+    export TORCH_SENDNN_LOG=CRITICAL
+    ```
+
+### Topology Aware Allocation
+
+This section is specific to the AIU operator and scheduling workloads onto specific cards.
+
+(TODO: link to docs once they exist)
+
+- This mode supports users to request a special set of AIU cards based on `PCI` topology. By using this mode, we can guarantee to pick up AIU cards of a particular class in the node:
+  
+    - `Tier0` provides a set of cards in the same `PCI` switch.
+    - `Tier1` provides a set of cards from at most one-hop away `PCI` switch.
+    - `Tier2` provides a set of cards from at most two-hops away `PCI` switch.
+
+- Running a Multi AIU Job using `ibm.com/aiu_pf_tier0,tier1,tier2`:
+  
+    - This resource type is used for picking up a topology aware card set, which is required to run tensor parallel (`TP`) workloads more effectively. By using `tierX` class resource, `TP` users can automatically get a best performing card set for the workload.
+
+- The maximum number of allocatable resources in each tier depends on the platform & cluster, but we can get up to:
+  
+    - `Tier0` - `4` cards
+    - `Tier1` - `8` cards
+    - `Tier2` - `16` cards
+
+- Devices in `tier0` can do `peer-to-peer (P2P) RDMA`, devices on different trees use `Host DMA` sharing files through `/dev/shm`.
+
+    !!! warning
+         If you request cards greater than the cards supported by the switch, the pod will never be scheduled. In the above example, if you specify `ibm.com/aiu_pf_tier0: 5` in your yaml, the pod will never be scheduled because the maximum set of cards in `tier0` was specified as `4`.
+
 ## Pull Requests
 
 ### Linting
 
@@ -12,7 +12,7 @@ readme = "README.md"
 license = {text = "Apache 2"}
 dependencies = [
     "fms-model-optimizer>=0.2.0",
-    "ibm-fms==1.0.0",
+    "ibm-fms==1.1.0",
     "vllm>=0.9.0,!=0.9.1",
 ]
 requires-python = ">=3.9"
@@ -140,6 +140,8 @@ plugins.md013.enabled = false # line-length
 plugins.md041.enabled = false # first-line-h1
 plugins.md033.enabled = false # inline-html
 plugins.md024.allow_different_nesting = true # no-duplicate-headers
+plugins.md007.enabled = true
+plugins.md007.indent = 4
 
 [dependency-groups]
 dev = [
 
@@ -8,9 +8,14 @@
 from typing import Any
 
 import pytest
+<<<<<<< HEAD
 from spyre_util import (compare_results, create_random_request,
                         generate_hf_output, generate_spyre_vllm_output,
                         get_spyre_model_list)
+=======
+from spyre_util import (create_random_request, generate_cb_spyre_vllm_output,
+                        get_spyre_backend_list, get_spyre_model_list)
+>>>>>>> origin/main
 from vllm import EngineArgs, SamplingParams
 from vllm.v1.engine import EngineCoreRequest
 from vllm.v1.engine.core import EngineCore
@@ -23,6 +28,7 @@
     "appropriately completes the request. Be polite in your response to the "
     "user.\n\n### Instruction:\n{}\n\n### Response:")
 
+<<<<<<< HEAD
 
 @pytest.mark.cb
 @pytest.mark.parametrize("max_num_seqs", [2, 3, 4],
@@ -41,6 +47,35 @@
         "how do I add multiple new columns in m for power query or power bi?"),
     template.format("Convert char to string in Java."),
 ]])
+=======
+@pytest.mark.cb
+@pytest.mark.v1
+@pytest.mark.parametrize("max_num_seqs", [2, 3, 4],
+                         ids=lambda val: f"max_num_seqs({val})")
+@pytest.mark.parametrize("model", get_spyre_model_list())
+@pytest.mark.parametrize("backend", get_spyre_backend_list())
+@pytest.mark.parametrize(
+    "prompts",
+    [
+        [
+            "7 6 5 4",
+            "10 9 8 7",
+        ],
+        [
+            "7 6 5 4",
+            "10 9 8 7",
+            "8 7 6 5",
+        ],
+        [
+            "7 6 5 4",
+            "10 9 8 7",
+            "8 7 6 5",
+            "9 8 7 6",
+        ],
+    ],
+    ids=lambda val: f"num_prompts({len(val)})",
+)
+>>>>>>> origin/main
 def test_cb_handling(
     model: str,
     backend: str,
@@ -648,9 +683,9 @@ def augment_checked_steps(
 
 
 @pytest.mark.cb
+@pytest.mark.v1
 @pytest.mark.parametrize("model", get_spyre_model_list())
-@pytest.mark.parametrize(
-    "backend", [pytest.param("eager", marks=pytest.mark.cpu, id="eager")])
+@pytest.mark.parametrize("backend", get_spyre_backend_list())
 @pytest.mark.parametrize("max_num_seqs", [2])
 @pytest.mark.parametrize(
     "seqs_max_tokens,prompts_lengths,steps_add_reqs,checked_steps,"
 
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+# A bash script that uses `/opt/sentient/senlib/bin/senlib_unit_test` 
+# to check each AIU allocated to the pod to see if 
+# they work for a basic test:
+
+cleanup_done=0
+cleanup() {
+  if [ "$cleanup_done" -eq 0 ] && [ -f ~/.senlib.json.bak ]; then
+    echo "Restoring .senlib.json from backup"
+    cp ~/.senlib.json.bak ~/.senlib.json
+    cleanup_done=1
+  fi
+  kill -- -$PPID
+  wait
+  exit
+}
+
+trap cleanup EXIT SIGINT
+
+# Create backup .senlib.json if it doesn't exist
+if [ -f "$HOME"/.senlib.json ]; then
+  if [ ! -f "$HOME"/.senlib.json.bak ]; then
+    echo "Creating backup of $HOME/.senlib.json"
+    cp "$HOME"/.senlib.json "$HOME"/.senlib.json.bak
+  else
+    echo "$HOME/.senlib.json.bak already exists"
+  fi
+fi
+
+for device_id in $(jq -r .GENERAL.sen_bus_id[] /etc/aiu/senlib_config.json); do
+  echo "======================================================================"
+  echo "Checking AIU ${device_id}"
+  echo "======================================================================"
+  jq -n '{"GENERAL": { "sen_bus_id": "'"${device_id}"'" }}' > .senlib.json
+  # run in background to not override bash signal handler
+  timeout 10 /opt/sentient/senlib/bin/senlib_unit_test --gtest_filter=SmlPF1VF0.Open &
+  wait
+done