🎨✅ Fix doc strings and scores in llama-test

gkumbhat · gkumbhat · commit 308f2e95bf11 · 2025-02-21T11:36:15.000-06:00
Signed-off-by: Gaurav-Kumbhat &lt;Gaurav.Kumbhat@ibm.com&gt;
diff --git a/tests/generative_detectors/test_base.py b/tests/generative_detectors/test_base.py
@@ -170,7 +170,7 @@ def test_content_analysis_success(detection_base, completion_response):
         contents=["Where do I find geese?", "You could go to Canada"]
     )
 
-    scores = [0.9, 0.1, 0.21, 0.54, 0.33]
+    scores = [0.9, 0.1]
     response = (completion_response, scores, "risk")
     with patch(
         "vllm_detector_adapter.generative_detectors.base.ChatCompletionDetectionBase.process_chat_completion_with_scores",
diff --git a/tests/generative_detectors/test_llama_guard.py b/tests/generative_detectors/test_llama_guard.py
@@ -219,7 +219,7 @@ def test_context_analyze(llama_guard_detection):
 
 def test_post_process_content_splits_unsafe_categories(llama_guard_detection):
     unsafe_message = "\n\nunsafe\nS2,S3"
-    responses = ChatCompletionResponse(
+    response = ChatCompletionResponse(
         model="foo",
         usage=UsageInfo(prompt_tokens=1, total_tokens=1),
         choices=[
@@ -236,21 +236,21 @@ def test_post_process_content_splits_unsafe_categories(llama_guard_detection):
     llama_guard_detection_instance = asyncio.run(llama_guard_detection)
     # NOTE: we are testing private function here
     (
-        responses,
+        response,
         scores,
         _,
     ) = llama_guard_detection_instance._LlamaGuard__post_process_result(
-        responses, [unsafe_score], "risk"
+        response, [unsafe_score], "risk"
     )
-    assert isinstance(responses, ChatCompletionResponse)
-    assert responses.choices[0].message.content == "unsafe"
+    assert isinstance(response, ChatCompletionResponse)
+    assert response.choices[0].message.content == "unsafe"
     assert scores[0] == unsafe_score
-    assert len(responses.choices) == 1
+    assert len(response.choices) == 1
 
 
 def test_post_process_content_works_for_safe(llama_guard_detection):
     safe_message = "safe"
-    responses = ChatCompletionResponse(
+    response = ChatCompletionResponse(
         model="foo",
         usage=UsageInfo(prompt_tokens=1, total_tokens=1),
         choices=[
@@ -267,16 +267,17 @@ def test_post_process_content_works_for_safe(llama_guard_detection):
     llama_guard_detection_instance = asyncio.run(llama_guard_detection)
     # NOTE: we are testing private function here
     (
-        responses,
+        response,
         scores,
         _,
     ) = llama_guard_detection_instance._LlamaGuard__post_process_result(
-        responses, [safe_message], "risk"
+        response, [safe_score], "risk"
     )
-    assert isinstance(responses, ChatCompletionResponse)
-    assert len(responses.choices) == 1
-    assert responses.choices[0].message.content == "safe"
-    assert scores[0] == safe_message
+
+    assert isinstance(response, ChatCompletionResponse)
+    assert len(response.choices) == 1
+    assert response.choices[0].message.content == "safe"
+    assert scores[0] == safe_score
 
 
 def test_content_detection_with_llama_guard(
diff --git a/vllm_detector_adapter/generative_detectors/base.py b/vllm_detector_adapter/generative_detectors/base.py
@@ -1,7 +1,7 @@
 # Standard
 from http import HTTPStatus
 from pathlib import Path
-from typing import List, Optional, Union
+from typing import List, Optional, Tuple, Union
 import asyncio
 import codecs
 import math
@@ -173,7 +173,7 @@ def calculate_scores(self, response: ChatCompletionResponse) -> List[float]:
 
     async def process_chat_completion_with_scores(
         self, chat_completion_request, raw_request
-    ) -> Union[DetectionResponse, ErrorResponse]:
+    ) -> Union[Tuple[ChatCompletionResponse, List[float], str], ErrorResponse]:
         # Return an error for streaming for now. Since the detector API is unary,
         # results would not be streamed back anyway. The chat completion response
         # object would look different, and content would have to be aggregated.
diff --git a/vllm_detector_adapter/generative_detectors/llama_guard.py b/vllm_detector_adapter/generative_detectors/llama_guard.py
@@ -26,16 +26,16 @@ class LlamaGuard(ChatCompletionDetectionBase):
     SAFE_TOKEN = "safe"
     UNSAFE_TOKEN = "unsafe"
 
-    def __post_process_result(self, responses, scores, detection_type):
+    def __post_process_result(self, response, scores, detection_type):
         """Function to process chat completion results for content type detection.
 
         Args:
-            responses: ChatCompletionResponse,
+            response: ChatCompletionResponse,
             scores: List[float],
             detection_type: str,
         Returns:
             Tuple(
-                responses: ChatCompletionResponse,
+                response: ChatCompletionResponse,
                 scores: List[float],
                 detection_type,
             )
@@ -51,7 +51,7 @@ def __post_process_result(self, responses, scores, detection_type):
         new_scores = []
 
         # NOTE: we are flattening out choices here as different categories
-        for i, choice in enumerate(responses.choices):
+        for i, choice in enumerate(response.choices):
             content = choice.message.content
             if self.UNSAFE_TOKEN in content:
                 # Reason for reassigning the content:
@@ -64,8 +64,8 @@ def __post_process_result(self, responses, scores, detection_type):
                 new_choices.append(choice)
                 new_scores.append(scores[i])
 
-        responses.choices = new_choices
-        return (responses, new_scores, detection_type)
+        response.choices = new_choices
+        return (response, new_scores, detection_type)
 
     async def content_analysis(
         self,
diff --git a/vllm_detector_adapter/protocol.py b/vllm_detector_adapter/protocol.py
@@ -53,7 +53,7 @@ def from_chat_completion_response(results, contents: List[str]):
 
         Args:
             results: List(Tuple(
-                responses: ChatCompletionResponse,
+                response: ChatCompletionResponse,
                 scores: List[float],
                 detection_type,
             ))
@@ -62,13 +62,13 @@ def from_chat_completion_response(results, contents: List[str]):
         """
         contents_detection_responses = []
 
-        for content_idx, (responses, scores, detection_type) in enumerate(results):
+        for content_idx, (response, scores, detection_type) in enumerate(results):
 
             detection_responses = []
             start = 0
             end = len(contents[content_idx])
 
-            for i, choice in enumerate(responses.choices):
+            for i, choice in enumerate(response.choices):
                 content = choice.message.content
                 # NOTE: for providing spans, we currently consider entire generated text as a span.
                 # This is because, at the time of writing, the generative guardrail models does not

Original file line number	Diff line number	Diff line change
`@@ -170,7 +170,7 @@ def test_content_analysis_success(detection_base, completion_response):`
`170`	`170`	`contents=["Where do I find geese?", "You could go to Canada"]`
`171`	`171`	`)`
`172`	`172`
`173`		`- scores = [0.9, 0.1, 0.21, 0.54, 0.33]`
	`173`	`+ scores = [0.9, 0.1]`
`174`	`174`	`response = (completion_response, scores, "risk")`
`175`	`175`	`with patch(`
`176`	`176`	`"vllm_detector_adapter.generative_detectors.base.ChatCompletionDetectionBase.process_chat_completion_with_scores",`