Skip to content

Commit 7eb6a04

Browse files
Merge pull request #2 from surfiniaburger/dipg-research
FIX: Create robust client parser for reset/step inconsistency
2 parents 919833c + f4073ad commit 7eb6a04

File tree

1 file changed

+71
-7
lines changed

1 file changed

+71
-7
lines changed

src/envs/dipg_safety_env/client.py

Lines changed: 71 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,87 @@
1+
# src/envs/dipg_safety_env/client.py
2+
"""
3+
Client implementation for the custom DIPGSafetyEnv.
4+
5+
This file defines the `DIPGSafetyEnv` class, which acts as the "remote control"
6+
for the environment server. Its primary job is to handle the HTTP communication:
7+
1. It takes Python objects (like an Action) from the agent's code.
8+
2. It converts them into JSON to send to the server.
9+
3. It receives JSON responses from the server.
10+
4. It parses that JSON back into useful Python objects (like Observations and Rewards).
11+
"""
12+
113
from core.http_env_client import HTTPEnvClient, StepResult
214
from .models import DIPGAction, DIPGObservation, DIPGState
315

16+
417
class DIPGSafetyEnv(HTTPEnvClient[DIPGAction, DIPGObservation]):
18+
"""
19+
Client for interacting with the `DIPGSafetyEnv` server.
20+
21+
This class inherits from the base `HTTPEnvClient` and is specialized to handle
22+
the specific data types of our environment: `DIPGAction` and `DIPGObservation`.
23+
"""
24+
525
def _step_payload(self, action: DIPGAction) -> dict:
26+
"""
27+
Formats the `DIPGAction` object into a JSON-serializable dictionary.
28+
29+
This dictionary becomes the body of the HTTP POST request sent to the
30+
server's `/step` endpoint.
31+
32+
Args:
33+
action: The `DIPGAction` object containing the model's response.
34+
35+
Returns:
36+
A dictionary to be sent as the JSON request body.
37+
"""
638
return {"llm_response": action.llm_response}
739

840
def _parse_result(self, payload: dict) -> StepResult[DIPGObservation]:
9-
# --- ADD THESE DEBUG LINES ---
10-
print("--- DEBUG: Raw payload received by client ---")
11-
print(payload)
12-
print("-------------------------------------------")
13-
# -----------------------------
14-
# Go one level deeper to get the actual observation data
15-
obs = DIPGObservation(**payload["observation"]["observation"])
41+
"""
42+
Parses the JSON payload from the server's response into a `StepResult`.
43+
44+
This method contains critical logic to handle a known inconsistency between
45+
the data structures returned by the server's `/reset` and `/step` endpoints.
46+
47+
Args:
48+
payload: The raw dictionary parsed from the server's JSON response.
49+
50+
Returns:
51+
A structured `StepResult` object containing the observation, reward, and done status.
52+
"""
53+
# The server's response contains an 'observation' key.
54+
obs_data = payload.get("observation", {})
55+
56+
# ROBUSTNESS FIX: The server's /step endpoint returns a double-nested
57+
# observation `{'observation': {'observation': {...}}}` while the /reset
58+
# endpoint returns a single-nested one `{'observation': {...}}`.
59+
# This code checks for the double-nesting and handles both cases gracefully.
60+
if "observation" in obs_data:
61+
# If it's double-nested (from /step), go one level deeper.
62+
actual_obs_data = obs_data["observation"]
63+
else:
64+
# If it's single-nested (from /reset), use the data directly.
65+
actual_obs_data = obs_data
66+
67+
# Create the DIPGObservation object from the correctly identified data.
68+
obs = DIPGObservation(**actual_obs_data)
69+
70+
# Assemble the final StepResult object for the agent.
1671
return StepResult(
1772
observation=obs,
1873
reward=payload.get("reward"),
1974
done=payload.get("done", False),
2075
)
2176

2277
def _parse_state(self, payload: dict) -> DIPGState:
78+
"""
79+
Parses the JSON payload from the server's `/state` endpoint into a `DIPGState` object.
80+
81+
Args:
82+
payload: The raw dictionary parsed from the server's JSON response.
83+
84+
Returns:
85+
A structured `DIPGState` object.
86+
"""
2387
return DIPGState(**payload)

0 commit comments

Comments
 (0)