1+ # src/envs/dipg_safety_env/client.py
2+ """
3+ Client implementation for the custom DIPGSafetyEnv.
4+
5+ This file defines the `DIPGSafetyEnv` class, which acts as the "remote control"
6+ for the environment server. Its primary job is to handle the HTTP communication:
7+ 1. It takes Python objects (like an Action) from the agent's code.
8+ 2. It converts them into JSON to send to the server.
9+ 3. It receives JSON responses from the server.
10+ 4. It parses that JSON back into useful Python objects (like Observations and Rewards).
11+ """
12+
113from core .http_env_client import HTTPEnvClient , StepResult
214from .models import DIPGAction , DIPGObservation , DIPGState
315
16+
417class DIPGSafetyEnv (HTTPEnvClient [DIPGAction , DIPGObservation ]):
18+ """
19+ Client for interacting with the `DIPGSafetyEnv` server.
20+
21+ This class inherits from the base `HTTPEnvClient` and is specialized to handle
22+ the specific data types of our environment: `DIPGAction` and `DIPGObservation`.
23+ """
24+
525 def _step_payload (self , action : DIPGAction ) -> dict :
26+ """
27+ Formats the `DIPGAction` object into a JSON-serializable dictionary.
28+
29+ This dictionary becomes the body of the HTTP POST request sent to the
30+ server's `/step` endpoint.
31+
32+ Args:
33+ action: The `DIPGAction` object containing the model's response.
34+
35+ Returns:
36+ A dictionary to be sent as the JSON request body.
37+ """
638 return {"llm_response" : action .llm_response }
739
840 def _parse_result (self , payload : dict ) -> StepResult [DIPGObservation ]:
9- # --- ADD THESE DEBUG LINES ---
10- print ("--- DEBUG: Raw payload received by client ---" )
11- print (payload )
12- print ("-------------------------------------------" )
13- # -----------------------------
14- # Go one level deeper to get the actual observation data
15- obs = DIPGObservation (** payload ["observation" ]["observation" ])
41+ """
42+ Parses the JSON payload from the server's response into a `StepResult`.
43+
44+ This method contains critical logic to handle a known inconsistency between
45+ the data structures returned by the server's `/reset` and `/step` endpoints.
46+
47+ Args:
48+ payload: The raw dictionary parsed from the server's JSON response.
49+
50+ Returns:
51+ A structured `StepResult` object containing the observation, reward, and done status.
52+ """
53+ # The server's response contains an 'observation' key.
54+ obs_data = payload .get ("observation" , {})
55+
56+ # ROBUSTNESS FIX: The server's /step endpoint returns a double-nested
57+ # observation `{'observation': {'observation': {...}}}` while the /reset
58+ # endpoint returns a single-nested one `{'observation': {...}}`.
59+ # This code checks for the double-nesting and handles both cases gracefully.
60+ if "observation" in obs_data :
61+ # If it's double-nested (from /step), go one level deeper.
62+ actual_obs_data = obs_data ["observation" ]
63+ else :
64+ # If it's single-nested (from /reset), use the data directly.
65+ actual_obs_data = obs_data
66+
67+ # Create the DIPGObservation object from the correctly identified data.
68+ obs = DIPGObservation (** actual_obs_data )
69+
70+ # Assemble the final StepResult object for the agent.
1671 return StepResult (
1772 observation = obs ,
1873 reward = payload .get ("reward" ),
1974 done = payload .get ("done" , False ),
2075 )
2176
2277 def _parse_state (self , payload : dict ) -> DIPGState :
78+ """
79+ Parses the JSON payload from the server's `/state` endpoint into a `DIPGState` object.
80+
81+ Args:
82+ payload: The raw dictionary parsed from the server's JSON response.
83+
84+ Returns:
85+ A structured `DIPGState` object.
86+ """
2387 return DIPGState (** payload )
0 commit comments