3333 get_click_position_in_percent ,
3434 get_label_coordinates ,
3535)
36+ from operate .utils .style import (
37+ ANSI_GREEN ,
38+ ANSI_RED ,
39+ ANSI_RESET ,
40+ )
3641
3742
3843# Load configuration
@@ -347,12 +352,9 @@ async def call_gpt_4_v_labeled(messages, objective):
347352 return decision_content
348353
349354 label_data = parse_click_content (click_content )
350- print ("[app.py][click] label to click =>" , label_data .get ("label" ))
351- print ("[app.py][click] label_data" , label_data )
352355
353356 if label_data and "label" in label_data :
354357 coordinates = get_label_coordinates (label_data ["label" ], label_coordinates )
355- # print("[app.py][click] coordinates", coordinates)
356358 image = Image .open (
357359 io .BytesIO (base64 .b64decode (img_base64 ))
358360 ) # Load the image to get its size
@@ -361,24 +363,28 @@ async def call_gpt_4_v_labeled(messages, objective):
361363 coordinates , image_size
362364 )
363365 if not click_position_percent :
364- raise Exception ("Failed to get click position in percent" )
366+ print (
367+ f"{ ANSI_GREEN } [Self-Operating Computer]{ ANSI_RED } [Error] Failed to get click position in percent. Trying another method { ANSI_RESET } "
368+ )
369+ return call_gpt_4_v (messages , objective )
365370
366371 x_percent = f"{ click_position_percent [0 ]:.2f} %"
367372 y_percent = f"{ click_position_percent [1 ]:.2f} %"
368373 click_action = f'CLICK {{ "x": "{ x_percent } ", "y": "{ y_percent } ", "description": "{ label_data ["decision" ]} ", "reason": "{ label_data ["reason" ]} " }}'
374+
375+ else :
369376 print (
370- f"[app.py][click] returning click precentages: y - { y_percent } , x - { x_percent } "
377+ f"{ ANSI_GREEN } [Self-Operating Computer] { ANSI_RED } [Error] No label found. Trying another method { ANSI_RESET } "
371378 )
372- else :
373- print ("[app.py][click][error] no label found" )
374- print ("[app.py][click][error] label_data" , label_data )
375- raise Exception ("Failed to get click position in percent" )
379+ return call_gpt_4_v (messages , objective )
376380
377381 return click_action
378382
379383 except Exception as e :
380- print (f"Error parsing JSON: { e } " )
381- return "Failed take action after looking at the screenshot"
384+ print (
385+ f"{ ANSI_GREEN } [Self-Operating Computer]{ ANSI_RED } [Error] Something went wrong. Trying another method { ANSI_RESET } "
386+ )
387+ return call_gpt_4_v (messages , objective )
382388
383389
384390async def fetch_openai_response_async (messages ):
0 commit comments