104104from ..spec_writer import SpecWriter
105105from ..utils import (
106106 add_metadata_to_request ,
107+ instance_base_url ,
107108 query_billing_projects_with_cost ,
108109 query_billing_projects_without_cost ,
109110 regions_to_bits_rep ,
@@ -379,7 +380,7 @@ async def _get_job_record(app, batch_id, job_id):
379380
380381 record = await db .select_and_fetchone (
381382 """
382- SELECT jobs.state, jobs.spec, ip_address, format_version, jobs.attempt_id, t.attempt_id AS last_cancelled_attempt_id
383+ SELECT jobs.state, jobs.spec, ip_address, format_version, jobs.attempt_id, t.attempt_id AS last_cancelled_attempt_id, instances.version as instance_version
383384FROM jobs
384385INNER JOIN batches
385386 ON jobs.batch_id = batches.id
@@ -438,11 +439,12 @@ def attempt_id_from_spec(record) -> Optional[str]:
438439 return record ['attempt_id' ] or record ['last_cancelled_attempt_id' ]
439440
440441
441- async def _get_job_container_log_from_worker (client_session , batch_id , job_id , container , ip_address ) -> bytes :
442+ async def _get_job_container_log_from_worker (client_session , batch_id , job_id , container , job_record ) -> bytes :
443+ base_url = instance_base_url (job_record ['instance_version' ], job_record ['ip_address' ])
442444 try :
443445 return await retry_transient_errors (
444446 client_session .get_read ,
445- f'http:// { ip_address } :5000 /api/v1alpha/batches/{ batch_id } /jobs/{ job_id } /log/{ container } ' ,
447+ f'{ base_url } /api/v1alpha/batches/{ batch_id } /jobs/{ job_id } /log/{ container } ' ,
446448 )
447449 except aiohttp .ClientResponseError :
448450 log .exception (f'while getting log for { (batch_id , job_id )} ' )
@@ -467,7 +469,11 @@ async def _get_job_container_log(app, batch_id, job_id, container, job_record) -
467469 state = job_record ['state' ]
468470 if state == 'Running' :
469471 return await _get_job_container_log_from_worker (
470- app [CommonAiohttpAppKeys .CLIENT_SESSION ], batch_id , job_id , container , job_record ['ip_address' ]
472+ app [CommonAiohttpAppKeys .CLIENT_SESSION ],
473+ batch_id ,
474+ job_id ,
475+ container ,
476+ job_record ,
471477 )
472478
473479 attempt_id = attempt_id_from_spec (job_record )
@@ -502,7 +508,7 @@ async def _get_job_resource_usage_from_record(
502508 batch_format_version = BatchFormatVersion (record ['format_version' ])
503509
504510 state = record ['state' ]
505- ip_address = record ['ip_address' ]
511+ base_url = instance_base_url ( record ['instance_version' ], record [ ' ip_address' ])
506512 tasks = job_tasks_from_spec (record )
507513 attempt_id = attempt_id_from_spec (record )
508514
@@ -513,7 +519,7 @@ async def _get_job_resource_usage_from_record(
513519 try :
514520 data = await retry_transient_errors (
515521 client_session .get_read_json ,
516- f'http:// { ip_address } :5000 /api/v1alpha/batches/{ batch_id } /jobs/{ job_id } /resource_usage' ,
522+ f'{ base_url } /api/v1alpha/batches/{ batch_id } /jobs/{ job_id } /resource_usage' ,
517523 )
518524 return {
519525 task : ResourceUsageMonitor .decode_to_df (base64 .b64decode (encoded_df ))
@@ -639,11 +645,11 @@ async def _get_full_job_status(app, record):
639645 assert state == 'Running'
640646 assert record ['status' ] is None
641647
642- ip_address = record ['ip_address' ]
648+ base_url = instance_base_url ( record ['instance_version' ], record [ ' ip_address' ])
643649 try :
644650 return await retry_transient_errors (
645651 client_session .get_read_json ,
646- f'http:// { ip_address } :5000 /api/v1alpha/batches/{ batch_id } /jobs/{ job_id } /status' ,
652+ f'{ base_url } /api/v1alpha/batches/{ batch_id } /jobs/{ job_id } /status' ,
647653 )
648654 except aiohttp .ClientResponseError as e :
649655 if e .status == 404 :
0 commit comments