Skip to content

Commit

Permalink
Transition resilience improvements
Browse files Browse the repository at this point in the history
- Wait until the deletion is reflected in the metadata
- Refresh the directory as well if there is a connection failure
  • Loading branch information
geoffxy committed Nov 16, 2023
1 parent cacf3b9 commit ab803cd
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 1 deletion.
4 changes: 4 additions & 0 deletions src/brad/front_end/front_end.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,10 @@ async def start_session(self) -> SessionId:
)
raise
await asyncio.sleep(time_to_wait)
# Defensively refresh the blueprint and directory before
# retrying. Maybe we are getting outdated endpoint information
# from AWS.
await self._blueprint_mgr.load()

async def end_session(self, session_id: SessionId) -> None:
await self._sessions.end_session(session_id)
Expand Down
23 changes: 22 additions & 1 deletion src/brad/provisioning/rds.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,9 @@ def do_create_replica():
await asyncio.sleep(20)
await self.wait_until_instance_is_available(instance_id)

async def delete_replica(self, instance_id: str) -> None:
async def delete_replica(
self, instance_id: str, wait_until_status_updated: bool = True
) -> None:
def do_delete():
self._rds.delete_db_instance(
DBInstanceIdentifier=instance_id,
Expand All @@ -94,6 +96,11 @@ def do_delete():
loop = asyncio.get_running_loop()
await loop.run_in_executor(None, do_delete)

# Will poll until the instance's status is no longer "available".
if wait_until_status_updated:
await asyncio.sleep(10)
await self.wait_until_instance_is_not_available(instance_id)

async def wait_until_instance_is_available(
self, instance_id: str, polling_interval: float = 20
) -> None:
Expand All @@ -108,6 +115,20 @@ async def wait_until_instance_is_available(
)
await asyncio.sleep(polling_interval)

async def wait_until_instance_is_not_available(
self, instance_id: str, polling_interval: float = 20
) -> None:
while True:
response = await self._describe_db_instance(instance_id)
instance = response["DBInstances"][0]
status = instance["DBInstanceStatus"]
if status != "available":
break
logger.debug(
"Waiting for Aurora instance %s to be NOT available...", instance_id
)
await asyncio.sleep(polling_interval)

async def wait_until_cluster_is_available(
self, cluster_id: str, polling_interval: float = 20
) -> None:
Expand Down

0 comments on commit ab803cd

Please sign in to comment.