Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions python/tests/repro_shutdown_loop.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.

"""Hot loop repro for test_shutdown_host_mesh.

Usage (single iteration):
buck run @fbcode//mode/opt fbcode//monarch/python/tests:repro_shutdown_loop

Wrap with run_shutdown_loop.sh to loop until failure.
"""

from __future__ import annotations

from monarch._src.actor.actor_mesh import Actor, context
from monarch._src.actor.endpoint import endpoint
from monarch._src.job.process import ProcessJob
from scoped_state import scoped_state


class RankActor(Actor):
@endpoint
async def get_rank(self) -> int:
return context().actor_instance.rank.rank


def main() -> None:
with scoped_state(ProcessJob({"hosts": 2}), cached_path=None) as state:
hm = state.hosts
pm = hm.spawn_procs(per_host={"gpus": 2})
am = pm.spawn("actor", RankActor)
am.get_rank.choose().get()
hm.shutdown().get()


if __name__ == "__main__":
main()
29 changes: 29 additions & 0 deletions python/tests/run_shutdown_loop.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/bin/bash
# (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
#
# Runs test_shutdown_host_mesh in a loop until failure.
# Usage: ./run_shutdown_loop.sh

set -euo pipefail

TARGET="fbcode//monarch/python/tests:test_host_mesh"
TEST="test_shutdown_host_mesh"

i=0
while true; do
i=$((i + 1))
t0=$(date +%s%N)
if buck test @fbcode//mode/opt "$TARGET" -- "$TEST" 2>/dev/null; then
t1=$(date +%s%N)
elapsed=$(( (t1 - t0) / 1000000 ))
echo "[${i}] OK (${elapsed}ms)"
else
t1=$(date +%s%N)
elapsed=$(( (t1 - t0) / 1000000 ))
echo "[${i}] FAIL (${elapsed}ms)"
echo ""
echo "Failed on iteration $i. Re-running with stderr..."
buck test @fbcode//mode/opt "$TARGET" -- "$TEST"
exit 1
fi
done
Loading