-
Notifications
You must be signed in to change notification settings - Fork 4.5k
Allow multiprocessshared to spawn process and delete directly with obj #37112
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -25,6 +25,10 @@ | |||||||
| import logging | ||||||||
| import multiprocessing.managers | ||||||||
| import os | ||||||||
| import time | ||||||||
| import traceback | ||||||||
| import atexit | ||||||||
| import sys | ||||||||
| import tempfile | ||||||||
| import threading | ||||||||
| from typing import Any | ||||||||
|
|
@@ -79,6 +83,10 @@ def singletonProxy_release(self): | |||||||
| assert self._SingletonProxy_valid | ||||||||
| self._SingletonProxy_valid = False | ||||||||
|
|
||||||||
| def unsafe_hard_delete(self): | ||||||||
| assert self._SingletonProxy_valid | ||||||||
| self._SingletonProxy_entry.unsafe_hard_delete() | ||||||||
|
|
||||||||
| def __getattr__(self, name): | ||||||||
| if not self._SingletonProxy_valid: | ||||||||
| raise RuntimeError('Entry was released.') | ||||||||
|
|
@@ -105,13 +113,16 @@ def __dir__(self): | |||||||
| dir = self._SingletonProxy_entry.obj.__dir__() | ||||||||
| dir.append('singletonProxy_call__') | ||||||||
| dir.append('singletonProxy_release') | ||||||||
| dir.append('unsafe_hard_delete') | ||||||||
| return dir | ||||||||
|
|
||||||||
|
|
||||||||
| class _SingletonEntry: | ||||||||
| """Represents a single, refcounted entry in this process.""" | ||||||||
| def __init__(self, constructor, initialize_eagerly=True): | ||||||||
| def __init__( | ||||||||
| self, constructor, initialize_eagerly=True, hard_delete_callback=None): | ||||||||
| self.constructor = constructor | ||||||||
| self._hard_delete_callback = hard_delete_callback | ||||||||
| self.refcount = 0 | ||||||||
| self.lock = threading.Lock() | ||||||||
| if initialize_eagerly: | ||||||||
|
|
@@ -141,14 +152,28 @@ def unsafe_hard_delete(self): | |||||||
| if self.initialied: | ||||||||
| del self.obj | ||||||||
| self.initialied = False | ||||||||
| if self._hard_delete_callback: | ||||||||
| self._hard_delete_callback() | ||||||||
|
|
||||||||
|
|
||||||||
| class _SingletonManager: | ||||||||
| entries: Dict[Any, Any] = {} | ||||||||
|
|
||||||||
| def register_singleton(self, constructor, tag, initialize_eagerly=True): | ||||||||
| def __init__(self): | ||||||||
| self._hard_delete_callback = None | ||||||||
|
|
||||||||
| def set_hard_delete_callback(self, callback): | ||||||||
| self._hard_delete_callback = callback | ||||||||
|
|
||||||||
| def register_singleton( | ||||||||
| self, | ||||||||
| constructor, | ||||||||
| tag, | ||||||||
| initialize_eagerly=True, | ||||||||
| hard_delete_callback=None): | ||||||||
| assert tag not in self.entries, tag | ||||||||
| self.entries[tag] = _SingletonEntry(constructor, initialize_eagerly) | ||||||||
| self.entries[tag] = _SingletonEntry( | ||||||||
| constructor, initialize_eagerly, hard_delete_callback) | ||||||||
|
|
||||||||
| def has_singleton(self, tag): | ||||||||
| return tag in self.entries | ||||||||
|
|
@@ -160,7 +185,8 @@ def release_singleton(self, tag, obj): | |||||||
| return self.entries[tag].release(obj) | ||||||||
|
|
||||||||
| def unsafe_hard_delete_singleton(self, tag): | ||||||||
| return self.entries[tag].unsafe_hard_delete() | ||||||||
| self.entries[tag].unsafe_hard_delete() | ||||||||
| self._hard_delete_callback() | ||||||||
AMOOOMA marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||||
|
|
||||||||
|
|
||||||||
| _process_level_singleton_manager = _SingletonManager() | ||||||||
|
|
@@ -200,9 +226,99 @@ def __call__(self, *args, **kwargs): | |||||||
| def __getattr__(self, name): | ||||||||
| return getattr(self._proxyObject, name) | ||||||||
|
|
||||||||
| def __setstate__(self, state): | ||||||||
| self.__dict__.update(state) | ||||||||
|
|
||||||||
| def __getstate__(self): | ||||||||
| return self.__dict__ | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I assume this is so that this is pickleable, but is it valid? Normally I'd expect this to not be pickleable since the proxy objects aren't necessarily valid in another context
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah this is exactly what was needed for the pickling stuff. It does seems to be valid in testing with the custom built beam version loaded on custom container.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it would only be valid if you unpickle onto the same machine (and maybe even in the same process). Could you remind me what unpickling issues you ran into? |
||||||||
|
|
||||||||
| def get_auto_proxy_object(self): | ||||||||
| return self._proxyObject | ||||||||
|
|
||||||||
| def unsafe_hard_delete(self): | ||||||||
| try: | ||||||||
| self._proxyObject.unsafe_hard_delete() | ||||||||
| except (EOFError, ConnectionResetError, BrokenPipeError): | ||||||||
| pass | ||||||||
| except Exception as e: | ||||||||
| logging.warning( | ||||||||
| "Exception %s when trying to hard delete shared object proxy", e) | ||||||||
|
|
||||||||
|
|
||||||||
| def _run_server_process(address_file, tag, constructor, authkey): | ||||||||
| """ | ||||||||
| Runs in a separate process. | ||||||||
| Includes a 'Suicide Pact' monitor: If parent dies, I die. | ||||||||
| """ | ||||||||
| parent_pid = os.getppid() | ||||||||
|
|
||||||||
| def cleanup_files(): | ||||||||
| logging.info("Server process exiting. Deleting files for %s", tag) | ||||||||
| try: | ||||||||
| if os.path.exists(address_file): | ||||||||
| os.remove(address_file) | ||||||||
| if os.path.exists(address_file + ".error"): | ||||||||
| os.remove(address_file + ".error") | ||||||||
| except Exception: | ||||||||
| pass | ||||||||
AMOOOMA marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||||
|
|
||||||||
| def handle_unsafe_hard_delete(): | ||||||||
| cleanup_files() | ||||||||
| os._exit(0) | ||||||||
|
|
||||||||
| def _monitor_parent(): | ||||||||
| """Checks if parent is alive every second.""" | ||||||||
| while True: | ||||||||
| try: | ||||||||
| os.kill(parent_pid, 0) | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are we sending a kill signal to the parent process? Isn't this the opposite of what we want?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not actually a kill signal but uses that interface to send a check, it will fail with OSError if the parent_pid is dead and if alive nothing happens.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Could you help me understand why this happens? https://www.geeksforgeeks.org/python/python-os-kill-method/ seems to say this will actually send the kill signal. Does the parent just ignore it? |
||||||||
| except OSError: | ||||||||
| logging.warning( | ||||||||
| "Process %s detected Parent %s died. Self-destructing.", | ||||||||
| os.getpid(), | ||||||||
| parent_pid) | ||||||||
| cleanup_files() | ||||||||
| os._exit(0) | ||||||||
| time.sleep(0.5) | ||||||||
|
|
||||||||
| atexit.register(cleanup_files) | ||||||||
|
|
||||||||
| try: | ||||||||
| t = threading.Thread(target=_monitor_parent, daemon=True) | ||||||||
| t.start() | ||||||||
|
||||||||
|
|
||||||||
| logging.getLogger().setLevel(logging.INFO) | ||||||||
| multiprocessing.current_process().authkey = authkey | ||||||||
|
|
||||||||
| serving_manager = _SingletonRegistrar( | ||||||||
| address=('localhost', 0), authkey=authkey) | ||||||||
| _process_level_singleton_manager.set_hard_delete_callback( | ||||||||
| handle_unsafe_hard_delete) | ||||||||
| _process_level_singleton_manager.register_singleton( | ||||||||
| constructor, | ||||||||
| tag, | ||||||||
| initialize_eagerly=True, | ||||||||
| hard_delete_callback=handle_unsafe_hard_delete) | ||||||||
|
|
||||||||
| server = serving_manager.get_server() | ||||||||
| logging.info( | ||||||||
| 'Process %s: Proxy serving %s at %s', os.getpid(), tag, server.address) | ||||||||
|
|
||||||||
| with open(address_file + '.tmp', 'w') as fout: | ||||||||
| fout.write('%s:%d' % server.address) | ||||||||
| os.rename(address_file + '.tmp', address_file) | ||||||||
|
|
||||||||
| server.serve_forever() | ||||||||
|
|
||||||||
| except Exception: | ||||||||
| tb = traceback.format_exc() | ||||||||
| try: | ||||||||
| with open(address_file + ".error.tmp", 'w') as fout: | ||||||||
| fout.write(tb) | ||||||||
| os.rename(address_file + ".error.tmp", address_file + ".error") | ||||||||
| except Exception: | ||||||||
| print(f"CRITICAL ERROR IN SHARED SERVER:\n{tb}", file=sys.stderr) | ||||||||
| os._exit(1) | ||||||||
|
|
||||||||
|
|
||||||||
| class MultiProcessShared(Generic[T]): | ||||||||
| """MultiProcessShared is used to share a single object across processes. | ||||||||
|
|
@@ -252,7 +368,8 @@ def __init__( | |||||||
| tag: Any, | ||||||||
| *, | ||||||||
| path: str = tempfile.gettempdir(), | ||||||||
| always_proxy: Optional[bool] = None): | ||||||||
| always_proxy: Optional[bool] = None, | ||||||||
| spawn_process: bool = False): | ||||||||
| self._constructor = constructor | ||||||||
| self._tag = tag | ||||||||
| self._path = path | ||||||||
|
|
@@ -262,6 +379,7 @@ def __init__( | |||||||
| self._rpc_address = None | ||||||||
| self._cross_process_lock = fasteners.InterProcessLock( | ||||||||
| os.path.join(self._path, self._tag) + '.lock') | ||||||||
| self._spawn_process = spawn_process | ||||||||
|
|
||||||||
| def _get_manager(self): | ||||||||
| if self._manager is None: | ||||||||
|
|
@@ -301,6 +419,10 @@ def acquire(self): | |||||||
| # Caveat: They must always agree, as they will be ignored if the object | ||||||||
| # is already constructed. | ||||||||
| singleton = self._get_manager().acquire_singleton(self._tag) | ||||||||
| # Trigger a sweep of zombie processes. | ||||||||
| # calling active_children() has the side-effect of joining any finished | ||||||||
| # processes, effectively reaping zombies from previous unsafe_hard_deletes. | ||||||||
| if self._spawn_process: multiprocessing.active_children() | ||||||||
|
||||||||
| if self._spawn_process: multiprocessing.active_children() | |
| if self._spawn_process: | |
| multiprocessing.active_children() |
style nit to be consistent with the rest of the repo.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated.
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd typically expect the caller to catch/handle this. As it is, there is no indication passed back that this call failed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sounds good! Updated.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you help me understand why we need the
unsafe_hard_deletechanges? Its not really clear to me what behavior this enables which we can't already doThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's mainly because the way that models are passed around is directly a _SingletonProxy instead of _SingletonEntry so we would need a way to directly call delete with the _SingletonProxy
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok - lets at least give it a name like
singletonProxy_unsafe_hard_delete. Otherwise we will run into issues if someone has an object with a function or property calledunsafe_hard_delete, which seems like it could happen.