From 5af073e8a6f5371e24aedd0b5b4b2db59b7e9dcc Mon Sep 17 00:00:00 2001 From: Thomas Proisl Date: Tue, 25 Oct 2022 16:22:21 +0200 Subject: [PATCH] Try to use multiprocessing start method 'fork' if available (issue #14) --- someweta/cli.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/someweta/cli.py b/someweta/cli.py index b2c10f9..85ecb36 100644 --- a/someweta/cli.py +++ b/someweta/cli.py @@ -216,6 +216,10 @@ def main(): t0 = time.perf_counter() corpus_size = 0 if args.parallel > 1: + try: + multiprocessing.set_start_method("fork") + except ValueError: + logging.warning(f"Multiprocessing start method 'fork' is not available on your operating system. Using method '{multiprocessing.get_start_method()}' instead. Note that this can lead to a massive overhead when creating the worker processes and to an increased memory usage.") tagged = parallel_tagging(args.CORPUS, asptagger, args.parallel, xml=args.xml, sentence_tag=args.sentence_tag) else: tagged = single_core_tagging(args.CORPUS, asptagger, xml=args.xml, sentence_tag=args.sentence_tag)