Skip to content
Open
23 changes: 14 additions & 9 deletions test/test_other.py
Original file line number Diff line number Diff line change
Expand Up @@ -15169,16 +15169,21 @@ def test_empath_split(self):
#include <iostream>
void foo() { std::cout << "foo" << std::endl; }
''')
create_file('path_list', r'''
create_file('path_list.txt', r'''
myapp
main.cpp
foo.cpp

lib1
/emsdk/emscripten/system

lib2
/emsdk/emscripten/system/lib/libc/musl
/emsdk/emscripten/system/lib/libcxx
''')

self.run_process([EMCC, 'main.cpp', 'foo.cpp', '-gsource-map', '-g2', '-o', 'test.js'])
self.run_process([empath_split, 'test.wasm', 'path_list', '-g', '-o', 'test_primary.wasm', '--out-prefix=test_'])
self.run_process([empath_split, 'test.wasm', 'path_list.txt', '-g', '-o', 'test_primary.wasm', '--out-prefix=test_'])

# Check if functions are correctly assigned and split with the specified
# paths. When one path contains another, the inner path should take its
Expand All @@ -15190,17 +15195,17 @@ def has_defined_function(file, func):
return pattern.search(f.read()) is not None

# main.cpp
self.assertTrue(has_defined_function('test_0.wasm', '__original_main'))
self.assertTrue(has_defined_function('test_myapp.wasm', '__original_main'))
# foo.cpp
self.assertTrue(has_defined_function('test_1.wasm', r'foo\\28\\29'))
self.assertTrue(has_defined_function('test_myapp.wasm', r'foo\\28\\29'))
# /emsdk/emscripten/system
self.assertTrue(has_defined_function('test_2.wasm', '__abort_message'))
self.assertTrue(has_defined_function('test_2.wasm', 'pthread_cond_wait'))
self.assertTrue(has_defined_function('test_lib1.wasm', '__abort_message'))
self.assertTrue(has_defined_function('test_lib1.wasm', 'pthread_cond_wait'))
# /emsdk/emscripten/system/lib/libc/musl
self.assertTrue(has_defined_function('test_3.wasm', 'strcmp'))
self.assertTrue(has_defined_function('test_lib2.wasm', 'strcmp'))
# /emsdk/emscripten/system/lib/libcxx
self.assertTrue(has_defined_function('test_4.wasm', r'std::__2::ios_base::getloc\\28\\29\\20const'))
self.assertTrue(has_defined_function('test_4.wasm', r'std::uncaught_exceptions\\28\\29'))
self.assertTrue(has_defined_function('test_lib2.wasm', r'std::__2::ios_base::getloc\\28\\29\\20const'))
self.assertTrue(has_defined_function('test_lib2.wasm', r'std::uncaught_exceptions\\28\\29'))

# Check --print-sources option
out = self.run_process([empath_split, 'test.wasm', '--print-sources'], stdout=PIPE).stdout
Expand Down
114 changes: 90 additions & 24 deletions tools/empath-split.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,26 @@
$ emcc -g2 -gsource-map a.o b.o -o result.js
See https://emscripten.org/docs/porting/Debugging.html for more details.

This takes a wasm file and a paths file, which is a text file containing a list
of paths as inputs. The paths file should contain a single path per line. A
single split module will be generated per specified path. If a specified path
contains another specified path, functions contained in the inner path will be
split as the inner path's module, and the rest of the functions will be split as
the outer path's module. Functions that do not belong to any of the specified
paths will remain in the primary module.
This takes a wasm file and a paths file as inputs. The paths file defines how
to split modules. The format is similar to the manifest file for wasm-split, but
with paths instead of function names. A module is defined by a name on a line,
followed by paths on subsequent lines. Modules are separated by empty lines.
For example:
module1
path/to/a
path/to/b

module2
path/to/c

This will create two modules, 'module1' and 'module2'. 'module1' will contain
functions from source files under path/to/a and path/to/b. 'module2' will
contain functions from source files under path/to/c.

If a specified path contains another specified path, functions contained in the
inner path will be split as the inner path's module, and the rest of the
functions will be split as the outer path's module. Functions that do not belong
to any of the specified paths will remain in the primary module.

The paths in the paths file can be either absolute or relative, but they should
match those of 'sources' field in the source map file. Sometimes a source map's
Expand Down Expand Up @@ -238,6 +251,50 @@ def is_synthesized_func(func):
return path_to_funcs


# 1. Strip whitespaces
# 2. Normalize separators
# 3. Make /a/b/c and /a/b/c/ equivalent
def normalize_path(path):
return utils.normalize_path(path.strip()).rstrip(os.sep)


def parse_paths_file(paths_file_content):
module_to_paths = {}
path_to_module = {}
cur_module = None
cur_paths = []

for line in paths_file_content.splitlines():
line = line.strip()
if not line:
if cur_module:
if not cur_paths:
diagnostics.warn(f"Module '{cur_module}' has no paths specified.")
module_to_paths[cur_module] = cur_paths
cur_module = None
cur_paths = []
continue

if not cur_module:
cur_module = line
else:
path = normalize_path(line)
if path in path_to_module:
exit_with_error("Path '{path}' cannot be assigned to module '{cur_module}; it is already assigned to module '{path_to_module[path]}'")
cur_paths.append(path)
path_to_module[path] = cur_module

if cur_module:
if not cur_paths:
diagnostics.warn(f"Module '{cur_module}' has no paths specified.")
module_to_paths[cur_module] = cur_paths

if not module_to_paths:
exit_with_error('The paths file is empty or invalid.')

return module_to_paths


def main():
args, forwarded_args = parse_args()
check_errors(args)
Expand All @@ -247,32 +304,41 @@ def main():
print_sources(sourcemap)
return

paths = utils.read_file(args.paths_file).splitlines()
paths = [utils.normalize_path(path.strip()) for path in paths if path.strip()]
# To make /a/b/c and /a/b/c/ equivalent
paths = [path.rstrip(os.sep) for path in paths]
# Remove duplicates
paths = list(dict.fromkeys(paths))
content = utils.read_file(args.paths_file)
module_to_paths = parse_paths_file(content)

# Compute {path: list of functions} map
path_to_funcs = get_path_to_functions_map(args.wasm, sourcemap, paths)
all_paths = []
for paths in module_to_paths.values():
all_paths.extend(paths)
path_to_funcs = get_path_to_functions_map(args.wasm, sourcemap, all_paths)

# Write .manifest file
with tempfile.NamedTemporaryFile(suffix=".manifest", mode='w+', delete=args.preserve_manifest) as f:
manifest = f.name
for i, path in enumerate(paths):
f.write(f'{i}\n')
if not path_to_funcs[path]:
diagnostics.warn(f'{path} does not match any functions')
for i, (module, paths) in enumerate(module_to_paths.items()):
if i != 0: # Unless we are the first entry add a newline separator
f.write('\n')
funcs = []
for path in paths:
if not path_to_funcs[path]:
diagnostics.warn(f'{path} does not match any functions')
funcs += path_to_funcs[path]
if not funcs:
diagnostics.warn(f"Module '{module}' does not match any functions")

if args.verbose:
print(f'{path}: {len(path_to_funcs[path])} functions')
for func in path_to_funcs[path]:
print(' ' + func)
print(f'{module}: {len(funcs)} functions')
for path in paths:
if path in path_to_funcs:
print(f' {path}: {len(path_to_funcs[path])} functions')
for func in path_to_funcs[path]:
print(' ' + func)
print()
for func in path_to_funcs[path]:

f.write(f'{module}\n')
for func in funcs:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Annoyingly If you want this to be deterministic then I think you need to make this a dict rather than set.

The dict in python became deterministic in 3.7 but the set remains unordered. Annoying..

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed it and also one more place from set to list: 7eedf6d
Apparently they didn't even need to be sets after all...

f.write(func + '\n')
if i < len(paths) - 1:
f.write('\n')
f.flush()

cmd = [args.wasm_split, '--multi-split', args.wasm, '--manifest', manifest]
Expand Down