Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions site/source/docs/tools_reference/settings_reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2823,6 +2823,18 @@ then you can safely ignore this warning.

Default value: false

.. _single_file_binary_encode:

SINGLE_FILE_BINARY_ENCODE
=========================

If true, binary Wasm content is encoded using a custom UTF-8 embedding
instead of base64. This generates a smaller binary that compresses well.
Set this to false to revert back to earlier base64 encoding if you run into
issues with the binary encoding. (and please let us know of any such issues)

Default value: true

.. _auto_js_libraries:

AUTO_JS_LIBRARIES
Expand Down
10 changes: 10 additions & 0 deletions src/binaryDecode.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// Prevent Closure from minifying the binaryDecode() function, or otherwise
// Closure may analyze through the WASM_BINARY_DATA placeholder string into this
// function, leading into incorrect results.
/** @noinline */
function binaryDecode(bin) {
for(var i = 0, l = bin.length, o = new Uint8Array(l); i < l; ++i) {
o[i] = bin.charCodeAt(i) - 1;
}
return o;
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since this is only 5 lines i wonder if its worth creating a completely new flie? Maybe just inline into runtime_common.js?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

check

29 changes: 21 additions & 8 deletions src/preamble.js
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,10 @@ function instrumentWasmTableWithAbort() {
}
#endif

#if SINGLE_FILE && SINGLE_FILE_BINARY_ENCODE && !WASM2JS
#include "binaryDecode.js"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we not want to support this in MINIMAL_RUNTIME too? If so maybe this could go in runtime_common.js instead?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, we do. I lost this when resurrecting the code. Added back, and updated test to cover minimal runtime as well.

#endif

#if !SOURCE_PHASE_IMPORTS && !WASM_ESM_INTEGRATION
var wasmBinaryFile;

Expand All @@ -423,27 +427,35 @@ function getWasmBinary(file) {}
#else

function findWasmBinary() {
#if SINGLE_FILE
#if SINGLE_FILE && SINGLE_FILE_BINARY_ENCODE && !WASM2JS
return binaryDecode('<<< WASM_BINARY_DATA >>>');
#elif SINGLE_FILE
return base64Decode('<<< WASM_BINARY_DATA >>>');
#elif AUDIO_WORKLET || !EXPORT_ES6
// For an Audio Worklet, we cannot use `new URL()`.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Its hard to see why the locateFile path is only used in EXPORT_ES6 mode... do you know why EXPORT_ES6 is here?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know. It is indeed a little bit odd code flow, which is why I cleaned the ifdefs a bit here as a drive-by. Something to ponder further in a separate PR.

return locateFile('{{{ WASM_BINARY_FILE }}}');
#else
#if EXPORT_ES6 && !AUDIO_WORKLET
if (Module['locateFile']) {
#endif
return locateFile('{{{ WASM_BINARY_FILE }}}');
#if EXPORT_ES6 && !AUDIO_WORKLET // For an Audio Worklet, we cannot use `new URL()`.
}

#if ENVIRONMENT_MAY_BE_SHELL
if (ENVIRONMENT_IS_SHELL) {
return '{{{ WASM_BINARY_FILE }}}';
}
#endif

if (Module['locateFile']) {
return locateFile('{{{ WASM_BINARY_FILE }}}');
}

// Use bundler-friendly `new URL(..., import.meta.url)` pattern; works in browsers too.
return new URL('{{{ WASM_BINARY_FILE }}}', import.meta.url).href;
#endif

#endif
}

function getBinarySync(file) {
#if SINGLE_FILE && SINGLE_FILE_BINARY_ENCODE
return file;
#else
#if SINGLE_FILE
if (ArrayBuffer.isView(file)) {
return file;
Expand All @@ -464,6 +476,7 @@ function getBinarySync(file) {
#else
throw 'sync fetching of the wasm failed: you can preload it to Module["wasmBinary"] manually, or emcc.py will do that for you when generating HTML (but not JS)';
#endif
#endif
}

async function getWasmBinary(binaryFile) {
Expand Down
7 changes: 7 additions & 0 deletions src/settings.js
Original file line number Diff line number Diff line change
Expand Up @@ -1853,6 +1853,13 @@ var WASMFS = false;
// [link]
var SINGLE_FILE = false;

// If true, binary Wasm content is encoded using a custom UTF-8 embedding
// instead of base64. This generates a smaller binary that compresses well.
// Set this to false to revert back to earlier base64 encoding if you run into
// issues with the binary encoding. (and please let us know of any such issues)
// [link]
var SINGLE_FILE_BINARY_ENCODE = true;

// If set to 1, all JS libraries will be automatically available at link time.
// This gets set to 0 in STRICT mode (or with MINIMAL_RUNTIME) which mean you
// need to explicitly specify -lfoo.js in at link time in order to access
Expand Down
13 changes: 13 additions & 0 deletions test/test_codesize.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ class codesize(RunnerCore):
'random_printf_wasm2js': ('random_printf', True),
'hello_webgl_wasm': ('hello_webgl', False),
'hello_webgl_wasm2js': ('hello_webgl', True),
'hello_webgl2_wasm_singlefile': ('hello_webgl2_wasm_singlefile', False),
'hello_webgl2_wasm': ('hello_webgl2', False),
'hello_webgl2_wasm2js': ('hello_webgl2', True),
'math': ('math', False),
Expand Down Expand Up @@ -87,6 +88,7 @@ def test_minimal_runtime_code_size(self, test_name, wasm2js, compare_js_output=F
'-lGL',
'-sMODULARIZE']
hello_webgl2_sources = hello_webgl_sources + ['-sMAX_WEBGL_VERSION=2']
hello_webgl2_wasm_singlefile_sources = hello_webgl2_sources + ['-sSINGLE_FILE']
hello_wasm_worker_sources = [test_file('wasm_worker/wasm_worker_code_size.c'), '-sWASM_WORKERS', '-sENVIRONMENT=web']
audio_worklet_sources = [test_file('webaudio/audioworklet.c'), '-sWASM_WORKERS', '-sAUDIO_WORKLET', '-sENVIRONMENT=web', '-sTEXTDECODER=1']
embind_hello_sources = [test_file('codesize/embind_hello_world.cpp'), '-lembind']
Expand All @@ -98,6 +100,7 @@ def test_minimal_runtime_code_size(self, test_name, wasm2js, compare_js_output=F
'hello_webgl': hello_webgl_sources,
'math': math_sources,
'hello_webgl2': hello_webgl2_sources,
'hello_webgl2_wasm_singlefile': hello_webgl2_wasm_singlefile_sources,
'hello_wasm_worker': hello_wasm_worker_sources,
'audio_worklet': audio_worklet_sources,
'embind_val': embind_val_sources,
Expand Down Expand Up @@ -411,3 +414,13 @@ def test_codesize_file_preload(self):
def test_small_js_flags(self):
self.emcc('browser_test_hello_world.c', ['-O3', '--closure=1', '-sINCOMING_MODULE_JS_API=[]', '-sENVIRONMENT=web', '--output-eol=linux'])
self.check_output_sizes('a.out.js')

# This test verifies that gzipped binary-encoded a SINGLE_FILE build results in a smaller size
# than gzipped base64-encoded version.
def test_binary_encode_is_smaller_than_base64_encode(self):
self.emcc('hello_world.c', ['-O2', '-sSINGLE_FILE', '-sSINGLE_FILE_BINARY_ENCODE'])
size_binary_encode = len(gzip.compress(read_binary('a.out.js')))
self.emcc('hello_world.c', ['-O2', '-sSINGLE_FILE', '-sSINGLE_FILE_BINARY_ENCODE=0'])
size_base64 = len(gzip.compress(read_binary('a.out.js')))
print(f'Binary encoded file size: {size_binary_encode}, base64 encoded file size: {size_base64}')
self.assertLess(size_binary_encode, size_base64)
9 changes: 6 additions & 3 deletions test/test_other.py
Original file line number Diff line number Diff line change
Expand Up @@ -9475,10 +9475,11 @@ def test_standalone_system_headers(self, prefix):
'closure': (False, True),
})
@parameterized({
'': (True,),
'disabled': (False,),
'': (True,False),
'disabled': (False,False),
'binary_encode': (True,True),
})
def test_single_file(self, debug_enabled, closure_enabled, single_file_enabled):
def test_single_file(self, debug_enabled, closure_enabled, single_file_enabled, single_file_binary_encoded):
cmd = [EMCC, test_file('hello_world.c')] + self.get_cflags()

if single_file_enabled:
Expand All @@ -9487,6 +9488,8 @@ def test_single_file(self, debug_enabled, closure_enabled, single_file_enabled):
else:
expect_wasm = self.is_wasm()

cmd += [f'-sSINGLE_FILE_BINARY_ENCODE={int(single_file_binary_encoded)}']

if debug_enabled:
cmd += ['-g']
if closure_enabled:
Expand Down
41 changes: 40 additions & 1 deletion tools/link.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,10 @@ def base64_encode(filename):
return b64.decode('ascii')


def base64_or_binary_encode(b):
return binary_encode(b) if settings.SINGLE_FILE and settings.SINGLE_FILE_BINARY_ENCODE else base64_encode(b)


def align_to_wasm_page_boundary(address):
page_size = webassembly.WASM_PAGE_SIZE
return ((address + (page_size - 1)) // page_size) * page_size
Expand Down Expand Up @@ -2435,7 +2439,7 @@ def phase_binaryen(target, options, wasm_target):
if final_js and settings.SINGLE_FILE and not settings.WASM2JS:
js = read_file(final_js)

js = do_replace(js, '<<< WASM_BINARY_DATA >>>', base64_encode(wasm_target))
js = do_replace(js, '<<< WASM_BINARY_DATA >>>', base64_or_binary_encode(wasm_target))
delete_file(wasm_target)
write_file(final_js, js)

Expand Down Expand Up @@ -2945,9 +2949,44 @@ def move_file(src, dst):
shutil.move(src, dst)


def binary_encode(filename):
"""This function encodes the given binary byte array to a UTF-8 string, by
first adding +1 to all the bytes [0, 255] to form values [1, 256], and then
encoding each of those values as UTF-8, except for specific byte values that
are escaped as two bytes. This kind of encoding results in a string that will
compress well by both gzip and brotli, unlike base64 encoding binary data
would do, and avoids emitting the null byte inside a string.
"""

data = utils.read_binary(filename)

out = bytearray(len(data) * 2) # Size output buffer conservatively
i = 0
for d in data:
d += 1 # Offset all bytes up by +1 to make zero (a very common value) be encoded with only one byte as 0x01. This is possible since we can encode 255 as 0x100 in UTF-8.
if d == ord("'"):
buf = [ord('\\'), d] # Escape single quote ' character with a backspace since we are writing a string inside single quotes. (' -> 2 bytes)
elif d == ord('"'):
buf = [ord('\\'), d] # Escape double quote " character with a backspace since optimizer may turn the string into being delimited with double quotes. (" -> 2 bytes)
elif d == ord('\r'):
buf = [ord('\\'), ord('r')] # Escape carriage return 0x0D as \r -> 2 bytes
elif d == ord('\n'):
buf = [ord('\\'), ord('n')] # Escape newline 0x0A as \n -> 2 bytes
elif d == ord('\\'):
buf = [ord('\\'), ord('\\')] # Escape backslash \ as \\ -> 2 bytes
else:
buf = chr(d).encode('utf-8') # Otherwise write the original value encoded in UTF-8 (1 or 2 bytes).
for b in buf: # Write the bytes to output buffer
out[i] = b
i += 1
return out[0:i].decode('utf-8') # Crop output buffer to the actual used size


# Returns the subresource location for run-time access
def get_subresource_location(path, mimetype='application/octet-stream'):
if settings.SINGLE_FILE:
if settings.SINGLE_FILE_BINARY_ENCODE:
return binary_encode(utils.read_binary(path))
return f'data:{mimetype};base64,{base64_encode(path)}'
else:
return os.path.basename(path)
Expand Down
Loading