Skip to content

Commit d054202

Browse files
authored
feat: experimental Cross-Origin Storage cache backend (#1549)
* added CrossOriginStorage implementation * added references to COS extension * added fallback cache * refactored CrossOriginStorage * clean-up * some improvements * added types * added type references
1 parent c782cac commit d054202

File tree

4 files changed

+297
-0
lines changed

4 files changed

+297
-0
lines changed

packages/transformers/src/env.js

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,11 @@ export const LogLevel = Object.freeze({
224224
* @property {boolean} useWasmCache Whether to pre-load and cache WASM binaries and the WASM factory (.mjs) for ONNX Runtime.
225225
* Defaults to `true` when cache is available. This can improve performance and enables offline usage by avoiding repeated downloads.
226226
* @property {string} cacheKey The cache key to use for storing models and WASM binaries. Defaults to 'transformers-cache'.
227+
* @property {boolean} experimental_useCrossOriginStorage Whether to use the Cross-Origin Storage API to cache model files
228+
* across origins, allowing different sites to share the same cached model weights. Defaults to `false`.
229+
* Requires the Cross-Origin Storage Chrome extension: {@link https://chromewebstore.google.com/detail/cross-origin-storage/denpnpcgjgikjpoglpjefakmdcbmlgih}.
230+
* The `experimental_` prefix indicates that the underlying browser API is not yet standardised and may change or be
231+
* removed without a major version bump. For more information, see {@link https://github.com/WICG/cross-origin-storage}.
227232
* @property {(input: string | URL, init?: any) => Promise<any>} fetch The fetch function to use. Defaults to `fetch`.
228233
*/
229234

@@ -270,6 +275,8 @@ export const env = {
270275
useWasmCache: IS_WEB_CACHE_AVAILABLE || IS_FS_AVAILABLE,
271276
cacheKey: 'transformers-cache',
272277

278+
experimental_useCrossOriginStorage: false,
279+
273280
/////////////////// Custom fetch /////////////////////
274281
fetch: DEFAULT_FETCH,
275282

packages/transformers/src/utils/cache.js

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import { apis, env } from '../env.js';
22
import { FileCache } from './cache/FileCache.js';
33
import { logger } from './logger.js';
4+
import { CrossOriginStorage } from './cache/CrossOriginStorageCache.js';
45

56
/**
67
* @typedef {Object} CacheInterface
@@ -38,6 +39,10 @@ export async function getCache(file_cache_dir = null) {
3839
cache = env.customCache;
3940
}
4041

42+
if (!cache && env.experimental_useCrossOriginStorage && CrossOriginStorage.isAvailable()) {
43+
cache = new CrossOriginStorage();
44+
}
45+
4146
if (!cache && env.useBrowserCache) {
4247
if (typeof caches === 'undefined') {
4348
throw Error('Browser cache is not available in this environment.');
Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,247 @@
1+
/// <reference path="./cross-origin-storage.d.ts" />
2+
3+
const HASH_ALGORITHM = 'SHA-256';
4+
5+
/**
6+
* Name of the Cache API bucket used to persist the url→hash mapping.
7+
*/
8+
const HASH_CACHE_NAME = 'experimental_transformers-hash-cache';
9+
10+
/**
11+
* Builds the hash descriptor object expected by the cross-origin storage API.
12+
*
13+
* @param {string} value Hex-encoded SHA-256 hash.
14+
* @returns {{ algorithm: string, value: string }}
15+
*/
16+
const makeHashDescriptor = (value) => ({ algorithm: HASH_ALGORITHM, value });
17+
18+
/**
19+
* A cache implementation backed by the experimental `navigator.crossOriginStorage` API,
20+
* which allows sharing cached files (identified by content hash) across origins.
21+
*
22+
* Implements {@link import('../cache.js').CacheInterface}.
23+
*
24+
* @see https://github.com/explainers-by-googlers/cross-origin-storage
25+
*/
26+
export class CrossOriginStorage {
27+
/** @type {Promise<Cache> | null} */
28+
#hashCache = null;
29+
30+
/**
31+
* Returns (and lazily opens) the hash cache, reusing the same promise across concurrent callers.
32+
* @returns {Promise<Cache>}
33+
*/
34+
_getHashCache = () => {
35+
this.#hashCache ??= caches.open(HASH_CACHE_NAME);
36+
return this.#hashCache;
37+
};
38+
39+
/**
40+
* Returns whether the `navigator.crossOriginStorage` API is available in the current environment.
41+
* @returns {boolean}
42+
*/
43+
static isAvailable = () => typeof navigator !== 'undefined' && 'crossOriginStorage' in navigator;
44+
45+
/**
46+
* Looks up a cached response for the given URL by resolving its SHA-256 hash and requesting
47+
* the corresponding file handle from cross-origin storage.
48+
*
49+
* Implements `CacheInterface.match`.
50+
*
51+
* @param {string} request The URL of the resource to look up.
52+
* @returns {Promise<Response|undefined>} The cached `Response`, or `undefined` if not found.
53+
*/
54+
match = async (request) => {
55+
const hashValue = await this._getFileHash(request);
56+
if (!hashValue) {
57+
return undefined;
58+
}
59+
try {
60+
const [handle] = await navigator.crossOriginStorage.requestFileHandles([makeHashDescriptor(hashValue)]);
61+
const blob = await handle.getFile();
62+
return new Response(blob);
63+
} catch {
64+
return undefined;
65+
}
66+
};
67+
68+
/**
69+
* Stores a response in cross-origin storage, keyed by its SHA-256 hash.
70+
*
71+
* For LFS-backed URLs the hash is resolved cheaply via `_getFileHash` (which checks
72+
* `HASH_CACHE_NAME` first, then falls back to fetching the Git LFS pointer file)
73+
* without reading the response body a second time.
74+
*
75+
* For non-LFS resources the hash is unknown upfront. In that case the body is consumed
76+
* in the background: the stream is read to compute the content hash, the file is written
77+
* into cross-origin storage, and the computed hash is persisted to `HASH_CACHE_NAME`
78+
* so that future `match` calls can resolve the file without a network round-trip.
79+
*
80+
* Implements `CacheInterface.put`.
81+
*
82+
* @param {string} request The URL of the resource (used as the hash-cache key).
83+
* @param {Response} response The response whose body will be written to the cache.
84+
* @returns {Promise<void>}
85+
*/
86+
put = async (request, response) => {
87+
const hashValue = await this._getFileHash(request);
88+
89+
if (hashValue) {
90+
// Fast path: LFS hash already known. Consume the body and store directly.
91+
const blob = await response.blob();
92+
await this._storeBlobInCOS(blob, hashValue);
93+
} else {
94+
// Slow path: hash unknown. Process in the background so put() returns promptly.
95+
// The caller already holds a reference to the original response; we receive it
96+
// here only to buffer and hash its body.
97+
this._processAndStore(request, response.body);
98+
}
99+
};
100+
101+
/**
102+
* Writes a blob into cross-origin storage using the given pre-computed hex hash string.
103+
*
104+
* @param {Blob} blob
105+
* @param {string} hashHex Hex-encoded SHA-256 hash of `blob`.
106+
* @returns {Promise<void>}
107+
*/
108+
_storeBlobInCOS = async (blob, hashHex) => {
109+
const [handle] = await navigator.crossOriginStorage.requestFileHandles([makeHashDescriptor(hashHex)], {
110+
create: true,
111+
});
112+
const writableStream = await handle.createWritable();
113+
await writableStream.write(blob);
114+
await writableStream.close();
115+
};
116+
117+
/**
118+
* Background task for non-LFS resources: consumes `stream`, computes the SHA-256 hash
119+
* of the resulting blob, stores it in cross-origin storage, and persists the computed
120+
* hash to `HASH_CACHE_NAME` keyed by `request` so future `match` calls can resolve the
121+
* file without a network round-trip.
122+
*
123+
* Called fire-and-forget from `put` — errors are swallowed so failures never surface to
124+
* the caller.
125+
*
126+
* @param {string} request The original resource URL.
127+
* @param {ReadableStream} stream The response body stream to consume.
128+
* @returns {Promise<void>}
129+
*/
130+
_processAndStore = async (request, stream) => {
131+
try {
132+
const chunks = [];
133+
for await (const chunk of stream) {
134+
chunks.push(chunk);
135+
}
136+
const blob = new Blob(chunks);
137+
const hashHex = await this._getBlobHash(blob);
138+
139+
await this._storeBlobInCOS(blob, hashHex);
140+
141+
// Persist the computed hash so future match() calls resolve without the network.
142+
try {
143+
const hashCache = await this._getHashCache();
144+
await hashCache.put(request, new Response(hashHex));
145+
} catch {
146+
// Cache API unavailable (e.g. non-secure context): COS entry still written.
147+
}
148+
} catch {
149+
// Non-fatal: background store failure must not affect the caller.
150+
}
151+
};
152+
153+
/**
154+
* Deletes the cache entry for the given request.
155+
*
156+
* Removes the hash entry from `HASH_CACHE_NAME`. Note: cross-origin storage itself does not
157+
* expose a delete API, so only the local hash mapping is removed. For non-LFS URLs this
158+
* permanently prevents `match` from resolving the file. For LFS-backed URLs, `match` will
159+
* re-fetch the LFS pointer file on the next call and repopulate the hash cache automatically.
160+
*
161+
* Implements `CacheInterface.delete`.
162+
*
163+
* @param {string} request
164+
* @returns {Promise<boolean>} Resolves to `true` if the hash entry was deleted, `false` otherwise.
165+
*/
166+
delete = async (request) => {
167+
try {
168+
const hashCache = await this._getHashCache();
169+
return await hashCache.delete(request);
170+
} catch {
171+
return false;
172+
}
173+
};
174+
175+
/**
176+
* Resolves the SHA-256 hash for a given URL.
177+
*
178+
* Returns the cached hash immediately if one has been persisted to `HASH_CACHE_NAME`.
179+
* Otherwise falls back to `_getLfsFileHash` to retrieve the hash from the Hugging Face
180+
* LFS pointer file, persisting the result to `HASH_CACHE_NAME` for future lookups.
181+
*
182+
* Returns `null` if the hash cannot be determined (e.g. non-LFS URL with no cached entry).
183+
*
184+
* @param {string} url The resource URL to resolve a hash for.
185+
* @returns {Promise<string|null>} The hex-encoded SHA-256 hash, or `null` if unavailable.
186+
*/
187+
_getFileHash = async (url) => {
188+
try {
189+
const hashCache = await this._getHashCache();
190+
const cached = await hashCache.match(url);
191+
if (cached) {
192+
return cached.text();
193+
}
194+
195+
const hash = await this._getLfsFileHash(url);
196+
if (hash) {
197+
await hashCache.put(url, new Response(hash));
198+
return hash;
199+
}
200+
201+
return null;
202+
} catch {
203+
return null;
204+
}
205+
};
206+
207+
/**
208+
* Attempts to retrieve the SHA-256 hash for a Hugging Face resource URL from its raw
209+
* Git LFS pointer file.
210+
*
211+
* Only applicable to URLs containing `/resolve/` (i.e. Hugging Face resolved file URLs).
212+
* The `/resolve/` segment is rewritten to `/raw/` to fetch the LFS pointer directly.
213+
* Returns `null` for non-LFS URLs or when the network request fails.
214+
*
215+
* @see https://huggingface.co/docs/hub/en/storage-backends#xet
216+
* @param {string} url The resolved Hugging Face URL of the resource.
217+
* @returns {Promise<string|null>} The hex-encoded SHA-256 hash, or `null` if unavailable.
218+
*/
219+
_getLfsFileHash = async (url) => {
220+
if (!url.includes('/resolve/')) {
221+
return null;
222+
}
223+
224+
const rawUrl = url.replace('/resolve/', '/raw/');
225+
226+
try {
227+
const text = await fetch(rawUrl).then((r) => r.text());
228+
const match = text.match(/^oid sha256:([0-9a-f]+)$/m);
229+
return match ? match[1] : null;
230+
} catch {
231+
return null;
232+
}
233+
};
234+
235+
/**
236+
* Computes the SHA-256 hash of a `Blob`'s contents.
237+
*
238+
* @param {Blob} blob The blob to hash.
239+
* @returns {Promise<string>} The lowercase hex-encoded SHA-256 hash.
240+
*/
241+
_getBlobHash = async (blob) => {
242+
const arrayBuffer = await blob.arrayBuffer();
243+
const hashBuffer = await crypto.subtle.digest(HASH_ALGORITHM, arrayBuffer);
244+
const hashArray = Array.from(new Uint8Array(hashBuffer));
245+
return hashArray.map((byte) => byte.toString(16).padStart(2, '0')).join('');
246+
};
247+
}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
/**
2+
* Type definitions for the Cross-Origin Storage API
3+
* Source: https://github.com/WICG/cross-origin-storage/blob/main/cross-origin-storage.d.ts
4+
* @see https://github.com/WICG/cross-origin-storage
5+
*/
6+
7+
/**
8+
* Represents the dictionary for hash algorithms and values.
9+
*/
10+
interface CrossOriginStorageRequestFileHandleHash {
11+
value: string;
12+
algorithm: string;
13+
}
14+
15+
/**
16+
* Represents the options for requesting file handles.
17+
*/
18+
interface CrossOriginStorageRequestFileHandleOptions {
19+
create?: boolean;
20+
}
21+
22+
/**
23+
* The CrossOriginStorageManager interface.
24+
* [SecureContext]
25+
*/
26+
interface CrossOriginStorageManager {
27+
requestFileHandles(
28+
hashes: CrossOriginStorageRequestFileHandleHash[],
29+
options?: CrossOriginStorageRequestFileHandleOptions,
30+
): Promise<FileSystemFileHandle[]>;
31+
}
32+
33+
/**
34+
* Augment the standard Navigator interface.
35+
*/
36+
interface Navigator {
37+
readonly crossOriginStorage: CrossOriginStorageManager;
38+
}

0 commit comments

Comments
 (0)