Skip to content

Commit

Permalink
Merge pull request #3480 from cloudflare/dominik/load-dynlib-to-usr-lib
Browse files Browse the repository at this point in the history
Fixes to load dynlib on newer Python versions correctly.
  • Loading branch information
dom96 authored Feb 7, 2025
2 parents 8e9ea2e + 5a60ecf commit bc2f014
Show file tree
Hide file tree
Showing 6 changed files with 114 additions and 50 deletions.
12 changes: 12 additions & 0 deletions src/pyodide/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,14 @@ if (typeof FinalizationRegistry === "undefined") {
unregister(){}
};
}
function patchDynlibLookup(Module, libName) {
try {
return Module.FS.readFile("/usr/lib/" + libName);
} catch(e) {
console.error("Failed to read ", libName, e);
}
}
"""

REPLACEMENTS = [
Expand Down Expand Up @@ -169,6 +177,10 @@ REPLACEMENTS = [
"eval(UTF8ToString(ptr))",
"(() => {throw new Error('Internal Emscripten code tried to eval, this should not happen, please file a bug report with your requirements.txt file\\'s contents')})()",
],
[
"!libData&&flags.fs",
"!(libData ??= patchDynlibLookup(Module, libName))&&flags.fs",
],
]

expand_template(
Expand Down
25 changes: 15 additions & 10 deletions src/pyodide/internal/loadPackage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ import {
USING_OLDEST_PACKAGES_VERSION,
} from 'pyodide-internal:metadata';
import {
SITE_PACKAGES,
DYNLIB_PATH,
VIRTUALIZED_DIR,
STDLIB_PACKAGES,
getSitePackagesPath,
} from 'pyodide-internal:setupPackages';
Expand All @@ -40,20 +41,20 @@ async function decompressArrayBuffer(
}
}

function getFilenameOfPackage(requirement: string): string {
function getPackageMetadata(requirement: string): PackageDeclaration {
const obj = LOCKFILE['packages'][requirement];
if (!obj) {
throw new Error('Requirement ' + requirement + ' not found in lockfile');
}

return obj.file_name;
return obj;
}

// loadBundleFromR2 loads the package from the internet (through fetch) and uses the DiskCache as
// a backing store. This is only used in local dev.
async function loadBundleFromR2(requirement: string): Promise<Reader> {
// first check if the disk cache has what we want
const filename = getFilenameOfPackage(requirement);
const filename = getPackageMetadata(requirement).file_name;
let original = DiskCache.get(filename);
if (!original) {
// we didn't find it in the disk cache, continue with original fetch
Expand Down Expand Up @@ -85,7 +86,7 @@ async function loadBundleFromR2(requirement: string): Promise<Reader> {
async function loadBundleFromArtifactBundler(
requirement: string
): Promise<Reader> {
const filename = getFilenameOfPackage(requirement);
const filename = getPackageMetadata(requirement).file_name;
const fullPath = 'python-package-bucket/' + PACKAGES_VERSION + '/' + filename;
const reader = ArtifactBundler.getPackage(fullPath);
if (!reader) {
Expand Down Expand Up @@ -127,7 +128,7 @@ async function loadPackagesImpl(
if (req === 'test') {
continue; // Skip the test package, it is only useful for internal Python regression testing.
}
if (SITE_PACKAGES.loadedRequirements.has(req)) {
if (VIRTUALIZED_DIR.hasRequirementLoaded(req)) {
continue;
}
loadPromises.push(loadBundle(req).then((r) => [req, r]));
Expand All @@ -139,15 +140,19 @@ async function loadPackagesImpl(
const buffers = await Promise.all(loadPromises);
for (const [requirement, reader] of buffers) {
const [tarInfo, soFiles] = parseTarInfo(reader);
SITE_PACKAGES.addSmallBundle(tarInfo, soFiles, requirement);
const pkg = getPackageMetadata(requirement);
VIRTUALIZED_DIR.addSmallBundle(
tarInfo,
soFiles,
requirement,
pkg.install_dir
);
}

console.log('Loaded ' + loading.join(', '));

const tarFS = createTarFS(Module);
const path = getSitePackagesPath(Module);
const info = SITE_PACKAGES.rootInfo;
Module.FS.mount(tarFS, { info }, path);
VIRTUALIZED_DIR.mount(Module, tarFS);
}

/**
Expand Down
6 changes: 3 additions & 3 deletions src/pyodide/internal/python.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { enterJaegerSpan } from 'pyodide-internal:jaeger';
import {
SITE_PACKAGES,
VIRTUALIZED_DIR,
TRANSITIVE_REQUIREMENTS,
adjustSysPath,
mountSitePackages,
Expand Down Expand Up @@ -80,10 +80,10 @@ export async function loadPyodide(
Module.setUnsafeEval(UnsafeEval);
Module.setGetRandomValues(getRandomValues);

mountSitePackages(Module, SITE_PACKAGES.rootInfo);
mountSitePackages(Module, VIRTUALIZED_DIR);
entropyMountFiles(Module);
await enterJaegerSpan('load_packages', () =>
// NB. loadPackages adds the packages to the `SITE_PACKAGES` global which then gets used in
// NB. loadPackages adds the packages to the `VIRTUALIZED_DIR` global which then gets used in
// preloadDynamicLibs.
loadPackages(Module, TRANSITIVE_REQUIREMENTS)
);
Expand Down
106 changes: 73 additions & 33 deletions src/pyodide/internal/setupPackages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,26 +33,33 @@ export const STDLIB_PACKAGES: string[] = Object.values(LOCKFILE.packages)
// `folder/file.txt` -> `["folder", "file.txt"]
export type FilePath = string[];

function createTarFsInfo(): TarFSInfo {
return {
children: new Map(),
mode: 0o777,
type: '5',
modtime: 0,
size: 0,
path: '',
name: '',
parts: [],
reader: null,
};
}

/**
* SitePackagesDir keeps track of the virtualized view of the site-packages
* directory generated for each worker.
* VirtualizedDir keeps track of the virtualized view of the site-packages
* directory generated for each worker as well as a virtualized view of the dynamic libraries stored
* in /usr/lib.
*/
class SitePackagesDir {
public rootInfo: TarFSInfo;
public soFiles: FilePath[];
public loadedRequirements: Set<string>;
class VirtualizedDir {
private rootInfo: TarFSInfo; // site-packages directory
private dynlibTarFs: TarFSInfo; // /usr/lib directory
private soFiles: FilePath[];
private loadedRequirements: Set<string>;
constructor() {
this.rootInfo = {
children: new Map(),
mode: 0o777,
type: '5',
modtime: 0,
size: 0,
path: '',
name: '',
parts: [],
reader: null,
};
this.rootInfo = createTarFsInfo();
this.dynlibTarFs = createTarFsInfo();
this.soFiles = [];
this.loadedRequirements = new Set();
}
Expand All @@ -63,33 +70,38 @@ class SitePackagesDir {
* If a file or directory already exists, an error is thrown.
* @param {TarInfo} overlayInfo The directory that is to be "copied" into site-packages
*/
mountOverlay(overlayInfo: TarFSInfo): void {
mountOverlay(overlayInfo: TarFSInfo, dir: InstallDir): void {
const dest = dir == 'dynlib' ? this.dynlibTarFs : this.rootInfo;
overlayInfo.children!.forEach((val, key) => {
if (this.rootInfo.children!.has(key)) {
if (dest.children!.has(key)) {
throw new Error(
`File/folder ${key} being written by multiple packages`
);
}
this.rootInfo.children!.set(key, val);
dest.children!.set(key, val);
});
}

/**
* A small bundle contains just a single package. The entire bundle will be overlaid onto site-packages.
* A small bundle can basically be thought of as a wheel.
* A small bundle contains just a single package, it can be thought of as a wheel.
*
* The entire bundle will be overlaid onto site-packages or /usr/lib depending on its install_dir.
*
* @param {TarInfo} tarInfo The root tarInfo for the small bundle (See tar.js)
* @param {List<String>} soFiles A list of .so files contained in the small bundle
* @param {String} requirement The canonicalized package name this small bundle corresponds to
* @param {InstallDir} installDir The `install_dir` field from the metadata about the package taken from the lockfile
*/
addSmallBundle(
tarInfo: TarFSInfo,
soFiles: string[],
requirement: string
requirement: string,
installDir: InstallDir
): void {
for (const soFile of soFiles) {
this.soFiles.push(soFile.split('/'));
}
this.mountOverlay(tarInfo);
this.mountOverlay(tarInfo, installDir);
this.loadedRequirements.add(requirement);
}

Expand Down Expand Up @@ -119,10 +131,32 @@ class SitePackagesDir {
if (!child) {
throw new Error(`Requirement ${req} not found in pyodide packages tar`);
}
this.mountOverlay(child);
this.mountOverlay(child, 'site');
this.loadedRequirements.add(req);
}
}

getSitePackagesRoot(): TarFSInfo {
return this.rootInfo;
}

getDynlibRoot(): TarFSInfo {
return this.dynlibTarFs;
}

getSoFilesToLoad(): FilePath[] {
return this.soFiles;
}

hasRequirementLoaded(req: string): boolean {
return this.loadedRequirements.has(req);
}

mount(Module: Module, tarFS: EmscriptenFS<TarFSInfo>) {
const path = getSitePackagesPath(Module);
Module.FS.mount(tarFS, { info: this.rootInfo }, path);
Module.FS.mount(tarFS, { info: this.dynlibTarFs }, DYNLIB_PATH);
}
}

/**
Expand All @@ -136,11 +170,11 @@ class SitePackagesDir {
*
* TODO(later): This needs to be removed when external package loading is enabled.
*/
export function buildSitePackages(requirements: Set<string>): SitePackagesDir {
export function buildVirtualizedDir(requirements: Set<string>): VirtualizedDir {
if (EmbeddedPackagesTarReader.read === undefined) {
// Package retrieval is enabled, so the embedded tar reader isn't initialized.
// All packages, including STDLIB_PACKAGES, are loaded in `loadPackages`.
return new SitePackagesDir();
return new VirtualizedDir();
}

const [bigTarInfo, bigTarSoFiles] = parseTarInfo(EmbeddedPackagesTarReader);
Expand All @@ -157,7 +191,7 @@ export function buildSitePackages(requirements: Set<string>): SitePackagesDir {
requirements.forEach((r) => requirementsInBigBundle.add(r));
}

const res = new SitePackagesDir();
const res = new VirtualizedDir();
res.addBigBundle(bigTarInfo, bigTarSoFiles, requirementsInBigBundle);

return res;
Expand Down Expand Up @@ -215,26 +249,32 @@ export function getSitePackagesPath(Module: Module): string {
return `/session/lib/python${pymajor}.${pyminor}/site-packages`;
}

export const DYNLIB_PATH = '/usr/lib';

/**
* This mounts the tarFS (which contains the packages) and metadataFS (which
* contains user code).
* This mounts a TarFS representing the site-packages directory (which contains the Python packages)
* and another TarFS representing the dynlib directory (where dynlibs like libcrypto.so live).
*
* This has to work before the runtime is initialized because of memory snapshot
* details, so even though we want these directories to be on sys.path, we
* handle that separately in adjustSysPath.
*/
export function mountSitePackages(Module: Module, info: TarFSInfo): void {
export function mountSitePackages(Module: Module, pkgs: VirtualizedDir): void {
const tarFS = createTarFS(Module);
const site_packages = getSitePackagesPath(Module);
Module.FS.mkdirTree(site_packages);
Module.FS.mkdirTree(DYNLIB_PATH);
if (!LOAD_WHEELS_FROM_R2 && !LOAD_WHEELS_FROM_ARTIFACT_BUNDLER) {
// if we are not loading additional wheels, then we're done
// with site-packages and we can mount it here. Otherwise, we must mount it in
// loadPackages().
Module.FS.mount(tarFS, { info }, site_packages);
pkgs.mount(Module, tarFS);
}
}

/**
* This mounts the metadataFS (which contains user code).
*/
export function mountWorkerFiles(Module: Module) {
Module.FS.mkdirTree('/session/metadata');
const mdFS = createMetadataFS(Module);
Expand Down Expand Up @@ -301,4 +341,4 @@ function addPackageToLoad(

export { REQUIREMENTS };
export const TRANSITIVE_REQUIREMENTS = getTransitiveRequirements();
export const SITE_PACKAGES = buildSitePackages(TRANSITIVE_REQUIREMENTS);
export const VIRTUALIZED_DIR = buildVirtualizedDir(TRANSITIVE_REQUIREMENTS);
12 changes: 9 additions & 3 deletions src/pyodide/internal/snapshot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { default as UnsafeEval } from 'internal:unsafe-eval';
import { default as DiskCache } from 'pyodide-internal:disk_cache';
import {
FilePath,
SITE_PACKAGES,
VIRTUALIZED_DIR,
getSitePackagesPath,
} from 'pyodide-internal:setupPackages';
import { default as EmbeddedPackagesTarReader } from 'pyodide-internal:packages_tar_reader';
Expand Down Expand Up @@ -131,7 +131,7 @@ const PRELOADED_SO_FILES: string[] = [];
* there.
*/
export function preloadDynamicLibs(Module: Module): void {
let SO_FILES_TO_LOAD = SITE_PACKAGES.soFiles;
let SO_FILES_TO_LOAD = VIRTUALIZED_DIR.getSoFilesToLoad();
if (IS_CREATING_BASELINE_SNAPSHOT || LOADED_BASELINE_SNAPSHOT) {
SO_FILES_TO_LOAD = [['_lzma.so'], ['_ssl.so']];
}
Expand All @@ -143,10 +143,16 @@ export function preloadDynamicLibs(Module: Module): void {
try {
const sitePackages = getSitePackagesPath(Module);
for (const soFile of SO_FILES_TO_LOAD) {
let node: TarFSInfo | undefined = SITE_PACKAGES.rootInfo;
let node: TarFSInfo | undefined = VIRTUALIZED_DIR.getSitePackagesRoot();
for (const part of soFile) {
node = node?.children?.get(part);
}
if (!node) {
node = VIRTUALIZED_DIR.getDynlibRoot();
for (const part of soFile) {
node = node?.children?.get(part);
}
}
if (!node) {
throw Error('fs node could not be found for ' + soFile);
}
Expand Down
3 changes: 2 additions & 1 deletion src/pyodide/types/pyodide-lock.d.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
type InstallDir = 'site' | 'stdlib' | 'dynlib';
interface PackageDeclaration {
depends: string[];
file_name: string;
imports: string[];
install_dir: 'site' | 'stdlib';
install_dir: InstallDir;
name: string;
package_type: string;
sha256: string;
Expand Down

0 comments on commit bc2f014

Please sign in to comment.