From 8dd8cbaa3e581c2af68612de227fa451d91f7b31 Mon Sep 17 00:00:00 2001 From: Son Luong Ngoc Date: Thu, 29 May 2025 16:23:08 +0200 Subject: [PATCH] RBE: Build buck2 with BuildBuddy RBE Provide an example to show building Buck2 using Buck2 on BuildBuddy RBE. --- TLDR: 1. Step one, build buck2 with Cargo. This would embed a version of the patched prelude inside the newly built buck2 binary. ``` cargo install --locked --git https://github.com/facebookincubator/reindeer reindeer reindeer --third-party-dir shim/third-party/rust buckify cargo install --path app/buck2 -Z unstable-options buck2 --version ``` 2. Create an account on https://app.buildbuddy.io and obtain an API key ``` export BUILDBUDDY_API_KEY= ``` 3. Build Buck2 with RBE ``` buck2 build :buck2 ``` --- With that said, here are some notes: First, it worth noting that Buck2 is currently using the system toolchains in shim/BUCK with the most important ones being @shim//:cxx and @shim//:rust. When using these toolchains to build buck2, we need to ensure the same versions are available in the execution environment. That means the same clang, the same rustc, the same libc etc... On most RBE server implementation today, including BuildBuddy, container image is how users can customize the execution environment. For this reason, we use the Dockerfile in this PR to build a container image with clang, git, unzip inside. We also take extra caution to make sure that the rustc installed on the container image is the same as the one declared in //:rust-toolchain file. If the clang version or the //:rust-toolchain file is updated, the container image need to be rebuild(!!!) before the remote build can run successfully. Secondly, the repo currently depends on 3 git_fetch targets generated by reindeer. git_fetch actions require networking, which is why currently they are marked as local_only and thus, not RBE compatible. Since BuildBuddy RBE workers can have external networking enabled, we patch git_fetch to remove the local_only clause. The Execution Platform is also tuned accordingly to have local and hybrid execution disabled. Finally, as all actions are now RBE compatible, we can also turn on all the deferred materializations flags Buck2 has to provide. This means that there are no intermediary artifacts download through out the build. Only the final "buck2" binary artifact should be downloaded from BuildBuddy server, keeping the network impact relatively minimal. A successful RBE build without Action Cache hits should look something like this. ```bash > buck2 log summary Showing summary from: buck2 build :buck2 build ID: 7ffdd713-27fd-4a19-9033-946b8b2afe83 total files materialized: 1 total bytes materialized: 341721472 total bytes uploaded: 4539317 local actions: 0 remote actions: 4140 cached actions: 0 other actions: 4893 targets analysed: 1889 peak process memory: 1.1GiB out of 16GiB peak used disk space: 167GiB out of 234GiB max download speed: 0B/s max upload speed: 0B/s duration: 3:35.6s has local changes: unknown ``` and this is how it should look with full cache hits ``` > buck2 log summary Showing summary from: buck2 build :buck2 build ID: 5372b752-f88c-4485-8bb0-c8ecccc7a76d total files materialized: 1 total bytes materialized: 341721472 total bytes uploaded: 0 local actions: 0 remote actions: 0 cached actions: 4173 other actions: 4860 targets analysed: 1915 peak process memory: 727MiB out of 16GiB peak used disk space: 167GiB out of 234GiB max download speed: 0B/s max upload speed: 0B/s duration: 42.8s has local changes: unknown ``` Note the 3m35.6s duration vs the 42.8s duration. --- .buckconfig | 22 ++++++++++++++ Dockerfile | 35 +++++++++++++++++++++ platforms/BUCK | 3 ++ platforms/defs.bzl | 64 +++++++++++++++++++++++++++++++++++++++ prelude/git/git_fetch.bzl | 2 +- 5 files changed, 125 insertions(+), 1 deletion(-) create mode 100644 Dockerfile create mode 100644 platforms/BUCK create mode 100644 platforms/defs.bzl diff --git a/.buckconfig b/.buckconfig index de91633a3201..699185e6d27d 100644 --- a/.buckconfig +++ b/.buckconfig @@ -19,3 +19,25 @@ ignore = \ [rust] default_edition = 2024 + +[buck2] +digest_algorithms = SHA256 +materializations = deferred +sqlite_materializer_state = true +defer_write_actions = true +clean_stale_enabled = true +restarter = true +hash_all_commands = true + +[build] +execution_platforms = //platforms:platforms + +[buck2_re_client] +use_fbcode_metadata = false +capabilities = true +tls = true +engine_address = grpc://remote.buildbuddy.io +action_cache_address = grpc://remote.buildbuddy.io +cas_address = grpc://remote.buildbuddy.io +http_headers = x-buildbuddy-api-key:$BUILDBUDDY_API_KEY +instance_name = $REMOTE_INSTANCE_NAME diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000000..e4568052f501 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,35 @@ +FROM ubuntu:24.04 + +# Note that the system_rust_toolchain assume that "rustc" is available in the PATH +ENV DEBIAN_FRONTEND=noninteractive \ + PATH=/root/.cargo/bin:$PATH + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + ca-certificates \ + build-essential \ + gpg \ + git \ + curl \ + wget \ + unzip \ + libssl-dev \ + libzstd-dev \ + python3 \ + clang \ + && rm -rf /var/lib/apt/lists/* + +# The rust toolchain version in the container should be consistent with +# the one declared in //:rust-toolchain file. +COPY rust-toolchain /tmp/rust-toolchain +RUN set -e; \ + RUST_CHANNEL="$(grep -E '^[[:space:]]*channel' /tmp/rust-toolchain \ + | head -1 \ + | cut -d'=' -f2 \ + | tr -d ' \"')" && \ + echo "Installing Rust toolchain ${RUST_CHANNEL}" && \ + curl -sSf https://sh.rustup.rs \ + | sh -s -- -y --profile minimal --default-toolchain "${RUST_CHANNEL}" && \ + rustup component add clippy + +CMD [ "bash" ] diff --git a/platforms/BUCK b/platforms/BUCK new file mode 100644 index 000000000000..63f852afecbd --- /dev/null +++ b/platforms/BUCK @@ -0,0 +1,3 @@ +load(":defs.bzl", "platforms") + +platforms(name = "platforms") diff --git a/platforms/defs.bzl b/platforms/defs.bzl new file mode 100644 index 000000000000..83070342ca36 --- /dev/null +++ b/platforms/defs.bzl @@ -0,0 +1,64 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under both the MIT license found in the +# LICENSE-MIT file in the root directory of this source tree and the Apache +# License, Version 2.0 found in the LICENSE-APACHE file in the root directory +# of this source tree. + +# See //:Dockerfile for more information +_DEFAULT_IMAGE = "docker://ghcr.io/sluongng/buck2-toolchains@sha256:51dbccd962018fe862322362d884b340e7386c6485d437e562b5f3c5f0e7cf80" + +def _platforms(ctx): + constraints = dict() + constraints.update(ctx.attrs.cpu_configuration[ConfigurationInfo].constraints) + constraints.update(ctx.attrs.os_configuration[ConfigurationInfo].constraints) + cfg = ConfigurationInfo(constraints = constraints, values = {}) + name = ctx.label.raw_target() + + platform = ExecutionPlatformInfo( + label = ctx.label.raw_target(), + configuration = cfg, + executor_config = CommandExecutorConfig( + # Note: There are 3 git_fetch targets with `local_only = True` set in their actions. + # Currently we patch prelude and build a new buck2 binary with cargo to make them + # remote-compatible. However, that does require external network access in the RBE + # worker. In BuildBuddy, that's controlled by the `dockerNetwork` exec property below. + # + # If the RBE worker does not have the ability to access external network, we will + # need to run the git_fetch targets locally. + local_enabled = False, + remote_enabled = True, + use_limited_hybrid = False, + remote_cache_enabled = True, + allow_cache_uploads = True, + remote_execution_properties = { + "OSFamily": "Linux", + "Arch": "amd64", + "container-image": ctx.attrs.container_image, + # Prefer BuildBuddy-managed executors + "use-self-hosted-executors": "false", + # Typically we disable external network access with "off" value for a performance boost. + # However, we want to enable external network access here so that git_fetch action + # can run remotely. + "dockerNetwork": "bridge", + }, + remote_execution_use_case = "buck2-default", + remote_output_paths = "output_paths", + ), + ) + + return [ + DefaultInfo(), + platform, + PlatformInfo(label = str(name), configuration = cfg), + ExecutionPlatformRegistrationInfo(platforms = [platform]), + ] + +platforms = rule( + impl = _platforms, + attrs = { + "container_image": attrs.string(default = _DEFAULT_IMAGE), + "cpu_configuration": attrs.dep(providers = [ConfigurationInfo], default = "prelude//cpu:x86_64"), + "os_configuration": attrs.dep(providers = [ConfigurationInfo], default = "prelude//os:linux"), + }, +) diff --git a/prelude/git/git_fetch.bzl b/prelude/git/git_fetch.bzl index f6c832d543eb..0d14bb76c6ea 100644 --- a/prelude/git/git_fetch.bzl +++ b/prelude/git/git_fetch.bzl @@ -38,7 +38,7 @@ def git_fetch_impl(ctx: AnalysisContext) -> list[Provider]: ctx.actions.run( cmd, category = "git_fetch", - local_only = True, + # local_only = True, allow_cache_upload = ctx.attrs.allow_cache_upload, )