Skip to content

Commit 952149b

Browse files
committed
Merge remote-tracking branch 'intel/sycl' into steffen/coverity_111125
2 parents a8692bf + 74ffed8 commit 952149b

File tree

214 files changed

+1236
-36618
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

214 files changed

+1236
-36618
lines changed

clang/include/clang/Driver/Options.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1341,12 +1341,12 @@ def fgpu_sanitize : Flag<["-"], "fgpu-sanitize">, Group<f_Group>,
13411341
def fno_gpu_sanitize : Flag<["-"], "fno-gpu-sanitize">, Group<f_Group>;
13421342

13431343
def offload_compress : Flag<["--"], "offload-compress">,
1344-
HelpText<"Compress offload device binaries (HIP only)">;
1344+
HelpText<"Compress offload device binaries (HIP and SYCL only)">;
13451345
def no_offload_compress : Flag<["--"], "no-offload-compress">;
13461346

13471347
def offload_compression_level_EQ : Joined<["--"], "offload-compression-level=">,
13481348
Flags<[HelpHidden]>,
1349-
HelpText<"Compression level for offload device binaries (HIP only)">;
1349+
HelpText<"Compression level for offload device binaries (HIP and SYCL only)">;
13501350

13511351
def offload_jobs_EQ : Joined<["--"], "offload-jobs=">,
13521352
HelpText<"Specify the number of threads to use for device offloading tasks "

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10260,6 +10260,18 @@ void OffloadPackager::ConstructJob(Compilation &C, const JobAction &JA,
1026010260
"kind=" + Kind.str(),
1026110261
};
1026210262

10263+
// When compiling like -fsycl-targets=spir64_gen -Xsycl-target-backend
10264+
// "-device pvc,bdw", the offloading arch will be "pvc,bdw", which
10265+
// contains a comma. Because the comma is used to separate fields
10266+
// within the --image option, we cannot pass arch=pvc,bdw directly.
10267+
// Instead, we pass it like arch=pvc,arch=bdw, then
10268+
// llvm-offload-binary joins them back to arch=pvc,bdw.
10269+
SmallVector<StringRef> Archs;
10270+
Arch.split(Archs, ',');
10271+
if (Archs.size() > 1) {
10272+
Parts[2] = "arch=" + llvm::join(Archs, ",arch=");
10273+
}
10274+
1026310275
if (TC->getDriver().isUsingOffloadLTO())
1026410276
for (StringRef Feature : FeatureArgs)
1026510277
Parts.emplace_back("feature=" + Feature.str());
@@ -10283,7 +10295,13 @@ void OffloadPackager::ConstructJob(Compilation &C, const JobAction &JA,
1028310295
AL += " ";
1028410296
AL += A;
1028510297
}
10286-
Parts.emplace_back(C.getArgs().MakeArgString(Twine(Opt) + AL));
10298+
// As mentioned earlier, we cannot pass a value with commas directly,
10299+
// but llvm-offload-binary joins multiple occurrences of the same
10300+
// option separated by commas, so we split the value on
10301+
// all commas and pass them as separate arguments.
10302+
for (StringRef Split : llvm::split(AL, ',')) {
10303+
Parts.emplace_back(C.getArgs().MakeArgString(Twine(Opt) + Split));
10304+
}
1028710305
};
1028810306
const ArgList &Args =
1028910307
C.getArgsForToolChain(nullptr, StringRef(), Action::OFK_SYCL);
@@ -10292,10 +10310,10 @@ void OffloadPackager::ConstructJob(Compilation &C, const JobAction &JA,
1029210310
static_cast<const toolchains::SYCLToolChain &>(*TC);
1029310311
SYCLTC.AddImpliedTargetArgs(TC->getTriple(), Args, BuildArgs, JA, *HostTC,
1029410312
Arch);
10295-
SYCLTC.TranslateBackendTargetArgs(TC->getTriple(), Args, BuildArgs, Arch);
10313+
SYCLTC.TranslateBackendTargetArgs(TC->getTriple(), Args, BuildArgs);
1029610314
createArgString("compile-opts=");
1029710315
BuildArgs.clear();
10298-
SYCLTC.TranslateLinkerTargetArgs(TC->getTriple(), Args, BuildArgs, Arch);
10316+
SYCLTC.TranslateLinkerTargetArgs(TC->getTriple(), Args, BuildArgs);
1029910317
createArgString("link-opts=");
1030010318
}
1030110319

clang/test/Driver/clang-linker-wrapper.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,13 +63,20 @@
6363
// CHK-CMDS-AOT-GEN: spirv-to-ir-wrapper{{.*}} -o [[FIRSTLLVMLINKIN:.*]].bc --llvm-spirv-opts --spirv-preserve-auxdata --spirv-target-env=SPV-IR --spirv-builtin-format=global
6464
// CHK-CMDS-AOT-GEN-NEXT: llvm-link{{.*}} --suppress-warnings [[FIRSTLLVMLINKIN]].bc -o [[FIRSTLLVMLINKOUT:.*]].bc
6565
// CHK-CMDS-AOT-GEN-NEXT: llvm-link{{.*}} -only-needed --suppress-warnings [[FIRSTLLVMLINKOUT]].bc {{.*}}.bc -o [[SECONDLLVMLINKOUT:.*]].bc
66-
// CHK-CMDS-AOT-GEN-NEXT: sycl-post-link{{.*}} SYCL_POST_LINK_OPTIONS -o [[SYCLPOSTLINKOUT:.*]].table [[SECONDLLVMLINKOUT]].bc
66+
// Check that target specified by -fsycl-targets is passed to sycl-post-link for filtering.
67+
// CHK-CMDS-AOT-GEN-NEXT: sycl-post-link{{.*}} SYCL_POST_LINK_OPTIONS -o intel_gpu_pvc,[[SYCLPOSTLINKOUT:.*]].table [[SECONDLLVMLINKOUT]].bc
6768
// CHK-CMDS-AOT-GEN-NEXT: llvm-spirv{{.*}} LLVM_SPIRV_OPTIONS -o {{.*}}
6869
// CHK-CMDS-AOT-GEN-NEXT: ocloc{{.*}} -output_no_suffix -spirv_input -device pvc{{.*}} -output {{.*}} -file {{.*}}
6970
// CHK-CMDS-AOT-GEN-NEXT: offload-wrapper: output: [[WRAPPEROUT:.*]].bc, input: {{.*}}, compile-opts: , link-opts:
7071
// CHK-CMDS-AOT-GEN-NEXT: clang{{.*}} -c -o [[LLCOUT:.*]].o [[WRAPPEROUT]].bc
7172
// CHK-CMDS-AOT-GEN-NEXT: "{{.*}}/ld" -- HOST_LINKER_FLAGS -dynamic-linker HOST_DYN_LIB -o a.out [[LLCOUT]].o HOST_LIB_PATH HOST_STAT_LIB {{.*}}.o
7273

74+
// Check that when --gpu-tool-arg is specified in clang-linker-wrapper
75+
// (happen when AOT device is specified via -Xsycl-target-backend '-device pvc' in clang),
76+
// the target is not passed to sycl-post-link for filtering.
77+
// RUN: clang-linker-wrapper -sycl-embed-ir -sycl-device-libraries=%t1.devicelib.o -sycl-post-link-options="SYCL_POST_LINK_OPTIONS" -llvm-spirv-options="LLVM_SPIRV_OPTIONS" "--host-triple=x86_64-unknown-linux-gnu" "--gpu-tool-arg=-device pvc" "--linker-path=/usr/bin/ld" "--" HOST_LINKER_FLAGS "-dynamic-linker" HOST_DYN_LIB "-o" "a.out" HOST_LIB_PATH HOST_STAT_LIB %t1.o --dry-run 2>&1 | FileCheck -check-prefix=CHK-NO-CMDS-AOT-GEN %s
78+
// CHK-NO-CMDS-AOT-GEN-NOT: sycl-post-link{{.*}} -o intel_gpu_pv,{{.*}}
79+
7380
/// Check for list of commands for standalone clang-linker-wrapper run for sycl (AOT for Intel CPU)
7481
// -------
7582
// Generate .o file as linker wrapper input.

clang/test/Driver/sycl-ftarget-compile-fast.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
// RUN: | FileCheck -check-prefix=TARGET_COMPILE_FAST_GEN %s
99

1010
// TARGET_COMPILE_FAST_GEN: llvm-offload-binary
11-
// TARGET_COMPILE_FAST_GEN: compile-opts={{.*}}-options -igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'
11+
// TARGET_COMPILE_FAST_GEN: compile-opts={{.*}}-options -igc_opts 'PartitionUnit=1,compile-opts=SubroutineThreshold=50000'
1212

1313
// RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl --offload-new-driver \
1414
// RUN: -ftarget-compile-fast %s 2>&1 \

clang/test/Driver/sycl-offload-new-driver.c

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@
188188
// RUN: %clangxx -fsycl -### -fsycl-targets=spir64_gen --offload-new-driver \
189189
// RUN: -Xsycl-target-backend=spir64_gen "-device pvc,bdw" %s 2>&1 \
190190
// RUN: | FileCheck -check-prefix COMMA_FILE %s
191-
// COMMA_FILE: llvm-offload-binary{{.*}} "--image=file={{.*}}pvc@bdw{{.*}},triple=spir64_gen-unknown-unknown,arch=pvc,bdw,kind=sycl,compile-opts=-device_options pvc -ze-intel-enable-auto-large-GRF-mode"
191+
// COMMA_FILE: llvm-offload-binary{{.*}} "--image=file={{.*}}pvc@bdw{{.*}},triple=spir64_gen-unknown-unknown,arch=pvc,arch=bdw,kind=sycl,compile-opts=-device_options pvc -ze-intel-enable-auto-large-GRF-mode -device pvc,compile-opts=bdw"
192192

193193
/// Verify the arch value for the packager is populated with different
194194
/// scenarios for spir64_gen
@@ -212,6 +212,52 @@
212212
// RUN: | FileCheck -check-prefix ARCH_CHECK %s
213213
// ARCH_CHECK: llvm-offload-binary{{.*}} "--image=file={{.*}}triple=spir64_gen-unknown-unknown,arch=bdw,kind=sycl{{.*}}"
214214

215+
// Verify when a comma-separated list of architectures is provided in -device, they are
216+
// passed to llvm-offload-binary correctly.
217+
// RUN: %clangxx -fsycl -### -fsycl-targets=spir64_gen --offload-new-driver \
218+
// RUN: -Xsycl-target-backend "-device pvc,bdw" %s 2>&1 \
219+
// RUN: | FileCheck -check-prefix MULTI_ARCH %s
220+
// RUN: %clangxx -fsycl -### -fsycl-targets=spir64_gen --offload-new-driver \
221+
// RUN: -Xsycl-target-backend=spir64_gen "-device pvc,bdw" %s 2>&1 \
222+
// RUN: | FileCheck -check-prefix MULTI_ARCH %s
223+
// RUN: %clangxx -fsycl -### -fsycl-targets=spir64_gen --offload-new-driver \
224+
// RUN: -Xs "-device pvc,bdw" %s 2>&1 \
225+
// RUN: | FileCheck -check-prefix MULTI_ARCH %s
226+
// MULTI_ARCH: llvm-offload-binary{{.*}} "--image=file={{.*}}triple=spir64_gen-unknown-unknown,arch=pvc,arch=bdw,kind=sycl
227+
// MULTI_ARCH-SAME: compile-opts=-device_options pvc -ze-intel-enable-auto-large-GRF-mode -device pvc,compile-opts=bdw"
228+
229+
// Verify that when an object produced by llvm-offload-binary with multiple Intel GPU architectures
230+
// clang-linker-wrapper will call ocloc with -device listing all architectures.
231+
// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen --offload-new-driver \
232+
// RUN: -Xsycl-target-backend=spir64_gen "-device pvc,bdw" -c %s -o %t_multiarch_test.o
233+
// RUN: clang-linker-wrapper --dry-run --linker-path=/usr/bin/ld \
234+
// RUN: --host-triple=x86_64-unknown-linux-gnu %t_multiarch_test.o 2>&1 \
235+
// RUN: | FileCheck -check-prefix=OCLOC_MULTI_ARCH %s
236+
// OCLOC_MULTI_ARCH: ocloc{{.*}}-device pvc,bdw
237+
238+
// Verify for multiple targets with -Xsycl-target-backend= with commas in the values
239+
// are passed correctly to llvm-offload-binary.
240+
// RUN: %clangxx -fsycl -### --offload-new-driver \
241+
// RUN: -fsycl-targets=nvptx64-nvidia-cuda,amdgcn-amd-amdhsa,spir64_gen \
242+
// RUN: -Xsycl-target-backend=amdgcn-amd-amdhsa --offload-arch=gfx908,gfx1010 \
243+
// RUN: -Xsycl-target-backend=nvptx64-nvidia-cuda --offload-arch=sm_86,sm_87,sm_89 \
244+
// RUN: -Xsycl-target-backend=spir64_gen "-device pvc,bdw" \
245+
// RUN: -Xsycl-target-linker=spir64_gen "-DFOO,BAR" \
246+
// RUN: -nogpulib %s 2>&1 | FileCheck -check-prefix=MULTI_ARCH2 %s
247+
// MULTI_ARCH2: llvm-offload-binary{{.*}} "--image=file={{.*}}triple=amdgcn-amd-amdhsa,arch=gfx1010,kind=sycl,compile-opts=--offload-arch=gfx908,compile-opts=gfx1010"
248+
// MULTI_ARCH2-SAME: "--image=file={{.*}}triple=amdgcn-amd-amdhsa,arch=gfx908,kind=sycl,compile-opts=--offload-arch=gfx908,compile-opts=gfx1010"
249+
// MULTI_ARCH2-SAME: "--image=file={{.*}}triple=nvptx64-nvidia-cuda,arch=sm_86,kind=sycl,compile-opts=--offload-arch=sm_86,compile-opts=sm_87,compile-opts=sm_89"
250+
// MULTI_ARCH2-SAME: "--image=file={{.*}}triple=nvptx64-nvidia-cuda,arch=sm_87,kind=sycl,compile-opts=--offload-arch=sm_86,compile-opts=sm_87,compile-opts=sm_89"
251+
// MULTI_ARCH2-SAME: "--image=file={{.*}}triple=nvptx64-nvidia-cuda,arch=sm_89,kind=sycl,compile-opts=--offload-arch=sm_86,compile-opts=sm_87,compile-opts=sm_89"
252+
// MULTI_ARCH2-SAME: "--image=file={{.*}}triple=spir64_gen-unknown-unknown,arch=pvc,arch=bdw,kind=sycl,compile-opts=-device_options pvc -ze-intel-enable-auto-large-GRF-mode -device pvc,compile-opts=bdw,link-opts=-DFOO,link-opts=BAR"
253+
254+
// Verify that the driver correctly handles link-opt and compile-opt values with commas
255+
// RUN: %clangxx -fsycl -### -fsycl-targets=spir64_gen --offload-new-driver \
256+
// RUN: -Xsycl-target-backend "-device bdw -FOO a,b" \
257+
// RUN: -Xsycl-target-linker "-BAR x,y" %s 2>&1 \
258+
// RUN: | FileCheck -check-prefix COMMA_OPTS %s
259+
// COMMA_OPTS: llvm-offload-binary{{.*}} "--image=file={{.*}}triple=spir64_gen-unknown-unknown,arch=bdw,kind=sycl,compile-opts=-device bdw -FOO a,compile-opts=b,link-opts=-BAR x,link-opts=y"
260+
215261
/// Verify that --cuda-path is passed to clang-linker-wrapper for SYCL offload
216262
// RUN: %clangxx -fsycl -### -fsycl-targets=nvptx64-nvidia-cuda -fno-sycl-libspirv \
217263
// RUN: --cuda-gpu-arch=sm_20 --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s \

clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp

Lines changed: 33 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -727,10 +727,24 @@ runSYCLPostLinkTool(ArrayRef<StringRef> InputFiles, const ArgList &Args) {
727727
createOutputFile(sys::path::filename(ExecutableName), "table");
728728
if (!TempFileOrErr)
729729
return TempFileOrErr.takeError();
730+
std::string OutputPathWithArch = TempFileOrErr->str();
731+
732+
// Enable the driver to invoke sycl-post-link with the device architecture
733+
// when Intel GPU targets are passed in -fsycl-targets.
734+
// OPT_gpu_tool_arg_EQ is checked to ensure the device architecture is not
735+
// passed through -Xsycl-target-backend=spir64_gen "-device <arch>" format
736+
const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
737+
StringRef Arch = Args.getLastArgValue(OPT_arch_EQ);
738+
StringRef IsGPUTool = Args.getLastArgValue(OPT_gpu_tool_arg_EQ);
739+
740+
if (Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen && !Arch.empty() &&
741+
IsGPUTool.empty() && Arch != "*")
742+
OutputPathWithArch = "intel_gpu_" + Arch.str() + "," + OutputPathWithArch;
743+
else if (Triple.getSubArch() == llvm::Triple::SPIRSubArch_x86_64)
744+
OutputPathWithArch = "spir64_x86_64," + OutputPathWithArch;
730745

731746
SmallVector<StringRef, 8> CmdArgs;
732747
CmdArgs.push_back(*SYCLPostLinkPath);
733-
const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
734748
Arg *SYCLDeviceLibLoc = Args.getLastArg(OPT_sycl_device_library_location_EQ);
735749
if (SYCLDeviceLibLoc && !Triple.isSPIRAOT()) {
736750
std::string SYCLDeviceLibLocParam = SYCLDeviceLibLoc->getValue();
@@ -748,7 +762,7 @@ runSYCLPostLinkTool(ArrayRef<StringRef> InputFiles, const ArgList &Args) {
748762
SYCLPostLinkOptions.split(CmdArgs, " ", /* MaxSplit = */ -1,
749763
/* KeepEmpty = */ false);
750764
CmdArgs.push_back("-o");
751-
CmdArgs.push_back(*TempFileOrErr);
765+
CmdArgs.push_back(Args.MakeArgString(OutputPathWithArch));
752766
for (auto &File : InputFiles)
753767
CmdArgs.push_back(File);
754768
if (Error Err = executeCommands(*SYCLPostLinkPath, CmdArgs))
@@ -943,24 +957,29 @@ static void addSYCLBackendOptions(const ArgList &Args,
943957
if (IsCPU) {
944958
BackendOptions.split(CmdArgs, " ", /*MaxSplit=*/-1, /*KeepEmpty=*/false);
945959
} else {
946-
// ocloc -options args need to be comma separated, e.g. `-options
947-
// "-g,-cl-opt-disable"`. Otherwise, only the first arg is processed by
948-
// ocloc as an arg for -options, and the rest are processed as standalone
949-
// flags, possibly leading to errors.
960+
// ocloc -options takes arguments in the form of '-options "-g
961+
// -cl-opt-disable"' where each argument is separated with spaces.
950962
// split function here returns a pair with everything before the separator
951963
// ("-options") in the first member of the pair, and everything after the
952964
// separator in the second part of the pair. The separator is not included
953965
// in any of them.
954966
auto [BeforeOptions, AfterOptions] = BackendOptions.split("-options ");
955967
// Only add if not empty, an empty arg can lead to ocloc errors.
956-
if (!BeforeOptions.empty())
957-
CmdArgs.push_back(BeforeOptions);
968+
if (!BeforeOptions.empty()) {
969+
SmallVector<StringRef, 8> BeforeArgs;
970+
BeforeOptions.split(BeforeArgs, " ", /*MaxSplit=*/-1,
971+
/*KeepEmpty=*/false);
972+
for (const auto &string : BeforeArgs) {
973+
CmdArgs.push_back(string);
974+
}
975+
}
958976
if (!AfterOptions.empty()) {
959-
// Separator not included by the split function, so explicitly added here.
960977
CmdArgs.push_back("-options");
961-
std::string Replace = AfterOptions.str();
962-
std::replace(Replace.begin(), Replace.end(), ' ', ',');
963-
CmdArgs.push_back(Args.MakeArgString(Replace));
978+
// Split the options string by spaces and rejoin to normalize whitespace
979+
SmallVector<StringRef, 8> AfterArgs;
980+
AfterOptions.split(AfterArgs, " ", /*MaxSplit=*/-1, /*KeepEmpty=*/false);
981+
std::string JoinedOptions = llvm::join(AfterArgs, " ");
982+
CmdArgs.push_back(Args.MakeArgString(JoinedOptions));
964983
}
965984
}
966985

@@ -1441,9 +1460,8 @@ static Expected<StringRef> linkDevice(ArrayRef<StringRef> InputFiles,
14411460
if (ExtractedDeviceLibFiles.empty()) {
14421461
// TODO: Add NVPTX when ready
14431462
if (Triple.isSPIROrSPIRV())
1444-
return createStringError(
1445-
inconvertibleErrorCode(),
1446-
" SYCL device library file list cannot be empty.");
1463+
WithColor::warning(errs(), LinkerExecutable)
1464+
<< "SYCL device library file list is empty\n";
14471465
return *LinkedFile;
14481466
}
14491467

devops/actions/run-tests/benchmark/action.yml

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -261,13 +261,14 @@ runs:
261261
${{ inputs.dry_run == 'true' && '--dry-run' || '' }} \
262262

263263
echo "::endgroup::"
264-
265-
# Run integration tests
266-
# NOTE: Each integration test prints its own group name as part of test script
267-
export LLVM_BENCHMARKS_UNIT_TESTING=1
268-
export COMPUTE_BENCHMARKS_BUILD_PATH=$WORKDIR/compute-benchmarks-build
269-
python3 ./devops/scripts/benchmarks/tests/test_integration.py
270264

265+
# Run benchmarks' integration tests
266+
# NOTE: Each integration test prints its own group name as part of test script
267+
if [ '${{ github.event_name == 'pull_request' }}' = 'true' ]; then
268+
export LLVM_BENCHMARKS_UNIT_TESTING=1
269+
export COMPUTE_BENCHMARKS_BUILD_PATH=$WORKDIR/compute-benchmarks-build
270+
python3 ./devops/scripts/benchmarks/tests/test_integration.py
271+
fi
271272
- name: Cache changes and upload github summary
272273
if: always()
273274
shell: bash

devops/scripts/benchmarks/benches/compute.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,8 @@ def git_url(self) -> str:
6161
return "https://github.com/intel/compute-benchmarks.git"
6262

6363
def git_hash(self) -> str:
64-
# Nov 7, 2025
65-
return "d985da634fc1a9416ca0bd067cfb9886b02d0211"
64+
# Nov 17, 2025
65+
return "932ae79f7cca7e156285fc10a59610927c769e89"
6666

6767
def setup(self) -> None:
6868
if options.sycl is None:
@@ -860,6 +860,7 @@ def _bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
860860
"--multiplier=1",
861861
"--vectorSize=1",
862862
"--lws=256",
863+
"--prefetch=0",
863864
]
864865

865866

0 commit comments

Comments
 (0)