Skip to content

Commit 8f578a1

Browse files
author
AMD
committed
Fix merge conflicts
1 parent 35615a5 commit 8f578a1

File tree

15 files changed

+6
-1517
lines changed

15 files changed

+6
-1517
lines changed

.ci/docker/build.sh

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,7 @@ case "$tag" in
288288
;;
289289
*)
290290
# Catch-all for builds that are not hardcoded.
291-
PROTOBUF=yes
291+
PROTOBUF=yes
292292
VISION=yes
293293
echo "image '$image' did not match an existing build configuration"
294294
if [[ "$image" == *py* ]]; then
@@ -460,15 +460,3 @@ elif [ "$HAS_TRITON" = "yes" ]; then
460460
echo "expecting triton to not be installed, but it is"
461461
exit 0
462462
fi
463-
<<<<<<< HEAD
464-
465-
# Sanity check cmake version. Executorch reinstalls cmake and I'm not sure if
466-
# they support 4.0.0 yet, so exclude them from this check.
467-
CMAKE_VERSION=$(drun cmake --version)
468-
if [[ "$EXECUTORCH" != *yes* && "$CMAKE_VERSION" != *4.* ]]; then
469-
echo "CMake version is not 4.0.0:"
470-
drun cmake --version
471-
exit 0
472-
fi
473-
=======
474-
>>>>>>> upstream/main
Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1 @@
1-
<<<<<<< HEAD
21
d704bc6e69c1a588c8edd3cbb67505d554ed65f6
3-
=======
4-
7416ffcb92cdbe98d9f97e4e6f95247e46dfc9fd
5-
>>>>>>> upstream/main

.ci/docker/libtorch/build.sh

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,15 +50,11 @@ case ${DOCKER_TAG_PREFIX} in
5050
BASE_TARGET=rocm
5151
GPU_IMAGE=rocm/dev-ubuntu-22.04:${GPU_ARCH_VERSION}-complete
5252
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
53-
<<<<<<< HEAD
54-
DOCKER_GPU_BUILD_ARG="--build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}"
55-
=======
5653
# add gfx950, gfx115x conditionally starting in ROCm 7.0
5754
if [[ "$GPU_ARCH_VERSION" == *"7.0"* ]]; then
5855
PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx950;gfx1150;gfx1151"
5956
fi
6057
DOCKER_GPU_BUILD_ARG="--build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg ROCM_VERSION=${GPU_ARCH_VERSION}"
61-
>>>>>>> upstream/main
6258
;;
6359
*)
6460
echo "ERROR: Unrecognized DOCKER_TAG_PREFIX: ${DOCKER_TAG_PREFIX}"

CMakeLists.txt

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -903,13 +903,8 @@ cmake_dependent_option(
903903
USE_FBGEMM_GENAI
904904
"Whether to build FBGEMM GenAI quantized GEMM kernels.\
905905
Will be disabled if not supported by the platform"
906-
<<<<<<< HEAD
907-
OFF
908-
"USE_CUDA OR USE_ROCM"
909-
=======
910906
${USE_FBGEMM_GENAI_DEFAULT}
911907
"(USE_CUDA AND NOT MSVC) OR USE_ROCM"
912-
>>>>>>> upstream/main
913908
OFF)
914909

915910

aten/src/ATen/native/sparse/cuda/SparseMatMul.cu

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -40,28 +40,6 @@
4040
#include <thrust/iterator/discard_iterator.h>
4141

4242

43-
<<<<<<< HEAD
44-
#if defined(__CUDACC__) && ((CUSPARSE_VERSION >= 11000) || (defined(USE_ROCM) && ROCM_VERSION >= 60300))
45-
#define IS_CUSPARSE11_AVAILABLE() 1
46-
#else
47-
#define IS_CUSPARSE11_AVAILABLE() 0
48-
#endif
49-
50-
#if defined(USE_ROCM) && (ROCM_VERSION >= 70000)
51-
#define HIPSPARSE_FP16_SUPPORT 1
52-
#else
53-
#define HIPSPARSE_FP16_SUPPORT 0
54-
#endif
55-
56-
#if defined(USE_ROCM) && (ROCM_VERSION >= 70100)
57-
#define HIPSPARSE_FP16_BF16_SUPPORT 1
58-
#else
59-
#define HIPSPARSE_FP16_BF16_SUPPORT 0
60-
#endif
61-
62-
#if IS_CUSPARSE11_AVAILABLE()
63-
=======
64-
>>>>>>> upstream/main
6543
#include <library_types.h>
6644

6745
namespace at::native {

requirements-build.txt

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
# Build System requirements
2-
<<<<<<< HEAD
32
setuptools>=70.1.0,<80.0 # setuptools develop deprecated on 80.0
43
cmake>=3.31.4
54
ninja==1.11.1.3
@@ -10,15 +9,4 @@ pyyaml==6.0.2
109
requests==2.32.4
1110
six==1.17.0 # dependency chain: NNPACK -> PeachPy -> six
1211
typing-extensions==4.14.1
13-
=======
14-
setuptools>=70.1.0
15-
cmake>=3.27
16-
ninja
17-
numpy
18-
packaging
19-
pyyaml
20-
requests
21-
six # dependency chain: NNPACK -> PeachPy -> six
22-
typing-extensions>=4.10.0
23-
>>>>>>> upstream/main
2412
pip # not technically needed, but this makes setup.py invocation work

test/dynamo/test_structured_trace.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,7 @@
2121
from torch._inductor.test_case import TestCase
2222
from torch._logging._internal import TorchLogsFormatter
2323
from torch.nn.parallel import DistributedDataParallel as DDP
24-
<<<<<<< HEAD
25-
from torch.testing._internal.common_utils import find_free_port, skipIfRocm
26-
=======
2724
from torch.testing._internal.common_utils import find_free_port, xfailIfS390X
28-
>>>>>>> upstream/main
2925
from torch.testing._internal.triton_utils import requires_cuda_and_triton
3026

3127

test/inductor/test_cuda_repro.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,7 @@
3939
DeterministicGuard,
4040
freeze_rng_state,
4141
IS_FBCODE,
42-
<<<<<<< HEAD
43-
skipIfRocm,
44-
=======
4542
MI350_ARCH,
46-
>>>>>>> upstream/main
4743
skipIfRocmArch,
4844
TEST_WITH_ASAN,
4945
TEST_WITH_ROCM,

test/inductor/test_decompose_mem_bound_mm.py

Lines changed: 0 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,7 @@
1212
from torch.testing import FileCheck
1313
from torch.testing._internal.common_utils import (
1414
instantiate_parametrized_tests,
15-
<<<<<<< HEAD
16-
patch_test_members,
17-
NAVI3_ARCH,
18-
is_arch,
19-
=======
2015
is_navi3_arch,
21-
>>>>>>> upstream/main
2216
parametrize,
2317
patch_test_members,
2418
TEST_XPU,
@@ -79,11 +73,7 @@ def forward(
7973
)
8074
@instantiate_parametrized_tests
8175
class TestDecomposeMemMM(TestCase):
82-
<<<<<<< HEAD
83-
def __init__(self, method_name='runTest', methodName='runTest'):
84-
=======
8576
def __init__(self, method_name="runTest", methodName="runTest"):
86-
>>>>>>> upstream/main
8777
super().__init__(method_name, methodName)
8878
self.atol = 1e-3
8979
self.rtol = 1e-3
@@ -92,13 +82,9 @@ def setup_tolerance(self, rtol=None, atol=None):
9282
if rtol is None:
9383
rtol = self.rtol
9484
if atol is None:
95-
<<<<<<< HEAD
96-
atol = self.rtol
97-
=======
9885
atol = self.atol
9986
self.rtol = rtol
10087
self.atol = atol
101-
>>>>>>> upstream/main
10288

10389
def compare_dict_tensors(self, ref_dict, res_dict, rtol=None, atol=None):
10490
self.setup_tolerance(rtol, atol)
@@ -107,13 +93,9 @@ def compare_dict_tensors(self, ref_dict, res_dict, rtol=None, atol=None):
10793
for key1 in ref_dict.keys():
10894
key2 = "_orig_mod." + key1
10995
assert key2 in res_dict, f"{key1} does not exist in traced module"
110-
<<<<<<< HEAD
111-
if not torch.allclose(ref_dict[key1], res_dict[key2], rtol=self.rtol, atol=self.atol):
112-
=======
11396
if not torch.allclose(
11497
ref_dict[key1], res_dict[key2], rtol=self.rtol, atol=self.atol
11598
):
116-
>>>>>>> upstream/main
11799
return False
118100
return True
119101

@@ -127,28 +109,20 @@ def compare_parameters(self, module, traced, rtol=None, atol=None):
127109
self.setup_tolerance(rtol, atol)
128110
ref_params = dict(module.named_parameters())
129111
res_params = dict(traced.named_parameters())
130-
<<<<<<< HEAD
131-
self.assertTrue(self.compare_dict_tensors(ref_params, res_params, rtol=self.rtol, atol=self.atol))
132-
=======
133112
self.assertTrue(
134113
self.compare_dict_tensors(
135114
ref_params, res_params, rtol=self.rtol, atol=self.atol
136115
)
137116
)
138-
>>>>>>> upstream/main
139117

140118
def compare_gradients(self, module, traced, rtol=None, atol=None):
141119
self.setup_tolerance(rtol, atol)
142120
ref_grad = {key: param.grad for key, param in module.named_parameters()}
143121
res_grad = {key: param.grad for key, param in traced.named_parameters()}
144122
self.assertTrue(
145-
<<<<<<< HEAD
146-
self.compare_dict_tensors(ref_grad, res_grad, rtol=self.rtol, atol=self.atol)
147-
=======
148123
self.compare_dict_tensors(
149124
ref_grad, res_grad, rtol=self.rtol, atol=self.atol
150125
)
151-
>>>>>>> upstream/main
152126
)
153127

154128
@parametrize(
@@ -257,19 +231,12 @@ def test_decompose_linear(self, m, n, k, has_bias, should_decompose):
257231

258232
# We have to increase tolerance for navi3 because all fp16, bf16
259233
# GEMMs operations have an accuracy issue caused by hardware limitation
260-
<<<<<<< HEAD
261-
@patch_test_members({
262-
"atol": 2e-3 if is_arch(NAVI3_ARCH) else 1e-3,
263-
"rtol": 2e-3 if is_arch(NAVI3_ARCH) else 1e-3
264-
})
265-
=======
266234
@patch_test_members(
267235
{
268236
"atol": 2e-3 if is_navi3_arch() else 1e-3,
269237
"rtol": 2e-3 if is_navi3_arch() else 1e-3,
270238
}
271239
)
272-
>>>>>>> upstream/main
273240
@parametrize(
274241
"m,k,n, should_decompose",
275242
[(20480, 5, 2, True), (20480, 32, 2, False), (2048, 2, 2, False)],
@@ -380,19 +347,12 @@ def test_decompose_mm_cpu(self, m, n, k, should_decompose):
380347

381348
# We have to increase tolerance for navi3 because all fp16, bf16
382349
# GEMMs operations have an accuracy issue caused by hardware limitation
383-
<<<<<<< HEAD
384-
@patch_test_members({
385-
"atol": 3e-3 if is_arch(NAVI3_ARCH) else 1e-3,
386-
"rtol": 4e-3 if is_arch(NAVI3_ARCH) else 1e-3
387-
})
388-
=======
389350
@patch_test_members(
390351
{
391352
"atol": 3e-3 if is_navi3_arch() else 1e-3,
392353
"rtol": 4e-3 if is_navi3_arch() else 1e-3,
393354
}
394355
)
395-
>>>>>>> upstream/main
396356
@parametrize(
397357
"m,k,n, should_decompose",
398358
[(20480, 5, 2, True), (20480, 32, 2, False), (2048, 2, 2, False)],

test/inductor/test_max_autotune.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1302,12 +1302,7 @@ def test_conv_backend(self):
13021302

13031303
self.assertIn("NoValidChoicesError", str(context.exception))
13041304

1305-
<<<<<<< HEAD
1306-
# Some ROCm GPUs don't have enough VRAM to run all autotune configurations and padding benchmarks
1307-
@skipIfRocmNotEnoughMemory(30)
1308-
=======
13091305
@skipIfRocmArch(NAVI_ARCH)
1310-
>>>>>>> upstream/main
13111306
def test_non_contiguous_input_mm(self):
13121307
"""
13131308
Make sure the triton template can work with non-contiguous inputs without crash.
@@ -1362,15 +1357,10 @@ def f(x, y):
13621357
# TODO: fix accuracy failure of the triton template on XPU.
13631358
# and enable this test case.
13641359
@skipIfXpu
1365-
<<<<<<< HEAD
1366-
# Some ROCm GPUs don't have enough VRAM to run all autotune configurations and padding benchmarks
1367-
@skipIfRocmNotEnoughMemory(30)
1368-
=======
13691360
@unittest.skipIf(
13701361
config.triton.native_matmul,
13711362
"native matmul and Triton template both have accuracy fail (2.2%)",
13721363
)
1373-
>>>>>>> upstream/main
13741364
def test_non_contiguous_input_mm_plus_mm(self):
13751365
x1 = rand_strided((50257, 2048), (1, 50304), device=GPU_TYPE)
13761366
y1 = rand_strided((2048, 768), (768, 1), device=GPU_TYPE)

0 commit comments

Comments
 (0)