@@ -141,7 +141,6 @@ class CodeGen_D3D12Compute_Dev : public CodeGen_GPU_Dev {
141141 void visit (const Free *op) override ;
142142 void visit (const Cast *op) override ;
143143 void visit (const Atomic *op) override ;
144- void visit (const Shuffle *op) override ;
145144 void visit (const AssertStmt *op) override ;
146145 void visit (const FloatImm *op) override ;
147146
@@ -1369,80 +1368,6 @@ void CodeGen_D3D12Compute_Dev::CodeGen_D3D12Compute_C::visit(const AssertStmt *o
13691368 user_warning << " Ignoring assertion inside D3D12Compute kernel: " << op->condition << " \n " ;
13701369}
13711370
1372- void CodeGen_D3D12Compute_Dev::CodeGen_D3D12Compute_C::visit (const Shuffle *op) {
1373- if (op->is_interleave ()) {
1374- int op_lanes = op->type .lanes ();
1375- int num_vectors = (int )op->vectors .size ();
1376- int arg_lanes = op->vectors [0 ].type ().lanes ();
1377- if (num_vectors == 1 ) {
1378- print_assignment (op->type , print_expr (op->vectors [0 ]));
1379- return ;
1380- }
1381- vector<string> arg_exprs (num_vectors);
1382- for (int i = 0 ; i < num_vectors; i++) {
1383- arg_exprs[i] = print_expr (op->vectors [i]);
1384- }
1385- ostringstream rhs;
1386- rhs << print_type (op->type ) << " (" ;
1387- for (int i = 0 ; i < op_lanes; i++) {
1388- int src = i % num_vectors;
1389- int lane = i / num_vectors;
1390- rhs << arg_exprs[src];
1391- if (arg_lanes > 1 ) {
1392- rhs << " [" << lane << " ]" ;
1393- }
1394- if (i < op_lanes - 1 ) {
1395- rhs << " , " ;
1396- }
1397- }
1398- rhs << " )" ;
1399- print_assignment (op->type , rhs.str ());
1400- } else if (op->is_extract_element ()) {
1401- // Extract a single scalar from a vector using subscript.
1402- ostringstream rhs;
1403- rhs << print_expr (op->vectors [0 ]) << " [" << op->indices [0 ] << " ]" ;
1404- print_assignment (op->type , rhs.str ());
1405- } else {
1406- // Generic shuffle or slice: build output vector element by element.
1407- int total_src_lanes = 0 ;
1408- vector<string> src_exprs;
1409- vector<int > src_offsets;
1410- for (const auto &v : op->vectors ) {
1411- src_offsets.push_back (total_src_lanes);
1412- src_exprs.push_back (print_expr (v));
1413- total_src_lanes += v.type ().lanes ();
1414- }
1415- int out_lanes = op->type .lanes ();
1416- ostringstream rhs;
1417- if (out_lanes > 1 ) {
1418- rhs << print_type (op->type ) << " (" ;
1419- }
1420- for (int i = 0 ; i < out_lanes; i++) {
1421- int idx = op->indices [i];
1422- // Find source vector for this index
1423- int src = (int )src_exprs.size () - 1 ;
1424- for (int s = 0 ; s + 1 < (int )op->vectors .size (); s++) {
1425- if (idx < src_offsets[s + 1 ]) {
1426- src = s;
1427- break ;
1428- }
1429- }
1430- int lane = idx - src_offsets[src];
1431- rhs << src_exprs[src];
1432- if (op->vectors [src].type ().lanes () > 1 ) {
1433- rhs << " [" << lane << " ]" ;
1434- }
1435- if (i < out_lanes - 1 ) {
1436- rhs << " , " ;
1437- }
1438- }
1439- if (out_lanes > 1 ) {
1440- rhs << " )" ;
1441- }
1442- print_assignment (op->type , rhs.str ());
1443- }
1444- }
1445-
14461371void CodeGen_D3D12Compute_Dev::CodeGen_D3D12Compute_C::visit (const Atomic *op) {
14471372 user_assert (op->mutex_name .empty ())
14481373 << " D3D12Compute does not support atomic operations that require a mutex lock.\n " ;
0 commit comments