fix reshape (partial 1d position convention)

sbrantq · sbrantq · commit 7f4aa5a260d0 · 2026-03-26T15:49:12.000-05:00
diff --git a/enzyme/Enzyme/MLIR/Interfaces/HMCUtils.cpp b/enzyme/Enzyme/MLIR/Interfaces/HMCUtils.cpp
@@ -361,9 +361,21 @@ GradientResult MCMC::computePotentialAndGradient(OpBuilder &builder,
       builder, loc, scalarType,
       DenseElementsAttr::get(scalarType, builder.getFloatAttr(elemType, 1.0)));
 
-  SmallVector<Value> autodiffInputs{position, gradSeed};
+  bool isCustomLogpdf = ctx.hasCustomLogpdf();
+  auto flatType = RankedTensorType::get({ctx.positionSize}, elemType);
+  Value autodiffPosition = position;
+  auto autodiffPositionType = positionType;
+  auto autodiffGradType = positionType;
+  if (isCustomLogpdf) {
+    autodiffPosition =
+        enzyme::ReshapeOp::create(builder, loc, flatType, position);
+    autodiffPositionType = flatType;
+    autodiffGradType = flatType;
+  }
+
+  SmallVector<Value> autodiffInputs{autodiffPosition, gradSeed};
   auto autodiffOp = enzyme::AutoDiffRegionOp::create(
-      builder, loc, TypeRange{scalarType, rng.getType(), positionType},
+      builder, loc, TypeRange{scalarType, rng.getType(), autodiffGradType},
       autodiffInputs,
       builder.getArrayAttr({enzyme::ActivityAttr::get(
           builder.getContext(), enzyme::Activity::enzyme_active)}),
@@ -376,12 +388,12 @@ GradientResult MCMC::computePotentialAndGradient(OpBuilder &builder,
       nullptr);
 
   Block *autodiffBlock = builder.createBlock(&autodiffOp.getBody());
-  autodiffBlock->addArgument(positionType, loc);
+  autodiffBlock->addArgument(autodiffPositionType, loc);
 
   builder.setInsertionPointToStart(autodiffBlock);
   Value qArg = autodiffBlock->getArgument(0);
 
-  if (ctx.hasCustomLogpdf()) {
+  if (isCustomLogpdf) {
     SmallVector<Value> callArgs;
     callArgs.push_back(qArg);
     callArgs.append(ctx.fnInputs.begin(), ctx.fnInputs.end());
@@ -425,9 +437,14 @@ GradientResult MCMC::computePotentialAndGradient(OpBuilder &builder,
 
   builder.setInsertionPointAfter(autodiffOp);
 
+  Value grad = autodiffOp.getResult(2);
+  if (isCustomLogpdf) {
+    grad = enzyme::ReshapeOp::create(builder, loc, positionType, grad);
+  }
+
   return {
       autodiffOp.getResult(0), // U
-      autodiffOp.getResult(2), // grad
+      grad,                    // grad
       autodiffOp.getResult(1)  // rng
   };
 }
@@ -691,8 +708,10 @@ InitialHMCState MCMC::InitHMC(OpBuilder &builder, Location loc, Value rng,
 
   if (ctx.hasCustomLogpdf()) {
     q0 = initialPosition;
+    auto flatType = RankedTensorType::get({ctx.positionSize}, elemType);
+    auto q0Flat = enzyme::ReshapeOp::create(builder, loc, flatType, q0);
     SmallVector<Value> callArgs;
-    callArgs.push_back(q0);
+    callArgs.push_back(q0Flat);
     callArgs.append(ctx.fnInputs.begin(), ctx.fnInputs.end());
     auto callOp = func::CallOp::create(builder, loc, ctx.logpdfFn,
                                        TypeRange{scalarType}, callArgs);
@@ -735,13 +754,24 @@ InitialHMCState MCMC::InitHMC(OpBuilder &builder, Location loc, Value rng,
   }
 
   // 4. Compute initial gradient at q0
+  bool isCustomLogpdf = ctx.hasCustomLogpdf();
+  auto flatType = RankedTensorType::get({ctx.positionSize}, elemType);
+  Value autodiffQ0 = q0;
+  auto autodiffQ0Type = positionType;
+  auto autodiffGradType = positionType;
+  if (isCustomLogpdf) {
+    autodiffQ0 = enzyme::ReshapeOp::create(builder, loc, flatType, q0);
+    autodiffQ0Type = flatType;
+    autodiffGradType = flatType;
+  }
+
   auto gradSeedInit = arith::ConstantOp::create(
       builder, loc, scalarType,
       DenseElementsAttr::get(scalarType, builder.getFloatAttr(elemType, 1.0)));
-  SmallVector<Value> autodiffInputs{q0, gradSeedInit};
+  SmallVector<Value> autodiffInputs{autodiffQ0, gradSeedInit};
   auto autodiffInit = enzyme::AutoDiffRegionOp::create(
       builder, loc,
-      TypeRange{scalarType, rngForAutodiff.getType(), positionType},
+      TypeRange{scalarType, rngForAutodiff.getType(), autodiffGradType},
       autodiffInputs,
       builder.getArrayAttr({enzyme::ActivityAttr::get(
           builder.getContext(), enzyme::Activity::enzyme_active)}),
@@ -754,12 +784,12 @@ InitialHMCState MCMC::InitHMC(OpBuilder &builder, Location loc, Value rng,
       nullptr);
 
   Block *autodiffInitBlock = builder.createBlock(&autodiffInit.getBody());
-  autodiffInitBlock->addArgument(positionType, loc);
+  autodiffInitBlock->addArgument(autodiffQ0Type, loc);
 
   builder.setInsertionPointToStart(autodiffInitBlock);
   auto q0Arg = autodiffInitBlock->getArgument(0);
 
-  if (ctx.hasCustomLogpdf()) {
+  if (isCustomLogpdf) {
     SmallVector<Value> callArgs;
     callArgs.push_back(q0Arg);
     callArgs.append(ctx.fnInputs.begin(), ctx.fnInputs.end());
@@ -803,8 +833,10 @@ InitialHMCState MCMC::InitHMC(OpBuilder &builder, Location loc, Value rng,
   }
   builder.setInsertionPointAfter(autodiffInit);
 
-  // (U, rng, grad)
-  auto grad0 = autodiffInit.getResult(2);
+  Value grad0 = autodiffInit.getResult(2);
+  if (isCustomLogpdf) {
+    grad0 = enzyme::ReshapeOp::create(builder, loc, positionType, grad0);
+  }
 
   return {q0, U0, grad0, rngForSampleKernel};
 }
diff --git a/enzyme/Enzyme/MLIR/Passes/ProbProgMLIRPass.cpp b/enzyme/Enzyme/MLIR/Passes/ProbProgMLIRPass.cpp
@@ -655,8 +655,8 @@ struct ProbProgPass : public enzyme::impl::ProbProgPassBase<ProbProgPass> {
         logpdfFnAttr = mcmcOp.getLogpdfFnAttr();
         fnInputs.assign(inputs.begin() + 1, inputs.end());
         auto initialPos = mcmcOp.getInitialPosition();
-        positionSize =
-            cast<RankedTensorType>(initialPos.getType()).getShape()[1];
+        auto initPosType = cast<RankedTensorType>(initialPos.getType());
+        positionSize = initPosType.getNumElements();
         selection = mcmcOp.getSelectionAttr();
         allAddresses = mcmcOp.getAllAddressesAttr();
       } else {
diff --git a/enzyme/test/MLIR/ProbProg/mcmc_custom_logpdf.mlir b/enzyme/test/MLIR/ProbProg/mcmc_custom_logpdf.mlir
@@ -1,8 +1,8 @@
 // RUN: %eopt --probprog %s | FileCheck %s
 
 module {
-  func.func @logpdf(%x : tensor<1x2xf64>) -> tensor<f64> {
-    %sum_sq = enzyme.dot %x, %x {lhs_batching_dimensions = array<i64>, rhs_batching_dimensions = array<i64>, lhs_contracting_dimensions = array<i64: 0, 1>, rhs_contracting_dimensions = array<i64: 0, 1>} : (tensor<1x2xf64>, tensor<1x2xf64>) -> tensor<f64>
+  func.func @logpdf(%x : tensor<2xf64>) -> tensor<f64> {
+    %sum_sq = enzyme.dot %x, %x {lhs_batching_dimensions = array<i64>, rhs_batching_dimensions = array<i64>, lhs_contracting_dimensions = array<i64: 0>, rhs_contracting_dimensions = array<i64: 0>} : (tensor<2xf64>, tensor<2xf64>) -> tensor<f64>
     %neg_half = arith.constant dense<-5.000000e-01> : tensor<f64>
     %result = arith.mulf %neg_half, %sum_sq : tensor<f64>
     return %result : tensor<f64>
@@ -65,9 +65,9 @@ module {
     return %res#0, %res#1, %res#2 : tensor<1x2xf64>, tensor<1xi1>, tensor<2xui64>
   }
 
-  func.func @shifted_logpdf(%x : tensor<1x2xf64>, %mu : tensor<1x2xf64>) -> tensor<f64> {
-    %diff = arith.subf %x, %mu : tensor<1x2xf64>
-    %sum_sq = enzyme.dot %diff, %diff {lhs_batching_dimensions = array<i64>, rhs_batching_dimensions = array<i64>, lhs_contracting_dimensions = array<i64: 0, 1>, rhs_contracting_dimensions = array<i64: 0, 1>} : (tensor<1x2xf64>, tensor<1x2xf64>) -> tensor<f64>
+  func.func @shifted_logpdf(%x : tensor<2xf64>, %mu : tensor<2xf64>) -> tensor<f64> {
+    %diff = arith.subf %x, %mu : tensor<2xf64>
+    %sum_sq = enzyme.dot %diff, %diff {lhs_batching_dimensions = array<i64>, rhs_batching_dimensions = array<i64>, lhs_contracting_dimensions = array<i64: 0>, rhs_contracting_dimensions = array<i64: 0>} : (tensor<2xf64>, tensor<2xf64>) -> tensor<f64>
     %neg_half = arith.constant dense<-5.000000e-01> : tensor<f64>
     %result = arith.mulf %neg_half, %sum_sq : tensor<f64>
     return %result : tensor<f64>
@@ -80,12 +80,7 @@ module {
   // CHECK: func.call @shifted_logpdf
   // CHECK-NEXT: %[[NEG:.+]] = arith.negf
   // CHECK-NEXT: enzyme.yield
-  // CHECK: enzyme.for_loop
-  // CHECK: enzyme.autodiff_region
-  // CHECK: func.call @shifted_logpdf
-  // CHECK-NEXT: %{{.+}} = arith.negf
-  // CHECK-NEXT: enzyme.yield
-  func.func @nuts_shifted_logpdf(%rng : tensor<2xui64>, %mu : tensor<1x2xf64>) -> (tensor<1x2xf64>, tensor<1xi1>, tensor<2xui64>) {
+  func.func @nuts_shifted_logpdf(%rng : tensor<2xui64>, %mu : tensor<2xf64>) -> (tensor<1x2xf64>, tensor<1xi1>, tensor<2xui64>) {
     %init_pos = arith.constant dense<[[0.5, -0.5]]> : tensor<1x2xf64>
     %step_size = arith.constant dense<0.1> : tensor<f64>
     %res:8 = "enzyme.mcmc"(%rng, %mu, %step_size, %init_pos) {
@@ -97,7 +92,7 @@ module {
       num_warmup = 0,
       num_samples = 1,
       operand_segment_sizes = array<i32: 2, 0, 0, 1, 1, 0, 0>
-    } : (tensor<2xui64>, tensor<1x2xf64>, tensor<f64>, tensor<1x2xf64>) -> (tensor<1x2xf64>, tensor<1xi1>, tensor<2xui64>, tensor<1x2xf64>, tensor<1x2xf64>, tensor<f64>, tensor<f64>, tensor<1x2xf64>)
+    } : (tensor<2xui64>, tensor<2xf64>, tensor<f64>, tensor<1x2xf64>) -> (tensor<1x2xf64>, tensor<1xi1>, tensor<2xui64>, tensor<1x2xf64>, tensor<1x2xf64>, tensor<f64>, tensor<f64>, tensor<1x2xf64>)
     return %res#0, %res#1, %res#2 : tensor<1x2xf64>, tensor<1xi1>, tensor<2xui64>
   }
 
@@ -108,12 +103,7 @@ module {
   // CHECK: func.call @shifted_logpdf
   // CHECK-NEXT: %{{.+}} = arith.negf
   // CHECK-NEXT: enzyme.yield
-  // CHECK: enzyme.for_loop
-  // CHECK: enzyme.autodiff_region
-  // CHECK: func.call @shifted_logpdf
-  // CHECK-NEXT: %{{.+}} = arith.negf
-  // CHECK-NEXT: enzyme.yield
-  func.func @hmc_shifted_logpdf(%rng : tensor<2xui64>, %mu : tensor<1x2xf64>) -> (tensor<1x2xf64>, tensor<1xi1>, tensor<2xui64>) {
+  func.func @hmc_shifted_logpdf(%rng : tensor<2xui64>, %mu : tensor<2xf64>) -> (tensor<1x2xf64>, tensor<1xi1>, tensor<2xui64>) {
     %init_pos = arith.constant dense<[[0.5, -0.5]]> : tensor<1x2xf64>
     %step_size = arith.constant dense<0.1> : tensor<f64>
     %res:8 = "enzyme.mcmc"(%rng, %mu, %step_size, %init_pos) {
@@ -125,16 +115,16 @@ module {
       num_warmup = 0,
       num_samples = 1,
       operand_segment_sizes = array<i32: 2, 0, 0, 1, 1, 0, 0>
-    } : (tensor<2xui64>, tensor<1x2xf64>, tensor<f64>, tensor<1x2xf64>) -> (tensor<1x2xf64>, tensor<1xi1>, tensor<2xui64>, tensor<1x2xf64>, tensor<1x2xf64>, tensor<f64>, tensor<f64>, tensor<1x2xf64>)
+    } : (tensor<2xui64>, tensor<2xf64>, tensor<f64>, tensor<1x2xf64>) -> (tensor<1x2xf64>, tensor<1xi1>, tensor<2xui64>, tensor<1x2xf64>, tensor<1x2xf64>, tensor<f64>, tensor<f64>, tensor<1x2xf64>)
     return %res#0, %res#1, %res#2 : tensor<1x2xf64>, tensor<1xi1>, tensor<2xui64>
   }
 
-  func.func @anisotropic_logpdf(%x : tensor<1x2xf64>, %mu : tensor<1x2xf64>, %precision : tensor<1x2xf64>) -> tensor<f64> {
-    %diff = arith.subf %x, %mu : tensor<1x2xf64>
-    %diff_sq = arith.mulf %diff, %diff : tensor<1x2xf64>
-    %weighted = arith.mulf %precision, %diff_sq : tensor<1x2xf64>
-    %ones = arith.constant dense<1.0> : tensor<1x2xf64>
-    %sum = enzyme.dot %ones, %weighted {lhs_batching_dimensions = array<i64>, rhs_batching_dimensions = array<i64>, lhs_contracting_dimensions = array<i64: 0, 1>, rhs_contracting_dimensions = array<i64: 0, 1>} : (tensor<1x2xf64>, tensor<1x2xf64>) -> tensor<f64>
+  func.func @anisotropic_logpdf(%x : tensor<2xf64>, %mu : tensor<2xf64>, %precision : tensor<2xf64>) -> tensor<f64> {
+    %diff = arith.subf %x, %mu : tensor<2xf64>
+    %diff_sq = arith.mulf %diff, %diff : tensor<2xf64>
+    %weighted = arith.mulf %precision, %diff_sq : tensor<2xf64>
+    %ones = arith.constant dense<1.0> : tensor<2xf64>
+    %sum = enzyme.dot %ones, %weighted {lhs_batching_dimensions = array<i64>, rhs_batching_dimensions = array<i64>, lhs_contracting_dimensions = array<i64: 0>, rhs_contracting_dimensions = array<i64: 0>} : (tensor<2xf64>, tensor<2xf64>) -> tensor<f64>
     %neg_half = arith.constant dense<-5.000000e-01> : tensor<f64>
     %result = arith.mulf %neg_half, %sum : tensor<f64>
     return %result : tensor<f64>
@@ -147,12 +137,7 @@ module {
   // CHECK: func.call @anisotropic_logpdf
   // CHECK-NEXT: %[[NEG:.+]] = arith.negf
   // CHECK-NEXT: enzyme.yield
-  // CHECK: enzyme.for_loop
-  // CHECK: enzyme.autodiff_region
-  // CHECK: func.call @anisotropic_logpdf
-  // CHECK-NEXT: %{{.+}} = arith.negf
-  // CHECK-NEXT: enzyme.yield
-  func.func @nuts_anisotropic_logpdf(%rng : tensor<2xui64>, %mu : tensor<1x2xf64>, %precision : tensor<1x2xf64>) -> (tensor<1x2xf64>, tensor<1xi1>, tensor<2xui64>) {
+  func.func @nuts_anisotropic_logpdf(%rng : tensor<2xui64>, %mu : tensor<2xf64>, %precision : tensor<2xf64>) -> (tensor<1x2xf64>, tensor<1xi1>, tensor<2xui64>) {
     %init_pos = arith.constant dense<[[0.5, -0.5]]> : tensor<1x2xf64>
     %step_size = arith.constant dense<0.1> : tensor<f64>
     %res:8 = "enzyme.mcmc"(%rng, %mu, %precision, %step_size, %init_pos) {
@@ -164,7 +149,7 @@ module {
       num_warmup = 0,
       num_samples = 1,
       operand_segment_sizes = array<i32: 3, 0, 0, 1, 1, 0, 0>
-    } : (tensor<2xui64>, tensor<1x2xf64>, tensor<1x2xf64>, tensor<f64>, tensor<1x2xf64>) -> (tensor<1x2xf64>, tensor<1xi1>, tensor<2xui64>, tensor<1x2xf64>, tensor<1x2xf64>, tensor<f64>, tensor<f64>, tensor<1x2xf64>)
+    } : (tensor<2xui64>, tensor<2xf64>, tensor<2xf64>, tensor<f64>, tensor<1x2xf64>) -> (tensor<1x2xf64>, tensor<1xi1>, tensor<2xui64>, tensor<1x2xf64>, tensor<1x2xf64>, tensor<f64>, tensor<f64>, tensor<1x2xf64>)
     return %res#0, %res#1, %res#2 : tensor<1x2xf64>, tensor<1xi1>, tensor<2xui64>
   }
 
@@ -175,12 +160,7 @@ module {
   // CHECK: func.call @anisotropic_logpdf
   // CHECK-NEXT: %{{.+}} = arith.negf
   // CHECK-NEXT: enzyme.yield
-  // CHECK: enzyme.for_loop
-  // CHECK: enzyme.autodiff_region
-  // CHECK: func.call @anisotropic_logpdf
-  // CHECK-NEXT: %{{.+}} = arith.negf
-  // CHECK-NEXT: enzyme.yield
-  func.func @hmc_anisotropic_logpdf(%rng : tensor<2xui64>, %mu : tensor<1x2xf64>, %precision : tensor<1x2xf64>) -> (tensor<1x2xf64>, tensor<1xi1>, tensor<2xui64>) {
+  func.func @hmc_anisotropic_logpdf(%rng : tensor<2xui64>, %mu : tensor<2xf64>, %precision : tensor<2xf64>) -> (tensor<1x2xf64>, tensor<1xi1>, tensor<2xui64>) {
     %init_pos = arith.constant dense<[[0.5, -0.5]]> : tensor<1x2xf64>
     %step_size = arith.constant dense<0.1> : tensor<f64>
     %res:8 = "enzyme.mcmc"(%rng, %mu, %precision, %step_size, %init_pos) {
@@ -192,7 +172,7 @@ module {
       num_warmup = 0,
       num_samples = 1,
       operand_segment_sizes = array<i32: 3, 0, 0, 1, 1, 0, 0>
-    } : (tensor<2xui64>, tensor<1x2xf64>, tensor<1x2xf64>, tensor<f64>, tensor<1x2xf64>) -> (tensor<1x2xf64>, tensor<1xi1>, tensor<2xui64>, tensor<1x2xf64>, tensor<1x2xf64>, tensor<f64>, tensor<f64>, tensor<1x2xf64>)
+    } : (tensor<2xui64>, tensor<2xf64>, tensor<2xf64>, tensor<f64>, tensor<1x2xf64>) -> (tensor<1x2xf64>, tensor<1xi1>, tensor<2xui64>, tensor<1x2xf64>, tensor<1x2xf64>, tensor<f64>, tensor<f64>, tensor<1x2xf64>)
     return %res#0, %res#1, %res#2 : tensor<1x2xf64>, tensor<1xi1>, tensor<2xui64>
   }
 }
diff --git a/enzyme/test/MLIR/ProbProg/mcmc_strong_zero.mlir b/enzyme/test/MLIR/ProbProg/mcmc_strong_zero.mlir
@@ -1,8 +1,8 @@
 // RUN: %eopt --probprog %s | FileCheck %s
 
 module {
-  func.func @logpdf(%x : tensor<1x2xf64>) -> tensor<f64> {
-    %sum_sq = enzyme.dot %x, %x {lhs_batching_dimensions = array<i64>, rhs_batching_dimensions = array<i64>, lhs_contracting_dimensions = array<i64: 0, 1>, rhs_contracting_dimensions = array<i64: 0, 1>} : (tensor<1x2xf64>, tensor<1x2xf64>) -> tensor<f64>
+  func.func @logpdf(%x : tensor<2xf64>) -> tensor<f64> {
+    %sum_sq = enzyme.dot %x, %x {lhs_batching_dimensions = array<i64>, rhs_batching_dimensions = array<i64>, lhs_contracting_dimensions = array<i64: 0>, rhs_contracting_dimensions = array<i64: 0>} : (tensor<2xf64>, tensor<2xf64>) -> tensor<f64>
     %neg_half = arith.constant dense<-5.000000e-01> : tensor<f64>
     %result = arith.mulf %neg_half, %sum_sq : tensor<f64>
     return %result : tensor<f64>