Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Project 1: Austin Eng #8

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
uniform and coherent grid mostly completed; likely a grid indexing pr…
…oblem causing boids to move towards one corner
austinEng committed Sep 8, 2016
commit d969cc7bb923863685ef39b0b158c98d0d61232d
260 changes: 249 additions & 11 deletions src/kernel.cu
Original file line number Diff line number Diff line change
@@ -85,6 +85,7 @@ int *dev_gridCellEndIndices; // to this cell?

// TODO-2.3 - consider what additional buffers you might need to reshuffle
// the position and velocity data to be coherent within cells.
glm::vec3 *dev_pos2;

// LOOK-2.1 - Grid parameters based on simulation parameters.
// These are automatically computed for you in Boids::initSimulation
@@ -162,13 +163,29 @@ void Boids::initSimulation(int N) {
gridSideCount = 2 * halfSideCount;

gridCellCount = gridSideCount * gridSideCount * gridSideCount;
printf("%i CELLS\n", gridCellCount);
gridInverseCellWidth = 1.0f / gridCellWidth;
float halfGridWidth = gridCellWidth * halfSideCount;
gridMinimum.x -= halfGridWidth;
gridMinimum.y -= halfGridWidth;
gridMinimum.z -= halfGridWidth;

// TODO-2.1 TODO-2.3 - Allocate additional buffers here.
cudaMalloc((void**)&dev_particleArrayIndices, N * sizeof(int));
checkCUDAErrorWithLine("cudaMalloc dev_particleArrayIndices failed!");

cudaMalloc((void**)&dev_particleGridIndices, N * sizeof(int));
checkCUDAErrorWithLine("cudaMalloc dev_particleGridIndices failed!");

cudaMalloc((void**)&dev_gridCellStartIndices, gridCellCount * sizeof(int));
checkCUDAErrorWithLine("cudaMalloc dev_gridCellStartIndices failed!");

cudaMalloc((void**)&dev_gridCellEndIndices, gridCellCount * sizeof(int));
checkCUDAErrorWithLine("cudaMalloc dev_gridCellEndIndices failed!");

cudaMalloc((void**)&dev_pos2, N * sizeof(glm::vec3));
checkCUDAErrorWithLine("cudaMalloc dev_pos2 failed!");

cudaThreadSynchronize();
}

@@ -318,10 +335,23 @@ __device__ int gridIndex3Dto1D(int x, int y, int z, int gridResolution) {
__global__ void kernComputeIndices(int N, int gridResolution,
glm::vec3 gridMin, float inverseCellWidth,
glm::vec3 *pos, int *indices, int *gridIndices) {
// TODO-2.1
// - Label each boid with the index of its grid cell.
// - Set up a parallel array of integer indices as pointers to the actual
// boid data in pos and vel1/vel2
// TODO-2.1
int index = (blockIdx.x * blockDim.x) + threadIdx.x;
if (index >= N) {
return;
}

// - Label each boid with the index of its grid cell.
//int xIdx = (pos[index].x - gridMin.x) * inverseCellWidth;
//int yIdx = (pos[index].y - gridMin.y) * inverseCellWidth;
//int zIdx = (pos[index].z - gridMin.z) * inverseCellWidth;
glm::ivec3 index3D = (pos[index] - gridMin) * inverseCellWidth;

gridIndices[index] = gridIndex3Dto1D(index3D.x, index3D.y, index3D.z, gridResolution);

// - Set up a parallel array of integer indices as pointers to the actual
// boid data in pos and vel1/vel2
indices[index] = index;
}

// LOOK-2.1 Consider how this could be useful for indicating that a cell
@@ -336,9 +366,51 @@ __global__ void kernResetIntBuffer(int N, int *intBuffer, int value) {
__global__ void kernIdentifyCellStartEnd(int N, int *particleGridIndices,
int *gridCellStartIndices, int *gridCellEndIndices) {
// TODO-2.1
int index = (blockIdx.x * blockDim.x) + threadIdx.x;
if (index >= N || index < 1) { // ignore 0 index b.c. we look at [index-1, index]
return;
}
// Identify the start point of each cell in the gridIndices array.
// This is basically a parallel unrolling of a loop that goes
// "this index doesn't match the one before it, must be a new cell!"
int gIdxPrev = particleGridIndices[index - 1];
int gIdx = particleGridIndices[index];

if (gIdxPrev != gIdx) {
gridCellEndIndices[gIdxPrev] = gridCellStartIndices[gIdx] = index;
}
}

__device__ void getGridIndexAndQuadrantMask(glm::vec3 pos, glm::vec3 gridMin, float inverseCellWidth, glm::ivec3 &gIdx, unsigned char &mask) {
glm::vec3 partialIndex3D = (pos - gridMin) * inverseCellWidth;
gIdx = partialIndex3D;
partialIndex3D = partialIndex3D - glm::floor(partialIndex3D);

mask = ((partialIndex3D.x >= 0.5f) << 0) ||
((partialIndex3D.y >= 0.5f) << 1) ||
((partialIndex3D.z >= 0.5f) << 2);
}

__device__ int getNeighborCells(int neighborCells[8], const glm::ivec3 &gIdx, const unsigned char &mask, int gridResolution) {
int nCells = 0;
for (int k = -1; k <= 1; ++k) {
for (int j = -1; j <= 1; ++j) {
for (int i = -1; i <= 1; ++i) {
if (!(
(i == -1 && (mask & 0x1)) || (i == 1 && !(mask & 0x1)) ||
(j == -1 && (mask & 0x2)) || (j == 1 && !(mask & 0x2)) ||
(k == -1 && (mask & 0x4)) || (k == 1 && !(mask & 0x4))
)) {
glm::ivec3 idx = gIdx + glm::ivec3(i, j, k);
if (!glm::any(glm::lessThan(idx, glm::ivec3(0, 0, 0))) &&
!glm::any(glm::greaterThanEqual(idx, glm::ivec3(gridResolution, gridResolution, gridResolution)))) {
neighborCells[nCells++] = gridIndex3Dto1D(idx.x, idx.y, idx.z, gridResolution);
}
}
}
}
}
return nCells;
}

__global__ void kernUpdateVelNeighborSearchScattered(
@@ -349,12 +421,75 @@ __global__ void kernUpdateVelNeighborSearchScattered(
glm::vec3 *pos, glm::vec3 *vel1, glm::vec3 *vel2) {
// TODO-2.1 - Update a boid's velocity using the uniform grid to reduce
// the number of boids that need to be checked.
int index = (blockIdx.x * blockDim.x) + threadIdx.x;
if (index >= N) {
return;
}

// - Identify the grid cell that this particle is in
glm::ivec3 index3D;
unsigned char mask;
getGridIndexAndQuadrantMask(pos[index], gridMin, inverseCellWidth, index3D, mask);

// - Identify which cells may contain neighbors. This isn't always 8.
// - For each cell, read the start/end indices in the boid pointer array.
// - Access each boid in the cell and compute velocity change from
// the boids rules, if this boid is within the neighborhood distance.
int neighborCells[8];
int nCells = getNeighborCells(neighborCells, index3D, mask, gridResolution);

glm::vec3 posSelf = pos[index];
glm::vec3 cmass(0.f, 0.f, 0.f);
int cmassCount = 0;
glm::vec3 collisionV(0.f, 0.f, 0.f);
glm::vec3 cvel(0.f, 0.f, 0.f);
int cvelCount = 0;

for (int c = 0; c < nCells; ++c) {
// - For each cell, read the start/end indices in the boid pointer array.
int n = neighborCells[c];
int start = gridCellStartIndices[n];
int end = gridCellEndIndices[n];

for (int b = start; b < end; ++b) {
// - Access each boid in the cell and compute velocity change from
// the boids rules, if this boid is within the neighborhood distance.
int particleIndex = particleArrayIndices[b];
if (particleIndex == index) continue;

// Rule 1: boids fly towards their local perceived center of mass, which excludes themselves
if (glm::distance(pos[particleIndex], posSelf) < rule1Distance) {
cmass += pos[particleIndex];
cmassCount++;
}
// Rule 2: boids try to stay a distance d away from each other
if (glm::distance(pos[particleIndex], posSelf) < rule2Distance) {
collisionV -= pos[particleIndex] - posSelf;
}
// Rule 3: boids try to match the speed of surrounding boids
if (glm::distance(pos[particleIndex], posSelf) < rule3Distance) {
cvel += vel1[particleIndex];
cvelCount++;
}
}
}

cmass = cmassCount > 0 ? (cmass / (float)cmassCount) - pos[index] : cmass;
cvel = cvelCount > 0 ? cvel / (float)cvelCount : cvel;

glm::vec3 vel = vel1[index] + cmass * rule1Scale + collisionV * rule2Scale + cvel * rule3Scale;

// - Clamp the speed change before putting the new speed in vel2
vel = glm::normalize(vel) * glm::min(maxSpeed, glm::length(vel));
vel2[index] = vel;
}

__global__ void kernShuffleParticleBuffers(int N,
glm::vec3* srcPos, glm::vec3* srcVel,
int* tgtIndices, glm::vec3* tgtPos, glm::vec3* tgtVel) {
int index = (blockIdx.x * blockDim.x) + threadIdx.x;
if (index >= N) {
return;
}
tgtPos[index] = srcPos[tgtIndices[index]];
tgtVel[index] = srcVel[tgtIndices[index]];
}

__global__ void kernUpdateVelNeighborSearchCoherent(
@@ -366,14 +501,68 @@ __global__ void kernUpdateVelNeighborSearchCoherent(
// except with one less level of indirection.
// This should expect gridCellStartIndices and gridCellEndIndices to refer
// directly to pos and vel1.
int index = (blockIdx.x * blockDim.x) + threadIdx.x;
if (index >= N) {
return;
}

// - Identify the grid cell that this particle is in
glm::ivec3 index3D;
unsigned char mask;
getGridIndexAndQuadrantMask(pos[index], gridMin, inverseCellWidth, index3D, mask);

// - Identify which cells may contain neighbors. This isn't always 8.
int neighborCells[8];
int nCells = getNeighborCells(neighborCells, index3D, mask, gridResolution);

glm::vec3 posSelf = pos[index];
glm::vec3 cmass(0.f, 0.f, 0.f);
int cmassCount = 0;
glm::vec3 collisionV(0.f, 0.f, 0.f);
glm::vec3 cvel(0.f, 0.f, 0.f);
int cvelCount = 0;

// - For each cell, read the start/end indices in the boid pointer array.
// DIFFERENCE: For best results, consider what order the cells should be
// checked in to maximize the memory benefits of reordering the boids data.
// - Access each boid in the cell and compute velocity change from
// the boids rules, if this boid is within the neighborhood distance.
for (int c = 0; c < nCells; ++c) {
int n = neighborCells[c];
int start = gridCellStartIndices[n];
int end = gridCellEndIndices[n];

for (int particleIndex = start; particleIndex < end; ++particleIndex) {
// - Access each boid in the cell and compute velocity change from
// the boids rules, if this boid is within the neighborhood distance.
if (particleIndex == index) continue;

// Rule 1: boids fly towards their local perceived center of mass, which excludes themselves
if (glm::distance(pos[particleIndex], posSelf) < rule1Distance) {
cmass += pos[particleIndex];
cmassCount++;
}
// Rule 2: boids try to stay a distance d away from each other
if (glm::distance(pos[particleIndex], posSelf) < rule2Distance) {
collisionV -= pos[particleIndex] - posSelf;
}
// Rule 3: boids try to match the speed of surrounding boids
if (glm::distance(pos[particleIndex], posSelf) < rule3Distance) {
cvel += vel1[particleIndex];
cvelCount++;
}
}
}

cmass = cmassCount > 0 ? (cmass / (float)cmassCount) - pos[index] : cmass;
cvel = cvelCount > 0 ? cvel / (float)cvelCount : cvel;

glm::vec3 vel = vel1[index] + cmass * rule1Scale + collisionV * rule2Scale + cvel * rule3Scale;

// - Clamp the speed change before putting the new speed in vel2
float len = glm::length(vel);
if (len > maxSpeed) {
vel = glm::normalize(vel) * glm::min(maxSpeed, glm::length(vel));
}
vel2[index] = vel;
}

/**
@@ -392,43 +581,92 @@ void Boids::stepSimulationNaive(float dt) {

void Boids::stepSimulationScatteredGrid(float dt) {
// TODO-2.1

dim3 fullBlocksPerGrid((numObjects + blockSize - 1) / blockSize);
// Uniform Grid Neighbor search using Thrust sort.
// In Parallel:

// - label each particle with its array index as well as its grid index.
// Use 2x width grids.
kernComputeIndices<<< fullBlocksPerGrid, blockSize >>>(numObjects, gridSideCount, gridMinimum,
gridInverseCellWidth, dev_pos, dev_particleArrayIndices,
dev_particleGridIndices);

// - Unstable key sort using Thrust. A stable sort isn't necessary, but you
// are welcome to do a performance comparison.
dev_thrust_particleGridIndices = thrust::device_ptr<int>(dev_particleGridIndices);
dev_thrust_particleArrayIndices = thrust::device_ptr<int>(dev_particleArrayIndices);
thrust::sort_by_key(dev_thrust_particleGridIndices, dev_thrust_particleGridIndices + numObjects, dev_thrust_particleArrayIndices);

// - Naively unroll the loop for finding the start and end indices of each
// cell's data pointers in the array of boid indices
kernIdentifyCellStartEnd<<< fullBlocksPerGrid, blockSize >>>(numObjects, dev_particleGridIndices,
dev_gridCellStartIndices, dev_gridCellEndIndices);

// - Perform velocity updates using neighbor search
kernUpdateVelNeighborSearchScattered<<< fullBlocksPerGrid, blockSize >>>(numObjects, gridSideCount, gridMinimum,
gridInverseCellWidth, gridCellWidth, dev_gridCellStartIndices,
dev_gridCellEndIndices, dev_particleArrayIndices, dev_pos,
dev_vel1, dev_vel2);

// - Update positions
kernUpdatePos<<< fullBlocksPerGrid, blockSize >>>(numObjects, dt, dev_pos, dev_vel2);

// - Ping-pong buffers as needed
std::swap(dev_vel1, dev_vel2);
}

void Boids::stepSimulationCoherentGrid(float dt) {
// TODO-2.3 - start by copying Boids::stepSimulationNaiveGrid
// Uniform Grid Neighbor search using Thrust sort on cell-coherent data.
dim3 fullBlocksPerGrid((numObjects + blockSize - 1) / blockSize);
// In Parallel:
// - Label each particle with its array index as well as its grid index.
// Use 2x width grids
kernComputeIndices << < fullBlocksPerGrid, blockSize >> >(numObjects, gridSideCount, gridMinimum,
gridInverseCellWidth, dev_pos, dev_particleArrayIndices,
dev_particleGridIndices);

// - Unstable key sort using Thrust. A stable sort isn't necessary, but you
// are welcome to do a performance comparison.
dev_thrust_particleGridIndices = thrust::device_ptr<int>(dev_particleGridIndices);
dev_thrust_particleArrayIndices = thrust::device_ptr<int>(dev_particleArrayIndices);
thrust::sort_by_key(dev_thrust_particleGridIndices, dev_thrust_particleGridIndices + numObjects, dev_thrust_particleArrayIndices);

// - Naively unroll the loop for finding the start and end indices of each
// cell's data pointers in the array of boid indices
kernIdentifyCellStartEnd << < fullBlocksPerGrid, blockSize >> >(numObjects, dev_particleGridIndices,
dev_gridCellStartIndices, dev_gridCellEndIndices);

// - BIG DIFFERENCE: use the rearranged array index buffer to reshuffle all
// the particle data in the simulation array.
// CONSIDER WHAT ADDITIONAL BUFFERS YOU NEED
kernShuffleParticleBuffers << < fullBlocksPerGrid, blockSize >> >(
numObjects, dev_pos, dev_vel1,
dev_particleArrayIndices, dev_pos2, dev_vel2);
// std::swap(dev_vel1, dev_vel2);
//std::swap(dev_pos, dev_pos2);

// - Perform velocity updates using neighbor search
kernUpdateVelNeighborSearchCoherent << < fullBlocksPerGrid, blockSize >> >(numObjects, gridSideCount, gridMinimum,
gridInverseCellWidth, gridCellWidth, dev_gridCellStartIndices,
dev_gridCellEndIndices, dev_pos2, dev_vel2, dev_vel1);

// - Update positions
// - Ping-pong buffers as needed. THIS MAY BE DIFFERENT FROM BEFORE.
kernUpdatePos << < fullBlocksPerGrid, blockSize >> >(numObjects, dt, dev_pos2, dev_vel1);

// - Ping-pong buffers as needed
//std::swap(dev_vel1, dev_vel2);
std::swap(dev_pos, dev_pos2);
}

void Boids::endSimulation() {
cudaFree(dev_vel1);
cudaFree(dev_vel2);
cudaFree(dev_pos);

// TODO-2.1 TODO-2.3 - Free any additional buffers here.
// TODO-2.1 TODO-2.3 - Free any additional buffers her.e
cudaFree(dev_pos2);
}

void Boids::unitTest() {
6 changes: 3 additions & 3 deletions src/main.cpp
Original file line number Diff line number Diff line change
@@ -14,11 +14,11 @@

// LOOK-2.1 LOOK-2.3 - toggles for UNIFORM_GRID and COHERENT_GRID
#define VISUALIZE 1
#define UNIFORM_GRID 0
#define COHERENT_GRID 0
#define UNIFORM_GRID 1
#define COHERENT_GRID 1

// LOOK-1.2 - change this to adjust particle count in the simulation
const int N_FOR_VIS = 5000;
const int N_FOR_VIS = 20000;
const float DT = 0.2f;

/**