'Cuda kernel not running on if statements c++
I have a small program that has two arrays of vectors (pos, vel) representing particles floating in space, and I'm using the GPU to update them and will soon run the GPU to render them as well. The kernel adding the vectors together runs as it is supposed to and updates the particles, however running the kernel that does a simple check to see if the particle is on screen or not, and wraps around the edge accordingly, simply doesn't change the particle's position even if the conditions are met. Not going to include the screen drawing since it mostly clutters up the code and I've already narrowed the problem to the kernel functions.
Vec is just a struct with a float x, y variables and has the basic operator() functions for adding, subtracting, multiplying with scalars, etc. Every functions is made to be compatible with CUDA with the CUDA_CALLABLE_MEMBER macro
Includes and CUDA kernels:
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#define CUDA_CALLABLE_MEMBER __host__ __device__
CUDA_CALLABLE_MEMBER
void kernel(vec* a , vec* b, vec* c, int i)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < i)
{
c[idx] = a[idx] + b[idx];
}
}
CUDA_CALLABLE_MEMBER
void check_positions(vec* pos, vec* res) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < sizeof(pos) / sizeof(vec)) {
// Wrap around
if (pos[idx].x < 0) {
pos[idx].x = pos[idx].x + res->x;
}
if (pos[idx].x > res->x) {
pos[idx].x = pos[idx].x - res->x;
}
if (pos[idx].y < 0) {
pos[idx].y = pos[idx].y + res->y;
}
if (pos[idx].y > res->y) {
pos[idx].y = pos[idx].y - res->y;
}
}
}
The function that runs the kernels and does the memory allocation and copying from host to device and vice versa:
vec* res = new vec(800, 800); // Resolution vector for use by the gpu
vec* addWithCuda(vec* a, vec* b, int n) {
// Making and allocating space for arrays
vec *c = new vec[n];
vec *da = new vec[n];
vec *db = new vec[n];
vec *d_res = new vec;
vec *dc = new vec[n];
cudaMalloc((void**)&da, n * sizeof(vec));
cudaMalloc((void**)&db, n * sizeof(vec));
cudaMalloc((void**)&dc, n * sizeof(vec));
cudaMalloc((void**)&d_res, sizeof(vec));
cudaMemcpy(da, a, n * sizeof(vec), cudaMemcpyHostToDevice);
cudaMemcpy(db, b, n * sizeof(vec), cudaMemcpyHostToDevice);
cudaMemcpy(d_res, res, sizeof(vec), cudaMemcpyHostToDevice);
// Run the kernels
int blockSize = 256;
int gridSize = (n + blockSize - 1) / blockSize;
check_positions << <gridSize, blockSize >> > (da, d_res);
kernel<<<gridSize, blockSize>>>(da, db, dc, n);
// Copy data back to host and free the data
cudaMemcpy(c, dc, n * sizeof(vec), cudaMemcpyDeviceToHost);
cudaFree(da);
cudaFree(db);
cudaFree(dc);
cudaFree(d_res);
return c;
}
Main code:
int main(){
// Make an array of vectors
printf("Making an array of vectors\n");
int n = 1<<15;
vec* pos = new vec[n];
vec* vel = new vec[n];
for (int i = 0; i < n; i++) {
pos[i].x = rand() % 800;
pos[i].y = rand() % 800;
vel[i].x = rand() % 5 - 2.5f;
vel[i].y = rand() % 5 - 2.5f;
}
printf("Done making an array of vectors\n");
vec* newPos;
while(not_exited){ // Sample loop
newPos = addWithCuda(pos, vel, n);
// Copy pos to newPos
for (int i = 0; i < n; i++) {
pos[i] = newPos[i];
}
}
// Print resulting positions:
int i = -1;
while(++i < n)
printf("%d: (%f, %f)\n", i, pos[i].x, pos[i].y);
}
Once run, you can see that after a certain period of time every position will be outside of the "window":
32746: (-3262.000000, 989.000000)
32747: (2953.000000, -267.000000)
32748: (-2002.000000, -2314.000000)
32749: (-3400.000000, 2725.000000)
32750: (-2051.000000, 1165.000000)
32751: (1065.000000, 3004.000000)
32752: (-3700.000000, -223.000000)
32753: (-3904.000000, -440.000000)
32754: (2691.000000, -301.000000)
32755: (3065.000000, -470.000000)
32756: (-3647.000000, -703.000000)
32757: (-224.000000, -80.000000)
32758: (-2237.000000, 888.000000)
32759: (-585.000000, -620.000000)
32760: (-3926.000000, -426.000000)
32761: (-3453.000000, -1921.000000)
Any suggestions?
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
