'Segmentation Fault at runtime in a simple C++ CUDA code

I got a cuda Segmentation fault after running this code.

The reason for this code: I wanted to know the maximum size ian array can declare in register memory, maximum array for each thread per block:

#include "common.h"
#include <cuda_runtime.h>
#include "stdio.h"

#define N 10
#define Nblock 10

__global__ void add(int *c)
{
   int X[N];
   int tID = blockIdx.x * blockDim.x + threadIdx.x;
   for(int o = 0; o < N;o++) {
      X[o]=1;
      c[tID] +=X[o];
 }
}
int main(int argc, char **argv)
{

 // set up device
 int dev = 0;
 cudaDeviceProp deviceProp;
 CHECK(cudaGetDeviceProperties(&deviceProp, dev));
 printf("%s test struct of array at ", argv[0]);
 printf("device %d: %s \n", dev, deviceProp.name);
 CHECK(cudaSetDevice(dev));

 int c[N*Nblock];
 int *dev_c;

 cudaMalloc((void **) &dev_c, N*Nblock*sizeof(int));

 add<<<Nblock,N>>>(dev_c);
 cudaMemcpy(c, dev_c, N*Nblock*sizeof(int), cudaMemcpyDeviceToHost);
 int sum = 0 ;
 for (int i = 0; i < N*Nblock; i++)
 {
 sum +=c[i];
 }
 printf("sum= %d\n", sum);
 free(dev_c);
    // reset device
    CHECK(cudaDeviceReset());
    return EXIT_SUCCESS;
}

Got this message after running the code:

line 4: 18244 Segmentation error (memory stack flushed to disk)



Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source