'CUDA Seg Fault for Int Device to Host Copy

Why is the following simple program (24 lines) lead to segmentation fault at shrinked_size_host int variable:

#include <stdio.h>
#include <cuda_runtime.h>
#include <curand_kernel.h>


__global__ void cuda_set(int* device_var){
   *device_var = 12;
   printf("Set device variable to: %d\n", *device_var);
}

int main() {
    printf("Hello world CPU\n");

    int* shrinked_size_device;
    cudaMalloc((void**)&shrinked_size_device, sizeof(int));
    cudaDeviceSynchronize();

    
    cudaMemset(shrinked_size_device, 0, sizeof(int));
    cudaDeviceSynchronize();

    cuda_set<<<1,1>>>(shrinked_size_device); 
    cudaDeviceSynchronize();

    int* shrinked_size_host = 0;
    cudaMemcpy(shrinked_size_host, shrinked_size_device, sizeof(int), cudaMemcpyDeviceToHost);
    cudaDeviceSynchronize();

    printf("shrinked_size_host=%d\n", *shrinked_size_host);
    return 0;
}

That's the output produced from the program:

  1. Hello world CPU

  2. Set device variable to: 12

  3. Segmentation fault (core dumped)

Not sure why there is a segmentation fault.



Solution 1:[1]

I figured out the answer to this question.

Memory for the shrinked_size_host should be allocated. So, either do:

  1. Heap allocation: malloc or new int to allocate an integer of size 1. Remember to delete the allocated memory at the end.
  2. Stack allocation: Use int shrinked_size_host[1];

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 Hossam Amer