'CUDA Seg Fault for Int Device to Host Copy
Why is the following simple program (24 lines) lead to segmentation fault at shrinked_size_host int variable:
#include <stdio.h>
#include <cuda_runtime.h>
#include <curand_kernel.h>
__global__ void cuda_set(int* device_var){
*device_var = 12;
printf("Set device variable to: %d\n", *device_var);
}
int main() {
printf("Hello world CPU\n");
int* shrinked_size_device;
cudaMalloc((void**)&shrinked_size_device, sizeof(int));
cudaDeviceSynchronize();
cudaMemset(shrinked_size_device, 0, sizeof(int));
cudaDeviceSynchronize();
cuda_set<<<1,1>>>(shrinked_size_device);
cudaDeviceSynchronize();
int* shrinked_size_host = 0;
cudaMemcpy(shrinked_size_host, shrinked_size_device, sizeof(int), cudaMemcpyDeviceToHost);
cudaDeviceSynchronize();
printf("shrinked_size_host=%d\n", *shrinked_size_host);
return 0;
}
That's the output produced from the program:
Hello world CPU
Set device variable to: 12
Segmentation fault (core dumped)
Not sure why there is a segmentation fault.
Solution 1:[1]
I figured out the answer to this question.
Memory for the shrinked_size_host should be allocated. So, either do:
- Heap allocation: malloc or new int to allocate an integer of size 1. Remember to delete the allocated memory at the end.
- Stack allocation: Use int shrinked_size_host[1];
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 | Hossam Amer |
