'How to access dynamically allocated array in CUDA
Here I'm trying to access a dynamically allocated array in CUDA. However, after running the output is c[0][0] = 0. Am I accessing the allocated array correctly? I think the way I'm copying the arrays is probably correct and for some reason, the value of C has not been changed on the device.
#include<iostream>
using namespace std;
__global__ void add_matrix(float *A, float *B, float *C, int n) {
int j = blockIdx.x * blockDim.x + threadIdx.x;
int i = blockIdx.y * blockDim.y + threadIdx.y;
if ((i < n) && (j < n)){
C[i*n+j] = A[i*n+j] + B[i*n+j];
}
}
int main(){
const size_t N = 1024;
const size_t size = N * N * sizeof(float);
float *A, *B, *C;
A = (float*) malloc(size);
B = (float*) malloc(size);
C = (float*) malloc(size);
for (size_t i=0; i<N*N; i++){
A[i] = 5.0;
B[i] = 6.0;
}
float *A_d, *B_d, *C_d;
cudaMalloc((void**)&A_d, size);
cudaMalloc((void**)&B_d, size);
cudaMalloc((void**)&C_d, size);
auto code = cudaMemcpy(A_d, A, size, cudaMemcpyHostToDevice);
if (code != cudaSuccess){
cout << "Error copying A to device" << endl;
}
code = cudaMemcpy(B_d, B, size, cudaMemcpyHostToDevice);
if (code != cudaSuccess){
cout << "Error copying B to device" << endl;
}
dim3 threads(N, N);
dim3 blocks(1,1);
add_matrix<<<blocks, threads>>>(A_d, B_d, C_d, N);
code = cudaMemcpy(C, C_d, size, cudaMemcpyDeviceToHost);
if (code != cudaSuccess){
cout << "Error copying C from device" << endl;
}
std::cout << "C[0][0] : " << C[0] << std::endl;
free(A); free(B); free(C);
cudaFree(A_d); cudaFree(B_d); cudaFree(C_d);
return 0;
}
Solution 1:[1]
The problem was arranging the blocks. I totally forgot each block can have a limited number of threads. we can obtain the maximum threads per block by getting maxThreadsPerBlock property using cudaDeviceGetAttribute. It seems the Colab GPU supports 1024 threads in each block. so I changed the arrangement this way:
dim3 threads(32,32);
dim3 blocks(32,32);
And it worked
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 | Amirabbas asadi |
