'what is the proper way to compile cuda with g++ [duplicate]

the code files as follow:

a.h

void warperFoo();

a.cu

//---------- a.cu ----------
#include <cuda.h>
#include <cuda_runtime.h>
#include <stdio.h>
#include "a.h"


__global__ void foo (void) {
  printf("calling from kernel foo: %d\n", threadIdx.x);
  // bar();
}

void warperFoo() {
    printf("calling from warperFoo\n");
    dim3 gdim(1,1,1);
    dim3 bdim(4,4,4);
    foo<<<gdim, bdim>>>();
}

main.cpp

#include <iostream>
#include <cuda_runtime_api.h>
#include "a.h"

using namespace std;


int main() {
    warperFoo();   
    return 0;
}

makefile

.PHONY: clean
all: a.o
    g++ -m64 -Wall a.o main.cpp -lcudart -L/usr/local/cuda-11.2/lib64/ -I/usr/local/cuda-11.2/include -lcudadevrt -lcuda

a.o:
    nvcc --gpu-architecture=sm_70 -ccbin /usr/bin/gcc -c a.cu
    
clean:
    rm -rf *.o a.out

make output

nvcc --gpu-architecture=sm_70 -ccbin /usr/bin/gcc -c a.cu
g++ -m64 -Wall a.o main.cpp -lcudart -L/usr/local/cuda-11.2/lib64/ -I/usr/local/cuda-11.2/include -lcudadevrt -lcuda

a.out output

calling from warperFoo

i want compile .cu with nvcc first and then compile c++ host code with g++.

it supposed to print "calling from kernel foo"...

SO why kernel didn't output?

c++cuda

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution	Source