'Change variable in device during kernel execution

I was trying to change some value during kernel execution and am stuck to a situation I don't understand.

Here's the code I was testing.

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

__global__ void func(int *var) {
  printf("var = %d\n", *var);
  while (*var == 0) {
    printf("1");
  }
  printf("\n");
  printf("var = %d\n", *var);
}

int main() {
  cudaDeviceProp deviceProp;
  cudaGetDeviceProperties(&deviceProp, 0);
  printf("setting device %d with name %s\n", 0, deviceProp.name);

  int h_var;

  int *d_var;
  cudaMalloc((void **)&d_var, sizeof(int));

  cudaStream_t stream_0, stream_1;
  cudaStreamCreate(&stream_0);
  cudaStreamCreate(&stream_1);

  h_var = 0;

  cudaMemcpyAsync(d_var, &h_var, sizeof(int), cudaMemcpyHostToDevice, stream_0);

  func<<<1, 1, 0, stream_0>>>(d_var);

  h_var = 1;

  usleep(200);

  cudaMemcpyAsync(d_var, &h_var, sizeof(int), cudaMemcpyHostToDevice, stream_1);
  cudaDeviceSynchronize();

  cudaFree(d_var);

  return 0;
}

I just changed the value in the kernel memory during kernel execution. This works fine, but when I comment printf("1"); in kernel code (so nothing inside while), the kernel runs forever.

Also, if I comment both printf("1"); and usleep(200);, it works as expected.

What would be the cause? Am I missing something?



Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source