'Want some guide about how to use nvjpegEncodeYUV()

I am trying to implement some jpeg encoding cuda code based one a sample code below: https://docs.nvidia.com/cuda/nvjpeg/index.html#nvjpeg-encode-examples

I post all the code, test result and Makefile and input file.

I am testing with NVIDIA 2080 HW + cuda 11.2

But as you would be able to find from this question, the result is fail. First fail is from nvjpegEncodeYUV() The returned error code is 2. That complains for some invalid parameters. But I have no idea what is wrong.... Could you please help me find what went wrong?

#include <iterator>
#include <fstream>
#include <iostream>
#include <vector>

#include <cassert>
#include <unistd.h>

#include "nppdefs.h"
#include "nppi_support_functions.h"
#include "nppi_color_conversion.h"
#include "nvjpeg.h"

#include "cuda_runtime.h"


#define DEFAULT_RAWFILE  "./uyvy422.raw"

//file >>> buff_UYVY
int read_raw(const char *file2read, unsigned char *buff_UYVY)
{
  if (!file2read) {
    std::cout << "file2read empty!!" << std::endl;
    return 1;
  }

  if (!buff_UYVY) {
    std::cout << "buff_UYVY empty!!" << std::endl;
    return 1;
  }

  std::string   file_uyvy(file2read);
  std::ifstream stream_uyvy;
  stream_uyvy.open(file_uyvy, std::ios::in | std::ios::binary);

  if (!stream_uyvy.is_open())
  {
    std::cerr << "[ERROR] cannot open the raw file " << file_uyvy
      << std::endl;
    std::cerr << std::endl;
    assert(0);
  }
  stream_uyvy.read((char*)buff_UYVY, 1920*1080*2);
  stream_uyvy.close();

  return 0;
}


int main(int argc, char*argv[])
{
  unsigned char *buff_UYVY =
    new unsigned char[1920 * 1080 * 2];

  //file >>> buff_UYVY
  int ret;
  if (argv[1]) {
    ret = read_raw(argv[1], buff_UYVY);
  } else {
    ret = read_raw(DEFAULT_RAWFILE, buff_UYVY);
  }
  if (ret != 0) {
    std::cout << "read_raw() failed!!" << std::endl;
    return ret;
  }
  if (!buff_UYVY) {
    std::cout << "buff_UYVY empty!!" << std::endl;
    return 1;
  }

  cudaError_t err_cu_api;
  Npp8u* gpu_buff_CbYCr422;

  err_cu_api = cudaMalloc((void**)&gpu_buff_CbYCr422,
                          1920*1080*2);
  err_cu_api = cudaMemcpy((void*)gpu_buff_CbYCr422,
                          (const void*)buff_UYVY,
                          1920*1080*2,
                          cudaMemcpyHostToDevice);

  //////////////////////////////////////////////////////////////////////////////
  //https://docs.nvidia.com/cuda/nvjpeg/index.html#nvjpeg-encode-examples

  nvjpegStatus_t status;
  nvjpegHandle_t nv_handle;
  nvjpegEncoderState_t nv_enc_state;
  nvjpegEncoderParams_t nv_enc_params;
  cudaStream_t stream = 0;

  // initialize nvjpeg structures
  status = nvjpegCreateSimple(&nv_handle);
  std::cout << "nvjpegCreateSimple : " << status << std::endl;
  status = nvjpegEncoderStateCreate(nv_handle, &nv_enc_state, stream);
  std::cout << "nvjpegEncoderStateCreate : " << status << std::endl;
  status = nvjpegEncoderParamsCreate(nv_handle, &nv_enc_params, stream);
  std::cout << "nvjpegEncoderParamsCreate : " << status << std::endl;

  nvjpegImage_t imgdesc =
  {
    {
      gpu_buff_CbYCr422,
      gpu_buff_CbYCr422 + 1920*1080,
      gpu_buff_CbYCr422 + 1920*1080*2,
      gpu_buff_CbYCr422 + 1920*1080*3
    },
    {
      1920,
      1920,
      1920,
      1920
    }
  };

  // Compress image
  status = nvjpegEncodeYUV(nv_handle, nv_enc_state, nv_enc_params,
                  &imgdesc, NVJPEG_CSS_422, 1920, 1080,
                  stream);
  std::cout << "nvjpegEncodeYUV : " << status << std::endl;

  // get compressed stream size
  size_t length;
  status = nvjpegEncodeRetrieveBitstream(nv_handle, nv_enc_state, NULL,
                                                &length, stream);
  std::cout << "nvjpegEncodeRetrieveBitstream : " << status << std::endl;
  // get stream itself
  cudaStreamSynchronize(stream);
  std::vector<char> jpeg(length);
  status = nvjpegEncodeRetrieveBitstream(nv_handle, nv_enc_state,
                                (unsigned char*)jpeg.data(), &length, 0);
  std::cout << "nvjpegEncodeRetrieveBitstream : " << status << std::endl;

  // write stream to file
  cudaStreamSynchronize(stream);
  std::ofstream output_file("test.jpg", std::ios::out | std::ios::binary);
  output_file.write(jpeg.data(), length);
  output_file.close();

  //https://docs.nvidia.com/cuda/nvjpeg/index.html#nvjpeg-encode-examples
  //////////////////////////////////////////////////////////////////////////////

  cudaFree(gpu_buff_CbYCr422);
  err_cu_api = err_cu_api;

  delete[] buff_UYVY;

  return 0;
}
$ ./test
nvjpegCreateSimple : 0
nvjpegEncoderStateCreate : 0
nvjpegEncoderParamsCreate : 0
nvjpegEncodeYUV : 2
nvjpegEncodeRetrieveBitstream : 2
nvjpegEncodeRetrieveBitstream : 2
CC = g++
CFLAGS = -v -Wall -I/usr/local/cuda/include -g
LDFLAGS += -L/usr/local/cuda/lib64
SRCS = main_gpu.cpp
PROG = test

OPENCV = `pkg-config opencv4 --cflags --libs`
LIBS = $(OPENCV) \
       -lcudart \
       -lnppisu \
       -lnpps \
       -lnppc \
       -lnppial \
       -lnppicc \
       -lnppidei \
       -lnppif \
       -lnppig \
       -lnppim \
       -lnppist \
       -lnppitc \
       -lnvjpeg

.PHONY: all clean

all: $(PROG)

$(PROG):$(SRCS)
        $(CC) $(CFLAGS) $(LDFLAGS) -o $(PROG) $(SRCS) $(LIBS)

clean:
        rm -f $(OBJS) $(PROG) *.jpg *.bmp

getting input file:

git clone https://github.com/jumogehn/Jumogehn.git

uyvy422.raw is it.



Solution 1:[1]

Based on what I see in your code, I'm guessing your input storage format is ordinary YUV422:

U0 Y0 V0 Y1 U2 Y2 V2 Y3 U4 Y4 V4…

That is an interleaved storage format. However the docs for nvjpegEncodeYUV state:

The source argument should be filled with the corresponding YUV planar data.

So you would need to convert your interleaved input into planar storage of a Y plane followed by a U plane followed by a V plane.

As a result, your imgdesc would need to change, because the pitch of U and V planes is half that of the Y plane:

  nvjpegImage_t imgdesc =
  {
    {
      gpu_buff_CbYCr422,                        // pointer to start of Y plane
      gpu_buff_CbYCr422 + 1920*1080,                 // pointer to start of U plane
      gpu_buff_CbYCr422 + 1920*1080 + 960*1080,  // pointer to start of V plane
      NULL
    },
    {
      1920,                         // pitch of Y plane
      960,                           // pitch of U plane
      960,                          // pitch of V plane
      0
    }
  };

Finally, it seems you need to set the Sampling Factors in the Params:

$ cat t2017.cpp
#include "nvjpeg.h"

#include "cuda_runtime.h"
#include <iostream>

#define DEFAULT_RAWFILE  "./uyvy422.raw"

int main(int argc, char*argv[])
{
  unsigned char *buff_UYVY =
    new unsigned char[1920 * 1080 * 2];

  cudaError_t err_cu_api;
  unsigned char* gpu_buff_CbYCr422;

  err_cu_api = cudaMalloc((void**)&gpu_buff_CbYCr422,
                          1920*1080*2);
  err_cu_api = cudaMemcpy((void*)gpu_buff_CbYCr422,
                          (const void*)buff_UYVY,
                          1920*1080*2,
                          cudaMemcpyHostToDevice);

  //////////////////////////////////////////////////////////////////////////////
  //https://docs.nvidia.com/cuda/nvjpeg/index.html#nvjpeg-encode-examples

  nvjpegStatus_t status;
  nvjpegHandle_t nv_handle;
  nvjpegEncoderState_t nv_enc_state;
  nvjpegEncoderParams_t nv_enc_params;
  cudaStream_t stream = 0;

  // initialize nvjpeg structures
  status = nvjpegCreateSimple(&nv_handle);
  std::cout << "nvjpegCreateSimple : " << status << std::endl;
  status = nvjpegEncoderStateCreate(nv_handle, &nv_enc_state, stream);
  std::cout << "nvjpegEncoderStateCreate : " << status << std::endl;
  status = nvjpegEncoderParamsCreate(nv_handle, &nv_enc_params, stream);
  std::cout << "nvjpegEncoderParamsCreate : " << status << std::endl;

  nvjpegImage_t imgdesc =
  {
    {
      gpu_buff_CbYCr422,
      gpu_buff_CbYCr422 + 1920*1080,
      gpu_buff_CbYCr422 + 1920*1080 + 960*1080,
      NULL
    },
    {
      1920,
      960,
      960,
      0
    }
  };
  status = nvjpegEncoderParamsSetSamplingFactors(nv_enc_params, NVJPEG_CSS_422, stream);
  std::cout << "nvjpegEncoderParamsSetSamplingFactors: " << status << std::endl;
  // Compress image
  status = nvjpegEncodeYUV(nv_handle, nv_enc_state, nv_enc_params,
                  &imgdesc, NVJPEG_CSS_422, 1920, 1080,
                  stream);
  std::cout << "nvjpegEncodeYUV : " << status << std::endl;
}
$ g++ t2017.cpp -I/usr/local/cuda/include -L/usr/local/cuda/lib64 -lnvjpeg -lcudart -o t2017
$ ./t2017
nvjpegCreateSimple : 0
nvjpegEncoderStateCreate : 0
nvjpegEncoderParamsCreate : 0
nvjpegEncoderParamsSetSamplingFactors: 0
nvjpegEncodeYUV : 0
$

I'm not suggesting this fixes every possible error in your code, merely that it seems to address this question:

First fail is from nvjpegEncodeYUV() The returned error code is 2. That complains for some invalid parameters. But I have no idea what is wrong.... Could you please help me find what went wrong?

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1