'How to retrieve raw data from YUV2 streaming

I am interfacing qvga sensor streaming out yuv2 format data through host application on windows (usb). How can I use any opencv-python example application to stream or capture raw data from yuv2 format.

How can I do that? Is there any test example to do so?

//opencv-python (host appl)
import cv2
import numpy as np
    
# open video0
cap = cv2.VideoCapture(0, cv2.CAP_MSMF)
# set width and height
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 340)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 240)
# set fps
cap.set(cv2.CAP_PROP_FPS, 30)
while(True):
    # Capture frame-by-frame
    ret, frame = cap.read()
    # Display the resulting frame
    cv2.imshow('frame', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()      

Code sample for grabbing video frames without decoding:

import cv2
import numpy as np

# open video0
# -------> Try replacing cv2.CAP_MSMF with cv2.CAP_FFMPEG):
cap = cv2.VideoCapture(0, cv2.CAP_FFMPEG)

# set width and height
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 340)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 240)
# set fps
cap.set(cv2.CAP_PROP_FPS, 30)

# Fetch undecoded RAW video streams
cap.set(cv2.CAP_PROP_FORMAT, -1)  # Format of the Mat objects. Set value -1 to fetch undecoded RAW video streams (as Mat 8UC1)

for i in range(10):
    # Capture frame-by-frame
    ret, frame = cap.read()

    if not ret:
        break

    print('frame.shape = {}    frame.dtype = {}'.format(frame.shape, frame.dtype))

cap.release()

In case cv2.CAP_FFMPEG is not working, try the following code sample:

import cv2
import numpy as np

# open video0
cap = cv2.VideoCapture(0, cv2.CAP_MSMF)

# set width and height
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 340)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 240)
# set fps
cap.set(cv2.CAP_PROP_FPS, 30)

# -----> Try setting FOURCC and disable RGB conversion:
#########################################################
cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter.fourcc('Y','1','6',' ')) 
cap.set(cv2.CAP_PROP_CONVERT_RGB, 0)    
#########################################################

# Fetch undecoded RAW video streams
cap.set(cv2.CAP_PROP_FORMAT, -1)  # Format of the Mat objects. Set value -1 to fetch undecoded RAW video streams (as Mat 8UC1)

for i in range(10):
    # Capture frame-by-frame
    ret, frame = cap.read()

    if not ret:
        break

    print('frame.shape = {}    frame.dtype = {}'.format(frame.shape, frame.dtype))

cap.release()

Reshape the uint8 frame to 680x240 and save as img.png:

import cv2
import numpy as np

# open video0
cap = cv2.VideoCapture(0, cv2.CAP_MSMF)

# set width and height
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 340)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 240)
cap.set(cv2.CAP_PROP_FPS, 30) # set fps

# Disable the conversion to BGR by setting FOURCC to Y16 and `CAP_PROP_CONVERT_RGB` to 0.
cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter.fourcc('Y','1','6',' ')) 
cap.set(cv2.CAP_PROP_CONVERT_RGB, 0)    

# Fetch undecoded RAW video streams
cap.set(cv2.CAP_PROP_FORMAT, -1)  # Format of the Mat objects. Set value -1 to fetch undecoded RAW video streams (as Mat 8UC1)

for i in range(10):
    # Capture frame-by-frame
    ret, frame = cap.read()

    if not ret:
        break

    cols = 340*2
    rows = 240

    img = frame.reshape(rows, cols)

    cv2.imwrite('img.png', img)

cap.release()

//680x240 img.png enter image description here

//in presence of hot object (img1.png) enter image description here

//processed image (hot object)

enter image description here

//with little-endian (test)

enter image description here

//test image (captured) with CAP_DSHOW

enter image description here

//test image (saved) with CAP_DSHOW

enter image description here

//680x240 (hand.png)

enter image description here

//680x240 (hand1.png)

enter image description here

//fing preview

enter image description here

//fing.png

enter image description here

//fing.png

enter image description here



Solution 1:[1]

The true format of the pixels in your video is int16 grayscale pixel, but it is marked as YUV2 format (probably for compatibility with grabbers that do not support 16 bit).

I saw the same technique use by the RAVI format.

The default behavior of OpenCV is converting the frames from YUV2 to BGR format.
Since the format has no color (and just marked as YUV2), the conversions messes up your data.

I could be wrong here... but it looks like the format is "big endian" and signed 16 bits.


Here is a complete code sample for grabbing and displaying the video:

# open video0
cap = cv2.VideoCapture(0, cv2.CAP_MSMF)

# set width and height
cols, rows = 340, 240
cap.set(cv2.CAP_PROP_FRAME_WIDTH, cols)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, rows)
cap.set(cv2.CAP_PROP_FPS, 30) # set fps

# Disable the conversion to BGR by setting FOURCC to Y16 and `CAP_PROP_CONVERT_RGB` to 0.
cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter.fourcc('Y','1','6',' ')) 
cap.set(cv2.CAP_PROP_CONVERT_RGB, 0)    

# Fetch undecoded RAW video streams
cap.set(cv2.CAP_PROP_FORMAT, -1)  # Format of the Mat objects. Set value -1 to fetch undecoded RAW video streams (as Mat 8UC1)

while True:
    # Capture frame-by-frame
    ret, frame = cap.read()

    if not ret:
        break

    # Convert the frame from uint8 elements to big-endian signed int16 format.
    frame = frame.reshape(rows, cols*2) # Reshape to 680x240
    frame = frame.astype(np.uint16) # Convert uint8 elements to uint16 elements
    frame = (frame[:, 0::2] << 8) + frame[:, 1::2]  # Convert from little endian to big endian (apply byte swap), the result is 340x240.
    frame = frame.view(np.int16)  # The data is actually signed 16 bits - view it as int16 (16 bits singed).

    # Apply some processing for disapply (this part is just "cosmetics"):
    frame_roi = frame[:, 10:-10]  # Crop 320x240 (the left and right parts are not meant to be displayed).
    # frame_roi = cv2.medianBlur(frame_roi, 3)  # Clean the dead pixels (just for better viewing the image).
    frame_roi = frame_roi << 3  # Remove the 3 most left bits ???
    normed = cv2.normalize(frame_roi, None, 0, 255, cv2.NORM_MINMAX, cv2.CV_8U)  # Convert to uint8 with normalizing (just for viewing the image).

    cv2.imshow('normed', normed)  # Show the normalized video frame

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

    # cv2.imwrite('normed.png', normed)

cap.release()
cv2.destroyAllWindows()

Left shifting each pixel by 3 (frame_roi = frame_roi << 3) fixes most of the issues.

It could be that the upper 3 bits are not in place, or have some different meaning?

The ROI cropping, and normalizing are just "cosmetics", so you can see something.

Here is the processed image you have posted (with the hot object):

enter image description here


For little endian, replace the following lines:

frame = frame.reshape(rows, cols*2) # Reshape to 680x240
frame = frame.astype(np.uint16) # Convert uint8 elements to uint16 elements
frame = (frame[:, 0::2] << 8) + frame[:, 1::2]  # Convert from little endian to big endian (apply byte swap), the result is 340x240.
frame = frame.view(np.int16)  # The data is actually signed 16 bits - view it as int16 (16 bits singed).

With:

frame = frame.view(np.int16).reshape(rows, cols)

In case the value are all positive (uint16 type) try:

frame = frame.view(np.uint16).reshape(rows, cols)

Sketch code for processing the image for display:

frame = cv2.imread('hand1.png', cv2.IMREAD_UNCHANGED)  # Read input image (grayscale uint8)


# create a CLAHE object (Arguments are optional).
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))


# Convert the frame from uint8 elements to big-endian signed int16 format.
frame = frame.reshape(rows, cols * 2)  # Reshape to 680x240
frame = frame.astype(np.uint16)  # Convert uint8 elements to uint16 elements
frame = (frame[:, 0::2] << 8) + frame[:, 1::2]  # Convert from little endian to big endian (apply byte swap), the result is 340x240.
frame = frame.view(np.int16)  # The data is actually signed 16 bits - view it as int16 (16 bits singed).

# Apply some processing for display (this part is just "cosmetics"):
frame_roi = frame[:, 10:-10]  # Crop 320x240 (the left and right parts are not meant to be displayed).
# frame_roi = cv2.medianBlur(frame_roi, 3)  # Clean the dead pixels (just for better viewing the image).

#frame_roi = frame_roi << 3  # Remove the 3 most left bits ???
frame_roi = frame_roi << 1  # Remove the 1 most left bits ???

# Fix the offset difference between the odd and even columns (note: this is not a good solution).
#frame_as_uint16 = (frame_roi.astype(np.int32) + 32768).astype(np.uint16)
frame_as_uint16 = frame_roi.view(np.uint16)  # Try to interpret the data as unsigned
frame_as_float = frame_as_uint16.astype(np.float32) / 2  # Divide by 2 for avoiding overflow
med_odd = np.median(frame_as_float[:, 0::2])
med_evn = np.median(frame_as_float[:, 1::2])
med_dif = med_odd - med_evn
frame_as_float[:, 0::2] -= med_dif/2
frame_as_float[:, 1::2] += med_dif/2
frame_as_uint16 = np.round(frame_as_float).clip(0, 2**16-1).astype(np.uint16)

cl1 = clahe.apply(frame_as_uint16)  # Apply contrast enhancement.
normed = cv2.normalize(cl1, None, 0, 255, cv2.NORM_MINMAX, cv2.CV_8U)  # Convert to uint8 with normalizing (just for viewing the image).

cv2.imwrite('normed.png', normed)

cv2.imshow('normed', normed)
cv2.waitKey()
cv2.destroyAllWindows()

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1