'Python - How to process a binary image to align sparse letters in a row

I'm trying to align letters from an image in order to obtain the full word with tesseract OCR:

import cv2
import numpy as np

img = cv2.imread("captcha.png", 0)
h1, w1 = img.shape
img = cv2.resize(img, (w1*5, h1*5))
# Threshold the image and find the contours
_, thresh = cv2.threshold(img, 123, 255, cv2.THRESH_BINARY_INV)
contours, hierarchy = cv2.findContours(
    thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

# Create a white background iamge to paste the letters on
bg = np.zeros((200, 200), np.uint8)
bg[:] = 255
left = 5

# Iterate through the contours
for contour, h in zip(contours, hierarchy[0]):
    # Ignore inside parts (circle in a 'p' or 'b')
    if h[3] == -1:
        # Get the bounding rectangle
        x, y, w, h = cv2.boundingRect(contour)
        # Paste it onto the background
        bg[5:5+h, left:left+w] = img[y:y+h, x:x+w]
        left += (w + 5)
cv2.imshow('thresh', bg)
cv2.waitKey()

And the image that I want to process is this one

However, I got this message:

>Traceback (most recent call last):
  File ".\img.py", line 24, in <module>
    bg[5:5+h, left:left+w] = img[y:y+h, x:x+w]
ValueError: could not broadcast input array from shape (72,750) into shape (72,195)

Just with tesseract OCR I got "acba" without the zero and four so I need to reorder the letters to obtain it. Any suggestions?

Solution 1:^[1]

You try to put bigger image in smaller area - but they have to be the same.

You may get shapes for both objects and get min() for width and height and use it

        h1, w1 = bg[5:5+h, left:left+w].shape
        h2, w2 = img[y:y+h, x:x+w].shape
        
        min_h = min(h1, h2)
        min_w = min(w1, w2)
        
        bg[5:5+min_h, left:left+min_w] = img[y:y+min_h, x:x+min_w]

EDIT:

OR maybe you should use x,y instead of 5 and left (also 5)

        bg[y:y+h, x:x+w] = img[y:y+h, x:x+w]

And maybe you should create bg with the same size as img (after resizing)

h1, w1 = img.shape

bg = np.zeros((h1, w1), np.uint8)

EDIT:

Full working code with other changes.

I read image in RGB to see what contours it found because it seems it found something different then you may expect.

import cv2
import numpy as np

print('CV:', cv2.__version__)

img_color = cv2.imread("ZzSgt.png", cv2.IMREAD_UNCHANGED)
h, w = img_color.shape[:2]
print('original shape (W,H):', w, h)

img_color = cv2.resize(img_color, (w*5, h*5))
h, w = img_color.shape[:2]
print('resized shape (W,H) :', w, h)

img = cv2.cvtColor(img_color, cv2.COLOR_BGR2GRAY)
                       
# Threshold the image and find the contours
_, thresh = cv2.threshold(img, 123, 255, cv2.THRESH_BINARY_INV)

contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

# Create a white background image to paste the letters on
bg = np.full((h, w), 255, np.uint8)

# Create image to display contours 
img_contours = np.full((h, w, 3), 255, np.uint8)

left = 5

# Iterate through the contours
for contour, h in zip(contours, hierarchy[0]):
    # Ignore inside parts (circle in a 'p' or 'b')
    if h[3] == -1:

        # Get the bounding rectangle
        x, y, w, h = cv2.boundingRect(contour)
        print('contour (X,Y,W,H):', x, y, w, h)

        # Paste it onto the background
        h1, w1 = bg[5:5+h, left:left+w].shape
        h2, w2 = img[y:y+h, x:x+w].shape
        
        min_h = min(h1, h2)
        min_w = min(w1, w2)
        
        bg[5:5+min_h, left:left+min_w] = img[y:y+min_h, x:x+min_w]
        
        left += (w + 5)

        # Copy color regions and draw contours        
        img_contours[y:y+h, x:x+w] = img_color[y:y+h, x:x+w]
        img_contours = cv2.drawContours(img_contours, [contour], 0, (0,0,255))

cv2.imshow('contours', img_contours)
cv2.imshow('background', bg)
cv2.waitKey()

cv2.destroyAllWindows()

contours

background

EDIT:

I get better result if I revese image img = ~img and change threshold from 123 to 30

thresh

contours

background (and now I see it could have size even (75, 255) or safer (100, 300))

import cv2
import numpy as np

print('CV:', cv2.__version__)

#img_color = cv2.imread("captcha.png", cv2.IMREAD_UNCHANGED)
img_color = cv2.imread("ZzSgt.png", cv2.IMREAD_UNCHANGED)
h, w = img_color.shape[:2]
print('original shape (W,H):', w, h)

img_color = cv2.resize(img_color, (w*5, h*5))
h, w = img_color.shape[:2]
print('resized shape (W,H) :', w, h)

img = cv2.cvtColor(img_color, cv2.COLOR_BGR2GRAY)
                       
img = ~img                       
# Threshold the image and find the contours
_, thresh = cv2.threshold(img, 30, 255, cv2.THRESH_BINARY_INV)
cv2.imshow('thresh', thresh)

contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

# Create a white background image to paste the letters on
bg = np.full((h, w), 255, np.uint8)

# Create image to display contours 
img_contours = np.full((h, w, 3), 255, np.uint8)

left = 5

# Iterate through the contours
for contour, h in zip(contours, hierarchy[0]):
    # Ignore inside parts (circle in a 'p' or 'b')
    if h[3] == -1:

        # Get the bounding rectangle
        x, y, w, h = cv2.boundingRect(contour)
        print('contour (X,Y,W,H):', x, y, w, h)

        # Paste it onto the background
        h1, w1 = bg[5:5+h, left:left+w].shape
        h2, w2 = img[y:y+h, x:x+w].shape
        
        min_h = min(h1, h2)
        min_w = min(w1, w2)
        
        bg[5:5+min_h, left:left+min_w] = img[y:y+min_h, x:x+min_w]
        
        left += (w + 5)

        # Copy (color) region and draw contour
        img_contours[y:y+h, x:x+w] = img_color[y:y+h, x:x+w]
        img_contours = cv2.drawContours(img_contours, [contour], 0, (0,0,255))

cv2.imshow('contours', img_contours)
cv2.imshow('background', bg)
cv2.waitKey()

cv2.destroyAllWindows()

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution	Source
Solution 1

'Python - How to process a binary image to align sparse letters in a row

Solution 1:[1]

Sources

Related Questions

Solution 1:^[1]