Import Import As From Import As: "Test/testing - JPG"

10/4/21, 8:03 PM OCR_
In [369…
import cv2
import numpy as np
from matplotlib import pyplot as plt
test_image = "test/testing.jpg"
img = cv2.imread(test_image,0)
In [ ]:

In [370…
def display(img_path):
dpi = 80
img_data = plt.imread(img_path)
height, width = img_data.shape[:2]
figsize = width / float(dpi), height / float(dpi)
fig = plt.figure(figsize=figsize)
ax = fig.add_axes([0, 0, 1, 1])
ax.axis('off')
ax.imshow(img_data, cmap='gray')
plt.show()
In [371…
display(test_image)
In [372…
#def grayscale(image):
# return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
In [373…
#gray_image = grayscale(img)
#cv2.imwrite("result/gray.jpg", gray_image)
In [374…
#display("result/gray.jpg")
localhost:8888/nbconvert/html/OCR_.ipynb?download=false 1/7
10/4/21, 8:03 PM OCR_
In [375…
no_noise = cv2.GaussianBlur(img,(5,5),0)
In [376…
cv2.imwrite("result/no_noise.jpg", no_noise)
True
Out[376…
In [377…
display("result/no_noise.jpg")
In [378…
ret , bw_image = cv2.threshold(no_noise,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
In [379…
cv2.imwrite("result/binarized.jpg", bw_image)
True
Out[379…
In [380…
display("result/binarized.jpg")
10/4/21, 8:03 PM OCR_
In [381…
kernel = np.ones((2,2),np.uint8)
bw_img = cv2.dilate(bw_image,kernel)
In [382…
cv2.imwrite("result/binarized_new.jpg", bw_img)
True
Out[382…
In [383…
display("result/binarized_new.jpg")
In [384…
def getTransformationMatrix(img):
#input should be a binarized image - text white, bg black
#Find all white pixels
pts = np.empty([0,0])
pts = cv2.findNonZero(img)
#Get rotated rect of white pixels
rect = cv2.minAreaRect(pts)
# rect[0] has the center of rectangle, rect[1] has width and height, rect[2] ha
# To draw the rotated box and save the png image, uncomment below
drawrect = img.copy()
drawrect = cv2.cvtColor(drawrect, cv2.COLOR_GRAY2BGR)
box = cv2.cv2.boxPoints(rect)
box = np.int0(box) # box now has four vertices of rotated rectangle
cv2.drawContours(drawrect,[box],0,(0,0,255),10)
cv2.imwrite('result/rotated_rect.png', drawrect)
#Change rotation angle if the tilt is in another direction
rect = list(rect)
if (rect[1][0] < rect[1][1]): # rect.size.width > rect.size.height
temp = list(rect[1])
temp[0], temp[1] = temp[1], temp[0]
rect[1] = tuple(temp)
rect[2] = rect[2] +90.0
#convert rect back to numpy/tuple
rect = np.asarray(rect)
10/4/21, 8:03 PM OCR_
rect[2] = 357.22
#Rotate the image according to the found angle
rotated_image = np.empty([0,0])
M = cv2.getRotationMatrix2D(rect[0], rect[2], 1.0)
# img = cv2.warpAffine(img, M, (img.shape[1],img.shape[0]))
#returns the transformation matrix for this rotation
return M
In [385…
def rotate(image, M):
return cv2.warpAffine(image, M, (image.shape[1],image.shape[0]))
display("result/rotated_rect.png")
M = getTransformationMatrix(bw_img)
C:\Users\Admin\AppData\Local\Temp/ipykernel_19692/3204302291.py:28: VisibleDeprecationWa
rning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lis
ts-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant t
o do this, you must specify 'dtype=object' when creating the ndarray.
rect = np.asarray(rect)
In [386…
img_for_det = rotate(bw_img,M)
In [387…
cv2.imwrite("result/rotated_img.jpg", img_for_det)
True
Out[387…
In [388…
display("result/rotated_img.jpg")
10/4/21, 8:03 PM OCR_
In [389…
def findLines(bw_image, LinesThres):
# making horizontal projections
horProj = cv2.reduce(bw_image, 1, cv2.cv2.REDUCE_AVG)
# make hist - same dimension as horProj - if 0 (space), then True, else False
th = 0; # black pixels threshold value. this represents the space lines
hist = horProj <= th;
#Get mean coordinate of white white pixels groups
ycoords = []
y = 0
count = 0
isSpace = False
for i in range(0, bw_image.shape[0]):
if (not isSpace):
if (hist[i]): #if space is detected, get the first starting y-c
isSpace = True
count = 1
y = i
else:
if (not hist[i]):
isSpace = False
#when smoothing, thin letters will breakdown, creating

if (count >=LinesThres):
ycoords.append(y / count)
else:
y = y + i
count = count + 1
ycoords.append(y / count)
#returns y-coordinates of the lines found
return ycoords
def LinesMedian(bw_image):
# making horizontal projections
horProj = cv2.reduce(bw_image, 1, cv2.cv2.REDUCE_AVG)
10/4/21, 8:03 PM OCR_
# make hist - same dimension as horProj - if 0 (space), then True, else False
th = 0; # black pixels threshold value. this represents the space lines
hist = horProj <= th;
#Get mean coordinate of white white pixels groups
ycoords = []
y = 0
count = 0
isSpace = False
median_count = []
for i in range(0, bw_image.shape[0]):
if (not isSpace):
if (hist[i]): #if space is detected, get the first starting y-c
isSpace = True
count = 1
#y = i
else:
if (not hist[i]):
isSpace = False
median_count.append(count)
else:
#y = y + i
count = count + 1
median_count.append(count)
#ycoords.append(y / count)
#returns counts of each blank rows of each of the lines found
return median_count
def get_lines_threshold(percent, img_for_det):
ThresPercent = percent
LinMed = LinesMedian(img_for_det)
LinMed = sorted(LinMed)
LinesThres = LinMed[len(LinMed)//3]*(ThresPercent/100.0)
LinesThres = int(LinesThres)
return LinesThres
def image_for_extraction(raw_image):
raw_image = cv2.GaussianBlur(raw_image,(3,3),0)
ret,no_sm_bw_image = cv2.threshold(raw_image,0,255,cv2.THRESH_BINARY_INV+cv2.TH

return no_sm_bw_image
In [390…
img_for_ext = image_for_extraction(img)
img_for_ext = rotate(img_for_ext,M)
LinesThres = get_lines_threshold(40, img_for_det)
ycoords = findLines(img_for_det, LinesThres)
# save image with lines printed ==========
img_with_lines = img_for_ext.copy()
for i in ycoords:
cv2.line(img_with_lines,(int(0),int(i)),(int(img_with_lines.shape[1]),int(i)),2
cv2.imwrite('result/img_with_lines.png', img_with_lines)
10/4/21, 8:03 PM OCR_
True
Out[390…
In [391…
display("result/img_with_lines.png")
In [ ]:

In [ ]:


Import Import As From Import As: "Test/testing - JPG"

Uploaded by

Document Information

Original Description:

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Import Import As From Import As: "Test/testing - JPG"

Uploaded by

Copyright:

Available Formats

10/4/21, 8:03 PM OCR_

from matplotlib import pyplot as plt

height, width = img_data.shape[:2]

figsize = width / float(dpi), height / float(dpi)

# return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

#input should be a binarized image - text white, bg black

#Find all white pixels

#Get rotated rect of white pixels

drawrect = cv2.cvtColor(drawrect, cv2.COLOR_GRAY2BGR)

box = np.int0(box) # box now has four vertices of rotated rectangle

#Change rotation angle if the tilt is in another direction

if (rect[1][0] < rect[1][1]): # rect.size.width > rect.size.height

temp[0], temp[1] = temp[1], temp[0]

rect[2] = rect[2] +90.0

#convert rect back to numpy/tuple

#Rotate the image according to the found angle

M = cv2.getRotationMatrix2D(rect[0], rect[2], 1.0)

# img = cv2.warpAffine(img, M, (img.shape[1],img.shape[0]))

#returns the transformation matrix for this rotation

return cv2.warpAffine(image, M, (image.shape[1],image.shape[0]))

# making horizontal projections

horProj = cv2.reduce(bw_image, 1, cv2.cv2.REDUCE_AVG)

th = 0; # black pixels threshold value. this represents the space lines

hist = horProj <= th;

#Get mean coordinate of white white pixels groups

for i in range(0, bw_image.shape[0]):

#when smoothing, thin letters will breakdown, creating

#returns y-coordinates of the lines found

# making horizontal projections

horProj = cv2.reduce(bw_image, 1, cv2.cv2.REDUCE_AVG)

th = 0; # black pixels threshold value. this represents the space lines

hist = horProj <= th;

#Get mean coordinate of white white pixels groups

for i in range(0, bw_image.shape[0]):

#returns counts of each blank rows of each of the lines found

def get_lines_threshold(percent, img_for_det):

LinesThres = get_lines_threshold(40, img_for_det)

ycoords = findLines(img_for_det, LinesThres)

# save image with lines printed ==========

You might also like