Professional Documents
Culture Documents
In [369…
import cv2
import numpy as np
test_image = "test/testing.jpg"
img = cv2.imread(test_image,0)
In [ ]:
In [370…
def display(img_path):
dpi = 80
img_data = plt.imread(img_path)
fig = plt.figure(figsize=figsize)
ax = fig.add_axes([0, 0, 1, 1])
ax.axis('off')
ax.imshow(img_data, cmap='gray')
plt.show()
In [371…
display(test_image)
In [372…
#def grayscale(image):
In [373…
#gray_image = grayscale(img)
#cv2.imwrite("result/gray.jpg", gray_image)
In [374…
#display("result/gray.jpg")
localhost:8888/nbconvert/html/OCR_.ipynb?download=false 1/7
10/4/21, 8:03 PM OCR_
In [375…
no_noise = cv2.GaussianBlur(img,(5,5),0)
In [376…
cv2.imwrite("result/no_noise.jpg", no_noise)
True
Out[376…
In [377…
display("result/no_noise.jpg")
In [378…
ret , bw_image = cv2.threshold(no_noise,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
In [379…
cv2.imwrite("result/binarized.jpg", bw_image)
True
Out[379…
In [380…
display("result/binarized.jpg")
localhost:8888/nbconvert/html/OCR_.ipynb?download=false 2/7
10/4/21, 8:03 PM OCR_
In [381…
kernel = np.ones((2,2),np.uint8)
bw_img = cv2.dilate(bw_image,kernel)
In [382…
cv2.imwrite("result/binarized_new.jpg", bw_img)
True
Out[382…
In [383…
display("result/binarized_new.jpg")
In [384…
def getTransformationMatrix(img):
pts = np.empty([0,0])
pts = cv2.findNonZero(img)
rect = cv2.minAreaRect(pts)
# rect[0] has the center of rectangle, rect[1] has width and height, rect[2] ha
# To draw the rotated box and save the png image, uncomment below
drawrect = img.copy()
box = cv2.cv2.boxPoints(rect)
cv2.drawContours(drawrect,[box],0,(0,0,255),10)
cv2.imwrite('result/rotated_rect.png', drawrect)
rect = list(rect)
temp = list(rect[1])
rect[1] = tuple(temp)
rect = np.asarray(rect)
localhost:8888/nbconvert/html/OCR_.ipynb?download=false 3/7
10/4/21, 8:03 PM OCR_
rect[2] = 357.22
rotated_image = np.empty([0,0])
return M
In [385…
def rotate(image, M):
display("result/rotated_rect.png")
M = getTransformationMatrix(bw_img)
C:\Users\Admin\AppData\Local\Temp/ipykernel_19692/3204302291.py:28: VisibleDeprecationWa
rning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lis
ts-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant t
o do this, you must specify 'dtype=object' when creating the ndarray.
rect = np.asarray(rect)
In [386…
img_for_det = rotate(bw_img,M)
In [387…
cv2.imwrite("result/rotated_img.jpg", img_for_det)
True
Out[387…
In [388…
display("result/rotated_img.jpg")
localhost:8888/nbconvert/html/OCR_.ipynb?download=false 4/7
10/4/21, 8:03 PM OCR_
In [389…
def findLines(bw_image, LinesThres):
# make hist - same dimension as horProj - if 0 (space), then True, else False
ycoords = []
y = 0
count = 0
isSpace = False
if (not isSpace):
if (hist[i]): #if space is detected, get the first starting y-c
isSpace = True
count = 1
y = i
else:
if (not hist[i]):
isSpace = False
ycoords.append(y / count)
else:
y = y + i
count = count + 1
ycoords.append(y / count)
return ycoords
def LinesMedian(bw_image):
localhost:8888/nbconvert/html/OCR_.ipynb?download=false 5/7
10/4/21, 8:03 PM OCR_
# make hist - same dimension as horProj - if 0 (space), then True, else False
ycoords = []
y = 0
count = 0
isSpace = False
median_count = []
if (not isSpace):
if (hist[i]): #if space is detected, get the first starting y-c
isSpace = True
count = 1
#y = i
else:
if (not hist[i]):
isSpace = False
median_count.append(count)
else:
#y = y + i
count = count + 1
median_count.append(count)
#ycoords.append(y / count)
return median_count
ThresPercent = percent
LinMed = LinesMedian(img_for_det)
LinMed = sorted(LinMed)
LinesThres = LinMed[len(LinMed)//3]*(ThresPercent/100.0)
LinesThres = int(LinesThres)
return LinesThres
def image_for_extraction(raw_image):
raw_image = cv2.GaussianBlur(raw_image,(3,3),0)
ret,no_sm_bw_image = cv2.threshold(raw_image,0,255,cv2.THRESH_BINARY_INV+cv2.TH
return no_sm_bw_image
In [390…
img_for_ext = image_for_extraction(img)
img_for_ext = rotate(img_for_ext,M)
img_with_lines = img_for_ext.copy()
for i in ycoords:
cv2.line(img_with_lines,(int(0),int(i)),(int(img_with_lines.shape[1]),int(i)),2
cv2.imwrite('result/img_with_lines.png', img_with_lines)
localhost:8888/nbconvert/html/OCR_.ipynb?download=false 6/7
10/4/21, 8:03 PM OCR_
True
Out[390…
In [391…
display("result/img_with_lines.png")
In [ ]:
In [ ]:
localhost:8888/nbconvert/html/OCR_.ipynb?download=false 7/7