加载east 模型 进行 文本检测
模型下载 https://codeload.github.com/oyyd/frozen_east_text_detection.pb/zip/refs/heads/master
#coding:utf-8
import cv2
import math
############ Utility functions ############
def decode(scores, geometry, scoreThresh):
detections = []
confidences = []
############ CHECK DIMENSIONS AND SHAPES OF geometry AND scores ############
assert len(scores.shape) == 4, "Incorrect dimensions of scores"
assert len(geometry.shape) == 4, "Incorrect dimensions of geometry"
assert scores.shape[0] == 1, "Invalid dimensions of scores"
assert geometry.shape[0] == 1, "Invalid dimensions of geometry"
assert scores.shape[1] == 1, "Invalid dimensions of scores"
assert geometry.shape[1] == 5, "Invalid dimensions of geometry"
assert scores.shape[2] == geometry.shape[2], "Invalid dimensions of scores and geometry"
assert scores.shape[3] == geometry.shape[3], "Invalid dimensions of scores and geometry"
height = scores.shape[2]
width = scores.shape[3]
for y in range(0, height):
# Extract data from scores
scoresData = scores[0][0][y]
x0_data = geometry[0][0][y]
x1_data = geometry[0][1][y]
x2_data = geometry[0][2][y]
x3_data = geometry[0][3][y]
anglesData = geometry[0][4][y]
for x in range(0, width):
score = scoresData[x]
# If score is lower than threshold score, move to next x
if(score<scoreThresh):
continue
# Calculate offset
offsetX = x * 4.0
offsetY = y * 4.0
angle = anglesData[x]
# Calculate cos and sin of angle
cosA = math.cos(angle)
sinA = math.sin(angle)
h = x0_data[x] + x2_data[x]
w = x1_data[x] + x3_data[x]
# Calculate offset
offset = ([offsetX + cosA * x1_data[x] + sinA * x2_data[x], offsetY - sinA * x1_data[x] + cosA * x2_data[x]])
# Find points for rectangle
p1 = (-sinA * h + offset[0], -cosA * h + offset[1])
p3 = (-cosA * w + offset[0], sinA * w + offset[1])
center = (0.5*(p1[0]+p3[0]), 0.5*(p1[1]+p3[1]))
detections.append((center, (w,h), -1*angle * 180.0 / math.pi))
confidences.append(float(score))
# Return detections and confidences
return [detections, confidences]
modelpath = "d:/downloads/frozen_east_text_detection.pb"
net = cv2.dnn.readNetFromTensorflow(modelpath)
names = net.getLayerNames()
outNames = ['feature_fusion/Conv_7/Sigmoid', 'feature_fusion/concat_3']
inputsize = (320,320)
# input need 3 channels
img = cv2.imread('d:/ocr.png',1)
height = img.shape[0]
width = img.shape[1]
rW = width / float(inputsize[0])
rH = height /float(inputsize[1])
confThreshold = 0.5
nmsThreshold = 0.4
scalefactor = 1.0
meanval = (123.68, 116.78, 103.94)
# pre proc
blob = cv2.dnn.blobFromImage(img,scalefactor,inputsize,meanval, True,False)
net.setInput(blob)
out = net.forward(outNames)
t,_ = net.getPerfProfile()
label = "inference time: %.2f ms"%(t*1000.0/cv2.getTickFrequency())
print(label)
print(out[0].shape, out[1].shape)
scores = out[0]
geometry = out[1]
[boxes, confidences] = decode(scores, geometry, confThreshold)
if(1):
frame = img
# Apply NMS
indices = cv2.dnn.NMSBoxesRotated(boxes, confidences, confThreshold,nmsThreshold)
print(indices)
for i in indices:
# get 4 corners of the rotated rect
vertices = cv2.boxPoints(boxes[i])
print("vertices:", vertices)
# scale the bounding box coordinates based on the respective ratios
for j in range(4):
vertices[j][0] *= rW
vertices[j][1] *= rH
for j in range(4):
ri = lambda x: int(round(x))
p1 = (ri(vertices[j][0]), ri(vertices[j][1]))
p2 = (ri(vertices[(j + 1) % 4][0]), ri(vertices[(j + 1) % 4][1]))
cv2.line(frame, p1, p2, (0, 255, 0), 2, cv2.LINE_AA);
# cv.putText(frame, "{:.3f}".format(confidences[i[0]]), (vertices[0][0], vertices[0][1]), cv.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1, cv.LINE_AA)
# Put efficiency information
cv2.putText(frame, label, (0, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
# Display the frame
cv2.imshow("result",frame)
cv2.waitKey(3000)
cv2.destroyAllWindows()
标签:OCR,dimensions,geometry,Invalid,assert,opencv,shape,scores,EAST From: https://www.cnblogs.com/hakula/p/18007548