import io
import os
import re
from google.cloud import vision_v1
from google.cloud.vision_v1 import types
def detect_text(path, target_strings):
client = vision_v1.ImageAnnotatorClient()
with io.open(path, 'rb') as image_file:
content = image_file.read()
image = types.Image(content=content)
response = client.text_detection(image=image)
texts = response.text_annotations
apm = [0, 0]
i = 0
max_y = 0
for text in response.text_annotations:
if '사번' == text.description:
vertices = text.bounding_poly.vertices
bounds = [(vertex.x, vertex.y) for vertex in vertices]
bounds_str = ','.join('({}, {})'.format(x, y) for x, y in bounds)
print('"{}"'.format(text.description), 'bounds: {}'.format(bounds_str))
apm[i] = bounds[0][0]
i += 1
y = bounds[2][1]
if y > max_y:
max_y = y
route = ""
text_bounds = [r.bounding_poly.vertices for r in response.text_annotations]
date_pattern = re.compile(r'\d{4}/\d{2}/\d{2}')
for i, text in enumerate(response.text_annotations):
if text.description in target_strings :
vertices = response.text_annotations[i].bounding_poly.vertices
bound = [(vertex.x, vertex.y) for vertex in vertices]
bounds_str = ','.join('({}, {})'.format(x, y) for x, y in bound)
route = text.description
if ( route == "66408") : route = "6640B"
match = date_pattern.search(text.description)
vertices = text_bounds[i]
y_values = [vertex.y for vertex in vertices]
if match :
if max(y_values) < max_y :
olist_date = match.group(0)
print ( "===== ", route, olist_date, "==================>" )
text_dict = {}
for text in texts:
vertices = [(vertex.x, vertex.y) for vertex in text.bounding_poly.vertices]
text_dict[text.description] = vertices
for j in range(2) :
if j == 0 :
x1, y1 = apm[0]-30, 190
x2, y2 = apm[0]+30, 1000
else :
x1, y1 = apm[1]-50, 190
x2, y2 = apm[1], 1000
print ( "apm", apm[j])
left = min(x1, x2)
top = min(y1, y2)
right = max(x1, x2)
bottom = max(y1, y2)
annotations = response.text_annotations
filtered_annotations = []
for annotation in annotations:
vertices = annotation.bounding_poly.vertices
x, y = vertices[0].x, vertices[0].y
if left <= x <= right and top <= y <= bottom and annotation.description.isdigit():
filtered_annotations.append(annotation)
sorted_annotations = sorted(filtered_annotations, key=lambda a: a.bounding_poly.vertices[0].y)
current_y = sorted_annotations[0].bounding_poly.vertices[0].y
current_line = []
i = 1
for annotation in sorted_annotations:
if annotation.bounding_poly.vertices[0].y == current_y:
current_line.append(annotation)
else:
text = ''.join([a.description for a in current_line])
bounds = [(vertex.x, vertex.y) for vertex in current_line[0].bounding_poly.vertices]
bounds_str = ','.join('({}, {})'.format(x, y) for x, y in bounds)
print(i, text, 'bounds: {}'.format(bounds_str))
current_line = [annotation]
current_y = annotation.bounding_poly.vertices[0].y
i += 1
if current_line:
text = ''.join([a.description for a in current_line])
bounds = [(vertex.x, vertex.y) for vertex in current_line[0].bounding_poly.vertices]
bounds_str = ','.join('({}, {})'.format(x, y) for x, y in bounds)
print(i, text, 'bounds: {}'.format(bounds_str))
i += 1
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'doc/sbus-407c7-62869bfd7e72.json'
path = "/home/pi/ofile/upload/"
files = os.listdir(path)
target_strings = ['6613', '6614', '6615', '6640A', '6640B', '6616', '6638', '6716', '6713']
for file in files:
print ()
print(file)
file_name = os.path.join(path, file)
detect_text(file_name, target_strings)