Merge pull request #19 from tienthienhd/feature/ocr-custom

fix : fix size image
This commit is contained in:
Trần Minh Đức 2024-05-28 16:40:14 +07:00 committed by GitHub
commit 816f16667a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -12,7 +12,7 @@ from typing import Optional
from django.conf import settings from django.conf import settings
import requests import requests
from PyPDF2 import PdfReader from PyPDF2 import PdfReader, PdfWriter
from reportlab.pdfgen import canvas from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter from reportlab.lib.pagesizes import letter
from PIL import Image from PIL import Image
@ -258,9 +258,10 @@ class RasterisedDocumentParser(DocumentParser):
first_page=1, first_page=1,
last_page=input_pdf.getNumPages()+1) last_page=input_pdf.getNumPages()+1)
can = canvas.Canvas(str(output_path), pagesize=letter) can = canvas.Canvas(str(output_path), pagesize=letter)
for page_num, page in enumerate(input_pdf.pages): for page_num, image in enumerate(images):
page_height = input_pdf.pages[page_num].mediabox[3] page_width, page_height = image.size
page_width = input_pdf.pages[page_num].mediabox[2] # page_height = input_pdf.pages[page_num].mediabox[3]
# page_width = input_pdf.pages[page_num].mediabox[2]
# set size new page # set size new page
can.setPageSize((page_width, page_height)) can.setPageSize((page_width, page_height))
byte_image = io.BytesIO() byte_image = io.BytesIO()
@ -274,13 +275,14 @@ class RasterisedDocumentParser(DocumentParser):
pdfmetrics.registerFont(TTFont('Arial', font_path)) pdfmetrics.registerFont(TTFont('Arial', font_path))
width_api_img = data["pages"][page_num]["dimensions"][1] width_api_img = data["pages"][page_num]["dimensions"][1]
height_api_img = data["pages"][page_num]["dimensions"][0] height_api_img = data["pages"][page_num]["dimensions"][0]
# print(f'kich thuoc goc: height{page_height}, width{page_width}, kich thuoc api: height{height_api_img} width{width_api_img}')
rolate_height = height_api_img /page_height rolate_height = height_api_img /page_height
rolate_width = width_api_img /page_width rolate_width = width_api_img /page_width
for block in data["pages"][page_num]["blocks"]: for block in data["pages"][page_num]["blocks"]:
for line in block.get("lines", []): for line in block.get("lines", []):
y1 = (line.get("bbox")[0][1] / float(rolate_height)) y1 = (line.get("bbox")[0][1] / float(rolate_height))
y2 = (line.get("bbox")[1][1] / float(rolate_height)) y2 = (line.get("bbox")[1][1] / float(rolate_height))
font_size = (y2 - y1) * 72 / 96 font_size = math.floor((y2 - y1) * 72 / 96)-2
y_center_coordinates = y2 - (y2 - y1)/2 y_center_coordinates = y2 - (y2 - y1)/2
for word in line.get("words", []): for word in line.get("words", []):
x1 = word["bbox"][0][0] / float(rolate_width) x1 = word["bbox"][0][0] / float(rolate_width)
@ -294,7 +296,7 @@ class RasterisedDocumentParser(DocumentParser):
w = can.stringWidth(value, font_name, font_size) w = can.stringWidth(value, font_name, font_size)
can.setFont('Arial', font_size) can.setFont('Arial', font_size)
can.drawString(x_center_coordinates - w/2, can.drawString(x_center_coordinates - w/2,
int(page_height) - y_center_coordinates - (font_size/3), int(page_height) - y_center_coordinates - (font_size/2),
value) value)
can.drawImage(ImageReader(io.BytesIO(jpg_image)), can.drawImage(ImageReader(io.BytesIO(jpg_image)),
0, 0, 0, 0,