Rename to remote ocr

This commit is contained in:
shamoon
2024-02-29 14:08:07 -08:00
parent fba4ce9147
commit 24c40bbc5e
5 changed files with 108 additions and 42 deletions

View File

@@ -39,9 +39,9 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
)
with override_settings(
REMOTE_PARSER_ENGINE="azureaivision",
REMOTE_PARSER_API_KEY="somekey",
REMOTE_PARSER_ENDPOINT="https://endpoint.cognitiveservices.azure.com/",
REMOTE_OCR_ENGINE="azureaivision",
REMOTE_OCR_API_KEY="somekey",
REMOTE_OCR_ENDPOINT="https://endpoint.cognitiveservices.azure.com/",
):
parser = RemoteDocumentParser(uuid.uuid4())
parser.parse(
@@ -66,10 +66,64 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
}
with override_settings(
REMOTE_PARSER_ENGINE="awstextract",
REMOTE_PARSER_API_KEY="somekey",
REMOTE_PARSER_API_KEY_ID="somekeyid",
REMOTE_PARSER_REGION="us-west-2",
REMOTE_OCR_ENGINE="awstextract",
REMOTE_OCR_API_KEY="somekey",
REMOTE_OCR_API_KEY_ID="somekeyid",
REMOTE_OCR_REGION="us-west-2",
):
parser = RemoteDocumentParser(uuid.uuid4())
parser.parse(
self.SAMPLE_FILES / "simple-digital.pdf",
"application/pdf",
)
self.assertContainsStrings(
parser.text.strip(),
["This is a test document."],
)
@mock.patch("google.cloud.vision.ImageAnnotatorClient")
@mock.patch("google.cloud.storage.Client")
@mock.patch("google.oauth2.service_account.Credentials.from_service_account_file")
def test_get_text_with_googlecloudvision(
self,
mock_credentials_from_file,
mock_gcs_client,
mock_gcv_client,
):
credentials = mock.Mock()
credentials.project_id = "someproject"
mock_credentials_from_file.return_value = credentials
blob_mock0 = mock.Mock()
blob_mock0.name = "somefile.pdf"
blob_mock1 = mock.Mock()
blob_mock1.name = "somefile.json"
blob_mock1.download_as_bytes.return_value.decode.return_value = json.dumps(
{
"responses": [
{
"fullTextAnnotation": {
"text": "This is a test document.",
},
},
],
},
)
mock_gcs_client.return_value.lookup_bucket.return_value.list_blobs.return_value = [
blob_mock0,
blob_mock1,
]
result = mock.Mock()
result.result = mock.Mock()
mock_gcv_client.return_value.async_batch_annotate_files.return_value = result
with override_settings(
REMOTE_OCR_ENGINE="googlecloudvision",
REMOTE_OCR_CREDENTIALS_FILE="somefile.json",
):
parser = RemoteDocumentParser(uuid.uuid4())
parser.parse(