-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathazure_function_sample
More file actions
93 lines (75 loc) · 4.32 KB
/
Copy pathazure_function_sample
File metadata and controls
93 lines (75 loc) · 4.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import azure.functions as func
import requests
from requests import get, post
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import FormRecognizerClient
import os
from azure.identity import DefaultAzureCredential
import asyncio
from azure.storage.blob import BlobServiceClient, BlobClient
def format_bounding_box(bounding_box):
if not bounding_box:
return "N/A"
return ", ".join(["[{}, {}]".format(p.x, p.y) for p in bounding_box])
class RecognizeContentSampleAsync(object):
async def recognize_content(self):
# This is the connection to the blob storage and form_recognizer credentials
container_name = ['YOUR CONTAINER with original docs']
blob_service_client = BlobServiceClient.from_connection_string("DefaultEndpointsProtocol=https;AccountName=[Storage Account Name];AccountKey=[Storage Account Key];EndpointSuffix=core.windows.net")
container_client = blob_service_client.get_container_client(container_name)
blob_list = container_client.list_blobs()
endpoint = r"https://YOUR_ENDPOINT.cognitiveservices.azure.com/"
key = ["YOUR KEY"]
post_url = endpoint + "/formrecognizer/v2.1/custom/models/docs-chunking"
model_id = ["YOUR MODEL ID"]
credential = DefaultAzureCredential()
async with FormRecognizerClient(
endpoint=endpoint, credential=AzureKeyCredential(key)
) as form_recognizer_client:
for blob in blob_list:
blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob)
f_file = blob_client.download_blob()
form_recognizer_client=FormRecognizerClient(
endpoint=endpoint, credential=AzureKeyCredential(key))
poller = await form_recognizer_client.begin_recognize_content(form = f_file, content_type="application/f")
form_pages = await poller.result()
for idx, content in enumerate(form_pages):
print("----Recognizing content from page #{}----".format(idx+1))
print("Page has width: {} and height: {}, measured with unit: {}".format(
content.width,
content.height,
content.unit
))
for table_idx, table in enumerate(content.tables):
print("Table # {} has {} rows and {} columns".format(table_idx, table.row_count, table.column_count))
print("Table # {} location on page: {}".format(table_idx, format_bounding_box(table.bounding_box)))
for cell in table.cells:
print("...Cell[{}][{}] has text '{}' within bounding box '{}'".format(
cell.row_index,
cell.column_index,
cell.text,
format_bounding_box(cell.bounding_box)
))
for line_idx, line in enumerate(content.lines):
print("Line # {} has word count '{}' and text '{}' within bounding box '{}'".format(
line_idx,
len(line.words),
line.text,
format_bounding_box(line.bounding_box)
))
if line.appearance:
if line.appearance.style_name == "handwriting" and line.appearance.style_confidence > 0.8:
print("Text line '{}' is handwritten and might be a signature.".format(line.text))
for word in line.words:
print("...Word '{}' has a confidence of {}".format(word.text, word.confidence))
for selection_mark in content.selection_marks:
print("Selection mark is '{}' within bounding box '{}' and has a confidence of {}".format(
selection_mark.state,
format_bounding_box(selection_mark.bounding_box),
selection_mark.confidence
))
print("----------------------------------------")
# [END recognize_content_async]
async def main():
sample = RecognizeContentSampleAsync()
await sample.recognize_content()