json textract response
# Call Amazon Textract and get JSON response
# client = boto3.client('textract')
# response = client.analyze_document(Document={...}, FeatureTypes=[...])
# Parse JSON response from Textract
doc = Document(response)
# Iterate over elements in the document
for page in doc.pages:
# Print lines and words
for line in page.lines:
print("Line: {}--{}".format(line.text, line.confidence))
for word in line.words:
print("Word: {}--{}".format(word.text, word.confidence))
# Print tables
for table in page.tables:
for r, row in enumerate(table.rows):
for c, cell in enumerate(row.cells):
print("Table[{}][{}] = {}-{}".format(r, c, cell.text, cell.confidence))
# Print fields
for field in page.form.fields:
print("Field: Key: {}, Value: {}".format(field.key.text, field.value.text))
# Get field by key
key = "Phone Number:"
field = page.form.getFieldByKey(key)
if(field):
print("Field: Key: {}, Value: {}".format(field.key, field.value))
# Search fields by key
key = "address"
fields = page.form.searchFieldsByKey(key)
for field in fields:
print("Field: Key: {}, Value: {}".format(field.key, field.value))