import boto3
import textractcaller as tc
from textractcaller.t_call import call_textract, Textract_Features
from textractprettyprinter.t_pretty_print import get_text_from_layout_json

# Define the S3 bucket and document key
s3_bucket = 'docs.scbbs.com'
s3_document_key = 'docs/test/2022_Local_161_MOA_09.pdf'

# Create a Textract client with the specified region
session = boto3.Session(region_name='us-west-2')
textract_client = session.client('textract')

# Call Textract with the specified features
layout_textract_json = call_textract(
    input_document=f's3://{s3_bucket}/{s3_document_key}', 
    features=[Textract_Features.LAYOUT],
    boto3_textract_client=textract_client
)

# Extract and print the text from the layout
layout_text = get_text_from_layout_json(textract_json=layout_textract_json)[1]
print(layout_text)
