import boto3
import textractcaller as tc
from textractcaller.t_call import call_textract, Textract_Features
from textractprettyprinter.t_pretty_print import get_text_from_layout_json

# Define the S3 bucket and document key
s3_bucket = 'docs.scbbs.com'
s3_document_key = 'docs/test/2022_Local_161_MOA_09.pdf'

# Create a Textract client with the specified region
session = boto3.Session(region_name='us-west-2')
textract_client = session.client('textract')

# Call Textract with the specified features
textract_json = call_textract(
    input_document=f's3://{s3_bucket}/{s3_document_key}',
    features=[Textract_Features.LAYOUT, Textract_Features.TABLES],
    boto3_textract_client=textract_client
)

# Extract and print the text from the layout
layout = get_text_from_layout_json(
    textract_json=textract_json,
    exclude_figure_text=False,   # Do not exclude figure text
    exclude_page_header=False,   # Do not exclude page header
    exclude_page_footer=False,   # Do not exclude page footer
    exclude_page_number=False    # Do not exclude page number
)

full_text = layout[1]
print(full_text)
