Reports

from pdf2image import convert_from_path
import pytesseract
from PIL import Image

# Convert the PDF pages to images for OCR since text extraction failed
images = convert_from_path(presentation_path)

# Perform OCR on each page image
presentation_ocr_text = ""
for image in images:
    text = pytesseract.image_to_string(image, lang='eng')
    presentation_ocr_text += text + "\n"

presentation_ocr_text[:1500]  # Preview the first 1500 characters of OCR text
79648581