from pdf2image import convert_from_path
import pytesseract
from PIL import Image
# Convert the PDF pages to images for OCR since text extraction failed
images = convert_from_path(presentation_path)
# Perform OCR on each page image
presentation_ocr_text = ""
for image in images:
text = pytesseract.image_to_string(image, lang='eng')
presentation_ocr_text += text + "\n"
presentation_ocr_text[:1500] # Preview the first 1500 characters of OCR text