79774211

Date: 2025-09-24 21:47:29
Score: 0.5
Natty:
Report link

I ended trying the other solutions and comments, but always found I was getting an accuracy of maybe 95% which is not great for what I want to do.

I am now using easyocr with a seemingly 100% pass rate

from PyQt5.QtWidgets import QApplication, QMainWindow, QHBoxLayout, QWidget
from PyQt5.QtWebEngineWidgets import QWebEngineView, QWebEnginePage
from PyQt5.QtCore import QUrl, QTimer
import sys
import mss
from PIL import Image
from datetime import datetime
import easyocr
import numpy as np

class CustomWebEnginePage(QWebEnginePage):
    def javaScriptConsoleMessage(self, level, message, lineNumber, sourceID):
        pass  # Suppresses output to terminal

class ScreenMonitorApp:
    def __init__(self):
        self.app = QApplication(sys.argv)
        self.window = QMainWindow()
        self.window.setGeometry(100, 100, 1400, 800)
        
        central_widget = QWidget()
        layout = QHBoxLayout(central_widget)
        
        self.left_web = QWebEngineView()
        self.left_web.setPage(CustomWebEnginePage(self.left_web))
        self.right_web = QWebEngineView()
        self.right_web.setPage(CustomWebEnginePage(self.right_web))
        
        layout.addWidget(self.left_web, 1)
        layout.addWidget(self.right_web, 1)
        
        self.window.setCentralWidget(central_widget)
        
        self.previous_text = ""
        self.reader = easyocr.Reader(['en'])  # Initialize EasyOCR reader for English
        
        self.region = {"top": 80, "left": 80, "width": 78, "height": 30}
        
        self.timer = QTimer()
        self.timer.timeout.connect(self.check_region)
        self.timer.start(2000)
        
        screens = self.app.screens()
        
        monitor_index = 3
        if monitor_index < len(screens):
            screen = screens[monitor_index]
            geometry = screen.geometry()
            x = geometry.x() + (geometry.width() - self.window.width()) // 2
            y = geometry.y() + (geometry.height() - self.window.height()) // 2
            self.window.move(x, y)
        else:
            print("Monitor index out of range. Opening on the primary monitor.")
        self.window.show()
        sys.exit(self.app.exec_())
    
    def load_url(self, url_l, url_r):
        print("URLs loaded")
        self.left_web.setUrl(QUrl(f"https://example.com/"))
        self.right_web.setUrl(QUrl(f"https://example.com/"))
    
    def perform_ocr(self):
        """Capture screen region, resize 4x with Lanczos, convert to grayscale, and perform OCR with EasyOCR, saving the image for debug"""
        with mss.mss() as sct:
            img = sct.grab(self.region)
            pil_img = Image.frombytes("RGB", img.size, img.bgra, "raw", "BGRX")
            
            # Resize 4x with Lanczos resampling to increase effective DPI
            pil_resized = pil_img.resize((234, 90), Image.LANCZOS)  # Target ~300 DPI based on assumed 96 DPI
            
            # Convert to grayscale
            pil_gray = pil_resized.convert('L')
            
            # Save the processed image with a timestamp
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            pil_gray.save(f"ocr_capture_{timestamp}.png", dpi=(300, 300))  # Set DPI to 300
            
            # Convert PIL image to NumPy array for EasyOCR
            img_np = np.array(pil_gray)
            
            # Perform OCR with EasyOCR
            result = self.reader.readtext(img_np, detail=0)  # detail=0 returns only text, no bounding box/confidence
            text = result[0] if result else ""  # Take the first detected text, or empty string if none
            return text
    
    def check_region(self):
        current_text = self.perform_ocr()
        if current_text != self.previous_text and current_text:
            self.previous_text = current_text
            new_url_l = current_text
            new_url_r = current_text
            self.load_url(new_url_l, new_url_r)
            print(f"Updated search for: {current_text}")

if __name__ == "__main__":
    app = ScreenMonitorApp()
Reasons:
  • RegEx Blacklisted phrase (1): I want
  • Long answer (-1):
  • Has code block (-0.5):
  • Self-answer (0.5):
  • Low reputation (0.5):
Posted by: RvBVakama