79795411

Date: 2025-10-21 01:48:55
Score: 4
Natty:
Report link
# compare_icon_fmt.py
import cv2
import numpy as np
from dataclasses import dataclass
from typing import Tuple, List

# ===================== T H A M  S Ố  &  C ᾳ U  H Ì N H =====================

@dataclass
class RedMaskParams:
    # Dải đỏ HSV đôi: [0..10] U [170..180]
    lower1: Tuple[int, int, int] = (0, 80, 50)
    upper1: Tuple[int, int, int] = (10, 255, 255)
    lower2: Tuple[int, int, int] = (170, 80, 50)
    upper2: Tuple[int, int, int] = (180, 255, 255)
    open_ksize: int = 3
    close_ksize: int = 5

@dataclass
class CCParams:
    dilate_ksize: int = 3
    min_area: int = 150
    max_area: int = 200000
    aspect_min: float = 0.5
    aspect_max: float = 2.5
    pad: int = 2

@dataclass
class FMTParams:
    hann: bool = True
    eps: float = 1e-3
    min_scale: float = 0.5
    max_scale: float = 2.0

@dataclass
class MatchParams:
    ncc_threshold: float = 0.45
    canny_low: int = 60
    canny_high: int = 120

# ===================== 1) LOAD & BINARIZE =====================

def load_and_binarize(path: str):
    img_bgr = cv2.imread(path, cv2.IMREAD_COLOR)
    if img_bgr is None:
        raise FileNotFoundError(f"Không thể đọc ảnh: {path}")
    rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    _, binarized = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    return img_bgr, rgb, binarized

# ===================== 2) TEMPLATE BIN + INVERT =====================

def binarize_and_invert_template(tpl_bgr):
    tpl_gray = cv2.cvtColor(tpl_bgr, cv2.COLOR_BGR2GRAY)
    _, tpl_bin = cv2.threshold(tpl_gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    tpl_inv = cv2.bitwise_not(tpl_bin)
    return tpl_bin, tpl_inv

# ===================== 3) RED MASK =====================

def red_mask_on_dashboard(dash_bgr, red_params: RedMaskParams):
    hsv = cv2.cvtColor(dash_bgr, cv2.COLOR_BGR2HSV)
    m1 = cv2.inRange(hsv, red_params.lower1, red_params.upper1)
    m2 = cv2.inRange(hsv, red_params.lower2, red_params.upper2)
    mask = cv2.bitwise_or(m1, m2)

    if red_params.open_ksize > 0:
        k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (red_params.open_ksize,)*2)
        mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, k)
    if red_params.close_ksize > 0:
        k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (red_params.close_ksize,)*2)
        mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, k)
    return mask

def apply_mask_to_binarized(binarized, mask):
    return cv2.bitwise_and(binarized, binarized, mask=mask)

# ===================== 4) DILATE + CONNECTED COMPONENTS =====================

def find_candidate_boxes(masked_bin, cc_params: CCParams) -> List[Tuple[int,int,int,int]]:
    k = cv2.getStructuringElement(cv2.MORPH_RECT, (cc_params.dilate_ksize,)*2)
    dil = cv2.dilate(masked_bin, k, iterations=1)

    num_labels, labels, stats, _ = cv2.connectedComponentsWithStats((dil>0).astype(np.uint8), connectivity=8)
    boxes = []
    H, W = masked_bin.shape[:2]
    for i in range(1, num_labels):
        x, y, w, h, area = stats[i]
        if area < cc_params.min_area or area > cc_params.max_area:
            continue
        aspect = w / (h + 1e-6)
        if not (cc_params.aspect_min <= aspect <= cc_params.aspect_max):
            continue
        x0 = max(0, x - cc_params.pad)
        y0 = max(0, y - cc_params.pad)
        x1 = min(W, x + w + cc_params.pad)
        y1 = min(H, y + h + cc_params.pad)
        boxes.append((x0, y0, x1-x0, y1-y0))
    return boxes

# ===================== 5) CROP CHẶT TEMPLATE =====================

def tight_crop_template(tpl_inv):
    cnts, _ = cv2.findContours(tpl_inv, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if not cnts:
        return tpl_inv
    x, y, w, h = cv2.boundingRect(max(cnts, key=cv2.contourArea))
    return tpl_inv[y:y+h, x:x+w]

# ===================== 6) FOURIER–MELLIN (scale, rotation) =====================

def _fft_magnitude(img: np.ndarray, use_hann=True, eps=1e-3) -> np.ndarray:
    if use_hann:
        hann_y = cv2.createHanningWindow((img.shape[1], 1), cv2.CV_32F)
        hann_x = cv2.createHanningWindow((1, img.shape[0]), cv2.CV_32F)
        window = hann_x @ hann_y
        img = img * window
    dft = cv2.dft(img, flags=cv2.DFT_COMPLEX_OUTPUT)
    dft_shift = np.fft.fftshift(dft, axes=(0,1))
    mag = cv2.magnitude(dft_shift[:,:,0], dft_shift[:,:,1])
    mag = np.log(mag + eps)
    mag = cv2.normalize(mag, None, 0, 1, cv2.NORM_MINMAX)
    return mag

def _log_polar(mag: np.ndarray) -> Tuple[np.ndarray, float]:
    center = (mag.shape[1]//2, mag.shape[0]//2)
    max_radius = min(center[0], center[1])
    M = mag.shape[1] / np.log(max_radius + 1e-6)
    lp = cv2.logPolar(mag, center, M, cv2.WARP_FILL_OUTLIERS + cv2.INTER_LINEAR)
    return lp, M

def fourier_mellin_register(img_ref: np.ndarray, img_mov: np.ndarray, fmt_params: FMTParams):
    a = cv2.normalize(img_ref.astype(np.float32), None, 0, 1, cv2.NORM_MINMAX)
    b = cv2.normalize(img_mov.astype(np.float32), None, 0, 1, cv2.NORM_MINMAX)

    amag = _fft_magnitude(a, use_hann=fmt_params.hann, eps=fmt_params.eps)
    bmag = _fft_magnitude(b, use_hann=fmt_params.hann, eps=fmt_params.eps)

    alp, M = _log_polar(amag)
    blp, _ = _log_polar(bmag)

    shift, response = cv2.phaseCorrelate(alp, blp)
    # phaseCorrelate trả (shiftX, shiftY)
    shiftX, shiftY = shift

    cols = alp.shape[1]
    scale = np.exp(shiftY / (M + 1e-9))
    rotation = -360.0 * (shiftX / (cols + 1e-9))
    scale = float(np.clip(scale, fmt_params.min_scale, fmt_params.max_scale))
    rotation = float(((rotation + 180) % 360) - 180)
    return scale, rotation, float(response)

def warp_template_by(scale: float, rotation_deg: float, tpl_gray: np.ndarray, target_size: Tuple[int, int]):
    h, w = tpl_gray.shape[:2]
    center = (w/2, h/2)
    M = cv2.getRotationMatrix2D(center, rotation_deg, scale)
    warped = cv2.warpAffine(tpl_gray, M, (w, h), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=0)
    warped = cv2.resize(warped, (target_size[0], target_size[1]), interpolation=cv2.INTER_LINEAR)
    return warped

# ===================== 7) MATCH SCORE (robust) =====================

def edge_preprocess(img_gray: np.ndarray, mp: MatchParams):
    # CLAHE để chống ảnh phẳng
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    g = clahe.apply(img_gray)

    edges = cv2.Canny(g, mp.canny_low, mp.canny_high)

    # Nếu cạnh quá ít → dùng gradient magnitude
    if np.count_nonzero(edges) < 0.001 * edges.size:
        gx = cv2.Sobel(g, cv2.CV_32F, 1, 0, ksize=3)
        gy = cv2.Sobel(g, cv2.CV_32F, 0, 1, ksize=3)
        mag = cv2.magnitude(gx, gy)
        mag = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
        return mag

    # Dãn cạnh nhẹ
    k = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
    edges = cv2.dilate(edges, k, iterations=1)
    return edges

def _nan_to_val(x: float, val: float = -1.0) -> float:
    return float(val) if (x is None or (isinstance(x, float) and (x != x))) else float(x)

def ncc_score(scene: np.ndarray, templ: np.ndarray) -> float:
    Hs, Ws = scene.shape[:2]
    Ht, Wt = templ.shape[:2]
    if Hs < Ht or Ws < Wt:
        pad = np.zeros((max(Hs,Ht), max(Ws,Wt)), dtype=scene.dtype)
        pad[:Hs,:Ws] = scene
        scene = pad

    # 1) TM_CCOEFF_NORMED
    res = cv2.matchTemplate(scene, templ, cv2.TM_CCOEFF_NORMED)
    s1 = _nan_to_val(res.max())

    # 2) Fallback: TM_CCORR_NORMED
    s2 = -1.0
    if s1 <= -0.5:
        res2 = cv2.matchTemplate(scene, templ, cv2.TM_CCORR_NORMED)
        s2 = _nan_to_val(res2.max())

    # 3) Fallback cuối: IoU giữa 2 mask nhị phân
    if s1 <= -0.5 and s2 <= 0:
        t = templ
        sc = scene
        if sc.shape != t.shape:
            sc = cv2.resize(sc, (t.shape[1], t.shape[0]), interpolation=cv2.INTER_NEAREST)
        _, tb = cv2.threshold(t, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
        _, sb = cv2.threshold(sc, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
        inter = np.count_nonzero(cv2.bitwise_and(tb, sb))
        union = np.count_nonzero(cv2.bitwise_or(tb, sb))
        iou = inter / union if union > 0 else 0.0
        return float(iou)

    return max(s1, s2)

def thicken_binary(img: np.ndarray, ksize: int = 3, iters: int = 1) -> np.ndarray:
    k = cv2.getStructuringElement(cv2.MORPH_RECT, (ksize,ksize))
    return cv2.dilate(img, k, iterations=iters)

# ===================== P I P E L I N E  C H Í N H =====================

def find_icon_with_fmt(
    dashboard_path: str,
    template_path: str,
    red_params=RedMaskParams(),
    cc_params=CCParams(),
    fmt_params=FMTParams(),
    match_params=MatchParams(),
):
    # 1) Dashboard: RGB + bin
    dash_bgr, dash_rgb, dash_bin = load_and_binarize(dashboard_path)

    # 2) Template: bin + invert
    tpl_bgr = cv2.imread(template_path, cv2.IMREAD_COLOR)
    if tpl_bgr is None:
        raise FileNotFoundError(f"Không thể đọc template: {template_path}")
    tpl_bin, tpl_inv = binarize_and_invert_template(tpl_bgr)

    # 3) Lọc đỏ & áp mask lên ảnh nhị phân dashboard
    redmask = red_mask_on_dashboard(dash_bgr, red_params)
    dash_masked = apply_mask_to_binarized(dash_bin, redmask)

    # 4) Dãn + tìm CC để lấy candidate boxes
    boxes = find_candidate_boxes(dash_masked, cc_params)

    # 5) Cắt chặt template & chuẩn bị phiên bản grayscale
    tpl_tight = tight_crop_template(tpl_inv)
    tpl_tight_gray = cv2.GaussianBlur(tpl_tight, (3,3), 0)

    # Tiền xử lý cạnh cho template
    tpl_edges = edge_preprocess(tpl_tight_gray, match_params)

    best = {
        "score": -1.0,
        "box": None,
        "scale": None,
        "rotation": None
    }

    dash_gray = cv2.cvtColor(dash_bgr, cv2.COLOR_BGR2GRAY)

    for (x, y, w, h) in boxes:
        roi = dash_gray[y:y+h, x:x+w]
        if roi.size == 0 or w < 8 or h < 8:
            continue

        # Resize tạm cho FMT
        tpl_norm = cv2.resize(tpl_tight_gray, (w, h), interpolation=cv2.INTER_LINEAR)
        roi_norm  = cv2.resize(roi, (w, h), interpolation=cv2.INTER_LINEAR)

        # 6) FMT ước lượng scale/rotation (có fallback)
        try:
            scale, rotation, resp = fourier_mellin_register(tpl_norm, roi_norm, fmt_params)
        except Exception:
            scale, rotation, resp = 1.0, 0.0, 0.0

        warped = warp_template_by(scale, rotation, tpl_tight_gray, target_size=(w, h))

        # (tuỳ chọn) làm dày biên template
        warped = thicken_binary(warped, ksize=3, iters=1)

        # 7) Tính điểm khớp trên đặc trưng robust
        roi_feat    = edge_preprocess(roi, match_params)
        warped_feat = edge_preprocess(warped, match_params)
        score = ncc_score(roi_feat, warped_feat)

        if score > best["score"]:
            best.update({
                "score": score,
                "box": (x, y, w, h),
                "scale": scale,
                "rotation": rotation
            })

    return {
        "best_score": best["score"],
        "best_box": best["box"],              # (x, y, w, h) trên dashboard
        "best_scale": best["scale"],
        "best_rotation_deg": best["rotation"],
        "pass": (best["score"] is not None and best["score"] >= match_params.ncc_threshold),
        "num_candidates": len(boxes),
    }

# ===================== V Í  D Ụ  C H Ạ Y =====================

if __name__ == "__main__":
    # ĐỔI 2 ĐƯỜNG DẪN NÀY THEO MÁY BẠN
    DASHBOARD = r"\Icon\dashboard.jpg"
    TEMPLATE  = r"\Icon\ID01.jpg"

    result = find_icon_with_fmt(
        dashboard_path=DASHBOARD,
        template_path=TEMPLATE,
        red_params=RedMaskParams(),                      # nới dải đỏ nếu cần
        cc_params=CCParams(min_area=60, max_area=120000, pad=3),
        fmt_params=FMTParams(min_scale=0.6, max_scale=1.8),
        match_params=MatchParams(ncc_threshold=0.55, canny_low=50, canny_high=130)
    )

    print("=== KẾT QUẢ ===")
    for k, v in result.items():
        print(f"{k}: {v}")

    # Vẽ khung best match để kiểm tra nhanh
    if result["best_box"] is not None:
        img = cv2.imread(DASHBOARD)
        x, y, w, h = result["best_box"]
        cv2.rectangle(img, (x,y), (x+w, y+h), (0,255,0), 2)
        cv2.putText(img, f"NCC={result['best_score']:.2f}", (x, max(0,y-8)),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,255,0), 2, cv2.LINE_AA)
        cv2.imshow("Best match", img)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
 Hi i am using but it don't find correct image. Please help me check 
Reasons:
  • Blacklisted phrase (1): help me
  • RegEx Blacklisted phrase (3): Please help me
  • Long answer (-1):
  • Has code block (-0.5):
  • Self-answer (0.5):
  • Low reputation (1):
Posted by: Dũng Hoàng