# compare_icon_fmt.py
import cv2
import numpy as np
from dataclasses import dataclass
from typing import Tuple, List
# ===================== T H A M S Ố & C ᾳ U H Ì N H =====================
@dataclass
class RedMaskParams:
# Dải đỏ HSV đôi: [0..10] U [170..180]
lower1: Tuple[int, int, int] = (0, 80, 50)
upper1: Tuple[int, int, int] = (10, 255, 255)
lower2: Tuple[int, int, int] = (170, 80, 50)
upper2: Tuple[int, int, int] = (180, 255, 255)
open_ksize: int = 3
close_ksize: int = 5
@dataclass
class CCParams:
dilate_ksize: int = 3
min_area: int = 150
max_area: int = 200000
aspect_min: float = 0.5
aspect_max: float = 2.5
pad: int = 2
@dataclass
class FMTParams:
hann: bool = True
eps: float = 1e-3
min_scale: float = 0.5
max_scale: float = 2.0
@dataclass
class MatchParams:
ncc_threshold: float = 0.45
canny_low: int = 60
canny_high: int = 120
# ===================== 1) LOAD & BINARIZE =====================
def load_and_binarize(path: str):
img_bgr = cv2.imread(path, cv2.IMREAD_COLOR)
if img_bgr is None:
raise FileNotFoundError(f"Không thể đọc ảnh: {path}")
rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
_, binarized = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
return img_bgr, rgb, binarized
# ===================== 2) TEMPLATE BIN + INVERT =====================
def binarize_and_invert_template(tpl_bgr):
tpl_gray = cv2.cvtColor(tpl_bgr, cv2.COLOR_BGR2GRAY)
_, tpl_bin = cv2.threshold(tpl_gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
tpl_inv = cv2.bitwise_not(tpl_bin)
return tpl_bin, tpl_inv
# ===================== 3) RED MASK =====================
def red_mask_on_dashboard(dash_bgr, red_params: RedMaskParams):
hsv = cv2.cvtColor(dash_bgr, cv2.COLOR_BGR2HSV)
m1 = cv2.inRange(hsv, red_params.lower1, red_params.upper1)
m2 = cv2.inRange(hsv, red_params.lower2, red_params.upper2)
mask = cv2.bitwise_or(m1, m2)
if red_params.open_ksize > 0:
k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (red_params.open_ksize,)*2)
mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, k)
if red_params.close_ksize > 0:
k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (red_params.close_ksize,)*2)
mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, k)
return mask
def apply_mask_to_binarized(binarized, mask):
return cv2.bitwise_and(binarized, binarized, mask=mask)
# ===================== 4) DILATE + CONNECTED COMPONENTS =====================
def find_candidate_boxes(masked_bin, cc_params: CCParams) -> List[Tuple[int,int,int,int]]:
k = cv2.getStructuringElement(cv2.MORPH_RECT, (cc_params.dilate_ksize,)*2)
dil = cv2.dilate(masked_bin, k, iterations=1)
num_labels, labels, stats, _ = cv2.connectedComponentsWithStats((dil>0).astype(np.uint8), connectivity=8)
boxes = []
H, W = masked_bin.shape[:2]
for i in range(1, num_labels):
x, y, w, h, area = stats[i]
if area < cc_params.min_area or area > cc_params.max_area:
continue
aspect = w / (h + 1e-6)
if not (cc_params.aspect_min <= aspect <= cc_params.aspect_max):
continue
x0 = max(0, x - cc_params.pad)
y0 = max(0, y - cc_params.pad)
x1 = min(W, x + w + cc_params.pad)
y1 = min(H, y + h + cc_params.pad)
boxes.append((x0, y0, x1-x0, y1-y0))
return boxes
# ===================== 5) CROP CHẶT TEMPLATE =====================
def tight_crop_template(tpl_inv):
cnts, _ = cv2.findContours(tpl_inv, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if not cnts:
return tpl_inv
x, y, w, h = cv2.boundingRect(max(cnts, key=cv2.contourArea))
return tpl_inv[y:y+h, x:x+w]
# ===================== 6) FOURIER–MELLIN (scale, rotation) =====================
def _fft_magnitude(img: np.ndarray, use_hann=True, eps=1e-3) -> np.ndarray:
if use_hann:
hann_y = cv2.createHanningWindow((img.shape[1], 1), cv2.CV_32F)
hann_x = cv2.createHanningWindow((1, img.shape[0]), cv2.CV_32F)
window = hann_x @ hann_y
img = img * window
dft = cv2.dft(img, flags=cv2.DFT_COMPLEX_OUTPUT)
dft_shift = np.fft.fftshift(dft, axes=(0,1))
mag = cv2.magnitude(dft_shift[:,:,0], dft_shift[:,:,1])
mag = np.log(mag + eps)
mag = cv2.normalize(mag, None, 0, 1, cv2.NORM_MINMAX)
return mag
def _log_polar(mag: np.ndarray) -> Tuple[np.ndarray, float]:
center = (mag.shape[1]//2, mag.shape[0]//2)
max_radius = min(center[0], center[1])
M = mag.shape[1] / np.log(max_radius + 1e-6)
lp = cv2.logPolar(mag, center, M, cv2.WARP_FILL_OUTLIERS + cv2.INTER_LINEAR)
return lp, M
def fourier_mellin_register(img_ref: np.ndarray, img_mov: np.ndarray, fmt_params: FMTParams):
a = cv2.normalize(img_ref.astype(np.float32), None, 0, 1, cv2.NORM_MINMAX)
b = cv2.normalize(img_mov.astype(np.float32), None, 0, 1, cv2.NORM_MINMAX)
amag = _fft_magnitude(a, use_hann=fmt_params.hann, eps=fmt_params.eps)
bmag = _fft_magnitude(b, use_hann=fmt_params.hann, eps=fmt_params.eps)
alp, M = _log_polar(amag)
blp, _ = _log_polar(bmag)
shift, response = cv2.phaseCorrelate(alp, blp)
# phaseCorrelate trả (shiftX, shiftY)
shiftX, shiftY = shift
cols = alp.shape[1]
scale = np.exp(shiftY / (M + 1e-9))
rotation = -360.0 * (shiftX / (cols + 1e-9))
scale = float(np.clip(scale, fmt_params.min_scale, fmt_params.max_scale))
rotation = float(((rotation + 180) % 360) - 180)
return scale, rotation, float(response)
def warp_template_by(scale: float, rotation_deg: float, tpl_gray: np.ndarray, target_size: Tuple[int, int]):
h, w = tpl_gray.shape[:2]
center = (w/2, h/2)
M = cv2.getRotationMatrix2D(center, rotation_deg, scale)
warped = cv2.warpAffine(tpl_gray, M, (w, h), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=0)
warped = cv2.resize(warped, (target_size[0], target_size[1]), interpolation=cv2.INTER_LINEAR)
return warped
# ===================== 7) MATCH SCORE (robust) =====================
def edge_preprocess(img_gray: np.ndarray, mp: MatchParams):
# CLAHE để chống ảnh phẳng
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
g = clahe.apply(img_gray)
edges = cv2.Canny(g, mp.canny_low, mp.canny_high)
# Nếu cạnh quá ít → dùng gradient magnitude
if np.count_nonzero(edges) < 0.001 * edges.size:
gx = cv2.Sobel(g, cv2.CV_32F, 1, 0, ksize=3)
gy = cv2.Sobel(g, cv2.CV_32F, 0, 1, ksize=3)
mag = cv2.magnitude(gx, gy)
mag = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
return mag
# Dãn cạnh nhẹ
k = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
edges = cv2.dilate(edges, k, iterations=1)
return edges
def _nan_to_val(x: float, val: float = -1.0) -> float:
return float(val) if (x is None or (isinstance(x, float) and (x != x))) else float(x)
def ncc_score(scene: np.ndarray, templ: np.ndarray) -> float:
Hs, Ws = scene.shape[:2]
Ht, Wt = templ.shape[:2]
if Hs < Ht or Ws < Wt:
pad = np.zeros((max(Hs,Ht), max(Ws,Wt)), dtype=scene.dtype)
pad[:Hs,:Ws] = scene
scene = pad
# 1) TM_CCOEFF_NORMED
res = cv2.matchTemplate(scene, templ, cv2.TM_CCOEFF_NORMED)
s1 = _nan_to_val(res.max())
# 2) Fallback: TM_CCORR_NORMED
s2 = -1.0
if s1 <= -0.5:
res2 = cv2.matchTemplate(scene, templ, cv2.TM_CCORR_NORMED)
s2 = _nan_to_val(res2.max())
# 3) Fallback cuối: IoU giữa 2 mask nhị phân
if s1 <= -0.5 and s2 <= 0:
t = templ
sc = scene
if sc.shape != t.shape:
sc = cv2.resize(sc, (t.shape[1], t.shape[0]), interpolation=cv2.INTER_NEAREST)
_, tb = cv2.threshold(t, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
_, sb = cv2.threshold(sc, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
inter = np.count_nonzero(cv2.bitwise_and(tb, sb))
union = np.count_nonzero(cv2.bitwise_or(tb, sb))
iou = inter / union if union > 0 else 0.0
return float(iou)
return max(s1, s2)
def thicken_binary(img: np.ndarray, ksize: int = 3, iters: int = 1) -> np.ndarray:
k = cv2.getStructuringElement(cv2.MORPH_RECT, (ksize,ksize))
return cv2.dilate(img, k, iterations=iters)
# ===================== P I P E L I N E C H Í N H =====================
def find_icon_with_fmt(
dashboard_path: str,
template_path: str,
red_params=RedMaskParams(),
cc_params=CCParams(),
fmt_params=FMTParams(),
match_params=MatchParams(),
):
# 1) Dashboard: RGB + bin
dash_bgr, dash_rgb, dash_bin = load_and_binarize(dashboard_path)
# 2) Template: bin + invert
tpl_bgr = cv2.imread(template_path, cv2.IMREAD_COLOR)
if tpl_bgr is None:
raise FileNotFoundError(f"Không thể đọc template: {template_path}")
tpl_bin, tpl_inv = binarize_and_invert_template(tpl_bgr)
# 3) Lọc đỏ & áp mask lên ảnh nhị phân dashboard
redmask = red_mask_on_dashboard(dash_bgr, red_params)
dash_masked = apply_mask_to_binarized(dash_bin, redmask)
# 4) Dãn + tìm CC để lấy candidate boxes
boxes = find_candidate_boxes(dash_masked, cc_params)
# 5) Cắt chặt template & chuẩn bị phiên bản grayscale
tpl_tight = tight_crop_template(tpl_inv)
tpl_tight_gray = cv2.GaussianBlur(tpl_tight, (3,3), 0)
# Tiền xử lý cạnh cho template
tpl_edges = edge_preprocess(tpl_tight_gray, match_params)
best = {
"score": -1.0,
"box": None,
"scale": None,
"rotation": None
}
dash_gray = cv2.cvtColor(dash_bgr, cv2.COLOR_BGR2GRAY)
for (x, y, w, h) in boxes:
roi = dash_gray[y:y+h, x:x+w]
if roi.size == 0 or w < 8 or h < 8:
continue
# Resize tạm cho FMT
tpl_norm = cv2.resize(tpl_tight_gray, (w, h), interpolation=cv2.INTER_LINEAR)
roi_norm = cv2.resize(roi, (w, h), interpolation=cv2.INTER_LINEAR)
# 6) FMT ước lượng scale/rotation (có fallback)
try:
scale, rotation, resp = fourier_mellin_register(tpl_norm, roi_norm, fmt_params)
except Exception:
scale, rotation, resp = 1.0, 0.0, 0.0
warped = warp_template_by(scale, rotation, tpl_tight_gray, target_size=(w, h))
# (tuỳ chọn) làm dày biên template
warped = thicken_binary(warped, ksize=3, iters=1)
# 7) Tính điểm khớp trên đặc trưng robust
roi_feat = edge_preprocess(roi, match_params)
warped_feat = edge_preprocess(warped, match_params)
score = ncc_score(roi_feat, warped_feat)
if score > best["score"]:
best.update({
"score": score,
"box": (x, y, w, h),
"scale": scale,
"rotation": rotation
})
return {
"best_score": best["score"],
"best_box": best["box"], # (x, y, w, h) trên dashboard
"best_scale": best["scale"],
"best_rotation_deg": best["rotation"],
"pass": (best["score"] is not None and best["score"] >= match_params.ncc_threshold),
"num_candidates": len(boxes),
}
# ===================== V Í D Ụ C H Ạ Y =====================
if __name__ == "__main__":
# ĐỔI 2 ĐƯỜNG DẪN NÀY THEO MÁY BẠN
DASHBOARD = r"\Icon\dashboard.jpg"
TEMPLATE = r"\Icon\ID01.jpg"
result = find_icon_with_fmt(
dashboard_path=DASHBOARD,
template_path=TEMPLATE,
red_params=RedMaskParams(), # nới dải đỏ nếu cần
cc_params=CCParams(min_area=60, max_area=120000, pad=3),
fmt_params=FMTParams(min_scale=0.6, max_scale=1.8),
match_params=MatchParams(ncc_threshold=0.55, canny_low=50, canny_high=130)
)
print("=== KẾT QUẢ ===")
for k, v in result.items():
print(f"{k}: {v}")
# Vẽ khung best match để kiểm tra nhanh
if result["best_box"] is not None:
img = cv2.imread(DASHBOARD)
x, y, w, h = result["best_box"]
cv2.rectangle(img, (x,y), (x+w, y+h), (0,255,0), 2)
cv2.putText(img, f"NCC={result['best_score']:.2f}", (x, max(0,y-8)),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,255,0), 2, cv2.LINE_AA)
cv2.imshow("Best match", img)
cv2.waitKey(0)
cv2.destroyAllWindows()
Hi i am using but it don't find correct image. Please help me check