I particularly, trained a YOLOv11 segmentation model in order to detect positions for Rubik's cubes.
First of all, data has to be prepared in the YOLOv11 Dataset format. and a data.yaml file has to be created:
train: ../train/images
val: ../valid/images
test: ../test/images
nc: 6
names: ['Cube']
Then, install ultralytics and train the model
!pip install ultralytics
from ultralytics import YOLO
model = YOLO('best.pt')
model.train(data='./data/data.yaml', epochs=100, batch=64, device='cuda')
After using the segmentation model on a frame, I do some checks to see if the object is a Rubiks' cube or not:
import cv2
import numpy as np
from ultralytics import YOLO
def is_patch_cube(patch, epsilon=0.2):
h, w = patch.shape[:2]
ratio, inverse = h/w, w/h
if ratio < 1 - epsilon or ratio > 1 + epsilon:
return False
if inverse < 1 - epsilon or inverse > 1 + epsilon:
return False
return True
def is_patch_mostly_colored(patch, threshold=0.85):
h, w, c = patch.shape
num_pixels = h*w*c
num_colored_pixels = np.sum(patch > 0)
return num_colored_pixels/num_pixels > threshold
def check_homogenous_color(patch, color, threshold):
if color not in color_ranges: return False
h, w = patch.shape[:2]
patch = cv2.cvtColor(patch, cv2.COLOR_BGR2HSV)
lower, upper = color_ranges[color]
thres = cv2.inRange(patch, np.array(lower), np.array(upper))
# print(thres.shape)
return (np.count_nonzero(thres)/(h*w)) > threshold
def find_segments(seg_model: YOLO, image):
return seg_model(image, verbose=False)
def get_face(results, n, homogenity_thres=0.6):
for i, r in enumerate(results):
original_img = r.orig_img
img_h, img_w, c = original_img.shape
if r.masks is not None:
for obj_i, mask_tensor in enumerate(r.masks.data):
mask_np = (mask_tensor.cpu().numpy() * 255).astype(np.uint8)
if mask_np.shape[0] != original_img.shape[0] or mask_np.shape[1] != original_img.shape[1]:
mask_np = cv2.resize(mask_np, (img_w, img_h), interpolation=cv2.INTER_NEAREST)
mask_np, box = simplify_mask(mask_np, eps=0.005)
obj = cv2.bitwise_and(original_img, original_img, mask=mask_np)
x1, y1, w, h = box
x2, y2 = x1 + w, y1 + h
x1 = max(0, x1)
y1 = max(0, y1)
x2 = min(original_img.shape[1], x2)
y2 = min(original_img.shape[0], y2)
cropped_object = obj[y1:y2, x1:x2]
if not is_patch_cube(cropped_object):
continue
if not is_patch_mostly_colored(cropped_object):
continue
colors, homogenity = find_colors(cropped_object, n, color_detection_model)
if sum([sum(row) for row in homogenity]) < homogenity_thres * len(homogenity) * len(homogenity[0]):
continue
return colors, cropped_object, mask_np, box
return None, None, None, None
def find_colors(patch, n):
h, w, c = patch.shape
hh, ww = h//n, w//n
colors = [['' for _ in range(n)] for __ in range(n)]
homogenity = [[False for _ in range(n)] for __ in range(n)]
for i in range(n):
for j in range(n):
pp = patch[i*hh:(i+1)*hh, j*ww:(j+1)*ww]
colors[i][j] = find_best_matching_color_legacy(
get_median_color(pp), tpe='bgr') # whatever function you want to detect colors
homogenity[i][j] = check_homogenous_color(pp, colors[i][j], threshold=0.5)
return colors, homogenity
We can use this as follows:
results = find_segments(model, self.current_frame)
face, obj, mask, box = get_face(results, n=self.n, homogenity_thres=0.6)
Thanks to @ChristophRackwitz for recommending usage of semantic segmentation models