Hey brotha I threw this into FixitAPI.dev and go the following response i think it may be helpful for ya
LayoutLMv3Tokenizer.from_pretrained("microsoft/layoutlmv3-base") processor = LayoutLMv3Processor.from_pretrained("microsoft/layoutlmv3-base") ... def preprocess(example): image = Image.open(example["image_path"]).convert("RGB") image_width, image_height = image.size normalized_bboxes = [normalize_bbox(bbox, image_width, image_height) for bbox in example["bboxes"]] encoding = processor.tokenizer( image, example["words"], is_split_into_words=True, boxes=normalized_bboxes, word_labels=[label2id[l] for l in example["labels"]], truncation=True, padding="max_length", return_tensors="pt" ) return { "input_ids": encoding["input_ids"].squeeze(0), "attention_mask": encoding["attention_mask"].squeeze(0), "bbox": encoding["bbox"].squeeze(0), "pixel_values": encoding["pixel_values"].squeeze(0), "labels": encoding["labels"].squeeze(0) } tokenized_dataset = dataset.map(preprocess, remove_columns=dataset.column_names)