Is this issue fixed ?
I am getting the same error
for this
import torch
from transformers import (
pipeline,
BitsAndBytesConfig,
)
# 1) Build your 4-bit config.
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
llm_int8_enable_fp32_cpu_offload=True, # keeps some weights in FP32 on CPU
bnb_4bit_quant_type="nf4", # or "fp4", "fp4-dq", etc.
bnb_4bit_compute_dtype=torch.float16, # compute in fp16 on GPU
)
# 2) Create the pipeline, passing quantization_config:
pipe = pipeline(
"image-text-to-text",
model="unsloth/gemma-3-27b-it-unsloth-bnb-4bit",
cache_dir="/mnt/models/gemma3",
trust_remote_code=True,
device_map="auto",
quantization_config=bnb_config, # ← here’s the key
)
messages = [
{
"role": "user",
"content": [
{"type": "image", "url": "https://…/candy.JPG"},
{"type": "text", "text": "What animal is on the candy?"}
]
},
]
print(pipe(text=messages))