[Bug] Qwen3.5 vision training , error "only for image model"
#4202 opened on Mar 11, 2026
Description
I follow exactly "https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Qwen3_5_(2B)_Vision.ipynb" , except that I download the model beforehand (the unsloth version ). so the model_name parametor in from_pretrained function is a local path.
Then this error happen: Traceback (most recent call last): File "/home/git/DDESora.new/DDESora/seismic/sftseismic/test1.py", line 68, in data_collator = UnslothVisionDataCollator(model, tokenizer), # Must use! File "/nas/py310llava/lib/python3.10/site-packages/unsloth_zoo/vision_utils.py", line 665, in init raise TypeError("Unsloth: UnslothVisionDataCollator is only for image models!") TypeError: Unsloth: UnslothVisionDataCollator is only for image models!
Code: `from unsloth import FastLanguageModel from unsloth import FastVisionModel import torch from datasets import load_dataset from trl import SFTTrainer, SFTConfig from unsloth.trainer import UnslothVisionDataCollator # 视觉数据整理器 from transformers import DataCollatorForSeq2Seq from trl import SFTTrainer, SFTConfig
import dataP1
max_seq_length = 16384 # 可根据你的显存和需求调整 dtype = None # None 表示自动检测(T4 上为 Float16,A100 上为 BFloat16) load_in_4bit = False # 使用 4bit 量化大幅降低显存占用 batch_size = 4
Seed = 3407
ModelPath = "/home/git/Qwen3.5-2B.new" # it is a unsloth version of Qwen3.5-2b
model, tokenizer = FastVisionModel.from_pretrained( model_name = ModelPath,
load_in_4bit = load_in_4bit,
use_gradient_checkpointing = "unsloth",
)
print(tokenizer.dict)
model = FastVisionModel.get_peft_model( model, finetune_vision_layers = True, # False if not finetuning vision layers finetune_language_layers = True, # False if not finetuning language layers finetune_attention_modules = True, # False if not finetuning attention layers finetune_mlp_modules = True, # False if not finetuning MLP layers
r = 16, # The larger, the higher the accuracy, but might overfit
lora_alpha = 16, # Recommended alpha == r at least
lora_dropout = 0,
bias = "none",
random_state = Seed,
use_rslora = False, # We support rank stabilized LoRA
loftq_config = None, # And LoftQ
# target_modules = "all-linear", # Optional now! Can specify a list if needed
use_gradient_checkpointing = "unsloth",
)
print("==="*20) print(model)
DataPath = "dataset.json" dataset = load_dataset("json", data_files=DataPath, split="train") print(dataset[0]) converted_dataset = [dataP1.format_data_for_vlm(sample) for sample in dataset] print("转换后数据示例:", converted_dataset[0])
FastVisionModel.for_training(model) # Enable for training!
trainer = SFTTrainer( model = model, tokenizer = tokenizer, data_collator = UnslothVisionDataCollator(model, tokenizer), # Must use! #data_collator = DataCollatorForSeq2Seq(model, tokenizer), # Must use! train_dataset = converted_dataset, args = SFTConfig( per_device_train_batch_size = 2, gradient_accumulation_steps = 4, warmup_steps = 5, max_steps = 30, # num_train_epochs = 1, # Set this instead of max_steps for full training runs learning_rate = 2e-4, logging_steps = 1, optim = "adamw_8bit", weight_decay = 0.001, lr_scheduler_type = "linear", seed = Seed, output_dir = "outputs", report_to = "none", # For Weights and Biases
# You MUST put the below items for vision finetuning:
remove_unused_columns = False,
dataset_text_field = "",
dataset_kwargs = {"skip_prepare_dataset": True},
max_length = max_seq_length,
),
)
trainer_stats = trainer.train() `
🦥 You can also ask via our Reddit page: https://reddit.com/r/unsloth/