Description
Somehow, I cannot load my models now.
%%capture import os if "COLAB_" not in "".join(os.environ.keys()): !pip install unsloth vllm else: # [NOTE] Do the below ONLY in Colab! Use [[pip install unsloth vllm]] !pip install --no-deps unsloth vllm==0.8.5.post1
%%capture import os if "COLAB_" not in "".join(os.environ.keys()): !pip install unsloth vllm else: # [NOTE] Do the below ONLY in Colab! Use [[pip install unsloth vllm]] !pip install --no-deps unsloth vllm==0.8.5.post1
#@title Colab Extra Install { display-mode: "form" } %%capture import os if "COLAB_" not in "".join(os.environ.keys()): !pip install unsloth vllm else: !pip install --no-deps unsloth vllm==0.8.5.post1 # [NOTE] Do the below ONLY in Colab! Use [[pip install unsloth vllm]] # Skip restarting message in Colab import sys, re, requests; modules = list(sys.modules.keys()) for x in modules: sys.modules.pop(x) if "PIL" in x or "google" in x else None !pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft "trl==0.15.2" triton cut_cross_entropy unsloth_zoo !pip install sentencepiece protobuf "datasets>=3.4.1" huggingface_hub hf_transfer !pip install transformers==4.51.3
# vLLM requirements - vLLM breaks Colab due to reinstalling numpy
f = requests.get("https://raw.githubusercontent.com/vllm-project/vllm/refs/heads/main/requirements/common.txt").content
with open("vllm_requirements.txt", "wb") as file:
file.write(re.sub(rb"(transformers|numpy|xformers)[^\n]{1,}\n", b"", f))
!pip install -r vllm_requirements.txt
from unsloth import FastLanguageModel, is_bfloat16_supported import torch
load your already merged 16-bit model
model_name = "niklasm222/qwen2.5-3b-grpo-gsm8k-sp-struct-rwd1-full" max_seq_length = 2048
model, tokenizer = FastLanguageModel.from_pretrained( model_name = model_name, max_seq_length = max_seq_length, load_in_4bit = False, # If you merged into 16-bit, just load in normal float16 or CPU fast_inference = True, # If you want to use vLLM for fast generation gpu_memory_utilization = 0.7, )
model.eval()
INFO 05-31 21:27:28 [init.py:243] Automatically detected platform cuda. :1: UserWarning: WARNING: Unsloth should be imported before trl, transformers, peft to ensure all optimizations are applied. Your code may run slower or encounter memory issues without these optimizations.
Please restructure your imports with 'import unsloth' at the top of your file. from unsloth import FastLanguageModel, is_bfloat16_supported
ImportError Traceback (most recent call last) in <cell line: 0>() ----> 1 from unsloth import FastLanguageModel, is_bfloat16_supported 2 import torch 3 4 # Optionally: pip install unsloth==2025.3.6 unsloth_zoo==2025.3.4 vllm 5 # Then load your already merged 16-bit model
16 frames /usr/local/lib/python3.11/dist-packages/vllm/platforms/cuda.py in 13 14 # import custom ops, trigger op registration ---> 15 import vllm._C # noqa 16 import vllm.envs as envs 17 from vllm.logger import init_logger
ImportError: /usr/local/lib/python3.11/dist-packages/vllm/_C.abi3.so: undefined symbol: _ZNK3c1011StorageImpl27throw_data_ptr_access_errorEv
NOTE: If your import is failing due to a missing package, you can manually install dependencies using either !pip or !apt.