2025年2月3日 星期一

使用 accelerate 將部分參數 offload 到 CPU 上

 有時手上的 GPU memory 不夠跑模型,舉例:eva-clip-8b,可以使用 accelerate 將部分參數 offload 到 CPU 上。

https://github.com/baaivision/EVA/issues/147

https://blog.csdn.net/qq_42363032/article/details/139597486

原本程式碼:

https://github.com/baaivision/EVA/tree/master/EVA-CLIP-18B#usage


改動後:


from PIL import Image

from transformers import AutoModel, AutoConfig

from transformers import CLIPImageProcessor, pipeline, CLIPTokenizer

import torch

import torchvision.transforms as T

from torchvision.transforms import InterpolationMode

from accelerate import infer_auto_device_map, dispatch_model



image_path = "/images.jpg"

model_name_or_path = "BAAI/EVA-CLIP-8B" # or /path/to/local/EVA-CLIP-8B

image_size = 224


processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-large-patch14")




model = AutoModel.from_pretrained(

    model_name_or_path, 

    torch_dtype=torch.float16,

    trust_remote_code=True).eval()#.to('cuda')

device_map = infer_auto_device_map(model, max_memory={1: "10GiB", "cpu": "30GiB"})

model = dispatch_model(model, device_map=device_map)

image = Image.open(image_path)

captions = ["a diagram", "a dog", "a cat"]

tokenizer = CLIPTokenizer.from_pretrained(model_name_or_path)

input_ids = tokenizer(captions,  return_tensors="pt", padding=True).input_ids.to('cuda')

input_pixels = processor(images=image, return_tensors="pt", padding=True).pixel_values.to('cuda')


with torch.no_grad(), torch.cuda.amp.autocast():

    image_features = model.encode_image(input_pixels)

    text_features = model.encode_text(input_ids)

    image_features /= image_features.norm(dim=-1, keepdim=True)

    text_features /= text_features.norm(dim=-1, keepdim=True)


label_probs = (100.0 * image_features @ text_features.T).softmax(dim=-1)

print(f"Label probs: {label_probs}")

沒有留言:

張貼留言