Part1前言

隨着大語言模型的流行，如何讓大模型在消費級GPU上進行微調訓練成爲了熱點。掌握參數有效微調成爲每個自然語言處理工程師必不可少的技能，正好hugging face開源了一個PEFT庫，讓我們也能夠自己動手去了解參數有效微調。接下來以中文情感分析（二分類）去了解下參數有效微調。

使用的方法來自這些論文：

LoRA: LORA: LOW-RANK ADAPTATION OF LARGE LANGUAGE MODELS
Prefix Tuning: Prefix-Tuning: Optimizing Continuous Prompts for Generation, P-Tuning v2: Prompt Tuning Can Be Comparable to Fine-tuning Universally Across Scales and Tasks
P-Tuning: GPT Understands, Too
Prompt Tuning: The Power of Scale for Parameter-Efficient Prompt Tuning

Part2結果

接下來是一些的基礎設置：

數據：ChnSentiCorp_htl_all
模型：hfl/chinese-roberta-wwm-ext
顯存：Tesla T4 15G
batch_size：64
epoch：3
max_length：86
lr：3e-4

以下是結果，各位自行分析吧：

	全參數微調	prefix-tuning	prompt-tuning	p-tuning	LoRA
總參數	102269186	102637826	102284546	102498562	102564098
可訓練參數	102269186	370178	16898	230914	296450
可訓練參數佔比(%)	100	0.3606	0.0165	0.2252	0.2890
佔用GPU(15G)	5.5G	4.5G	5.0G	5.1G	4.8G
特有參數	/	num_virtual_tokens=20	num_virtual_tokens=20	num_virtual_tokens=20 encoder_hidden_size=128	inference_mode=False, r=8, lora_alpha=16, lora_dropout=0.1
訓練速度	1.13it/s	1.55 it/s	1.35 it/s	1.28 it/s	1.53 it/s
驗證速度	3.36it/s	3.26 it/s	2.70 it/s	2.72 it/s	3.11 it/s
訓練時長(分鐘)	4.6838	4.3513	4.1768	4.1798	3.6353
驗證loss	12.2706	12.1903	13.1484	9.1823	6.3543
準確率	0.6941	0.7617	0.7044	0.8461	0.8976
備註

Part3代碼

最後附上所有代碼：

#!pip install peft==0.2.0
#!pip install transformers==4.28.1
#!pip install accelerate
#!pip install loralib
#!pip install evaluate
#!pip install tqdm
#!pip install datasets

import argparse
import os

import torch
from torch.optim import AdamW
from torch.utils.data import DataLoader
from peft import (
    get_peft_config,
    get_peft_model,
    get_peft_model_state_dict,
    set_peft_model_state_dict,
    PeftType,
    PrefixTuningConfig,
    PromptEncoderConfig,
    PromptTuningConfig,
    LoraConfig,
)

import evaluate
from datasets import load_dataset
from transformers import AutoModelForSequenceClassification, AutoTokenizer, get_linear_schedule_with_warmup, set_seed
from tqdm import tqdm

import peft
print(peft.__version__)

#!wget https://raw.githubusercontent.com/SophonPlus/ChineseNlpCorpus/master/datasets/ChnSentiCorp_htl_all/ChnSentiCorp_htl_all.csv

data_file = "./ChnSentiCorp_htl_all.csv" # 數據文件路徑，數據需要提前下載
# 加載數據集
dataset = load_dataset("csv", data_files=data_file)
dataset = dataset.filter(lambda x: x["review"] is not None)
datasets = dataset["train"].train_test_split(0.2, seed=123)

model_name_or_path = "hfl/chinese-roberta-wwm-ext"

if any(k in model_name_or_path for k in ("gpt", "opt", "bloom")):
    padding_side = "left"
else:
    padding_side = "right"

max_length = 86

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, padding_side=padding_side)
if getattr(tokenizer, "pad_token_id") is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id

def process_function(examples):
  tokenized_examples = tokenizer(examples["review"], truncation=True, max_length=max_length)
  tokenized_examples["labels"] = examples["label"]
  return tokenized_examples

tokenized_datasets = datasets.map(process_function, batched=True, remove_columns=datasets["train"].column_names)
accuracy_metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
  predictions, labels = eval_pred
  predictions = predictions.argmax(axis=-1)
  return accuracy_metric.compute(predictions=predictions, references=labels)


def collate_fn(examples):
    return tokenizer.pad(examples, padding="longest", return_tensors="pt")


# Instantiate dataloaders.
batch_size = 64
train_dataloader = DataLoader(tokenized_datasets["train"], shuffle=True, collate_fn=collate_fn, batch_size=batch_size)
eval_dataloader = DataLoader(
    tokenized_datasets["test"], shuffle=False, collate_fn=collate_fn, batch_size=batch_size
)

# 訓練器配置
p_type = "lora"
if p_type == "prefix-tuning":
  peft_type = PeftType.PREFIX_TUNING
  peft_config = PrefixTuningConfig(task_type="SEQ_CLS", num_virtual_tokens=20)
elif p_type == "prompt-tuning":
  peft_type = PeftType.PROMPT_TUNING
  peft_config = PromptTuningConfig(task_type="SEQ_CLS", num_virtual_tokens=20)
elif p_type == "p-tuning":
  peft_type = PeftType.P_TUNING
  peft_config = PromptEncoderConfig(task_type="SEQ_CLS", num_virtual_tokens=20, encoder_hidden_size=128)
elif p_type == "lora":
  peft_type = PeftType.LORA
  peft_config = LoraConfig(task_type="SEQ_CLS", inference_mode=False, r=8, lora_alpha=16, lora_dropout=0.1)
# print(peft_type)

model = AutoModelForSequenceClassification.from_pretrained(model_name_or_path, num_labels=2)
if p_type is not None:
  model = get_peft_model(model, peft_config)
  model.print_trainable_parameters()
else:
  def print_trainable_parameters(model):
        """
        Prints the number of trainable parameters in the model.
        """
        trainable_params = 0
        all_param = 0
        for _, param in model.named_parameters():
            num_params = param.numel()
            # if using DS Zero 3 and the weights are initialized empty
            if num_params == 0 and hasattr(param, "ds_numel"):
                num_params = param.ds_numel

            all_param += num_params
            if param.requires_grad:
                trainable_params += num_params
        print(
            f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
        )

  print_trainable_parameters(model)

lr = 3e-4
num_epochs = 3
optimizer = AdamW(params=model.parameters(), lr=lr)

# Instantiate scheduler
lr_scheduler = get_linear_schedule_with_warmup(
    optimizer=optimizer,
    num_warmup_steps=0.06 * (len(train_dataloader) * num_epochs),
    num_training_steps=(len(train_dataloader) * num_epochs),
)

device = "cuda"
model.to(device)
metric = evaluate.load("accuracy")
import time
start = time.time()
for epoch in range(num_epochs):
    model.train()
    for step, batch in enumerate(tqdm(train_dataloader)):
        batch.to(device)
        outputs = model(**batch)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()

    model.eval()
    total_loss = 0.
    for step, batch in enumerate(tqdm(eval_dataloader)):
        batch.to(device)
        with torch.no_grad():
            outputs = model(**batch)
            loss = outputs.loss
            total_loss += loss
        predictions = outputs.logits.argmax(dim=-1)
        predictions, references = predictions, batch["labels"]
        metric.add_batch(
            predictions=predictions,
            references=references,
        )

    eval_metric = metric.compute()
    print(f"epoch {epoch} loss {total_loss}:", eval_metric)
end = time.time()

print("耗時：{}分鐘".format((end-start) / 60))

參考：

https://github.com/huggingface/peft/

你也可以動手參數有效微調：LoRA、Prefix Tuning、P-Tuning、Prompt Tuning

Part1前言

Part2結果

Part3代碼

Python多線程編程深度探索：從入門到實戰

《期貨-市場技術分析》讀書筆記

《日本蠟燭圖》讀書筆記 & 技術分析回測

mongodb處理json數據很好

35K*14 薪，入職了！這公司只要不裁員，我能一直呆下去！

怎麼讓英文大語言模型支持中文？（三）進行指令微調

怎麼讓英文大預言模型支持中文？（一）繼續預訓練

怎麼讓英文大預言模型支持中文？（一）構建自己的tokenization

anaconda和python之間的對應關係

【python】linux下安裝python的一般方法

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結