mirror of
https://github.com/leigest519/ScreenCoder.git
synced 2026-02-13 02:02:48 +00:00
Add post-training folder
This commit is contained in:
@@ -0,0 +1,65 @@
|
||||
# Copyright 2025 the LlamaFactory team.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
|
||||
from openai import OpenAI
|
||||
from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
require_version("openai>=1.5.0", "To fix: pip install openai>=1.5.0")
|
||||
|
||||
|
||||
def main():
|
||||
client = OpenAI(
|
||||
api_key="{}".format(os.getenv("API_KEY", "0")),
|
||||
base_url="http://localhost:{}/v1".format(os.getenv("API_PORT", 8000)),
|
||||
)
|
||||
messages = []
|
||||
messages.append(
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "Output the color and number of each box."},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-VL/boxes.png"},
|
||||
},
|
||||
],
|
||||
}
|
||||
)
|
||||
result = client.chat.completions.create(messages=messages, model="test")
|
||||
messages.append(result.choices[0].message)
|
||||
print("Round 1:", result.choices[0].message.content)
|
||||
# The image shows a pyramid of colored blocks with numbers on them. Here are the colors and numbers of ...
|
||||
messages.append(
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "What kind of flower is this?"},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-VL/flowers.jpg"},
|
||||
},
|
||||
],
|
||||
}
|
||||
)
|
||||
result = client.chat.completions.create(messages=messages, model="test")
|
||||
messages.append(result.choices[0].message)
|
||||
print("Round 2:", result.choices[0].message.content)
|
||||
# The image shows a cluster of forget-me-not flowers. Forget-me-nots are small ...
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,77 @@
|
||||
# Copyright 2025 the LlamaFactory team.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import json
|
||||
import os
|
||||
|
||||
from openai import OpenAI
|
||||
from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
require_version("openai>=1.5.0", "To fix: pip install openai>=1.5.0")
|
||||
|
||||
|
||||
def calculate_gpa(grades: list[str], hours: list[int]) -> float:
|
||||
grade_to_score = {"A": 4, "B": 3, "C": 2}
|
||||
total_score, total_hour = 0, 0
|
||||
for grade, hour in zip(grades, hours):
|
||||
total_score += grade_to_score[grade] * hour
|
||||
total_hour += hour
|
||||
return round(total_score / total_hour, 2)
|
||||
|
||||
|
||||
def main():
|
||||
client = OpenAI(
|
||||
api_key="{}".format(os.getenv("API_KEY", "0")),
|
||||
base_url="http://localhost:{}/v1".format(os.getenv("API_PORT", 8000)),
|
||||
)
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "calculate_gpa",
|
||||
"description": "Calculate the Grade Point Average (GPA) based on grades and credit hours",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"grades": {"type": "array", "items": {"type": "string"}, "description": "The grades"},
|
||||
"hours": {"type": "array", "items": {"type": "integer"}, "description": "The credit hours"},
|
||||
},
|
||||
"required": ["grades", "hours"],
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
tool_map = {"calculate_gpa": calculate_gpa}
|
||||
|
||||
messages = []
|
||||
messages.append({"role": "user", "content": "My grades are A, A, B, and C. The credit hours are 3, 4, 3, and 2."})
|
||||
result = client.chat.completions.create(messages=messages, model="test", tools=tools)
|
||||
if result.choices[0].message.tool_calls is None:
|
||||
raise ValueError("Cannot retrieve function call from the response.")
|
||||
|
||||
messages.append(result.choices[0].message)
|
||||
tool_call = result.choices[0].message.tool_calls[0].function
|
||||
print(tool_call)
|
||||
# Function(arguments='{"grades": ["A", "A", "B", "C"], "hours": [3, 4, 3, 2]}', name='calculate_gpa')
|
||||
name, arguments = tool_call.name, json.loads(tool_call.arguments)
|
||||
tool_result = tool_map[name](**arguments)
|
||||
messages.append({"role": "tool", "content": json.dumps({"gpa": tool_result}, ensure_ascii=False)})
|
||||
result = client.chat.completions.create(messages=messages, model="test", tools=tools)
|
||||
print(result.choices[0].message.content)
|
||||
# Based on the grades and credit hours you provided, your Grade Point Average (GPA) is 3.42.
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,112 @@
|
||||
# Copyright 2025 the LlamaFactory team.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import json
|
||||
import os
|
||||
from collections import OrderedDict
|
||||
from typing import Any
|
||||
|
||||
import fire
|
||||
import torch
|
||||
from huggingface_hub import split_torch_state_dict_into_shards
|
||||
from safetensors.torch import save_file
|
||||
from tqdm import tqdm
|
||||
from transformers.modeling_utils import SAFE_WEIGHTS_INDEX_NAME, SAFE_WEIGHTS_NAME, WEIGHTS_INDEX_NAME, WEIGHTS_NAME
|
||||
|
||||
|
||||
CONFIG_NAME = "config.json"
|
||||
|
||||
|
||||
def save_weight(input_dir: str, output_dir: str, shard_size: str, save_safetensors: bool):
|
||||
baichuan2_state_dict: dict[str, torch.Tensor] = OrderedDict()
|
||||
for filepath in tqdm(os.listdir(input_dir), desc="Load weights"):
|
||||
if os.path.isfile(os.path.join(input_dir, filepath)) and filepath.endswith(".bin"):
|
||||
shard_weight = torch.load(os.path.join(input_dir, filepath), map_location="cpu")
|
||||
baichuan2_state_dict.update(shard_weight)
|
||||
|
||||
llama_state_dict: dict[str, torch.Tensor] = OrderedDict()
|
||||
for key, value in tqdm(baichuan2_state_dict.items(), desc="Convert format"):
|
||||
if "W_pack" in key:
|
||||
proj_size = value.size(0) // 3
|
||||
llama_state_dict[key.replace("W_pack", "q_proj")] = value[:proj_size, :]
|
||||
llama_state_dict[key.replace("W_pack", "k_proj")] = value[proj_size : 2 * proj_size, :]
|
||||
llama_state_dict[key.replace("W_pack", "v_proj")] = value[2 * proj_size :, :]
|
||||
elif "lm_head" in key:
|
||||
llama_state_dict[key] = torch.nn.functional.normalize(value)
|
||||
else:
|
||||
llama_state_dict[key] = value
|
||||
|
||||
weights_name = SAFE_WEIGHTS_NAME if save_safetensors else WEIGHTS_NAME
|
||||
filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors")
|
||||
state_dict_split = split_torch_state_dict_into_shards(
|
||||
llama_state_dict, filename_pattern=filename_pattern, max_shard_size=shard_size
|
||||
)
|
||||
for shard_file, tensors in tqdm(state_dict_split.filename_to_tensors.items(), desc="Save weights"):
|
||||
shard = {tensor: llama_state_dict[tensor].contiguous() for tensor in tensors}
|
||||
if save_safetensors:
|
||||
save_file(shard, os.path.join(output_dir, shard_file), metadata={"format": "pt"})
|
||||
else:
|
||||
torch.save(shard, os.path.join(output_dir, shard_file))
|
||||
|
||||
if not state_dict_split.is_sharded:
|
||||
print(f"Model weights saved in {os.path.join(output_dir, weights_name)}.")
|
||||
else:
|
||||
index = {
|
||||
"metadata": state_dict_split.metadata,
|
||||
"weight_map": state_dict_split.tensor_to_filename,
|
||||
}
|
||||
index_name = SAFE_WEIGHTS_INDEX_NAME if save_safetensors else WEIGHTS_INDEX_NAME
|
||||
with open(os.path.join(output_dir, index_name), "w", encoding="utf-8") as f:
|
||||
json.dump(index, f, indent=2, sort_keys=True)
|
||||
|
||||
print(f"Model weights saved in {output_dir}.")
|
||||
|
||||
|
||||
def save_config(input_dir: str, output_dir: str):
|
||||
with open(os.path.join(input_dir, CONFIG_NAME), encoding="utf-8") as f:
|
||||
llama2_config_dict: dict[str, Any] = json.load(f)
|
||||
|
||||
llama2_config_dict["architectures"] = ["LlamaForCausalLM"]
|
||||
llama2_config_dict.pop("auto_map", None)
|
||||
llama2_config_dict.pop("tokenizer_class", None)
|
||||
llama2_config_dict["model_type"] = "llama"
|
||||
|
||||
with open(os.path.join(output_dir, CONFIG_NAME), "w", encoding="utf-8") as f:
|
||||
json.dump(llama2_config_dict, f, indent=2)
|
||||
|
||||
print(f"Model config saved in {os.path.join(output_dir, CONFIG_NAME)}")
|
||||
|
||||
|
||||
def llamafy_baichuan2(
|
||||
input_dir: str,
|
||||
output_dir: str,
|
||||
shard_size: str = "2GB",
|
||||
save_safetensors: bool = True,
|
||||
):
|
||||
r"""Convert the Baichuan2-7B model in the same format as LLaMA2-7B.
|
||||
|
||||
Usage: python llamafy_baichuan2.py --input_dir input --output_dir output
|
||||
Converted model: https://huggingface.co/hiyouga/Baichuan2-7B-Base-LLaMAfied
|
||||
"""
|
||||
try:
|
||||
os.makedirs(output_dir, exist_ok=False)
|
||||
except Exception as e:
|
||||
raise print("Output dir already exists", e)
|
||||
|
||||
save_weight(input_dir, output_dir, shard_size, save_safetensors)
|
||||
save_config(input_dir, output_dir)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
fire.Fire(llamafy_baichuan2)
|
||||
165
post-training/LLaMA-Factory/scripts/convert_ckpt/llamafy_qwen.py
Normal file
165
post-training/LLaMA-Factory/scripts/convert_ckpt/llamafy_qwen.py
Normal file
@@ -0,0 +1,165 @@
|
||||
# Copyright 2025 the LlamaFactory team.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import json
|
||||
import os
|
||||
from collections import OrderedDict
|
||||
from typing import Any
|
||||
|
||||
import fire
|
||||
import torch
|
||||
from huggingface_hub import split_torch_state_dict_into_shards
|
||||
from safetensors import safe_open
|
||||
from safetensors.torch import save_file
|
||||
from tqdm import tqdm
|
||||
from transformers.modeling_utils import SAFE_WEIGHTS_INDEX_NAME, SAFE_WEIGHTS_NAME, WEIGHTS_INDEX_NAME, WEIGHTS_NAME
|
||||
from transformers.utils import check_min_version
|
||||
|
||||
|
||||
try:
|
||||
check_min_version("4.34.0")
|
||||
except Exception:
|
||||
raise ValueError("Please upgrade `transformers` to 4.34.0")
|
||||
|
||||
|
||||
CONFIG_NAME = "config.json"
|
||||
|
||||
|
||||
def save_weight(input_dir: str, output_dir: str, shard_size: str, save_safetensors: bool) -> str:
|
||||
qwen_state_dict: dict[str, torch.Tensor] = OrderedDict()
|
||||
for filepath in tqdm(os.listdir(input_dir), desc="Load weights"):
|
||||
if os.path.isfile(os.path.join(input_dir, filepath)) and filepath.endswith(".safetensors"):
|
||||
with safe_open(os.path.join(input_dir, filepath), framework="pt", device="cpu") as f:
|
||||
for key in f.keys():
|
||||
qwen_state_dict[key] = f.get_tensor(key)
|
||||
|
||||
llama_state_dict: dict[str, torch.Tensor] = OrderedDict()
|
||||
torch_dtype = None
|
||||
for key, value in tqdm(qwen_state_dict.items(), desc="Convert format"):
|
||||
if torch_dtype is None:
|
||||
torch_dtype = value.dtype
|
||||
if "wte" in key:
|
||||
llama_state_dict["model.embed_tokens.weight"] = value
|
||||
elif "ln_f" in key:
|
||||
llama_state_dict["model.norm.weight"] = value
|
||||
else:
|
||||
key = key.replace("transformer.h", "model.layers")
|
||||
if "attn.c_attn" in key:
|
||||
proj_size = value.size(0) // 3
|
||||
llama_state_dict[key.replace("attn.c_attn", "self_attn.q_proj")] = value[:proj_size, ...]
|
||||
llama_state_dict[key.replace("attn.c_attn", "self_attn.k_proj")] = value[
|
||||
proj_size : 2 * proj_size, ...
|
||||
]
|
||||
llama_state_dict[key.replace("attn.c_attn", "self_attn.v_proj")] = value[2 * proj_size :, ...]
|
||||
elif "attn.c_proj" in key:
|
||||
llama_state_dict[key.replace("attn.c_proj", "self_attn.o_proj")] = value
|
||||
llama_state_dict[key.replace("attn.c_proj.weight", "self_attn.o_proj.bias")] = torch.zeros_like(
|
||||
value[:, 0]
|
||||
).squeeze()
|
||||
elif "ln_1" in key:
|
||||
llama_state_dict[key.replace("ln_1", "input_layernorm")] = value
|
||||
elif "ln_2" in key:
|
||||
llama_state_dict[key.replace("ln_2", "post_attention_layernorm")] = value
|
||||
elif "mlp.w1" in key:
|
||||
llama_state_dict[key.replace("mlp.w1", "mlp.up_proj")] = value
|
||||
elif "mlp.w2" in key:
|
||||
llama_state_dict[key.replace("mlp.w2", "mlp.gate_proj")] = value
|
||||
elif "mlp.c_proj" in key:
|
||||
llama_state_dict[key.replace("mlp.c_proj", "mlp.down_proj")] = value
|
||||
elif "lm_head" in key:
|
||||
llama_state_dict[key] = value
|
||||
else:
|
||||
raise KeyError(f"Unable to process key {key}")
|
||||
|
||||
weights_name = SAFE_WEIGHTS_NAME if save_safetensors else WEIGHTS_NAME
|
||||
filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors")
|
||||
state_dict_split = split_torch_state_dict_into_shards(
|
||||
llama_state_dict, filename_pattern=filename_pattern, max_shard_size=shard_size
|
||||
)
|
||||
for shard_file, tensors in tqdm(state_dict_split.filename_to_tensors.items(), desc="Save weights"):
|
||||
shard = {tensor: llama_state_dict[tensor].contiguous() for tensor in tensors}
|
||||
if save_safetensors:
|
||||
save_file(shard, os.path.join(output_dir, shard_file), metadata={"format": "pt"})
|
||||
else:
|
||||
torch.save(shard, os.path.join(output_dir, shard_file))
|
||||
|
||||
if not state_dict_split.is_sharded:
|
||||
print(f"Model weights saved in {os.path.join(output_dir, weights_name)}.")
|
||||
else:
|
||||
index = {
|
||||
"metadata": state_dict_split.metadata,
|
||||
"weight_map": state_dict_split.tensor_to_filename,
|
||||
}
|
||||
index_name = SAFE_WEIGHTS_INDEX_NAME if save_safetensors else WEIGHTS_INDEX_NAME
|
||||
with open(os.path.join(output_dir, index_name), "w", encoding="utf-8") as f:
|
||||
json.dump(index, f, indent=2, sort_keys=True)
|
||||
|
||||
print(f"Model weights saved in {output_dir}.")
|
||||
|
||||
return str(torch_dtype).replace("torch.", "")
|
||||
|
||||
|
||||
def save_config(input_dir: str, output_dir: str, torch_dtype: str):
|
||||
with open(os.path.join(input_dir, CONFIG_NAME), encoding="utf-8") as f:
|
||||
qwen_config_dict: dict[str, Any] = json.load(f)
|
||||
|
||||
llama2_config_dict: dict[str, Any] = OrderedDict()
|
||||
llama2_config_dict["architectures"] = ["LlamaForCausalLM"]
|
||||
llama2_config_dict["hidden_act"] = "silu"
|
||||
llama2_config_dict["hidden_size"] = qwen_config_dict["hidden_size"]
|
||||
llama2_config_dict["initializer_range"] = qwen_config_dict["initializer_range"]
|
||||
llama2_config_dict["intermediate_size"] = qwen_config_dict["intermediate_size"] // 2
|
||||
llama2_config_dict["max_position_embeddings"] = qwen_config_dict["max_position_embeddings"]
|
||||
llama2_config_dict["model_type"] = "llama"
|
||||
llama2_config_dict["num_attention_heads"] = qwen_config_dict["num_attention_heads"]
|
||||
llama2_config_dict["num_hidden_layers"] = qwen_config_dict["num_hidden_layers"]
|
||||
llama2_config_dict["num_key_value_heads"] = qwen_config_dict["hidden_size"] // qwen_config_dict["kv_channels"]
|
||||
llama2_config_dict["pretraining_tp"] = 1
|
||||
llama2_config_dict["rms_norm_eps"] = qwen_config_dict["layer_norm_epsilon"]
|
||||
llama2_config_dict["rope_scaling"] = None
|
||||
llama2_config_dict["tie_word_embeddings"] = qwen_config_dict["tie_word_embeddings"]
|
||||
llama2_config_dict["torch_dtype"] = torch_dtype
|
||||
llama2_config_dict["transformers_version"] = "4.34.0"
|
||||
llama2_config_dict["use_cache"] = True
|
||||
llama2_config_dict["vocab_size"] = qwen_config_dict["vocab_size"]
|
||||
llama2_config_dict["attention_bias"] = True
|
||||
|
||||
with open(os.path.join(output_dir, CONFIG_NAME), "w", encoding="utf-8") as f:
|
||||
json.dump(llama2_config_dict, f, indent=2)
|
||||
|
||||
print(f"Model config saved in {os.path.join(output_dir, CONFIG_NAME)}")
|
||||
|
||||
|
||||
def llamafy_qwen(
|
||||
input_dir: str,
|
||||
output_dir: str,
|
||||
shard_size: str = "2GB",
|
||||
save_safetensors: bool = False,
|
||||
):
|
||||
r"""Convert the Qwen models in the same format as LLaMA2.
|
||||
|
||||
Usage: python llamafy_qwen.py --input_dir input --output_dir output
|
||||
Converted model: https://huggingface.co/hiyouga/Qwen-14B-Chat-LLaMAfied
|
||||
"""
|
||||
try:
|
||||
os.makedirs(output_dir, exist_ok=False)
|
||||
except Exception as e:
|
||||
raise print("Output dir already exists", e)
|
||||
|
||||
torch_dtype = save_weight(input_dir, output_dir, shard_size, save_safetensors)
|
||||
save_config(input_dir, output_dir, torch_dtype)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
fire.Fire(llamafy_qwen)
|
||||
@@ -0,0 +1,39 @@
|
||||
# Copyright 2025 the LlamaFactory team.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from transformers import Llama4Config, Llama4ForConditionalGeneration, Llama4TextConfig, Llama4VisionConfig
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
vision_config = Llama4VisionConfig(
|
||||
hidden_size=1408,
|
||||
image_size=336,
|
||||
intermediate_size=5632,
|
||||
num_attention_heads=16,
|
||||
num_hidden_layers=4,
|
||||
vision_output_dim=4096,
|
||||
)
|
||||
text_config = Llama4TextConfig(
|
||||
hidden_size=512,
|
||||
intermediate_size=1024,
|
||||
intermediate_size_mlp=1024,
|
||||
num_hidden_layers=4,
|
||||
num_attention_heads=8,
|
||||
num_key_value_heads=2,
|
||||
head_dim=512 // 8,
|
||||
num_local_experts=2,
|
||||
)
|
||||
config = Llama4Config(vision_config=vision_config, text_config=text_config)
|
||||
model = Llama4ForConditionalGeneration._from_config(config)
|
||||
model.save_pretrained("tiny-llama4")
|
||||
79
post-training/LLaMA-Factory/scripts/eval_bleu_rouge.py
Normal file
79
post-training/LLaMA-Factory/scripts/eval_bleu_rouge.py
Normal file
@@ -0,0 +1,79 @@
|
||||
# Copyright 2025 the LlamaFactory team.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
|
||||
import fire
|
||||
from datasets import load_dataset
|
||||
|
||||
|
||||
try:
|
||||
import jieba # type: ignore
|
||||
from nltk.translate.bleu_score import SmoothingFunction, sentence_bleu # type: ignore
|
||||
from rouge_chinese import Rouge # type: ignore
|
||||
|
||||
jieba.setLogLevel(logging.CRITICAL)
|
||||
jieba.initialize()
|
||||
except ImportError:
|
||||
print("Please install llamafactory with `pip install -e .[metrics]`.")
|
||||
raise
|
||||
|
||||
|
||||
def compute_metrics(sample):
|
||||
hypothesis = list(jieba.cut(sample["predict"]))
|
||||
reference = list(jieba.cut(sample["label"]))
|
||||
|
||||
bleu_score = sentence_bleu(
|
||||
[list(sample["label"])],
|
||||
list(sample["predict"]),
|
||||
smoothing_function=SmoothingFunction().method3,
|
||||
)
|
||||
|
||||
if len(" ".join(hypothesis).split()) == 0 or len(" ".join(reference).split()) == 0:
|
||||
result = {"rouge-1": {"f": 0.0}, "rouge-2": {"f": 0.0}, "rouge-l": {"f": 0.0}}
|
||||
else:
|
||||
rouge = Rouge()
|
||||
scores = rouge.get_scores(" ".join(hypothesis), " ".join(reference))
|
||||
result = scores[0]
|
||||
|
||||
metric_result = {}
|
||||
for k, v in result.items():
|
||||
metric_result[k] = round(v["f"] * 100, 4)
|
||||
|
||||
metric_result["bleu-4"] = round(bleu_score * 100, 4)
|
||||
|
||||
return metric_result
|
||||
|
||||
|
||||
def main(filename: str):
|
||||
start_time = time.time()
|
||||
dataset = load_dataset("json", data_files=filename, split="train")
|
||||
dataset = dataset.map(compute_metrics, num_proc=8, remove_columns=dataset.column_names)
|
||||
score_dict = dataset.to_dict()
|
||||
|
||||
average_score = {}
|
||||
for task, scores in sorted(score_dict.items(), key=lambda x: x[0]):
|
||||
print(f"{task}: {sum(scores) / len(scores):.4f}")
|
||||
average_score[task] = sum(scores) / len(scores)
|
||||
|
||||
with open("predictions_score.json", "w", encoding="utf-8") as f:
|
||||
json.dump(average_score, f, indent=4)
|
||||
|
||||
print(f"\nDone in {time.time() - start_time:.3f}s.\nScore file saved to predictions_score.json")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
fire.Fire(main)
|
||||
129
post-training/LLaMA-Factory/scripts/llama_pro.py
Normal file
129
post-training/LLaMA-Factory/scripts/llama_pro.py
Normal file
@@ -0,0 +1,129 @@
|
||||
# Copyright 2025 Tencent Inc. and the LlamaFactory team.
|
||||
#
|
||||
# This code is inspired by the Tencent's LLaMA-Pro library.
|
||||
# https://github.com/TencentARC/LLaMA-Pro/blob/main/scripts/block_expansion.py
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import json
|
||||
import os
|
||||
from collections import OrderedDict
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import fire
|
||||
import torch
|
||||
from huggingface_hub import split_torch_state_dict_into_shards
|
||||
from safetensors.torch import save_file
|
||||
from tqdm import tqdm
|
||||
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, PreTrainedModel
|
||||
from transformers.modeling_utils import SAFE_WEIGHTS_INDEX_NAME, SAFE_WEIGHTS_NAME, WEIGHTS_INDEX_NAME, WEIGHTS_NAME
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from transformers import PretrainedConfig
|
||||
|
||||
|
||||
def change_name(name: str, old_index: int, new_index: int) -> str:
|
||||
return name.replace(f".{old_index:d}.", f".{new_index:d}.")
|
||||
|
||||
|
||||
def block_expansion(
|
||||
model_name_or_path: str,
|
||||
output_dir: str,
|
||||
num_expand: int,
|
||||
shard_size: str = "5GB",
|
||||
save_safetensors: bool = True,
|
||||
):
|
||||
r"""Perform block expansion for LLaMA, Mistral, Qwen2 or Yi models.
|
||||
|
||||
Usage: python llama_pro.py --model_name_or_path meta-llama/Llama-2-7b-hf --output_dir llama2_pro --num_expand 8
|
||||
"""
|
||||
config: PretrainedConfig = AutoConfig.from_pretrained(model_name_or_path, trust_remote_code=True)
|
||||
num_layers = getattr(config, "num_hidden_layers")
|
||||
if num_layers % num_expand != 0:
|
||||
raise ValueError(f"`num_layers` {num_layers} should be divisible by `num_expand` {num_expand}.")
|
||||
|
||||
setattr(config, "num_hidden_layers", num_layers + num_expand)
|
||||
config.save_pretrained(output_dir)
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True)
|
||||
tokenizer.save_pretrained(output_dir)
|
||||
|
||||
print(f"Expanding model of {num_layers} layers to {num_layers + num_expand} layers.")
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
model_name_or_path, torch_dtype="auto", device_map="cpu", trust_remote_code=True, low_cpu_mem_usage=True
|
||||
)
|
||||
assert isinstance(model, PreTrainedModel) # type hint
|
||||
if save_safetensors and getattr(model.config, "tie_word_embeddings", False):
|
||||
del model.lm_head # safetensors does not allow shared weights
|
||||
|
||||
split = num_layers // num_expand
|
||||
layer_cnt = 0
|
||||
state_dict = model.state_dict()
|
||||
output_state_dict: dict[str, torch.Tensor] = OrderedDict()
|
||||
for i in range(num_layers):
|
||||
for key, value in state_dict.items():
|
||||
if f".{i:d}." in key:
|
||||
output_state_dict[change_name(key, i, layer_cnt)] = value
|
||||
|
||||
print(f"Add layer {layer_cnt} copied from layer {i}.")
|
||||
layer_cnt += 1
|
||||
if (i + 1) % split == 0:
|
||||
for key, value in state_dict.items():
|
||||
if f".{i:d}." in key:
|
||||
if "down_proj" in key or "o_proj" in key:
|
||||
output_state_dict[change_name(key, i, layer_cnt)] = torch.zeros_like(value)
|
||||
else:
|
||||
output_state_dict[change_name(key, i, layer_cnt)] = torch.clone(value)
|
||||
|
||||
print(f"Add layer {layer_cnt} expanded from layer {i}.")
|
||||
layer_cnt += 1
|
||||
|
||||
for key, value in state_dict.items():
|
||||
if key not in output_state_dict:
|
||||
output_state_dict[key] = value
|
||||
|
||||
weights_name = SAFE_WEIGHTS_NAME if save_safetensors else WEIGHTS_NAME
|
||||
filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors")
|
||||
state_dict_split = split_torch_state_dict_into_shards(
|
||||
output_state_dict, filename_pattern=filename_pattern, max_shard_size=shard_size
|
||||
)
|
||||
for shard_file, tensors in tqdm(state_dict_split.filename_to_tensors.items(), desc="Save weights"):
|
||||
shard = {tensor: output_state_dict[tensor].contiguous() for tensor in tensors}
|
||||
if save_safetensors:
|
||||
save_file(shard, os.path.join(output_dir, shard_file), metadata={"format": "pt"})
|
||||
else:
|
||||
torch.save(shard, os.path.join(output_dir, shard_file))
|
||||
|
||||
if not state_dict_split.is_sharded:
|
||||
print(f"Model weights saved in {os.path.join(output_dir, weights_name)}.")
|
||||
else:
|
||||
index = {
|
||||
"metadata": state_dict_split.metadata,
|
||||
"weight_map": state_dict_split.tensor_to_filename,
|
||||
}
|
||||
index_name = SAFE_WEIGHTS_INDEX_NAME if save_safetensors else WEIGHTS_INDEX_NAME
|
||||
with open(os.path.join(output_dir, index_name), "w", encoding="utf-8") as f:
|
||||
json.dump(index, f, indent=2, sort_keys=True)
|
||||
|
||||
print(f"Model weights saved in {output_dir}.")
|
||||
|
||||
print("- Fine-tune this model with:")
|
||||
print(f"model_name_or_path: {output_dir}")
|
||||
print("finetuning_type: freeze")
|
||||
print(f"freeze_trainable_layers: {num_expand}")
|
||||
print("use_llama_pro: true")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
fire.Fire(block_expansion)
|
||||
88
post-training/LLaMA-Factory/scripts/loftq_init.py
Normal file
88
post-training/LLaMA-Factory/scripts/loftq_init.py
Normal file
@@ -0,0 +1,88 @@
|
||||
# Copyright 2025 HuggingFace Inc. and the LlamaFactory team.
|
||||
#
|
||||
# This code is based on the HuggingFace's PEFT library.
|
||||
# https://github.com/huggingface/peft/blob/v0.10.0/examples/loftq_finetuning/quantize_save_load.py
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import fire
|
||||
from peft import LoftQConfig, LoraConfig, TaskType, get_peft_model
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from transformers import PreTrainedModel
|
||||
|
||||
|
||||
def quantize_loftq(
|
||||
model_name_or_path: str,
|
||||
output_dir: str,
|
||||
loftq_bits: int = 4,
|
||||
loftq_iter: int = 4,
|
||||
lora_alpha: int = None,
|
||||
lora_rank: int = 16,
|
||||
lora_dropout: float = 0,
|
||||
lora_target: tuple = ("q_proj", "v_proj"),
|
||||
save_safetensors: bool = True,
|
||||
):
|
||||
r"""Initialize LoRA weights with LoRA-fine-tuning-aware Quantization (LoftQ).
|
||||
|
||||
Usage: python loftq_init.py --model_name_or_path path_to_model --output_dir output_dir
|
||||
"""
|
||||
if isinstance(lora_target, str):
|
||||
lora_target = [name.strip() for name in lora_target.split(",")]
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True)
|
||||
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True, torch_dtype="auto")
|
||||
|
||||
loftq_config = LoftQConfig(loftq_bits=loftq_bits, loftq_iter=loftq_iter)
|
||||
lora_config = LoraConfig(
|
||||
task_type=TaskType.CAUSAL_LM,
|
||||
inference_mode=True,
|
||||
r=lora_rank,
|
||||
lora_alpha=lora_alpha if lora_alpha is not None else lora_rank * 2,
|
||||
lora_dropout=lora_dropout,
|
||||
target_modules=lora_target,
|
||||
init_lora_weights="loftq",
|
||||
loftq_config=loftq_config,
|
||||
)
|
||||
|
||||
# Init LoftQ model
|
||||
print("Initializing LoftQ weights, it may be take several minutes, wait patiently.")
|
||||
peft_model = get_peft_model(model, lora_config)
|
||||
loftq_dir = os.path.join(output_dir, "loftq_init")
|
||||
|
||||
# Save LoftQ model
|
||||
setattr(peft_model.peft_config["default"], "base_model_name_or_path", os.path.abspath(output_dir))
|
||||
setattr(peft_model.peft_config["default"], "init_lora_weights", True) # don't apply loftq again
|
||||
peft_model.save_pretrained(loftq_dir, safe_serialization=save_safetensors)
|
||||
print(f"Adapter weights saved in {loftq_dir}")
|
||||
|
||||
# Save base model
|
||||
base_model: PreTrainedModel = peft_model.unload()
|
||||
base_model.save_pretrained(output_dir, safe_serialization=save_safetensors)
|
||||
tokenizer.save_pretrained(output_dir)
|
||||
print(f"Model weights saved in {output_dir}")
|
||||
|
||||
print("- Fine-tune this model with:")
|
||||
print(f"model_name_or_path: {output_dir}")
|
||||
print(f"adapter_name_or_path: {loftq_dir}")
|
||||
print("finetuning_type: lora")
|
||||
print(f"quantization_bit: {loftq_bits}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
fire.Fire(quantize_loftq)
|
||||
86
post-training/LLaMA-Factory/scripts/pissa_init.py
Normal file
86
post-training/LLaMA-Factory/scripts/pissa_init.py
Normal file
@@ -0,0 +1,86 @@
|
||||
# Copyright 2025 HuggingFace Inc. and the LlamaFactory team.
|
||||
#
|
||||
# This code is based on the HuggingFace's PEFT library.
|
||||
# https://github.com/huggingface/peft/blob/v0.11.0/examples/pissa_finetuning/preprocess.py
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import fire
|
||||
from peft import LoraConfig, TaskType, get_peft_model
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from transformers import PreTrainedModel
|
||||
|
||||
|
||||
def quantize_pissa(
|
||||
model_name_or_path: str,
|
||||
output_dir: str,
|
||||
pissa_iter: int = 16,
|
||||
lora_alpha: int = None,
|
||||
lora_rank: int = 16,
|
||||
lora_dropout: float = 0,
|
||||
lora_target: tuple = ("q_proj", "v_proj"),
|
||||
save_safetensors: bool = True,
|
||||
):
|
||||
r"""Initialize LoRA weights with Principal Singular values and Singular vectors Adaptation (PiSSA).
|
||||
|
||||
Usage: python pissa_init.py --model_name_or_path path_to_model --output_dir output_dir
|
||||
"""
|
||||
if isinstance(lora_target, str):
|
||||
lora_target = [name.strip() for name in lora_target.split(",")]
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True)
|
||||
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True, torch_dtype="auto")
|
||||
|
||||
lora_config = LoraConfig(
|
||||
task_type=TaskType.CAUSAL_LM,
|
||||
r=lora_rank,
|
||||
lora_alpha=lora_alpha if lora_alpha is not None else lora_rank * 2,
|
||||
lora_dropout=lora_dropout,
|
||||
target_modules=lora_target,
|
||||
init_lora_weights="pissa" if pissa_iter == -1 else f"pissa_niter_{pissa_iter}",
|
||||
)
|
||||
|
||||
# Init PiSSA model
|
||||
peft_model = get_peft_model(model, lora_config)
|
||||
pissa_dir = os.path.join(output_dir, "pissa_init")
|
||||
|
||||
# Save PiSSA model
|
||||
setattr(peft_model.peft_config["default"], "base_model_name_or_path", os.path.abspath(output_dir))
|
||||
setattr(peft_model.peft_config["default"], "init_lora_weights", True) # don't apply pissa again
|
||||
peft_model.save_pretrained(pissa_dir, safe_serialization=save_safetensors)
|
||||
print(f"Adapter weights saved in {pissa_dir}")
|
||||
|
||||
# Save base model
|
||||
base_model: PreTrainedModel = peft_model.unload()
|
||||
base_model.save_pretrained(output_dir, safe_serialization=save_safetensors)
|
||||
tokenizer.save_pretrained(output_dir)
|
||||
print(f"Model weights saved in {output_dir}")
|
||||
|
||||
print("- Fine-tune this model with:")
|
||||
print(f"model_name_or_path: {output_dir}")
|
||||
print(f"adapter_name_or_path: {pissa_dir}")
|
||||
print("finetuning_type: lora")
|
||||
print("pissa_init: false")
|
||||
print("pissa_convert: true")
|
||||
print("- and optionally with:")
|
||||
print("quantization_bit: 4")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
fire.Fire(quantize_pissa)
|
||||
118
post-training/LLaMA-Factory/scripts/qwen_omni_merge.py
Normal file
118
post-training/LLaMA-Factory/scripts/qwen_omni_merge.py
Normal file
@@ -0,0 +1,118 @@
|
||||
# Copyright 2025 the LlamaFactory team.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import shutil
|
||||
|
||||
import fire
|
||||
from peft import PeftModel
|
||||
from transformers import AutoModel, AutoProcessor, Qwen2_5OmniThinkerForConditionalGeneration # type: ignore
|
||||
|
||||
|
||||
def merge_lora(
|
||||
base_model_path: str,
|
||||
lora_checkpoint_path: str,
|
||||
extra_file: str = "spk_dict.pt",
|
||||
submodule_name: str = "thinker",
|
||||
save_path: str = "./merged_model_checkpoint",
|
||||
):
|
||||
"""Load the original model, tokenizer, and processor configuration, merge the LoRA weights.
|
||||
|
||||
For a specified submodule, and save the final merged model along with its configurations.
|
||||
|
||||
Args:
|
||||
base_model_path (str): Path to the original model directory.
|
||||
lora_checkpoint_path (str): Path to the directory containing LoRA weights.
|
||||
extra_file (str): Name of the extra file to be copied (default: "spk_dict.pt").
|
||||
submodule_name (str): Name of the submodule to merge (default: "thinker").
|
||||
save_path (str): Directory where the merged model and configurations will be saved.
|
||||
"""
|
||||
# 1. Load the original model, tokenizer, and processor
|
||||
model = AutoModel.from_pretrained(base_model_path, torch_dtype="auto", device_map="cpu")
|
||||
processor = AutoProcessor.from_pretrained(base_model_path)
|
||||
print("Successfully loaded the original model and tokenizer.")
|
||||
|
||||
# 2. Extract the submodule to be merged (e.g., model.thinker)
|
||||
if not hasattr(model, submodule_name):
|
||||
raise AttributeError(f"The model does not have a submodule named '{submodule_name}'.")
|
||||
|
||||
base_submodule = getattr(model, submodule_name)
|
||||
print(f"Successfully extracted submodule: {submodule_name}.")
|
||||
|
||||
# 3. Load the LoRA weights onto the extracted submodule
|
||||
lora_model = PeftModel.from_pretrained(base_submodule, lora_checkpoint_path)
|
||||
print("LoRA weights loaded successfully.")
|
||||
|
||||
# 4. Merge the LoRA weights into the submodule and unload the LoRA modules
|
||||
merged_submodule = lora_model.merge_and_unload()
|
||||
print("LoRA weights merged successfully.")
|
||||
|
||||
# 5. Replace the original submodule with the merged submodule in the model
|
||||
setattr(model, submodule_name, merged_submodule)
|
||||
|
||||
# 6. Save the final merged model along with the tokenizer and processor configuration
|
||||
model.save_pretrained(save_path)
|
||||
processor.save_pretrained(save_path)
|
||||
print(f"Merged model and tokenizer saved to {save_path}.")
|
||||
|
||||
source_file = os.path.join(base_model_path, extra_file)
|
||||
target_file = os.path.join(save_path, extra_file)
|
||||
if os.path.exists(source_file):
|
||||
shutil.copy(source_file, target_file)
|
||||
print(f"File '{extra_file}' copied from {base_model_path} to {save_path}.")
|
||||
else:
|
||||
print(f"File '{extra_file}' not found in {base_model_path}, skipping copy.")
|
||||
|
||||
|
||||
def save_full_model(
|
||||
saved_thinker_path: str,
|
||||
base_model_path: str,
|
||||
save_path: str = "./merged_model_checkpoint",
|
||||
extra_file: str = "spk_dict.pt",
|
||||
):
|
||||
"""Load the saved thinker module and the original model, replace the thinker in the original model.
|
||||
|
||||
Then save the complete model along with its tokenizer and processor configuration.
|
||||
|
||||
Args:
|
||||
saved_thinker_path (str): Path to the saved thinker weights.
|
||||
base_model_path (str): Directory path of the original model.
|
||||
save_path (str): Directory where the merged model and configurations will be saved.
|
||||
extra_file (str): Name of the extra file to be copied (default: "spk_dict.pt").
|
||||
"""
|
||||
# 1. Load the saved thinker module and the original model
|
||||
thinker = Qwen2_5OmniThinkerForConditionalGeneration.from_pretrained(
|
||||
saved_thinker_path, torch_dtype="auto", device_map="cpu"
|
||||
)
|
||||
base_model = AutoModel.from_pretrained(base_model_path, torch_dtype="auto", device_map="cpu")
|
||||
base_model.thinker = thinker
|
||||
|
||||
# 2. Save the complete model along with its tokenizer and processor configuration
|
||||
processor = AutoProcessor.from_pretrained(base_model_path)
|
||||
base_model.save_pretrained(save_path)
|
||||
processor.save_pretrained(save_path)
|
||||
print(f"Merged model and tokenizer saved to {save_path}.")
|
||||
|
||||
# 3. Copy the extra file from the base model directory to the save_path
|
||||
source_file = os.path.join(base_model_path, extra_file)
|
||||
target_file = os.path.join(save_path, extra_file)
|
||||
if os.path.exists(source_file):
|
||||
shutil.copy(source_file, target_file)
|
||||
print(f"File '{extra_file}' copied from {base_model_path} to {save_path}.")
|
||||
else:
|
||||
print(f"File '{extra_file}' not found in {base_model_path}, skipping copy.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
fire.Fire({"save_full": save_full_model, "merge_lora": merge_lora})
|
||||
49
post-training/LLaMA-Factory/scripts/stat_utils/cal_flops.py
Normal file
49
post-training/LLaMA-Factory/scripts/stat_utils/cal_flops.py
Normal file
@@ -0,0 +1,49 @@
|
||||
# Copyright 2025 Microsoft Corporation and the LlamaFactory team.
|
||||
#
|
||||
# This code is inspired by the Microsoft's DeepSpeed library.
|
||||
# https://www.deepspeed.ai/tutorials/flops-profiler/
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import fire
|
||||
import torch
|
||||
from deepspeed.accelerator import get_accelerator # type: ignore
|
||||
from deepspeed.profiling.flops_profiler import get_model_profile # type: ignore
|
||||
|
||||
from llamafactory.chat import ChatModel
|
||||
|
||||
|
||||
def calculate_flops(
|
||||
model_name_or_path: str,
|
||||
batch_size: int = 1,
|
||||
seq_length: int = 512,
|
||||
flash_attn: str = "auto",
|
||||
):
|
||||
r"""Calculate the flops of pre-trained models.
|
||||
|
||||
Usage: python cal_flops.py --model_name_or_path path_to_model --batch_size 1 --seq_length 512
|
||||
"""
|
||||
with get_accelerator().device(0):
|
||||
chat_model = ChatModel(dict(model_name_or_path=model_name_or_path, template="empty", flash_attn=flash_attn))
|
||||
fake_input = torch.ones((batch_size, seq_length), dtype=torch.long, device=chat_model.engine.model.device)
|
||||
input_dict = {"input_ids": fake_input, "labels": fake_input.clone()}
|
||||
flops, macs, params = get_model_profile(
|
||||
chat_model.engine.model, kwargs=input_dict, print_profile=True, detailed=True
|
||||
)
|
||||
print("FLOPs:", flops)
|
||||
print("MACs:", macs)
|
||||
print("Params:", params)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
fire.Fire(calculate_flops)
|
||||
98
post-training/LLaMA-Factory/scripts/stat_utils/cal_lr.py
Normal file
98
post-training/LLaMA-Factory/scripts/stat_utils/cal_lr.py
Normal file
@@ -0,0 +1,98 @@
|
||||
# Copyright 2025 imoneoi and the LlamaFactory team.
|
||||
#
|
||||
# This code is inspired by the imoneoi's OpenChat library.
|
||||
# https://github.com/imoneoi/openchat/blob/3.6.0/ochat/training_deepspeed/train.py
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import math
|
||||
from typing import Literal
|
||||
|
||||
import fire
|
||||
import torch
|
||||
from torch.utils.data import DataLoader
|
||||
from tqdm import tqdm
|
||||
from transformers import DataCollatorForLanguageModeling
|
||||
|
||||
from llamafactory.data import MultiModalDataCollatorForSeq2Seq, get_dataset, get_template_and_fix_tokenizer
|
||||
from llamafactory.extras.constants import IGNORE_INDEX
|
||||
from llamafactory.hparams import get_train_args
|
||||
from llamafactory.model import load_tokenizer
|
||||
|
||||
|
||||
BASE_LR = 3e-4 # 1.5e-4 for 30B-70B models
|
||||
BASE_BS = 4_000_000 # from llama paper
|
||||
|
||||
|
||||
def calculate_lr(
|
||||
model_name_or_path: str,
|
||||
batch_size: int, # total batch size, namely (batch size * gradient accumulation * world size)
|
||||
stage: Literal["pt", "sft"] = "sft",
|
||||
dataset: str = "alpaca_en_demo",
|
||||
dataset_dir: str = "data",
|
||||
template: str = "default",
|
||||
cutoff_len: int = 2048, # i.e. maximum input length during training
|
||||
is_mistral_or_gemma: bool = False, # mistral and gemma models opt for a smaller learning rate,
|
||||
packing: bool = False,
|
||||
):
|
||||
r"""Calculate the optimal learning rate for 7B/13B models using LLaMA's hyper-parameters.
|
||||
|
||||
Usage:
|
||||
python cal_lr.py --model_name_or_path path_to_model --dataset alpaca_en_demo --cutoff_len 1024 --batch_size 16
|
||||
"""
|
||||
model_args, data_args, training_args, _, _ = get_train_args(
|
||||
dict(
|
||||
stage=stage,
|
||||
model_name_or_path=model_name_or_path,
|
||||
dataset=dataset,
|
||||
dataset_dir=dataset_dir,
|
||||
template=template,
|
||||
cutoff_len=cutoff_len,
|
||||
packing=packing,
|
||||
preprocessing_num_workers=16,
|
||||
output_dir="dummy_dir",
|
||||
overwrite_cache=True,
|
||||
do_train=True,
|
||||
)
|
||||
)
|
||||
tokenizer_module = load_tokenizer(model_args)
|
||||
tokenizer = tokenizer_module["tokenizer"]
|
||||
template = get_template_and_fix_tokenizer(tokenizer, data_args)
|
||||
trainset = get_dataset(template, model_args, data_args, training_args, stage, **tokenizer_module)["train_dataset"]
|
||||
if stage == "pt":
|
||||
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
|
||||
elif stage == "sft":
|
||||
data_collator = MultiModalDataCollatorForSeq2Seq(
|
||||
template=template, tokenizer=tokenizer, label_pad_token_id=IGNORE_INDEX
|
||||
)
|
||||
else:
|
||||
raise NotImplementedError(f"Stage does not supported: {stage}.")
|
||||
|
||||
dataloader = DataLoader(trainset, batch_size, shuffle=False, collate_fn=data_collator, pin_memory=True)
|
||||
valid_tokens, total_tokens = 0, 0
|
||||
for batch in tqdm(dataloader, desc="Collecting valid tokens"):
|
||||
valid_tokens += torch.sum(batch["labels"] != IGNORE_INDEX).item()
|
||||
total_tokens += torch.numel(batch["labels"])
|
||||
|
||||
valid_ratio = valid_tokens / total_tokens
|
||||
token_batch_size = cutoff_len * batch_size * valid_ratio
|
||||
lr = BASE_LR * math.sqrt(token_batch_size / BASE_BS) # lr ~ sqrt(batch_size)
|
||||
lr = lr / 6.0 if is_mistral_or_gemma else lr
|
||||
print(
|
||||
f"Optimal learning rate is {lr:.2e} for valid ratio% {valid_ratio * 100:.2f} "
|
||||
f"and effective token batch size {token_batch_size:.2f}"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
fire.Fire(calculate_lr)
|
||||
161
post-training/LLaMA-Factory/scripts/stat_utils/cal_mfu.py
Normal file
161
post-training/LLaMA-Factory/scripts/stat_utils/cal_mfu.py
Normal file
@@ -0,0 +1,161 @@
|
||||
# Copyright 2025 the LlamaFactory team.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import json
|
||||
import os
|
||||
|
||||
import fire
|
||||
import torch
|
||||
import torch.distributed as dist
|
||||
from transformers import AutoConfig
|
||||
|
||||
from llamafactory.train.tuner import run_exp
|
||||
|
||||
|
||||
BASE = 2 # gemm (add + mul)
|
||||
|
||||
|
||||
def compute_model_flops(
|
||||
model_name_or_path: str,
|
||||
total_batch_size: int,
|
||||
seq_length: int,
|
||||
include_backward: bool = True,
|
||||
include_recompute: bool = False,
|
||||
include_flashattn: bool = False,
|
||||
) -> int:
|
||||
r"""Calculate the FLOPs of model per forward/backward pass."""
|
||||
config = AutoConfig.from_pretrained(model_name_or_path)
|
||||
hidden_size = getattr(config, "hidden_size", None)
|
||||
vocab_size = getattr(config, "vocab_size", None)
|
||||
intermediate_size = getattr(config, "intermediate_size", None)
|
||||
num_attention_heads = getattr(config, "num_attention_heads", None)
|
||||
num_key_value_heads = getattr(config, "num_key_value_heads", None)
|
||||
num_hidden_layers = getattr(config, "num_hidden_layers", None)
|
||||
tie_word_embeddings = getattr(config, "tie_word_embeddings", False)
|
||||
|
||||
# mlp module
|
||||
mlp_flops_per_token = 3 * BASE * hidden_size * intermediate_size # up, gate, down
|
||||
mlp_flops = total_batch_size * seq_length * num_hidden_layers * mlp_flops_per_token
|
||||
|
||||
# attn projector module
|
||||
q_flops_per_token = BASE * hidden_size * hidden_size
|
||||
o_flops_per_token = BASE * hidden_size * hidden_size
|
||||
k_flops_per_token = BASE * hidden_size * hidden_size * num_key_value_heads // num_attention_heads
|
||||
v_flops_per_token = BASE * hidden_size * hidden_size * num_key_value_heads // num_attention_heads
|
||||
attn_proj_flops_per_token = q_flops_per_token + o_flops_per_token + k_flops_per_token + v_flops_per_token
|
||||
attn_proj_flops = total_batch_size * seq_length * num_hidden_layers * attn_proj_flops_per_token
|
||||
|
||||
# attn sdpa module
|
||||
sdpa_flops_per_layer = 2 * BASE * hidden_size * seq_length * seq_length # (q * k^T) * v
|
||||
sdpa_flops = total_batch_size * num_hidden_layers * sdpa_flops_per_layer
|
||||
|
||||
# embedding module
|
||||
embedding_flops_per_token = hidden_size * vocab_size
|
||||
embedding_flops = total_batch_size * seq_length * embedding_flops_per_token
|
||||
if tie_word_embeddings is False:
|
||||
embedding_flops *= 2
|
||||
|
||||
non_embedding_flops = mlp_flops + attn_proj_flops + sdpa_flops
|
||||
non_embedding_coeff, embedding_coeff = 1, 1
|
||||
if include_backward:
|
||||
non_embedding_coeff += 2
|
||||
embedding_coeff += 2
|
||||
|
||||
if include_recompute:
|
||||
non_embedding_coeff += 1
|
||||
|
||||
total_flops = non_embedding_coeff * non_embedding_flops + embedding_coeff * embedding_flops
|
||||
|
||||
if include_flashattn:
|
||||
total_flops += sdpa_flops
|
||||
|
||||
return total_flops
|
||||
|
||||
|
||||
def compute_device_flops(world_size: int) -> float:
|
||||
r"""Calculate the FLOPs of the device capability per second."""
|
||||
device_name = torch.cuda.get_device_name()
|
||||
if "H100" in device_name or "H800" in device_name:
|
||||
return 989 * 1e12 * world_size
|
||||
elif "A100" in device_name or "A800" in device_name:
|
||||
return 312 * 1e12 * world_size
|
||||
elif "V100" in device_name:
|
||||
return 125 * 1e12 * world_size
|
||||
elif "4090" in device_name:
|
||||
return 98 * 1e12 * world_size
|
||||
else:
|
||||
raise NotImplementedError(f"Device not supported: {device_name}.")
|
||||
|
||||
|
||||
def calculate_mfu(
|
||||
model_name_or_path: str,
|
||||
batch_size: int = 1,
|
||||
seq_length: int = 1024,
|
||||
num_steps: int = 100,
|
||||
finetuning_type: str = "lora",
|
||||
flash_attn: str = "auto",
|
||||
deepspeed_stage: int = 0,
|
||||
disable_gc: bool = False,
|
||||
liger_kernel: bool = False,
|
||||
unsloth_gc: bool = False,
|
||||
) -> float:
|
||||
r"""Calculate MFU for given model and hyper-params.
|
||||
|
||||
Usage: python cal_mfu.py --model_name_or_path path_to_model --batch_size 1 --seq_length 1024
|
||||
"""
|
||||
args = {
|
||||
"model_name_or_path": model_name_or_path,
|
||||
"flash_attn": flash_attn,
|
||||
"disable_gradient_checkpointing": disable_gc,
|
||||
"enable_liger_kernel": liger_kernel,
|
||||
"use_unsloth_gc": unsloth_gc,
|
||||
"stage": "pt",
|
||||
"do_train": True,
|
||||
"finetuning_type": finetuning_type,
|
||||
"dataset": "c4_demo",
|
||||
"cutoff_len": seq_length,
|
||||
"output_dir": os.path.join("saves", "test_mfu"),
|
||||
"logging_strategy": "no",
|
||||
"save_strategy": "no",
|
||||
"save_only_model": True,
|
||||
"overwrite_output_dir": True,
|
||||
"per_device_train_batch_size": batch_size,
|
||||
"max_steps": num_steps,
|
||||
"bf16": True,
|
||||
}
|
||||
if deepspeed_stage in [2, 3]:
|
||||
args["deepspeed"] = f"examples/deepspeed/ds_z{deepspeed_stage}_config.json"
|
||||
|
||||
run_exp(args)
|
||||
if dist.is_initialized():
|
||||
dist.barrier()
|
||||
world_size = dist.get_world_size()
|
||||
else:
|
||||
world_size = 1
|
||||
|
||||
if int(os.getenv("LOCAL_RANK", "0")) == 0:
|
||||
with open(os.path.join("saves", "test_mfu", "all_results.json"), encoding="utf-8") as f:
|
||||
result = json.load(f)
|
||||
|
||||
total_batch_size = batch_size * world_size
|
||||
mfu_value = (
|
||||
result["train_steps_per_second"]
|
||||
* compute_model_flops(model_name_or_path, total_batch_size, seq_length)
|
||||
/ compute_device_flops(world_size)
|
||||
)
|
||||
print(f"MFU: {mfu_value * 100:.2f}%")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
fire.Fire(calculate_mfu)
|
||||
134
post-training/LLaMA-Factory/scripts/stat_utils/cal_ppl.py
Normal file
134
post-training/LLaMA-Factory/scripts/stat_utils/cal_ppl.py
Normal file
@@ -0,0 +1,134 @@
|
||||
# Copyright 2025 the LlamaFactory team.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Literal, Optional
|
||||
|
||||
import fire
|
||||
import torch
|
||||
from torch.utils.data import DataLoader
|
||||
from tqdm import tqdm
|
||||
from transformers import DataCollatorForLanguageModeling
|
||||
|
||||
from llamafactory.data import MultiModalDataCollatorForSeq2Seq, get_dataset, get_template_and_fix_tokenizer
|
||||
from llamafactory.extras.constants import IGNORE_INDEX
|
||||
from llamafactory.hparams import get_train_args
|
||||
from llamafactory.model import load_model, load_tokenizer
|
||||
|
||||
|
||||
@dataclass
|
||||
class PairwiseDataCollatorWithPadding(MultiModalDataCollatorForSeq2Seq):
|
||||
r"""Data collator for pairwise data."""
|
||||
|
||||
train_on_prompt: bool = False
|
||||
|
||||
def __call__(self, features: list[dict[str, Any]]) -> dict[str, torch.Tensor]:
|
||||
r"""Pad batched data to the longest sequence in the batch."""
|
||||
chosen_features = []
|
||||
for feature in features:
|
||||
chosen_features.append(
|
||||
{
|
||||
"input_ids": feature["chosen_input_ids"],
|
||||
"attention_mask": feature["chosen_attention_mask"],
|
||||
"labels": feature["chosen_input_ids"] if self.train_on_prompt else feature["chosen_labels"],
|
||||
"images": feature["images"],
|
||||
"videos": feature["videos"],
|
||||
"audios": feature["audios"],
|
||||
}
|
||||
)
|
||||
|
||||
return super().__call__(chosen_features)
|
||||
|
||||
|
||||
def calculate_ppl(
|
||||
model_name_or_path: str,
|
||||
save_name: str = "ppl.json",
|
||||
batch_size: int = 4,
|
||||
stage: Literal["pt", "sft", "rm"] = "sft",
|
||||
dataset: str = "alpaca_en_demo",
|
||||
dataset_dir: str = "data",
|
||||
template: str = "default",
|
||||
cutoff_len: int = 2048,
|
||||
max_samples: Optional[int] = None,
|
||||
train_on_prompt: bool = False,
|
||||
):
|
||||
r"""Calculate the ppl on the dataset of the pre-trained models.
|
||||
|
||||
Usage: export CUDA_VISIBLE_DEVICES=0
|
||||
python cal_ppl.py --model_name_or_path path_to_model --dataset alpaca_en_demo --save_name ppl.json
|
||||
"""
|
||||
model_args, data_args, training_args, finetuning_args, _ = get_train_args(
|
||||
dict(
|
||||
stage=stage,
|
||||
model_name_or_path=model_name_or_path,
|
||||
dataset=dataset,
|
||||
dataset_dir=dataset_dir,
|
||||
template=template,
|
||||
cutoff_len=cutoff_len,
|
||||
max_samples=max_samples,
|
||||
train_on_prompt=train_on_prompt,
|
||||
preprocessing_num_workers=16,
|
||||
output_dir="dummy_dir",
|
||||
overwrite_cache=True,
|
||||
do_train=True,
|
||||
)
|
||||
)
|
||||
tokenizer_module = load_tokenizer(model_args)
|
||||
tokenizer = tokenizer_module["tokenizer"]
|
||||
template = get_template_and_fix_tokenizer(tokenizer, data_args)
|
||||
trainset = get_dataset(template, model_args, data_args, training_args, stage, **tokenizer_module)["train_dataset"]
|
||||
model = load_model(tokenizer, model_args, finetuning_args, is_trainable=False)
|
||||
if stage == "pt":
|
||||
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
|
||||
elif stage == "sft":
|
||||
data_collator = MultiModalDataCollatorForSeq2Seq(
|
||||
template=template, tokenizer=tokenizer, label_pad_token_id=IGNORE_INDEX
|
||||
)
|
||||
elif stage == "rm":
|
||||
data_collator = PairwiseDataCollatorWithPadding(
|
||||
template=template, tokenizer=tokenizer, label_pad_token_id=IGNORE_INDEX, train_on_prompt=train_on_prompt
|
||||
)
|
||||
else:
|
||||
raise NotImplementedError(f"Stage does not supported: {stage}.")
|
||||
|
||||
dataloader = DataLoader(trainset, batch_size, shuffle=False, collate_fn=data_collator, pin_memory=True)
|
||||
criterion = torch.nn.CrossEntropyLoss(reduction="none")
|
||||
total_ppl = 0
|
||||
perplexities = []
|
||||
batch: dict[str, torch.Tensor]
|
||||
with torch.no_grad():
|
||||
for batch in tqdm(dataloader, desc="Computing perplexities"):
|
||||
batch = batch.to(model.device)
|
||||
outputs = model(**batch)
|
||||
shift_logits: torch.Tensor = outputs["logits"][..., :-1, :]
|
||||
shift_labels: torch.Tensor = batch["labels"][..., 1:]
|
||||
loss_mask = shift_labels != IGNORE_INDEX
|
||||
flatten_logits = shift_logits.contiguous().view(shift_labels.size(0) * shift_labels.size(1), -1)
|
||||
flatten_labels = shift_labels.contiguous().view(-1)
|
||||
token_logps: torch.Tensor = criterion(flatten_logits, flatten_labels)
|
||||
token_logps = token_logps.contiguous().view(shift_logits.size(0), -1)
|
||||
sentence_logps = (token_logps * loss_mask).sum(-1) / loss_mask.sum(-1)
|
||||
total_ppl += sentence_logps.exp().sum().item()
|
||||
perplexities.extend(sentence_logps.exp().tolist())
|
||||
|
||||
with open(save_name, "w", encoding="utf-8") as f:
|
||||
json.dump(perplexities, f, indent=2)
|
||||
|
||||
print(f"Average perplexity is {total_ppl / len(perplexities):.2f}")
|
||||
print(f"Perplexities have been saved at {save_name}.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
fire.Fire(calculate_ppl)
|
||||
69
post-training/LLaMA-Factory/scripts/stat_utils/length_cdf.py
Normal file
69
post-training/LLaMA-Factory/scripts/stat_utils/length_cdf.py
Normal file
@@ -0,0 +1,69 @@
|
||||
# Copyright 2025 the LlamaFactory team.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from collections import defaultdict
|
||||
|
||||
import fire
|
||||
from tqdm import tqdm
|
||||
|
||||
from llamafactory.data import get_dataset, get_template_and_fix_tokenizer
|
||||
from llamafactory.hparams import get_train_args
|
||||
from llamafactory.model import load_tokenizer
|
||||
|
||||
|
||||
def length_cdf(
|
||||
model_name_or_path: str,
|
||||
dataset: str = "alpaca_en_demo",
|
||||
dataset_dir: str = "data",
|
||||
template: str = "default",
|
||||
interval: int = 1000,
|
||||
):
|
||||
r"""Calculate the distribution of the input lengths in the dataset.
|
||||
|
||||
Usage: export CUDA_VISIBLE_DEVICES=0
|
||||
python length_cdf.py --model_name_or_path path_to_model --dataset alpaca_en_demo --template default
|
||||
"""
|
||||
model_args, data_args, training_args, _, _ = get_train_args(
|
||||
dict(
|
||||
stage="sft",
|
||||
model_name_or_path=model_name_or_path,
|
||||
dataset=dataset,
|
||||
dataset_dir=dataset_dir,
|
||||
template=template,
|
||||
cutoff_len=1_000_000,
|
||||
preprocessing_num_workers=16,
|
||||
output_dir="dummy_dir",
|
||||
overwrite_cache=True,
|
||||
do_train=True,
|
||||
)
|
||||
)
|
||||
tokenizer_module = load_tokenizer(model_args)
|
||||
template = get_template_and_fix_tokenizer(tokenizer_module["tokenizer"], data_args)
|
||||
trainset = get_dataset(template, model_args, data_args, training_args, "sft", **tokenizer_module)["train_dataset"]
|
||||
total_num = len(trainset)
|
||||
length_dict = defaultdict(int)
|
||||
for sample in tqdm(trainset["input_ids"], desc="Collecting lengths"):
|
||||
length_dict[len(sample) // interval * interval] += 1
|
||||
|
||||
length_tuples = list(length_dict.items())
|
||||
length_tuples.sort()
|
||||
count_accu, prob_accu = 0, 0
|
||||
for length, count in length_tuples:
|
||||
count_accu += count
|
||||
prob_accu += count / total_num * 100
|
||||
print(f"{count_accu:d} ({prob_accu:.2f}%) samples have length < {length + interval}.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
fire.Fire(length_cdf)
|
||||
162
post-training/LLaMA-Factory/scripts/vllm_infer.py
Normal file
162
post-training/LLaMA-Factory/scripts/vllm_infer.py
Normal file
@@ -0,0 +1,162 @@
|
||||
# Copyright 2025 the LlamaFactory team.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import json
|
||||
from typing import Optional
|
||||
|
||||
import fire
|
||||
from transformers import Seq2SeqTrainingArguments
|
||||
|
||||
from llamafactory.data import get_dataset, get_template_and_fix_tokenizer
|
||||
from llamafactory.extras.constants import IGNORE_INDEX
|
||||
from llamafactory.extras.misc import get_device_count
|
||||
from llamafactory.extras.packages import is_vllm_available
|
||||
from llamafactory.hparams import get_infer_args
|
||||
from llamafactory.model import load_tokenizer
|
||||
|
||||
|
||||
if is_vllm_available():
|
||||
from vllm import LLM, SamplingParams
|
||||
from vllm.lora.request import LoRARequest
|
||||
|
||||
|
||||
def vllm_infer(
|
||||
model_name_or_path: str,
|
||||
adapter_name_or_path: str = None,
|
||||
dataset: str = "alpaca_en_demo",
|
||||
dataset_dir: str = "data",
|
||||
template: str = "default",
|
||||
cutoff_len: int = 2048,
|
||||
max_samples: Optional[int] = None,
|
||||
vllm_config: str = "{}",
|
||||
save_name: str = "generated_predictions.jsonl",
|
||||
temperature: float = 0.95,
|
||||
top_p: float = 0.7,
|
||||
top_k: int = 50,
|
||||
max_new_tokens: int = 1024,
|
||||
repetition_penalty: float = 1.0,
|
||||
skip_special_tokens: bool = True,
|
||||
seed: Optional[int] = None,
|
||||
pipeline_parallel_size: int = 1,
|
||||
image_max_pixels: int = 768 * 768,
|
||||
image_min_pixels: int = 32 * 32,
|
||||
):
|
||||
r"""Perform batch generation using vLLM engine, which supports tensor parallelism.
|
||||
|
||||
Usage: python vllm_infer.py --model_name_or_path meta-llama/Llama-2-7b-hf --template llama --dataset alpaca_en_demo
|
||||
"""
|
||||
if pipeline_parallel_size > get_device_count():
|
||||
raise ValueError("Pipeline parallel size should be smaller than the number of gpus.")
|
||||
|
||||
model_args, data_args, _, generating_args = get_infer_args(
|
||||
dict(
|
||||
model_name_or_path=model_name_or_path,
|
||||
adapter_name_or_path=adapter_name_or_path,
|
||||
dataset=dataset,
|
||||
dataset_dir=dataset_dir,
|
||||
template=template,
|
||||
cutoff_len=cutoff_len,
|
||||
max_samples=max_samples,
|
||||
preprocessing_num_workers=16,
|
||||
vllm_config=vllm_config,
|
||||
temperature=temperature,
|
||||
top_p=top_p,
|
||||
top_k=top_k,
|
||||
max_new_tokens=max_new_tokens,
|
||||
repetition_penalty=repetition_penalty,
|
||||
)
|
||||
)
|
||||
|
||||
training_args = Seq2SeqTrainingArguments(output_dir="dummy_dir")
|
||||
tokenizer_module = load_tokenizer(model_args)
|
||||
tokenizer = tokenizer_module["tokenizer"]
|
||||
template_obj = get_template_and_fix_tokenizer(tokenizer, data_args)
|
||||
template_obj.mm_plugin.expand_mm_tokens = False # for vllm generate
|
||||
dataset_module = get_dataset(template_obj, model_args, data_args, training_args, "ppo", **tokenizer_module)
|
||||
|
||||
inputs, prompts, labels = [], [], []
|
||||
for sample in dataset_module["train_dataset"]:
|
||||
if sample["images"]:
|
||||
multi_modal_data = {
|
||||
"image": template_obj.mm_plugin._regularize_images(
|
||||
sample["images"], image_max_pixels=image_max_pixels, image_min_pixels=image_min_pixels
|
||||
)["images"]
|
||||
}
|
||||
elif sample["videos"]:
|
||||
multi_modal_data = {
|
||||
"video": template_obj.mm_plugin._regularize_videos(
|
||||
sample["videos"], image_max_pixels=image_max_pixels, image_min_pixels=image_min_pixels
|
||||
)["videos"]
|
||||
}
|
||||
elif sample["audios"]:
|
||||
audio_data = template_obj.mm_plugin._regularize_audios(
|
||||
sample["audios"],
|
||||
sampling_rate=16000,
|
||||
)
|
||||
multi_modal_data = {"audio": zip(audio_data["audios"], audio_data["sampling_rates"])}
|
||||
else:
|
||||
multi_modal_data = None
|
||||
|
||||
inputs.append({"prompt_token_ids": sample["input_ids"], "multi_modal_data": multi_modal_data})
|
||||
prompts.append(tokenizer.decode(sample["input_ids"], skip_special_tokens=skip_special_tokens))
|
||||
labels.append(
|
||||
tokenizer.decode(
|
||||
list(filter(lambda x: x != IGNORE_INDEX, sample["labels"])), skip_special_tokens=skip_special_tokens
|
||||
)
|
||||
)
|
||||
|
||||
sampling_params = SamplingParams(
|
||||
repetition_penalty=generating_args.repetition_penalty or 1.0, # repetition_penalty must > 0
|
||||
temperature=generating_args.temperature,
|
||||
top_p=generating_args.top_p or 1.0, # top_p must > 0
|
||||
top_k=generating_args.top_k or -1, # top_k must > 0
|
||||
stop_token_ids=template_obj.get_stop_token_ids(tokenizer),
|
||||
max_tokens=generating_args.max_new_tokens,
|
||||
skip_special_tokens=skip_special_tokens,
|
||||
seed=seed,
|
||||
)
|
||||
if model_args.adapter_name_or_path is not None:
|
||||
lora_request = LoRARequest("default", 1, model_args.adapter_name_or_path[0])
|
||||
else:
|
||||
lora_request = None
|
||||
|
||||
engine_args = {
|
||||
"model": model_args.model_name_or_path,
|
||||
"trust_remote_code": True,
|
||||
"dtype": model_args.infer_dtype,
|
||||
"max_model_len": cutoff_len + max_new_tokens,
|
||||
"tensor_parallel_size": (get_device_count() // pipeline_parallel_size) or 1,
|
||||
"pipeline_parallel_size": pipeline_parallel_size,
|
||||
"disable_log_stats": True,
|
||||
"enable_lora": model_args.adapter_name_or_path is not None,
|
||||
}
|
||||
if template_obj.mm_plugin.__class__.__name__ != "BasePlugin":
|
||||
engine_args["limit_mm_per_prompt"] = {"image": 4, "video": 2, "audio": 2}
|
||||
|
||||
if isinstance(model_args.vllm_config, dict):
|
||||
engine_args.update(model_args.vllm_config)
|
||||
|
||||
results = LLM(**engine_args).generate(inputs, sampling_params, lora_request=lora_request)
|
||||
preds = [result.outputs[0].text for result in results]
|
||||
with open(save_name, "w", encoding="utf-8") as f:
|
||||
for text, pred, label in zip(prompts, preds, labels):
|
||||
f.write(json.dumps({"prompt": text, "predict": pred, "label": label}, ensure_ascii=False) + "\n")
|
||||
|
||||
print("*" * 70)
|
||||
print(f"{len(prompts)} generated results have been saved at {save_name}.")
|
||||
print("*" * 70)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
fire.Fire(vllm_infer)
|
||||
Reference in New Issue
Block a user