Add post-training folder

This commit is contained in:
leigest519
2025-10-22 15:38:32 +08:00
parent d59a5c14bd
commit 1f1933636d
326 changed files with 45070 additions and 0 deletions

View File

@@ -0,0 +1,117 @@
#!/bin/bash
set -euo pipefail
# bootstrap_envs.sh: set up conda envs and data for this repo
# Usage:
# bash scripts/bootstrap_envs.sh # install all (rl, vllm, data, sft)
# bash scripts/bootstrap_envs.sh rl # only RL env
# bash scripts/bootstrap_envs.sh vllm # only vLLM env
# bash scripts/bootstrap_envs.sh sft # only SFT env
# bash scripts/bootstrap_envs.sh data # only fetch demo data
# Configurable via env vars
CONDA_ROOT=${CONDA_ROOT:-"$HOME/miniconda3"}
SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
REPO_ROOT=$(cd -- "$SCRIPT_DIR/.." && pwd)
# Resolve conda binary
if [ -x "$CONDA_ROOT/bin/conda" ]; then
CONDA_BIN="$CONDA_ROOT/bin/conda"
else
CONDA_BIN="conda"
fi
os_is_linux() {
[[ "$(uname -s)" == "Linux" ]]
}
concat_parts_if_needed() {
local name="$1"
local parts=("$REPO_ROOT/conda_envs/${name}.tar.gz.part.")
shopt -s nullglob
local matches=("$REPO_ROOT/conda_envs/${name}.tar.gz.part."*)
shopt -u nullglob
if [ ${#matches[@]} -gt 0 ]; then
echo "Concatenating ${name} env parts..."
cat "$REPO_ROOT/conda_envs/${name}.tar.gz.part."* > "$REPO_ROOT/conda_envs/${name}.tar.gz"
fi
}
extract_env_archive() {
local name="$1"
local env_path="$CONDA_ROOT/envs/$name"
local archive="$REPO_ROOT/conda_envs/${name}.tar.gz"
if [ -f "$archive" ]; then
echo "Installing $name environment from archive..."
mkdir -p "$env_path"
tar -xzvf "$archive" -C "$env_path"
if [ -x "$env_path/bin/conda-unpack" ]; then
"$env_path/bin/conda-unpack"
fi
return 0
fi
return 1
}
install_rl() {
concat_parts_if_needed rl || true
extract_env_archive rl || true
echo "Configuring RL environment (pip installs)..."
pushd "$REPO_ROOT/VLM-R1/src/open-r1-multimodal" >/dev/null
"$CONDA_BIN" run -n rl pip install -e ".[dev]"
"$CONDA_BIN" run -n rl playwright install
if os_is_linux; then
"$CONDA_BIN" run -n rl playwright install-deps
fi
popd >/dev/null
echo "RL environment ready."
}
install_vllm() {
concat_parts_if_needed vllm || true
extract_env_archive vllm || true
echo "Configuring vLLM environment (metrics only editable LLaMA-Factory)..."
pushd "$REPO_ROOT/LLaMA-Factory" >/dev/null
"$CONDA_BIN" run -n vllm pip install -e ".[metrics]"
popd >/dev/null
echo "vLLM environment ready."
}
install_sft() {
echo "Configuring SFT environment..."
pushd "$REPO_ROOT/LLaMA-Factory" >/dev/null
"$CONDA_BIN" run -n sft pip install -e ".[metrics,torch]"
"$CONDA_BIN" run -n sft pip install deepspeed
popd >/dev/null
echo "SFT environment ready."
}
fetch_data() {
echo "Fetching example data for LLaMA-Factory..."
pushd "$REPO_ROOT/LLaMA-Factory/data" >/dev/null
bash get_data.sh
popd >/dev/null
echo "Data prepared."
}
run_target() {
case "${1:-all}" in
rl) install_rl ;;
vllm) install_vllm ;;
sft) install_sft ;;
data) fetch_data ;;
all)
install_rl
install_vllm
fetch_data
install_sft
;;
*)
echo "Usage: $0 [rl|vllm|sft|data|all]" >&2
exit 1
;;
esac
}
run_target "${1:-all}"

View File

@@ -0,0 +1,21 @@
#!/bin/bash
set -euo pipefail
# Usage: bash scripts/run_rl.sh <VLM-R1/run_scripts/*.sh>
# For direct torchrun or python entrypoints, pass them after a --.
if [ $# -lt 1 ]; then
echo "Usage: $0 <VLM-R1/run_scripts/*.sh | -- <cmd...>>"
exit 1
fi
SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
REPO_ROOT=$(cd -- "$SCRIPT_DIR/.." && pwd)
cd "$REPO_ROOT/VLM-R1"
if [ "$1" == "--" ]; then
shift
"$@"
else
bash "$1"
fi

View File

@@ -0,0 +1,20 @@
#!/bin/bash
set -euo pipefail
# Usage: bash scripts/run_sft.sh <train_yaml_or_cmd>
# If a YAML path is passed, it will run LLaMA-Factory CLI with that YAML.
if [ $# -lt 1 ]; then
echo "Usage: $0 <examples/<...>.yaml | <custom args>>"
exit 1
fi
SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
REPO_ROOT=$(cd -- "$SCRIPT_DIR/.." && pwd)
cd "$REPO_ROOT/LLaMA-Factory"
if [[ "$1" == *.yaml ]]; then
llamafactory-cli train "$1"
else
llamafactory-cli "$@"
fi

View File

@@ -0,0 +1,18 @@
#!/bin/bash
set -euo pipefail
# Usage: bash scripts/run_vllm.sh <examples/inference/*.yaml> [API_PORT]
if [ $# -lt 1 ]; then
echo "Usage: $0 <inference_yaml> [API_PORT]"
exit 1
fi
YAML="$1"
PORT="${2:-8000}"
SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
REPO_ROOT=$(cd -- "$SCRIPT_DIR/.." && pwd)
cd "$REPO_ROOT/LLaMA-Factory"
API_PORT="$PORT" llamafactory-cli api "$YAML"