41 lines
1.5 KiB
Bash
41 lines
1.5 KiB
Bash
#!/bin/bash
|
|
set -e
|
|
|
|
MODELS_PATH="models"
|
|
OUTPUT_DIR="compiled-models"
|
|
IMAGE_NAME="git.home/drholy/builder_rkllm:latest"
|
|
CONTAINER_NAME="rkllm-compile-$$"
|
|
|
|
MODEL_NAME="Qwen3-4B-Instruct-2507-Q4_0"
|
|
MODEL_PATH="/models/$MODEL_NAME/$MODEL_NAME.gguf"
|
|
MODEL_OUTPUT="/output/"
|
|
MODEL_FORMAT="gguf"
|
|
MODEL_QUANT_ENABLE="True"
|
|
MODEL_QUANT="w8a8"
|
|
|
|
mkdir -p "$MODELS_PATH" "$OUTPUT_DIR"
|
|
|
|
echo "🏗️ Сборка образа для компиляции..."
|
|
docker build -f Dockerfile -t "$IMAGE_NAME" .
|
|
|
|
# Клонируем модель на хост (если ещё не клонирована)
|
|
# if [ ! -d "$MODELS_PATH/Qwen3-4B-Instruct-2507-gptq-w4a16-g128" ]; then
|
|
# echo "📥 Клонирование модели из Hugging Face..."
|
|
# cd "$MODELS_PATH"
|
|
# git clone https://huggingface.co/kaitchup/Qwen3-4B-Instruct-2507-gptq-w4a16-g128
|
|
# cd "Qwen3-4B-Instruct-2507-gptq-w4a16-g128" && git lfs pull || true
|
|
# cd ../..
|
|
# else
|
|
# echo "✅ Модель уже клонирована в $MODELS_PATH/Qwen3-4B-Instruct-2507-gptq-w4a16-g128"
|
|
# fi
|
|
|
|
echo "⚙️ Запуск компиляции (может занять 30-60 минут)..."
|
|
docker run --rm \
|
|
--name "$CONTAINER_NAME" \
|
|
-v "$(pwd)/$MODELS_PATH:/models" \
|
|
-v "$(pwd)/$OUTPUT_DIR:/output" \
|
|
-v "$(pwd)/entrypoint.py:/work/entrypoint.py:ro" \
|
|
"$IMAGE_NAME" \
|
|
python3 /work/entrypoint.py --model $MODEL_PATH --output $MODEL_OUTPUT --format $MODEL_FORMAT --enable-q $MODEL_QUANT_ENABLE --quant $MODEL_QUANT
|
|
|
|
echo "✅ Модель сохранена в: $OUTPUT_DIR" |