From c7887b70b30229de5a9e29c6243a6ab30242cb88 Mon Sep 17 00:00:00 2001 From: drholy Date: Wed, 4 Feb 2026 18:34:47 +0700 Subject: [PATCH] fix code to work --- .gitignore | 2 ++ Dockerfile | 14 +++++++++++--- build.sh | 22 ++++++++++++++-------- entrypoint.py | 10 ++++++---- 4 files changed, 33 insertions(+), 15 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0775a32 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/models +/compiled-models \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 44e5277..f278e82 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,16 +1,24 @@ -FROM ubuntu:22.04 AS builder +FROM nvidia/cuda:12.1.0-runtime-ubuntu22.04 AS builder # Установка зависимостей RUN apt update && apt-get install -y \ - python3 pip git curl wget nano sudo apt-utils && \ + python3.10 pip git curl wget nano sudo apt-utils && \ rm -rf /var/lib/apt/lists/* # Установка RKLLM Toolkit RUN git clone https://github.com/airockchip/rknn-llm +# Удаляем защиту PEP 668 +RUN rm -f /usr/lib/python3.*/EXTERNALLY-MANAGED || true + +RUN pip3 install torch==2.6.0 numpy==1.26.4 + +# 2. Установите auto_gptq БЕЗ CUDA (игнорируя суффиксы версий) +RUN pip install auto-gptq==0.7.1 --no-build-isolation + # Установка Python-пакета -RUN pip3 install /ezrknn-llm/rkllm-toolkit/packages/rkllm_toolkit-1.2.3-cp312-cp312-linux_x86_64.whl +RUN pip3 install /rknn-llm/rkllm-toolkit/packages/rkllm_toolkit-1.2.3-cp310-cp310-linux_x86_64.whl # Клонирование модели WORKDIR /models diff --git a/build.sh b/build.sh index 0769816..3174f4f 100644 --- a/build.sh +++ b/build.sh @@ -1,25 +1,31 @@ #!/bin/bash set -e -MODELS_PATH="./models" -OUTPUT_DIR="./compiled-models" +MODELS_PATH="models" +OUTPUT_DIR="compiled-models" IMAGE_NAME="rkllm-builder" CONTAINER_NAME="rkllm-compile-$$" -mkdir -p "$OUTPUT_DIR" +mkdir -p "$MODELS_PATH" "$OUTPUT_DIR" echo "🏗️ Сборка образа для компиляции..." docker build -f Dockerfile -t "$IMAGE_NAME" . -mkdir -p "$MODELS_PATH" -cd "$MODELS_PATH" -git clone https://huggingface.co/simaai/Qwen3-4B-Instruct-2507-a16w4 && \ - cd Qwen3-4B-Instruct-2507-a16w4 && git lfs pull +# Клонируем модель на хост (если ещё не клонирована) +# if [ ! -d "$MODELS_PATH/Qwen3-4B-Instruct-2507-gptq-w4a16-g128" ]; then +# echo "📥 Клонирование модели из Hugging Face..." +# cd "$MODELS_PATH" +# git clone https://huggingface.co/kaitchup/Qwen3-4B-Instruct-2507-gptq-w4a16-g128 +# cd "Qwen3-4B-Instruct-2507-gptq-w4a16-g128" && git lfs pull || true +# cd ../.. +# else +# echo "✅ Модель уже клонирована в $MODELS_PATH/Qwen3-4B-Instruct-2507-gptq-w4a16-g128" +# fi echo "⚙️ Запуск компиляции (может занять 30-60 минут)..." docker run --rm \ --name "$CONTAINER_NAME" \ - -v "$(pwd)/"$MODELS_PATH":/models" \ + -v "$(pwd)/$MODELS_PATH:/models" \ -v "$(pwd)/$OUTPUT_DIR:/output" \ "$IMAGE_NAME" diff --git a/entrypoint.py b/entrypoint.py index 120487b..aedb2e4 100644 --- a/entrypoint.py +++ b/entrypoint.py @@ -4,23 +4,25 @@ import torch from torch import nn import os -modelpath = '/models/Qwen3-4B-Instruct-2507-a16w4/devkit' -output = '/output/Qwen3-4B-Instruct-2507-a16w4.rkllm' +modelpath = '/models/Qwen3-4B-Instruct-2507-F16/Qwen3-4B-Instruct-2507-F16.gguf' +output = '/output/Qwen3-4B-Instruct-2507-F16-Q.rkllm' os.makedirs("/output", exist_ok=True) print(f"Загрузка модели из: {modelpath}") llm = RKLLM() -ret = llm.load_huggingface(model=modelpath, model_lora = None, device='cpu') +ret = llm.load_gguf(model=modelpath) +# ret = llm.load_huggingface(model=modelpath, model_lora = None, device='cpu') if ret != 0: print(f"❌ Ошибка загрузки модели: {ret}") exit(ret) print("Компиляция для RK3588 (NPU)...") -ret = llm.build(do_quantization=True, optimization_level=1, quantized_dtype='w4a16', +ret = llm.build(do_quantization=True, optimization_level=1, quantized_dtype='w8a8', quantized_algorithm='normal', target_platform='rk3588', num_npu_core=3, extra_qparams=None) + if ret != 0: print(f"❌ Ошибка компиляции: {ret}") exit(ret)