mirror of
https://github.com/DrHo1y/rkllm-gradio-server.git
synced 2026-01-22 19:16:20 +07:00
131 lines
3.8 KiB
Python
131 lines
3.8 KiB
Python
import ctypes
|
|
|
|
# Define global variables to store the callback function output for displaying in the Gradio interface
|
|
global_text = []
|
|
global_state = -1
|
|
split_byte_data = bytes(b"") # Used to store the segmented byte data
|
|
|
|
# Set the dynamic library path
|
|
# Default is v1.1.2
|
|
rkllm_lib = ctypes.CDLL('/usr/lib/librkllmrt.so')
|
|
|
|
# Define the structures from the library
|
|
RKLLM_Handle_t = ctypes.c_void_p
|
|
userdata = ctypes.c_void_p(None)
|
|
|
|
LLMCallState = ctypes.c_int
|
|
LLMCallState.RKLLM_RUN_NORMAL = 0
|
|
LLMCallState.RKLLM_RUN_WAITING = 1
|
|
LLMCallState.RKLLM_RUN_FINISH = 2
|
|
LLMCallState.RKLLM_RUN_ERROR = 3
|
|
LLMCallState.RKLLM_RUN_GET_LAST_HIDDEN_LAYER = 4
|
|
|
|
RKLLMInputMode = ctypes.c_int
|
|
RKLLMInputMode.RKLLM_INPUT_PROMPT = 0
|
|
RKLLMInputMode.RKLLM_INPUT_TOKEN = 1
|
|
RKLLMInputMode.RKLLM_INPUT_EMBED = 2
|
|
RKLLMInputMode.RKLLM_INPUT_MULTIMODAL = 3
|
|
|
|
RKLLMInferMode = ctypes.c_int
|
|
RKLLMInferMode.RKLLM_INFER_GENERATE = 0
|
|
RKLLMInferMode.RKLLM_INFER_GET_LAST_HIDDEN_LAYER = 1
|
|
|
|
class RKLLMExtendParam(ctypes.Structure):
|
|
_fields_ = [
|
|
("base_domain_id", ctypes.c_int32),
|
|
("reserved", ctypes.c_uint8 * 112)
|
|
]
|
|
|
|
class RKLLMParam(ctypes.Structure):
|
|
_fields_ = [
|
|
("model_path", ctypes.c_char_p),
|
|
("max_context_len", ctypes.c_int32),
|
|
("max_new_tokens", ctypes.c_int32),
|
|
("top_k", ctypes.c_int32),
|
|
("top_p", ctypes.c_float),
|
|
("temperature", ctypes.c_float),
|
|
("repeat_penalty", ctypes.c_float),
|
|
("frequency_penalty", ctypes.c_float),
|
|
("presence_penalty", ctypes.c_float),
|
|
("mirostat", ctypes.c_int32),
|
|
("mirostat_tau", ctypes.c_float),
|
|
("mirostat_eta", ctypes.c_float),
|
|
("skip_special_token", ctypes.c_bool),
|
|
("is_async", ctypes.c_bool),
|
|
("img_start", ctypes.c_char_p),
|
|
("img_end", ctypes.c_char_p),
|
|
("img_content", ctypes.c_char_p),
|
|
("extend_param", RKLLMExtendParam),
|
|
]
|
|
|
|
class RKLLMLoraAdapter(ctypes.Structure):
|
|
_fields_ = [
|
|
("lora_adapter_path", ctypes.c_char_p),
|
|
("lora_adapter_name", ctypes.c_char_p),
|
|
("scale", ctypes.c_float)
|
|
]
|
|
|
|
class RKLLMEmbedInput(ctypes.Structure):
|
|
_fields_ = [
|
|
("embed", ctypes.POINTER(ctypes.c_float)),
|
|
("n_tokens", ctypes.c_size_t)
|
|
]
|
|
|
|
class RKLLMTokenInput(ctypes.Structure):
|
|
_fields_ = [
|
|
("input_ids", ctypes.POINTER(ctypes.c_int32)),
|
|
("n_tokens", ctypes.c_size_t)
|
|
]
|
|
|
|
class RKLLMMultiModelInput(ctypes.Structure):
|
|
_fields_ = [
|
|
("prompt", ctypes.c_char_p),
|
|
("image_embed", ctypes.POINTER(ctypes.c_float)),
|
|
("n_image_tokens", ctypes.c_size_t)
|
|
]
|
|
|
|
class RKLLMInputUnion(ctypes.Union):
|
|
_fields_ = [
|
|
("prompt_input", ctypes.c_char_p),
|
|
("embed_input", RKLLMEmbedInput),
|
|
("token_input", RKLLMTokenInput),
|
|
("multimodal_input", RKLLMMultiModelInput)
|
|
]
|
|
|
|
class RKLLMInput(ctypes.Structure):
|
|
_fields_ = [
|
|
("input_mode", ctypes.c_int),
|
|
("input_data", RKLLMInputUnion)
|
|
]
|
|
|
|
class RKLLMLoraParam(ctypes.Structure):
|
|
_fields_ = [
|
|
("lora_adapter_name", ctypes.c_char_p)
|
|
]
|
|
|
|
class RKLLMPromptCacheParam(ctypes.Structure):
|
|
_fields_ = [
|
|
("save_prompt_cache", ctypes.c_int),
|
|
("prompt_cache_path", ctypes.c_char_p)
|
|
]
|
|
|
|
class RKLLMInferParam(ctypes.Structure):
|
|
_fields_ = [
|
|
("mode", RKLLMInferMode),
|
|
("lora_params", ctypes.POINTER(RKLLMLoraParam)),
|
|
("prompt_cache_params", ctypes.POINTER(RKLLMPromptCacheParam))
|
|
]
|
|
|
|
class RKLLMResultLastHiddenLayer(ctypes.Structure):
|
|
_fields_ = [
|
|
("hidden_states", ctypes.POINTER(ctypes.c_float)),
|
|
("embd_size", ctypes.c_int),
|
|
("num_tokens", ctypes.c_int)
|
|
]
|
|
|
|
class RKLLMResult(ctypes.Structure):
|
|
_fields_ = [
|
|
("text", ctypes.c_char_p),
|
|
("size", ctypes.c_int),
|
|
("last_hidden_layer", RKLLMResultLastHiddenLayer)
|
|
] |