rkllm-gradio-server/app/rkllm_server_gradio.py
2025-03-25 23:08:13 +07:00

112 lines
4.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import sys
import resource
import gradio as gr
from ctypes_bindings import *
from model_class import *
from mesh_utils import *
# Set environment variables
os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
os.environ["GRADIO_SERVER_PORT"] = "8080"
os.environ["RKLLM_LOG_LEVEL"] = "1"
# Set resource limit
resource.setrlimit(resource.RLIMIT_NOFILE, (102400, 102400))
history = []
if __name__ == "__main__":
# Helper function to define initializing model before class is declared
# Without this, you would need to initialize the class before you select the model
def initialize_model(model):
global rkllm_model
# Have to unload previous model in single-threaded mode
try:
rkllm_model.release()
except:
print("No model loaded! Continuing with initialization...")
# Initialize RKLLM model
init_msg = "=========INITIALIZING==========="
print(init_msg)
sys.stdout.flush()
rkllm_model = RKLLMLoaderClass(model=model)
model_init = f"RKLLM Model, {rkllm_model.model_name} has been initialized successfully"
print(model_init)
complete_init = "=============================="
print(complete_init)
output = [[f"<h4 style=\"text-align:center;\">{model_init}\n</h4>", None]]
sys.stdout.flush()
return output
# Helper function to stream LLM output into the chat box
def get_RKLLM_output(message, history):
try:
yield from rkllm_model.get_RKLLM_output(message, history)
except RuntimeError as e:
print(f"ERROR: {e}")
return history
# Create a Gradio interface
with gr.Blocks(title="Chat with RKLLM") as chatRKLLM:
available_models = available_models()
gr.Markdown("<div align='center'><font size='10'> RKLLM Chat </font></div>")
with gr.Tabs():
with gr.TabItem("Select Model"):
model_dropdown = gr.Dropdown(choices=available_models, label="Select Model", value="None", allow_custom_value=True)
statusBox = gr.Chatbot(height=100)
model_dropdown.input(initialize_model, [model_dropdown], [statusBox])
with gr.TabItem("Txt2Txt"):
txt2txt = gr.ChatInterface(fn=get_RKLLM_output, type="messages")
txt2txt.chatbot.height = "70vh"
txt2txt.chatbot.resizeable = True
with gr.TabItem("Txt2HTML"):
# Поле для ввода текста
input_text = gr.Textbox(label="Введите ваш запрос", placeholder="Введите текст...", lines=3)
# Кнопка отправки запроса
submit_button = gr.Button("Отправить")
# Компонент для отображения HTML-ответа
output_html = gr.HTML(label="Ответ LLM")
def process_txt2txt(input_text):
return input_text
submit_button.click(
process_txt2txt,
inputs=input_text,
outputs=output_html
)
# with gr.TabItem("Txt2Mesh"):
# with gr.Row():
# with gr.Column(scale=2):
# txt2txt = gr.ChatInterface(fn=get_RKLLM_output, type="messages")
# txt2txt.chatbot.height = "70vh"
# txt2txt.chatbot.resizeable = True
# with gr.Column(scale=2):
# # Add the text box for 3D mesh input and button
# mesh_input = gr.Textbox(
# label="3D Mesh Input",
# placeholder="Paste your 3D mesh in OBJ format here...",
# lines=5,
# )
# visualize_button = gr.Button("Visualize 3D Mesh")
# output_model = gr.Model3D(
# label="3D Mesh Visualization",
# interactive=False,
# )
# # Link the button to the visualization function
# visualize_button.click(
# fn=apply_gradient_color,
# inputs=[mesh_input],
# outputs=[output_model]
# )
print("\nNo model loaded yet!\n")
# Enable the event queue system.
chatRKLLM.queue()
# Start the Gradio application.
chatRKLLM.launch()
print("====================")
print("RKLLM model inference completed, releasing RKLLM model resources...")
rkllm_model.release()
print("====================")