Spaces:
Build error
Build error
| import gradio as gr | |
| from llama_cpp import Llama | |
| import json | |
| # 1. LOAD THE ENGINE | |
| # We are using a 4-bit Quantized version of Llama 3.2 3B. | |
| # This is the "Owner's Engine" - it runs locally on your Space. | |
| print("Loading Coretex Engine...") | |
| llm = Llama.from_pretrained( | |
| repo_id="hugging-quants/Llama-3.2-3B-Instruct-Q8_0-GGUF", | |
| filename="llama-3.2-3b-instruct-q8_0.gguf", | |
| n_ctx=2048, # Context window | |
| n_threads=2 # Matches the 2 vCPUs on Hugging Face Free | |
| ) | |
| def load_knowledge(): | |
| try: | |
| with open("knowledge.jsonl", "r") as f: | |
| return [json.loads(line) for line in f] | |
| except: | |
| return [] | |
| def coretex_chat(user_input): | |
| knowledge = load_knowledge() | |
| # Format your custom knowledge into the prompt | |
| context_str = "\n".join([f"Info: {k['context']} -> {k['response']}" for k in knowledge]) | |
| prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n" \ | |
| f"You are Coretex. Use this custom knowledge:\n{context_str}<|eot_id|>" \ | |
| f"<|start_header_id|>user<|end_header_id|>\n\n{user_input}<|eot_id|>" \ | |
| f"<|start_header_id|>assistant<|end_header_id|>\n\n" | |
| # THE ENGINE THINKS HERE | |
| output = llm(prompt, max_tokens=150, stop=["<|eot_id|>"], echo=False) | |
| return output['choices'][0]['text'] | |
| demo = gr.Interface(fn=coretex_chat, inputs="text", outputs="text", title="Coretex Private Engine") | |
| demo.queue().launch() |