Spaces:

itamar11
/

Coretex-API

Build error

Update app.py

b5870f4 verified 3 months ago

1.44 kB

	import gradio as gr
	from llama_cpp import Llama
	import json

	# 1. LOAD THE ENGINE
	# We are using a 4-bit Quantized version of Llama 3.2 3B.
	# This is the "Owner's Engine" - it runs locally on your Space.
	print("Loading Coretex Engine...")
	llm = Llama.from_pretrained(
	repo_id="hugging-quants/Llama-3.2-3B-Instruct-Q8_0-GGUF",
	filename="llama-3.2-3b-instruct-q8_0.gguf",
	n_ctx=2048, # Context window
	n_threads=2 # Matches the 2 vCPUs on Hugging Face Free
	)

	def load_knowledge():
	try:
	with open("knowledge.jsonl", "r") as f:
	return [json.loads(line) for line in f]
	except:
	return []

	def coretex_chat(user_input):
	knowledge = load_knowledge()

	# Format your custom knowledge into the prompt
	context_str = "\n".join([f"Info: {k['context']} -> {k['response']}" for k in knowledge])

	prompt = f"<\|begin_of_text\|><\|start_header_id\|>system<\|end_header_id\|>\n\n" \
	f"You are Coretex. Use this custom knowledge:\n{context_str}<\|eot_id\|>" \
	f"<\|start_header_id\|>user<\|end_header_id\|>\n\n{user_input}<\|eot_id\|>" \
	f"<\|start_header_id\|>assistant<\|end_header_id\|>\n\n"

	# THE ENGINE THINKS HERE
	output = llm(prompt, max_tokens=150, stop=["<\|eot_id\|>"], echo=False)
	return output['choices'][0]['text']

	demo = gr.Interface(fn=coretex_chat, inputs="text", outputs="text", title="Coretex Private Engine")
	demo.queue().launch()