| | import * as readline from 'node:readline' |
| | import { stdin, stdout } from 'node:process' |
| | import { readFileSync } from 'node:fs' |
| | import { SchemaConverter } from './public_legacy/json-schema-to-grammar.mjs' |
| |
|
| | const args = process.argv.slice(2); |
| | const grammarJsonSchemaFile = args.find( |
| | (_, index) => args[index - 1] === "--grammar-json-schema" |
| | ); |
| |
|
| | const no_cached_prompt = args.find( |
| | (_, index) => args[index - 1] === "--no-cache-prompt" |
| | ) ?? "false"; |
| |
|
| | const grammarFile = args.find((_, index) => args[index - 1] === "--grammar"); |
| |
|
| | |
| | const grammarJsonSchemaPropOrder = args.find( |
| | (_, index) => args[index - 1] === "--grammar-json-schema-prop-order" |
| | ); |
| | const propOrder = grammarJsonSchemaPropOrder |
| | ? grammarJsonSchemaPropOrder |
| | .split(",") |
| | .reduce((acc, cur, index) => ({ ...acc, [cur]: index }), {}) |
| | : {}; |
| |
|
| | let grammar = null |
| | if (grammarJsonSchemaFile) { |
| | let schema = JSON.parse(readFileSync(grammarJsonSchemaFile, 'utf-8')) |
| | const converter = new SchemaConverter({prop_order: propOrder, allow_fetch: true}) |
| | schema = await converter.resolveRefs(schema, grammarJsonSchemaFile) |
| | converter.visit(schema, '') |
| | grammar = converter.formatGrammar() |
| | } |
| | if (grammarFile) { |
| | grammar = readFileSync(grammarFile, 'utf-8') |
| | } |
| |
|
| | |
| | let slot_id = -1; |
| |
|
| | const API_URL = 'http://127.0.0.1:8080' |
| |
|
| | const chat = [ |
| | { |
| | human: "Hello, Assistant.", |
| | assistant: "Hello. How may I help you today?" |
| | }, |
| | { |
| | human: "Please tell me the largest city in Europe.", |
| | assistant: "Sure. The largest city in Europe is Moscow, the capital of Russia." |
| | }, |
| | ] |
| |
|
| | const instruction = `A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.` |
| |
|
| | function format_prompt(question) { |
| | return `${instruction}\n${ |
| | chat.map(m =>`### Human: ${m.human}\n### Assistant: ${m.assistant}`).join("\n") |
| | }\n### Human: ${question}\n### Assistant:` |
| | } |
| |
|
| | async function tokenize(content) { |
| | const result = await fetch(`${API_URL}/tokenize`, { |
| | method: 'POST', |
| | body: JSON.stringify({ content }) |
| | }) |
| |
|
| | if (!result.ok) { |
| | return [] |
| | } |
| |
|
| | return await result.json().tokens |
| | } |
| |
|
| | const n_keep = await tokenize(instruction).length |
| |
|
| | async function chat_completion(question) { |
| | const result = await fetch(`${API_URL}/completion`, { |
| | method: 'POST', |
| | body: JSON.stringify({ |
| | prompt: format_prompt(question), |
| | temperature: 0.2, |
| | top_k: 40, |
| | top_p: 0.9, |
| | n_keep: n_keep, |
| | n_predict: 256, |
| | cache_prompt: no_cached_prompt === "false", |
| | slot_id: slot_id, |
| | stop: ["\n### Human:"], |
| | grammar, |
| | stream: true, |
| | }) |
| | }) |
| |
|
| | if (!result.ok) { |
| | return |
| | } |
| |
|
| | let answer = '' |
| |
|
| | for await (var chunk of result.body) { |
| | const t = Buffer.from(chunk).toString('utf8') |
| | if (t.startsWith('data: ')) { |
| | const message = JSON.parse(t.substring(6)) |
| | slot_id = message.slot_id |
| | answer += message.content |
| | process.stdout.write(message.content) |
| | if (message.stop) { |
| | if (message.truncated) { |
| | chat.shift() |
| | } |
| | break |
| | } |
| | } |
| | } |
| |
|
| | process.stdout.write('\n') |
| | chat.push({ human: question, assistant: answer.trimStart() }) |
| | } |
| |
|
| | const rl = readline.createInterface({ input: stdin, output: stdout }); |
| |
|
| | const readlineQuestion = (rl, query, options) => new Promise((resolve, reject) => { |
| | rl.question(query, options, resolve) |
| | }); |
| |
|
| | while(true) { |
| | const question = await readlineQuestion(rl, '> ') |
| | await chat_completion(question) |
| | } |
| |
|