| | const paramDefaults = { |
| | stream: true, |
| | temperature: 0.2, |
| | }; |
| |
|
| | let generation_settings = null; |
| |
|
| | export class CompletionError extends Error { |
| | constructor(message, name, data) { |
| | super(message); |
| | this.name = name; |
| | } |
| | }; |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | export async function* llama(prompt, params = {}, config = {}) { |
| | let controller = config.controller; |
| | const api_url = config.api_url?.replace(/\/+$/, '') || ""; |
| |
|
| | if (!controller) { |
| | controller = new AbortController(); |
| | } |
| |
|
| | const completionParams = { ...paramDefaults, ...params, prompt }; |
| |
|
| | const response = await fetch(`${api_url}${config.endpoint || '/completion'}`, { |
| | method: 'POST', |
| | body: JSON.stringify(completionParams), |
| | headers: { |
| | 'Connection': 'keep-alive', |
| | 'Content-Type': 'application/json', |
| | 'Accept': 'text/event-stream', |
| | ...(params.api_key ? {'Authorization': `Bearer ${params.api_key}`} : {}) |
| | }, |
| | signal: controller.signal, |
| | }); |
| |
|
| | const status = response.status; |
| | if (status !== 200) { |
| | try { |
| | const body = await response.json(); |
| | if (body && body.error && body.error.message) { |
| | throw new CompletionError(body.error.message, 'ServerError'); |
| | } |
| | } catch (err) { |
| | throw new CompletionError(err.message, 'ServerError'); |
| | } |
| | } |
| |
|
| | const reader = response.body.getReader(); |
| | const decoder = new TextDecoder(); |
| |
|
| | let content = ""; |
| | let leftover = ""; |
| |
|
| | try { |
| | let cont = true; |
| |
|
| | while (cont) { |
| | const result = await reader.read(); |
| | if (result.done) { |
| | break; |
| | } |
| |
|
| | |
| | const text = leftover + decoder.decode(result.value); |
| |
|
| | |
| | const endsWithLineBreak = text.endsWith('\n'); |
| |
|
| | |
| | let lines = text.split('\n'); |
| |
|
| | |
| | |
| | if (!endsWithLineBreak) { |
| | leftover = lines.pop(); |
| | } else { |
| | leftover = ""; |
| | } |
| |
|
| | |
| | const regex = /^(\S+):\s(.*)$/gm; |
| | for (const line of lines) { |
| | const match = regex.exec(line); |
| | if (match) { |
| | result[match[1]] = match[2]; |
| | if (result.data === '[DONE]') { |
| | cont = false; |
| | break; |
| | } |
| |
|
| | |
| | if (result.data) { |
| | result.data = JSON.parse(result.data); |
| | content += result.data.content; |
| |
|
| | |
| | yield result; |
| |
|
| | |
| | if (result.data.stop) { |
| | if (result.data.generation_settings) { |
| | generation_settings = result.data.generation_settings; |
| | } |
| | cont = false; |
| | break; |
| | } |
| | } |
| | if (result.error) { |
| | try { |
| | result.error = JSON.parse(result.error); |
| | if (result.error.message.includes('slot unavailable')) { |
| | |
| | throw new Error('slot unavailable'); |
| | } else { |
| | console.error(`llama.cpp error [${result.error.code} - ${result.error.type}]: ${result.error.message}`); |
| | } |
| | } catch(e) { |
| | console.error(`llama.cpp error ${result.error}`) |
| | } |
| | } |
| | } |
| | } |
| | } |
| | } catch (e) { |
| | if (e.name !== 'AbortError') { |
| | console.error("llama error: ", e); |
| | } |
| | throw e; |
| | } |
| | finally { |
| | controller.abort(); |
| | } |
| |
|
| | return content; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | export const llamaEventTarget = (prompt, params = {}, config = {}) => { |
| | const eventTarget = new EventTarget(); |
| | (async () => { |
| | let content = ""; |
| | for await (const chunk of llama(prompt, params, config)) { |
| | if (chunk.data) { |
| | content += chunk.data.content; |
| | eventTarget.dispatchEvent(new CustomEvent("message", { detail: chunk.data })); |
| | } |
| | if (chunk.data.generation_settings) { |
| | eventTarget.dispatchEvent(new CustomEvent("generation_settings", { detail: chunk.data.generation_settings })); |
| | } |
| | if (chunk.data.timings) { |
| | eventTarget.dispatchEvent(new CustomEvent("timings", { detail: chunk.data.timings })); |
| | } |
| | } |
| | eventTarget.dispatchEvent(new CustomEvent("done", { detail: { content } })); |
| | })(); |
| | return eventTarget; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | export const llamaPromise = (prompt, params = {}, config = {}) => { |
| | return new Promise(async (resolve, reject) => { |
| | let content = ""; |
| | try { |
| | for await (const chunk of llama(prompt, params, config)) { |
| | content += chunk.data.content; |
| | } |
| | resolve(content); |
| | } catch (error) { |
| | reject(error); |
| | } |
| | }); |
| | }; |
| |
|
| | |
| | |
| | |
| | export const llamaComplete = async (params, controller, callback) => { |
| | for await (const chunk of llama(params.prompt, params, { controller })) { |
| | callback(chunk); |
| | } |
| | } |
| |
|
| | |
| | export const llamaModelInfo = async (config = {}) => { |
| | if (!generation_settings) { |
| | const api_url = config.api_url?.replace(/\/+$/, '') || ""; |
| | const props = await fetch(`${api_url}/props`).then(r => r.json()); |
| | generation_settings = props.default_generation_settings; |
| | } |
| | return generation_settings; |
| | } |
| |
|