diff --git a/package.json b/package.json index 032c606..80df44a 100644 --- a/package.json +++ b/package.json @@ -15,6 +15,7 @@ "@types/markdown-it": "^13.0.7", "@vueuse/core": "^10.5.0", "autoprefixer": "^10.4.16", + "can-ndjson-stream": "^1.0.2", "date-fns": "^3.0.1", "dexie": "^3.2.4", "gravatar-url": "^4.0.1", diff --git a/src/services/api.ts b/src/services/api.ts index b644248..d88a3a0 100644 --- a/src/services/api.ts +++ b/src/services/api.ts @@ -1,6 +1,6 @@ import { ref } from 'vue' import { baseUrl } from './appConfig.ts' -import { Message } from './database.ts' +import ndjsonStream from "can-ndjson-stream" export type GenerateCompletionRequest = { model: string @@ -127,6 +127,8 @@ export const useApi = () => { request: GenerateCompletionRequest, onDataReceived: (data: GenerateCompletionResponse) => void, ): Promise => { + request.options ??= {} + request.options["num_thread"] ??= 2 const res = await fetch(getApiUrl('/generate'), { method: 'POST', headers: { @@ -140,19 +142,23 @@ export const useApi = () => { throw new Error('Network response was not ok') } - const reader = res.body?.getReader() + const reader = ndjsonStream(res.body).getReader() let results: GenerateCompletionResponse[] = [] if (reader) { while (true) { + const start = Date.now(); const { done, value } = await reader.read() + const end = Date.now(); + const elapsed = end - start; if (done) { break } - - const chunk = new TextDecoder().decode(value) - const parsedChunk: GenerateCompletionPartResponse = JSON.parse(chunk) - + if (elapsed < 100) { + // rate limit to ~10 requests per second otherwise the chat gets fragmented in the chat window + await new Promise(resolve => setTimeout(resolve, 100 - elapsed)); + } + const parsedChunk: GenerateCompletionPartResponse = value onDataReceived(parsedChunk) results.push(parsedChunk) } @@ -291,4 +297,4 @@ export const useApi = () => { generateEmbeddings, abort, } -} +} \ No newline at end of file