From 4f8750ae396071bc09a1010d16eb84ddc2b3f83c Mon Sep 17 00:00:00 2001 From: felixphixer Date: Mon, 26 Feb 2024 16:28:56 -0500 Subject: [PATCH 1/2] Response parsing logic hacks. It works! --- src/services/api.ts | 62 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 56 insertions(+), 6 deletions(-) diff --git a/src/services/api.ts b/src/services/api.ts index b644248..5d697f8 100644 --- a/src/services/api.ts +++ b/src/services/api.ts @@ -142,7 +142,7 @@ export const useApi = () => { const reader = res.body?.getReader() let results: GenerateCompletionResponse[] = [] - + let context: GenerateCompletionCompletedResponse | null = null if (reader) { while (true) { const { done, value } = await reader.read() @@ -150,14 +150,64 @@ export const useApi = () => { break } - const chunk = new TextDecoder().decode(value) - const parsedChunk: GenerateCompletionPartResponse = JSON.parse(chunk) + let chunk = new TextDecoder().decode(value) + + // Hacks galore! + // If a square bracket is found in the chunk, read the stream to the end + // since this signifies the end and we dont want to deal with partial JSON + if (chunk.includes('[')) { + let restValue; + while (!done) { + const { done: restDone, value: restValue } = await reader.read(); + if (restDone) { + break; + } + chunk += new TextDecoder().decode(restValue); + } + } - onDataReceived(parsedChunk) - results.push(parsedChunk) + // console.log(chunk) + // More hacks! + // Split the chunk between trailing and starting curly braces + const jsonStrings = chunk.split(/(?<=})\s*(?={)/) + // console.log(jsonStrings) + let combinedResponse: GenerateCompletionResponse | null = null + + for (const jsonString of jsonStrings) { + // Skip empty strings + if (jsonString.trim() === '') continue + + //console.log("before- " + jsonString) + const parsedChunk: GenerateCompletionPartResponse = JSON.parse(jsonString) + + if (!parsedChunk.context) { + if (!combinedResponse) { + combinedResponse = { + model: parsedChunk.model, + created_at: parsedChunk.created_at, + response: parsedChunk.response, + done: parsedChunk.done, + } + } else { + combinedResponse.created_at = parsedChunk.created_at + combinedResponse.response += parsedChunk.response + } + } else { + context = parsedChunk as GenerateCompletionCompletedResponse + } + } + + if (combinedResponse) { + onDataReceived(combinedResponse) + results.push(combinedResponse) + } } } - + if (context) { + onDataReceived(context) + results.push(context) + } + //console.log(results) return results } From 7f55536a86df2c9225493c4498a80e58f0dd4549 Mon Sep 17 00:00:00 2001 From: felixphixer Date: Sat, 9 Mar 2024 12:29:40 -0500 Subject: [PATCH 2/2] Add can-ndjson-stream package and rate limit the reader loop --- package.json | 1 + src/services/api.ts | 76 ++++++++++----------------------------------- 2 files changed, 17 insertions(+), 60 deletions(-) diff --git a/package.json b/package.json index 032c606..80df44a 100644 --- a/package.json +++ b/package.json @@ -15,6 +15,7 @@ "@types/markdown-it": "^13.0.7", "@vueuse/core": "^10.5.0", "autoprefixer": "^10.4.16", + "can-ndjson-stream": "^1.0.2", "date-fns": "^3.0.1", "dexie": "^3.2.4", "gravatar-url": "^4.0.1", diff --git a/src/services/api.ts b/src/services/api.ts index 5d697f8..d88a3a0 100644 --- a/src/services/api.ts +++ b/src/services/api.ts @@ -1,6 +1,6 @@ import { ref } from 'vue' import { baseUrl } from './appConfig.ts' -import { Message } from './database.ts' +import ndjsonStream from "can-ndjson-stream" export type GenerateCompletionRequest = { model: string @@ -127,6 +127,8 @@ export const useApi = () => { request: GenerateCompletionRequest, onDataReceived: (data: GenerateCompletionResponse) => void, ): Promise => { + request.options ??= {} + request.options["num_thread"] ??= 2 const res = await fetch(getApiUrl('/generate'), { method: 'POST', headers: { @@ -140,74 +142,28 @@ export const useApi = () => { throw new Error('Network response was not ok') } - const reader = res.body?.getReader() + const reader = ndjsonStream(res.body).getReader() let results: GenerateCompletionResponse[] = [] - let context: GenerateCompletionCompletedResponse | null = null + if (reader) { while (true) { + const start = Date.now(); const { done, value } = await reader.read() + const end = Date.now(); + const elapsed = end - start; if (done) { break } - - let chunk = new TextDecoder().decode(value) - - // Hacks galore! - // If a square bracket is found in the chunk, read the stream to the end - // since this signifies the end and we dont want to deal with partial JSON - if (chunk.includes('[')) { - let restValue; - while (!done) { - const { done: restDone, value: restValue } = await reader.read(); - if (restDone) { - break; - } - chunk += new TextDecoder().decode(restValue); - } - } - - // console.log(chunk) - // More hacks! - // Split the chunk between trailing and starting curly braces - const jsonStrings = chunk.split(/(?<=})\s*(?={)/) - // console.log(jsonStrings) - let combinedResponse: GenerateCompletionResponse | null = null - - for (const jsonString of jsonStrings) { - // Skip empty strings - if (jsonString.trim() === '') continue - - //console.log("before- " + jsonString) - const parsedChunk: GenerateCompletionPartResponse = JSON.parse(jsonString) - - if (!parsedChunk.context) { - if (!combinedResponse) { - combinedResponse = { - model: parsedChunk.model, - created_at: parsedChunk.created_at, - response: parsedChunk.response, - done: parsedChunk.done, - } - } else { - combinedResponse.created_at = parsedChunk.created_at - combinedResponse.response += parsedChunk.response - } - } else { - context = parsedChunk as GenerateCompletionCompletedResponse - } - } - - if (combinedResponse) { - onDataReceived(combinedResponse) - results.push(combinedResponse) + if (elapsed < 100) { + // rate limit to ~10 requests per second otherwise the chat gets fragmented in the chat window + await new Promise(resolve => setTimeout(resolve, 100 - elapsed)); } + const parsedChunk: GenerateCompletionPartResponse = value + onDataReceived(parsedChunk) + results.push(parsedChunk) } } - if (context) { - onDataReceived(context) - results.push(context) - } - //console.log(results) + return results } @@ -341,4 +297,4 @@ export const useApi = () => { generateEmbeddings, abort, } -} +} \ No newline at end of file