diff --git a/README.md b/README.md index 3718ade..d867129 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,7 @@ USAGE * [`heroku ai:docs`](#heroku-aidocs) * [`heroku ai:models`](#heroku-aimodels) * [`heroku ai:models:attach MODEL_RESOURCE`](#heroku-aimodelsattach-model_resource) +* [`heroku ai:models:call MODEL_RESOURCE`](#heroku-aimodelscall-model_resource) * [`heroku ai:models:create MODEL_NAME`](#heroku-aimodelscreate-model_name) * [`heroku ai:models:list`](#heroku-aimodelslist) @@ -94,6 +95,38 @@ EXAMPLES _See code: [dist/commands/ai/models/attach.ts](https://github.com/heroku/heroku-cli-plugin-integration/blob/v0.0.0/dist/commands/ai/models/attach.ts)_ +## `heroku ai:models:call MODEL_RESOURCE` + +make an inference request to a specific AI model resource + +``` +USAGE + $ heroku ai:models:call [MODEL_RESOURCE] -a -p [-j] [--optfile ] [--opts ] [-o + ] [-r ] + +ARGUMENTS + MODEL_RESOURCE The resource ID or alias of the model to call. + +FLAGS + -a, --app= (required) app to run command against + -j, --json Output response as JSON + -o, --output= The file path where the command writes the model response. + -p, --prompt= (required) The input prompt for the model. + -r, --remote= git remote of app to use + --optfile= Additional options for model inference, provided as a JSON config file. + --opts= Additional options for model inference, provided as a JSON string. + +DESCRIPTION + make an inference request to a specific AI model resource + +EXAMPLES + $ heroku ai:models:call my_llm --prompt "What is the meaning of life?" + + $ heroku ai:models:call sdxl --prompt "Generate an image of a sunset" --opts '{"quality": "hd"}' +``` + +_See code: [dist/commands/ai/models/call.ts](https://github.com/heroku/heroku-cli-plugin-integration/blob/v0.0.0/dist/commands/ai/models/call.ts)_ + ## `heroku ai:models:create MODEL_NAME` provision access to an AI model diff --git a/src/commands/ai/models/call.ts b/src/commands/ai/models/call.ts new file mode 100644 index 0000000..6bcc15e --- /dev/null +++ b/src/commands/ai/models/call.ts @@ -0,0 +1,158 @@ +import color from '@heroku-cli/color' +import {flags} from '@heroku-cli/command' +import {Args, ux} from '@oclif/core' +import fs from 'node:fs' +// import path from 'node:path' +import {ChatCompletionResponse, ModelList} from '../../../lib/ai/types' +import Command from '../../../lib/base' + +export default class Call extends Command { + static args = { + model_resource: Args.string({ + description: 'The resource ID or alias of the model to call.', + required: true, + }), + } + + static description = 'make an inference request to a specific AI model resource' + static examples = [ + 'heroku ai:models:call my_llm --prompt "What is the meaning of life?"', + 'heroku ai:models:call sdxl --prompt "Generate an image of a sunset" --opts \'{"quality": "hd"}\'', + ] + + static flags = { + app: flags.app({required: true}), + // interactive: flags.boolean({ + // char: 'i', + // description: 'Use interactive mode for conversation beyond the initial prompt (not available on all models)', + // default: false, + // }), + json: flags.boolean({char: 'j', description: 'Output response as JSON'}), + optfile: flags.string({ + description: 'Additional options for model inference, provided as a JSON config file.', + required: false, + }), + opts: flags.string({ + description: 'Additional options for model inference, provided as a JSON string.', + required: false, + }), + output: flags.string({ + char: 'o', + // description: 'The file path where the command writes the model response. If used with --interactive, this flag writes the entire exchange when the session closes.', + description: 'The file path where the command writes the model response.', + required: false, + }), + prompt: flags.string({ + char: 'p', + description: 'The input prompt for the model.', + required: true, + }), + remote: flags.remote(), + } + + public async run(): Promise { + const {args, flags} = await this.parse(Call) + const {model_resource: modelResource} = args + const {app, json, optfile, opts, output, prompt} = flags + + // Initially, configure the default client to fetch the available model classes + await this.configureHerokuAIClient() + const {body: availableModels} = await this.herokuAI.get('/available-models') + + // Now, configure the client to send a request for the target model resource + await this.configureHerokuAIClient(modelResource, app) + const options = this.parseOptions(optfile, opts) + // Not sure why `type` is an array in ModelListItem, we use the type from the first entry. + const modelType = availableModels.find(m => m.model_id === this.apiModelId)?.type[0] + + switch (modelType) { + case 'Embedding': + break + + case 'Text-to-Image': + break + + case 'Text-to-Text': { + const completion = await this.createChatCompletion(prompt, options) + this.displayChatCompletion(completion, output, json) + break + } + + default: + throw new Error(`Unsupported model type: ${modelType}`) + } + } + + /** + * Parse the model call request options from the command flags. + * + * @param optfile Path to a JSON file containing options. + * @param opts JSON string containing options. + * @returns The parsed options as an object. + */ + private parseOptions(optfile?: string, opts?: string) { + const options = {} + + if (optfile) { + const optfileContents = fs.readFileSync(optfile) + + try { + Object.assign(options, JSON.parse(optfileContents.toString())) + } catch (error: unknown) { + if (error instanceof SyntaxError) { + const {message} = error as SyntaxError + return ux.error( + `Invalid JSON in ${color.yellow(optfile)}. Check the formatting in your file.\n${message}`, + {exit: 1}, + ) + } + + throw error + } + } + + if (opts) { + try { + Object.assign(options, JSON.parse(opts)) + } catch (error: unknown) { + if (error instanceof SyntaxError) { + const {message} = error as SyntaxError + return ux.error( + `Invalid JSON. Check the formatting in your ${color.yellow('--opts')} value.\n${message}`, + {exit: 1}, + ) + } + + throw error + } + } + + return options + } + + private async createChatCompletion(prompt: string, options = {}) { + const {body: chatCompletionResponse} = await this.herokuAI.post('/v1/chat/completions', { + body: { + ...options, + model: this.apiModelId, + messages: [{ + role: 'user', + content: prompt, + }], + }, + headers: {authorization: `Bearer ${this.apiKey}`}, + }) + + return chatCompletionResponse + } + + private displayChatCompletion(completion: ChatCompletionResponse, output?: string, json = false) { + const content = json ? JSON.stringify(completion, null, 2) : completion.choices[0].message.content || '' + + if (output) { + fs.writeFileSync(output, content) + } else { + json ? ux.styledJSON(completion) : ux.log(content) + } + } +} diff --git a/src/lib/ai/types.ts b/src/lib/ai/types.ts index da19f90..56409fb 100644 --- a/src/lib/ai/types.ts +++ b/src/lib/ai/types.ts @@ -14,13 +14,13 @@ export type ModelName = 'cohere-embed-english' | 'cohere-embed-multilingual' -export type ModelType = 'Text to Text' | 'Embedding' +export type ModelType = 'Text-to-Image' | 'Text-to-Text' | 'Embedding' /** * Object schema for each collection item returned by the Model List endpoint. */ export type ModelListItem = { - name: ModelName + model_id: ModelName type: Array } @@ -50,6 +50,103 @@ export type ModelResource = { } /** - * Types returned for `ai:models:call` will be added after the description gets refined in the - * API reference document. + * OpenAI compatible response schemas for model calls + */ + +/** + * Tool call schema */ +export type ToolCall = { + /** The ID of the tool call. Currently, only function is supported */ + id: string + /** The type of the tool call */ + type: string + /** The function that the model called */ + function: { + /** The name of the function to call */ + name: string + /** The arguments to call the function with, as generated by the model in JSON format */ + arguments: string + } +} + +/** + * Log probability token schema + */ +export type LogProbToken = { + /** The token */ + token: string + /** The log probability of this token */ + logprob: number + /** The encoded bytes representing the token */ + bytes: Array | null +} + +/** + * Log probability schema + */ +export type LogProb = LogProbToken & { + /** List of the most likely tokens and their log probability */ + top_logprobs: Array | null +} + +/** + * Chat completion choice schema + */ +export type ChatCompletionChoice = { + /** The reason the model stopped generating tokens */ + readonly finish_reason: 'stop' | 'length' | 'content_filter' | 'tool_calls' + /** The index of the choice in the list of choices */ + readonly index: number + /** A chat completion message generated by the model */ + readonly message: { + /** The contents of the message */ + readonly content: string | null + /** The refusal message generated by the model */ + readonly refusal: string | null + readonly tool_calls?: Array | null + /** The role of the author of this message */ + readonly role: string + } + /** Log probability information for the choice */ + readonly logprobs?: { + /** A list of message content tokens with log probability information */ + content: Array | null + /** A list of message refusal tokens with log probability information */ + refusal: Array | null + } | null +} + +/** + * Chat completion response schema. + */ +export type ChatCompletionResponse = { + /** A unique identifier for the chat completion */ + readonly id: string + /** A list of chat completion choices. Can be more than one if n is greater than 1 */ + readonly choices: Array + /** The Unix timestamp (in seconds) of when the chat completion was created */ + readonly created: number + /** The model used for the chat completion */ + readonly model: ModelName + /** The service tier used for processing the request */ + readonly service_tier?: string | null + /** This fingerprint represents the backend configuration that the model runs with */ + readonly system_fingerprint: string + /** The object type, which is always chat.completion */ + readonly object: string + /** Usage statistics for the completion request */ + readonly usage: { + /** Number of tokens in the generated completion */ + readonly completion_tokens: number + /** Number of tokens in the prompt */ + readonly prompt_tokens: number + /** Total number of tokens used in the request (prompt + completion) */ + readonly total_tokens: number + /** Breakdown of tokens used in a completion */ + readonly completion_tokens_details?: { + /** Tokens generated by the model for reasoning */ + readonly reasoning_tokens: number + } | null + } +} diff --git a/src/lib/base.ts b/src/lib/base.ts index f5a835c..7868913 100644 --- a/src/lib/base.ts +++ b/src/lib/base.ts @@ -3,7 +3,6 @@ import {APIClient, Command} from '@heroku-cli/command' import * as Heroku from '@heroku-cli/schema' import {ux} from '@oclif/core' import heredoc from 'tsheredoc' -// import {inspect} from 'node:util' import {HerokuAPIError} from '@heroku-cli/command/lib/api-client' export class NotFound extends Error { @@ -48,7 +47,7 @@ export default abstract class extends Command { const defaultHeaders = { ...this.heroku.defaults.headers, accept: 'application/json', - 'user-agent': `heroku-cli-plugin-ai/${this.config.version} ${this.config.platform}`, + 'user-agent': `heroku-cli-plugin-ai/${process.env.npm_package_version} ${this.config.platform}`, } delete defaultHeaders.authorization diff --git a/test/commands/ai/models/call.test.ts b/test/commands/ai/models/call.test.ts new file mode 100644 index 0000000..0c0e2f7 --- /dev/null +++ b/test/commands/ai/models/call.test.ts @@ -0,0 +1,260 @@ +import fs from 'node:fs' +import {stdout, stderr} from 'stdout-stderr' +import {expect} from 'chai' +import nock from 'nock' +import sinon from 'sinon' +import Cmd from '../../../../src/commands/ai/models/call' +import stripAnsi from '../../../helpers/strip-ansi' +import {runCommand} from '../../../run-command' +import {addon3, addon3Attachment1, availableModels, chatCompletionResponse} from '../../../helpers/fixtures' +import heredoc from 'tsheredoc' + +describe('ai:models:call', function () { + const {env} = process + let api: nock.Scope + let defaultInferenceApi: nock.Scope + let inferenceApi: nock.Scope + let sandbox: sinon.SinonSandbox + + beforeEach(async function () { + process.env = {} + sandbox = sinon.createSandbox() + api = nock('https://api.heroku.com') + defaultInferenceApi = nock('https://inference.heroku.com') + .get('/available-models') + .reply(200, availableModels) + }) + + afterEach(function () { + process.env = env + api.done() + defaultInferenceApi.done() + inferenceApi.done() + nock.cleanAll() + sandbox.restore() + }) + + context('when targeting a LLM (Text-to-Text) model resource', function () { + beforeEach(async function () { + api.post('/actions/addons/resolve', {addon: addon3.name, app: addon3Attachment1.app?.name}) + .reply(200, [addon3]) + .post('/actions/addon-attachments/resolve', {addon_attachment: addon3.name, app: addon3Attachment1.app?.name}) + .reply(200, [addon3Attachment1]) + .get(`/apps/${addon3Attachment1.app?.id}/config-vars`) + .reply(200, { + INFERENCE_MAROON_KEY: 's3cr3t_k3y', + INFERENCE_MAROON_MODEL_ID: 'claude-3-sonnet', + INFERENCE_MAROON_URL: 'inference-eu.heroku.com', + }) + }) + + context('without any optional flags', function () { + it('sends the prompt to the service and displays the response content', async function () { + const prompt = 'Hello, who are you?' + inferenceApi = nock('https://inference-eu.heroku.com', { + reqheaders: {authorization: 'Bearer s3cr3t_k3y'}, + }).post('/v1/chat/completions', { + model: 'claude-3-sonnet', + messages: [{role: 'user', content: prompt}], + }).reply(200, chatCompletionResponse) + + await runCommand(Cmd, [ + 'inference-animate-91825', + '--app=app1', + `--prompt=${prompt}`, + ]) + + expect(stdout.output).to.eq(heredoc` + Hello! I'm an AI assistant created by a company called Anthropic. It's nice to meet you. + `) + expect(stripAnsi(stderr.output)).to.eq('') + }) + }) + + context('with --json flag', function () { + it('sends the prompt to the service and shows the JSON response', async function () { + const prompt = 'Hello, who are you?' + inferenceApi = nock('https://inference-eu.heroku.com', { + reqheaders: {authorization: 'Bearer s3cr3t_k3y'}, + }).post('/v1/chat/completions', { + model: 'claude-3-sonnet', + messages: [{role: 'user', content: prompt}], + }).reply(200, chatCompletionResponse) + + await runCommand(Cmd, [ + 'inference-animate-91825', + '--app=app1', + `--prompt=${prompt}`, + '--json', + ]) + + expect(JSON.parse(stdout.output)).to.deep.equal(chatCompletionResponse) + expect(stripAnsi(stderr.output)).to.eq('') + }) + }) + + context('with --optfile option', function () { + it('shows an error if the file contents isn’t valid JSON', async function () { + const prompt = 'Hello, who are you?' + const readFileSyncMock = sandbox.stub(fs, 'readFileSync').returns('invalid json') + + try { + await runCommand(Cmd, [ + 'inference-animate-91825', + '--app=app1', + `--prompt=${prompt}`, + '--optfile=model-options.json', + ]) + } catch (error: unknown) { + const {message} = error as SyntaxError + expect(stripAnsi(message)).to.eq(heredoc` + Invalid JSON in model-options.json. Check the formatting in your file. + Unexpected token i in JSON at position 0 + `.trim()) + } + + expect(readFileSyncMock.calledWith('model-options.json')).to.be.true + expect(stripAnsi(stderr.output)).to.eq('') + }) + + it('sends the prompt to the service with the specified options', async function () { + const prompt = 'Hello, who are you?' + const readFileSyncMock = sandbox + .stub(fs, 'readFileSync') + .returns(JSON.stringify({ + stream: false, + temperature: 0.7, + })) + inferenceApi = nock('https://inference-eu.heroku.com', { + reqheaders: {authorization: 'Bearer s3cr3t_k3y'}, + }).post('/v1/chat/completions', { + model: 'claude-3-sonnet', + messages: [{role: 'user', content: prompt}], + stream: false, + temperature: 0.7, + }).reply(200, chatCompletionResponse) + + await runCommand(Cmd, [ + 'inference-animate-91825', + '--app=app1', + `--prompt=${prompt}`, + '--optfile=model-options.json', + ]) + + expect(readFileSyncMock.calledWith('model-options.json')).to.be.true + expect(stdout.output).to.eq(heredoc` + Hello! I'm an AI assistant created by a company called Anthropic. It's nice to meet you. + `) + expect(stripAnsi(stderr.output)).to.eq('') + }) + }) + + context('with --opts option', function () { + it('shows an error if the string contents isn’t valid JSON', async function () { + const prompt = 'Hello, who are you?' + + try { + await runCommand(Cmd, [ + 'inference-animate-91825', + '--app=app1', + `--prompt=${prompt}`, + '--opts=invalid json', + ]) + } catch (error: unknown) { + const {message} = error as SyntaxError + expect(stripAnsi(message)).to.eq(heredoc` + Invalid JSON. Check the formatting in your --opts value. + Unexpected token i in JSON at position 0 + `.trim()) + } + + expect(stripAnsi(stderr.output)).to.eq('') + }) + + it('sends the prompt to the service with the specified options', async function () { + const prompt = 'Hello, who are you?' + inferenceApi = nock('https://inference-eu.heroku.com', { + reqheaders: {authorization: 'Bearer s3cr3t_k3y'}, + }).post('/v1/chat/completions', { + model: 'claude-3-sonnet', + messages: [{role: 'user', content: prompt}], + stream: false, + temperature: 0.7, + }).reply(200, chatCompletionResponse) + + await runCommand(Cmd, [ + 'inference-animate-91825', + '--app=app1', + `--prompt=${prompt}`, + '--opts={"stream":false,"temperature":0.7}', + ]) + + expect(stdout.output).to.eq(heredoc` + Hello! I'm an AI assistant created by a company called Anthropic. It's nice to meet you. + `) + expect(stripAnsi(stderr.output)).to.eq('') + }) + }) + + context('with both --optfile and --opts options', function () { + it('honors property values from --opts over the ones specified through --optfile', async function () { + const prompt = 'Hello, who are you?' + const readFileSyncMock = sandbox + .stub(fs, 'readFileSync') + .returns(JSON.stringify({ + stream: false, + temperature: 0.7, + })) + inferenceApi = nock('https://inference-eu.heroku.com', { + reqheaders: {authorization: 'Bearer s3cr3t_k3y'}, + }).post('/v1/chat/completions', { + model: 'claude-3-sonnet', + messages: [{role: 'user', content: prompt}], + stream: false, + temperature: 0.5, + }).reply(200, chatCompletionResponse) + + await runCommand(Cmd, [ + 'inference-animate-91825', + '--app=app1', + `--prompt=${prompt}`, + '--opts={"temperature":0.5}', + '--optfile=model-options.json', + ]) + + expect(readFileSyncMock.calledWith('model-options.json')).to.be.true + expect(stdout.output).to.eq(heredoc` + Hello! I'm an AI assistant created by a company called Anthropic. It's nice to meet you. + `) + expect(stripAnsi(stderr.output)).to.eq('') + }) + }) + + context('with --output option', function () { + it('writes to the indicated file', async function () { + const prompt = 'Hello, who are you?' + const writeFileSyncMock = sandbox.stub(fs, 'writeFileSync') + inferenceApi = nock('https://inference-eu.heroku.com', { + reqheaders: {authorization: 'Bearer s3cr3t_k3y'}, + }).post('/v1/chat/completions', { + model: 'claude-3-sonnet', + messages: [{role: 'user', content: prompt}], + }).reply(200, chatCompletionResponse) + + await runCommand(Cmd, [ + 'inference-animate-91825', + '--app=app1', + `--prompt=${prompt}`, + '--output=model-output.txt', + ]) + + expect(writeFileSyncMock.calledWith( + 'model-output.txt', + "Hello! I'm an AI assistant created by a company called Anthropic. It's nice to meet you.", + )).to.be.true + expect(stdout.output).to.eq('') + expect(stripAnsi(stderr.output)).to.eq('') + }) + }) + }) +}) diff --git a/test/commands/ai/models/list.test.ts b/test/commands/ai/models/list.test.ts index 4171268..8b70820 100644 --- a/test/commands/ai/models/list.test.ts +++ b/test/commands/ai/models/list.test.ts @@ -28,13 +28,13 @@ describe('ai:models:list', function () { .reply(200, availableModels) await runCommand(Cmd) - expect(stdout.output).to.contain('stable-diffusion-xl Text to image') - expect(stdout.output).to.contain('claude-3-5-sonnet Text to text') - expect(stdout.output).to.contain('claude-3-opus Text to text') - expect(stdout.output).to.contain('claude-3-sonnet Text to text') - expect(stdout.output).to.contain('claude-3-haiku Text to text') - expect(stdout.output).to.contain('cohere-embed-english Text to text, Embedding') - expect(stdout.output).to.contain('cohere-embed-multilingual Text to text, Embedding') + expect(stdout.output).to.match(/cohere-embed-english\s+Embedding/) + expect(stdout.output).to.match(/cohere-embed-multilingual\s+Embedding/) + expect(stdout.output).to.match(/stable-diffusion-xl\s+Text to Image/) + expect(stdout.output).to.match(/claude-3-5-sonnet\s+Text to Text/) + expect(stdout.output).to.match(/claude-3-opus\s+Text to Text/) + expect(stdout.output).to.match(/claude-3-sonnet\s+Text to Text/) + expect(stdout.output).to.match(/claude-3-haiku\s+Text to Text/) expect(stdout.output).to.contain('See https://devcenter.heroku.com/articles/rainbow-unicorn-princess-models for more info') expect(stderr.output).to.eq('') }) diff --git a/test/helpers/fixtures.ts b/test/helpers/fixtures.ts index 9a7156c..ad5e4ac 100644 --- a/test/helpers/fixtures.ts +++ b/test/helpers/fixtures.ts @@ -1,33 +1,34 @@ import * as Heroku from '@heroku-cli/schema' +import {ChatCompletionResponse} from '../../src/lib/ai/types' export const availableModels = [ { - model_id: 'stable-diffusion-xl', - type: ['Text-to-image'], + model_id: 'claude-3-sonnet', + type: ['Text-to-Text'], }, { - model_id: 'claude-3-5-sonnet', - type: ['Text-to-text'], + model_id: 'claude-3-haiku', + type: ['Text-to-Text'], }, { - model_id: 'claude-3-opus', - type: ['Text-to-text'], + model_id: 'cohere-embed-english', + type: ['Embedding'], }, { - model_id: 'claude-3-sonnet', - type: ['Text-to-text'], + model_id: 'cohere-embed-multilingual', + type: ['Embedding'], }, { - model_id: 'claude-3-haiku', - type: ['Text-to-text'], + model_id: 'stable-diffusion-xl', + type: ['Text-to-Image'], }, { - model_id: 'cohere-embed-english', - type: ['Text-to-text', 'Embedding'], + model_id: 'claude-3-5-sonnet', + type: ['Text-to-Text'], }, { - model_id: 'cohere-embed-multilingual', - type: ['Text-to-text', 'Embedding'], + model_id: 'claude-3-opus', + type: ['Text-to-Text'], }, ] @@ -236,3 +237,28 @@ export const addon1ProvisionedWithAttachmentName: Heroku.AddOn = { 'CLAUDE_HAIKU_URL', ], } + +export const chatCompletionResponse: ChatCompletionResponse = { + id: 'chatcmpl-17f8f365f941de720ad38', + object: 'chat.completion', + created: 1727398076, + model: 'claude-3-sonnet', + system_fingerprint: 'heroku-inf-zzuqrd', + choices: [ + { + index: 0, + message: { + role: 'assistant', + content: "Hello! I'm an AI assistant created by a company called Anthropic. It's nice to meet you.", + refusal: null, + }, + finish_reason: 'stop', + }, + ], + usage: { + prompt_tokens: 13, + completion_tokens: 26, + total_tokens: 39, + }, +} +