diff --git a/README.md b/README.md index 504fc25..96b24be 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ Speed and pricing at 2024-04-21. Also see their [changelog](https://console.groq ## Groq Cloud API -You can interact with their API using any Ruby HTTP library by following their documentation at +You can interact with their API using any Ruby HTTP library by following their documentation at . Also use their [Playground](https://console.groq.com/playground) and watch the API traffic in the browser's developer tools. The Groq Cloud API looks to be copying a subset of the OpenAI API. For example, you perform chat completions at `https://api.groq.com/openai/v1/chat/completions` with the same POST body schema as OpenAI. The Tools support looks to have the same schema for defining tools/functions. @@ -244,6 +244,34 @@ messages << T("25 degrees celcius", tool_call_id: tool_call_id, name: "get_weath # => {"role"=>"assistant", "content"=> "I'm glad you called the function!\n\nAs of your current location, the weather in Paris is indeed 25°C (77°F)..."} ``` +### Max Tokens & Temperature + +Max tokens is the maximum number of tokens that the model can process in a single response. This limits ensures computational efficiency and resource management. + +Temperature setting for each API call controls randomness of responses. A lower temperature leads to more predictable outputs while a higher temperature results in more varies and sometimes more creative outputs. The range of values is 0 to 2. + +Each API call includes a `max_token:` and `temperature:` value. + +The defaults are: + +```ruby +@client.max_tokens +=> 1024 +@client.temperature +=> 1 +``` + +You can override them in the `Groq.configuration` block, or with each `chat()` call: + +```ruby +Groq.configuration do |config| + config.max_tokens = 512 + config.temperature = 0.5 +end +# or +@client.chat("Hello, world!", max_tokens: 512, temperature: 0.5) +``` + ## Development After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment. diff --git a/lib/groq/client.rb b/lib/groq/client.rb index b5d58f0..50bb692 100644 --- a/lib/groq/client.rb +++ b/lib/groq/client.rb @@ -5,6 +5,8 @@ class Groq::Client api_key api_url model_id + max_tokens + temperature ].freeze attr_reader(*CONFIG_KEYS, :faraday_middleware) @@ -20,7 +22,7 @@ def initialize(config = {}, &faraday_middleware) end # TODO: support stream: true; or &stream block - def chat(messages, model_id: nil, tools: nil) + def chat(messages, model_id: nil, tools: nil, max_tokens: nil, temperature: nil) unless messages.is_a?(Array) || messages.is_a?(String) raise ArgumentError, "require messages to be an Array or String" end @@ -34,7 +36,9 @@ def chat(messages, model_id: nil, tools: nil) body = { model: model_id, messages: messages, - tools: tools + tools: tools, + max_tokens: max_tokens || @max_tokens, + temperature: temperature || @temperature }.compact response = post(path: "/openai/v1/chat/completions", body: body) if response.status == 200 diff --git a/lib/groq/configuration.rb b/lib/groq/configuration.rb index c783f6e..4aca6e2 100644 --- a/lib/groq/configuration.rb +++ b/lib/groq/configuration.rb @@ -1,18 +1,24 @@ class Groq::Configuration attr_writer :api_key - attr_accessor :model_id, :api_url, :request_timeout, :extra_headers + attr_accessor :model_id, :max_tokens, :temperature + attr_accessor :api_url, :request_timeout, :extra_headers DEFAULT_API_URL = "https://api.groq.com" DEFAULT_REQUEST_TIMEOUT = 5 + DEFAULT_MAX_TOKENS = 1024 + DEFAULT_TEMPERATURE = 1 class Error < StandardError; end def initialize @api_key = ENV["GROQ_API_KEY"] - @model_id = Groq::Model.default_model_id @api_url = DEFAULT_API_URL @request_timeout = DEFAULT_REQUEST_TIMEOUT @extra_headers = {} + + @model_id = Groq::Model.default_model_id + @max_tokens = DEFAULT_MAX_TOKENS + @temperature = DEFAULT_TEMPERATURE end def api_key diff --git a/test/fixtures/vcr_cassettes/llama3-8b-8192/chat_max_tokens.yml b/test/fixtures/vcr_cassettes/llama3-8b-8192/chat_max_tokens.yml new file mode 100644 index 0000000..b2dc50a --- /dev/null +++ b/test/fixtures/vcr_cassettes/llama3-8b-8192/chat_max_tokens.yml @@ -0,0 +1,72 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.groq.com/openai/v1/chat/completions + body: + encoding: UTF-8 + string: '{"model":"llama3-8b-8192","messages":[{"role":"user","content":"What''s + the next day after Wednesday? Answer with its first letter."}],"max_tokens":1,"temperature":1}' + headers: + User-Agent: + - Faraday v2.9.0 + Authorization: + - Bearer + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Sat, 20 Apr 2024 21:11:31 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Cache-Control: + - private, max-age=0, no-store, no-cache, must-revalidate + Vary: + - Origin, Accept-Encoding + X-Ratelimit-Limit-Requests: + - '14400' + X-Ratelimit-Limit-Tokens: + - '15000' + X-Ratelimit-Remaining-Requests: + - '14399' + X-Ratelimit-Remaining-Tokens: + - '14979' + X-Ratelimit-Reset-Requests: + - 6s + X-Ratelimit-Reset-Tokens: + - 84ms + X-Request-Id: + - req_01hvynk65hejetdcgwbn15s8h5 + Via: + - 1.1 google + Alt-Svc: + - h3=":443"; ma=86400 + Cf-Cache-Status: + - DYNAMIC + Set-Cookie: + - __cf_bm=TQrkPimL3C9A0lJ6i3WyvhmBkspFsGtY9LqNSHxSwBQ-1713647491-1.0.1.1-tiQVMBbv3FggJLSa5uj_IWrAnnBq5YM1FJv9CXDl2eb2vM3twI996FW1BvUUdMMMkgkfIa1.eIefTY3Blp0PyA; + path=/; expires=Sat, 20-Apr-24 21:41:31 GMT; domain=.groq.com; HttpOnly; Secure; + SameSite=None + Server: + - cloudflare + Cf-Ray: + - 877820931adaa813-SYD + body: + encoding: ASCII-8BIT + string: '{"id":"chatcmpl-7777b0f0-c67f-403a-bb6c-650f24af92f5","object":"chat.completion","created":1713647491,"model":"llama3-8b-8192","choices":[{"index":0,"message":{"role":"assistant","content":"The"},"logprobs":null,"finish_reason":"length"}],"usage":{"prompt_tokens":24,"prompt_time":0.011,"completion_tokens":1,"completion_time":0,"total_tokens":25,"total_time":0.011},"system_fingerprint":"fp_dadc9d6142","x_groq":{"id":"req_01hvynk65hejetdcgwbn15s8h5"}} + + ' + recorded_at: Sat, 20 Apr 2024 21:11:31 GMT +recorded_with: VCR 6.2.0 diff --git a/test/fixtures/vcr_cassettes/llama3-8b-8192/chat_messages.yml b/test/fixtures/vcr_cassettes/llama3-8b-8192/chat_messages.yml index 1e3bd9a..8896970 100644 --- a/test/fixtures/vcr_cassettes/llama3-8b-8192/chat_messages.yml +++ b/test/fixtures/vcr_cassettes/llama3-8b-8192/chat_messages.yml @@ -5,10 +5,10 @@ http_interactions: uri: https://api.groq.com/openai/v1/chat/completions body: encoding: UTF-8 - string: '{"model":"llama3-8b-8192","messages":[{"role":"system","content":"I - am an obedient AI."},{"role":"user","content":"What''s the next day after - Wednesday?"},{"role":"assistant","content":"The next day after Wednesday is - Thursday."},{"role":"user","content":"What''s the next day after that?"}]}' + string: '{"model":"llama3-8b-8192","messages":[{"role":"user","content":"What''s + the next day after Wednesday?"},{"role":"assistant","content":"The next day + after Wednesday is Thursday."},{"role":"user","content":"What''s the next + day after that?"}],"max_tokens":1024,"temperature":1}' headers: User-Agent: - Faraday v2.9.0 @@ -26,7 +26,7 @@ http_interactions: message: OK headers: Date: - - Sat, 20 Apr 2024 21:05:14 GMT + - Sat, 20 Apr 2024 21:11:24 GMT Content-Type: - application/json Transfer-Encoding: @@ -44,13 +44,13 @@ http_interactions: X-Ratelimit-Remaining-Requests: - '14399' X-Ratelimit-Remaining-Tokens: - - '14949' + - '14959' X-Ratelimit-Reset-Requests: - 6s X-Ratelimit-Reset-Tokens: - - 204ms + - 164ms X-Request-Id: - - req_01hvyn7p1vef08pyxh4f4rwckh + - req_01hvynjz99ejdb0apt5j7v7w4q Via: - 1.1 google Alt-Svc: @@ -58,18 +58,18 @@ http_interactions: Cf-Cache-Status: - DYNAMIC Set-Cookie: - - __cf_bm=i5D44.YoS7Oaw7.esHDZPQYkvmWn8KRxX2OC_aVUV4E-1713647114-1.0.1.1-8jMHyVbeoEAM619NPYVSrU1wErvw.OpcVGJZlszrTAm4mBMpIQakcxPd.4a8Gd1ys2APrsmITKsKGiXP0w64kA; - path=/; expires=Sat, 20-Apr-24 21:35:14 GMT; domain=.groq.com; HttpOnly; Secure; + - __cf_bm=d9NBIpFcnrkEqTLIn3ZusU4q3hzxHo.xBZofaGW.wDM-1713647484-1.0.1.1-TbAUs8TMqSsg6ud5bZUeXRZaH6WtD1C0gHykT50_O2YTi14X7Kcw10AIopckBARs5LG3XIwF_2tjnH2Q2VIr5w; + path=/; expires=Sat, 20-Apr-24 21:41:24 GMT; domain=.groq.com; HttpOnly; Secure; SameSite=None Server: - cloudflare Cf-Ray: - - 8778175f2ff35bf6-SYD + - 87782067cc095d25-SYD body: encoding: ASCII-8BIT - string: '{"id":"chatcmpl-2267a059-68c5-4060-b03c-44034d26c36b","object":"chat.completion","created":1713647114,"model":"llama3-8b-8192","choices":[{"index":0,"message":{"role":"assistant","content":"The - next day after Thursday is Friday."},"logprobs":null,"finish_reason":"stop"}],"usage":{"prompt_tokens":55,"prompt_time":0.024,"completion_tokens":8,"completion_time":0.011,"total_tokens":63,"total_time":0.035},"system_fingerprint":"fp_dadc9d6142","x_groq":{"id":"req_01hvyn7p1vef08pyxh4f4rwckh"}} + string: '{"id":"chatcmpl-4db7334c-ace9-4540-b954-496a65bead9b","object":"chat.completion","created":1713647484,"model":"llama3-8b-8192","choices":[{"index":0,"message":{"role":"assistant","content":"The + next day after Thursday is Friday."},"logprobs":null,"finish_reason":"stop"}],"usage":{"prompt_tokens":44,"prompt_time":0.029,"completion_tokens":8,"completion_time":0.009,"total_tokens":52,"total_time":0.038},"system_fingerprint":"fp_dadc9d6142","x_groq":{"id":"req_01hvynjz99ejdb0apt5j7v7w4q"}} ' - recorded_at: Sat, 20 Apr 2024 21:05:14 GMT + recorded_at: Sat, 20 Apr 2024 21:11:24 GMT recorded_with: VCR 6.2.0 diff --git a/test/groq/test_client.rb b/test/groq/test_client.rb index 17d6eed..fefdae9 100644 --- a/test/groq/test_client.rb +++ b/test/groq/test_client.rb @@ -3,6 +3,13 @@ require "test_helper" class TestGroqClient < Minitest::Test + def test_defaults + client = Groq::Client.new + assert_equal "llama3-8b-8192", client.model_id + assert_equal 1024, client.max_tokens + assert_equal 1, client.temperature + end + # define "say hello world" for each model, such as: test_hello_world_llama3_8b et al Groq::Model::MODELS.each do |model| model_id = model[:model_id] @@ -103,4 +110,15 @@ def test_tools_weather_report assert_equal response, {"role" => "assistant", "content" => "The weather in Brisbane, QLD is 25 degrees Celsius."} end end + + def test_max_tokens + VCR.use_cassette("llama3-8b-8192/chat_max_tokens") do + client = Groq::Client.new(model_id: "llama3-8b-8192") + response = client.chat("What's the next day after Wednesday?", max_tokens: 1) + assert_equal response, { + "role" => "assistant", "content" => "The" + } + # Yeah, max_tokens=1 still returns a full word; because its a single token. + end + end end