diff --git a/README.md b/README.md
index 504fc25..96b24be 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@ Speed and pricing at 2024-04-21. Also see their [changelog](https://console.groq
 
 ## Groq Cloud API
 
-You can interact with their API using any Ruby HTTP library by following their documentation at <https://console.groq.com/docs/quickstart>
+You can interact with their API using any Ruby HTTP library by following their documentation at <https://console.groq.com/docs/quickstart>. Also use their [Playground](https://console.groq.com/playground) and watch the API traffic in the browser's developer tools.
 
 The Groq Cloud API looks to be copying a subset of the OpenAI API. For example, you perform chat completions at `https://api.groq.com/openai/v1/chat/completions` with the same POST body schema as OpenAI. The Tools support looks to have the same schema for defining tools/functions.
 
@@ -244,6 +244,34 @@ messages << T("25 degrees celcius", tool_call_id: tool_call_id, name: "get_weath
 # => {"role"=>"assistant", "content"=> "I'm glad you called the function!\n\nAs of your current location, the weather in Paris is indeed 25°C (77°F)..."}
 ```
 
+### Max Tokens & Temperature
+
+Max tokens is the maximum number of tokens that the model can process in a single response. This limits ensures computational efficiency and resource management.
+
+Temperature setting for each API call controls randomness of responses. A lower temperature leads to more predictable outputs while a higher temperature results in more varies and sometimes more creative outputs. The range of values is 0 to 2.
+
+Each API call includes a `max_token:` and `temperature:` value.
+
+The defaults are:
+
+```ruby
+@client.max_tokens
+=> 1024
+@client.temperature
+=> 1
+```
+
+You can override them in the `Groq.configuration` block, or with each `chat()` call:
+
+```ruby
+Groq.configuration do |config|
+  config.max_tokens = 512
+  config.temperature = 0.5
+end
+# or
+@client.chat("Hello, world!", max_tokens: 512, temperature: 0.5)
+```
+
 ## Development
 
 After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
diff --git a/lib/groq/client.rb b/lib/groq/client.rb
index b5d58f0..50bb692 100644
--- a/lib/groq/client.rb
+++ b/lib/groq/client.rb
@@ -5,6 +5,8 @@ class Groq::Client
     api_key
     api_url
     model_id
+    max_tokens
+    temperature
   ].freeze
   attr_reader(*CONFIG_KEYS, :faraday_middleware)
 
@@ -20,7 +22,7 @@ def initialize(config = {}, &faraday_middleware)
   end
 
   # TODO: support stream: true; or &stream block
-  def chat(messages, model_id: nil, tools: nil)
+  def chat(messages, model_id: nil, tools: nil, max_tokens: nil, temperature: nil)
     unless messages.is_a?(Array) || messages.is_a?(String)
       raise ArgumentError, "require messages to be an Array or String"
     end
@@ -34,7 +36,9 @@ def chat(messages, model_id: nil, tools: nil)
     body = {
       model: model_id,
       messages: messages,
-      tools: tools
+      tools: tools,
+      max_tokens: max_tokens || @max_tokens,
+      temperature: temperature || @temperature
     }.compact
     response = post(path: "/openai/v1/chat/completions", body: body)
     if response.status == 200
diff --git a/lib/groq/configuration.rb b/lib/groq/configuration.rb
index c783f6e..4aca6e2 100644
--- a/lib/groq/configuration.rb
+++ b/lib/groq/configuration.rb
@@ -1,18 +1,24 @@
 class Groq::Configuration
   attr_writer :api_key
-  attr_accessor :model_id, :api_url, :request_timeout, :extra_headers
+  attr_accessor :model_id, :max_tokens, :temperature
+  attr_accessor :api_url, :request_timeout, :extra_headers
 
   DEFAULT_API_URL = "https://api.groq.com"
   DEFAULT_REQUEST_TIMEOUT = 5
+  DEFAULT_MAX_TOKENS = 1024
+  DEFAULT_TEMPERATURE = 1
 
   class Error < StandardError; end
 
   def initialize
     @api_key = ENV["GROQ_API_KEY"]
-    @model_id = Groq::Model.default_model_id
     @api_url = DEFAULT_API_URL
     @request_timeout = DEFAULT_REQUEST_TIMEOUT
     @extra_headers = {}
+
+    @model_id = Groq::Model.default_model_id
+    @max_tokens = DEFAULT_MAX_TOKENS
+    @temperature = DEFAULT_TEMPERATURE
   end
 
   def api_key
diff --git a/test/fixtures/vcr_cassettes/llama3-8b-8192/chat_max_tokens.yml b/test/fixtures/vcr_cassettes/llama3-8b-8192/chat_max_tokens.yml
new file mode 100644
index 0000000..b2dc50a
--- /dev/null
+++ b/test/fixtures/vcr_cassettes/llama3-8b-8192/chat_max_tokens.yml
@@ -0,0 +1,72 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.groq.com/openai/v1/chat/completions
+    body:
+      encoding: UTF-8
+      string: '{"model":"llama3-8b-8192","messages":[{"role":"user","content":"What''s
+        the next day after Wednesday? Answer with its first letter."}],"max_tokens":1,"temperature":1}'
+    headers:
+      User-Agent:
+      - Faraday v2.9.0
+      Authorization:
+      - Bearer <GROQ_API_KEY>
+      Content-Type:
+      - application/json
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Sat, 20 Apr 2024 21:11:31 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Cache-Control:
+      - private, max-age=0, no-store, no-cache, must-revalidate
+      Vary:
+      - Origin, Accept-Encoding
+      X-Ratelimit-Limit-Requests:
+      - '14400'
+      X-Ratelimit-Limit-Tokens:
+      - '15000'
+      X-Ratelimit-Remaining-Requests:
+      - '14399'
+      X-Ratelimit-Remaining-Tokens:
+      - '14979'
+      X-Ratelimit-Reset-Requests:
+      - 6s
+      X-Ratelimit-Reset-Tokens:
+      - 84ms
+      X-Request-Id:
+      - req_01hvynk65hejetdcgwbn15s8h5
+      Via:
+      - 1.1 google
+      Alt-Svc:
+      - h3=":443"; ma=86400
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=TQrkPimL3C9A0lJ6i3WyvhmBkspFsGtY9LqNSHxSwBQ-1713647491-1.0.1.1-tiQVMBbv3FggJLSa5uj_IWrAnnBq5YM1FJv9CXDl2eb2vM3twI996FW1BvUUdMMMkgkfIa1.eIefTY3Blp0PyA;
+        path=/; expires=Sat, 20-Apr-24 21:41:31 GMT; domain=.groq.com; HttpOnly; Secure;
+        SameSite=None
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 877820931adaa813-SYD
+    body:
+      encoding: ASCII-8BIT
+      string: '{"id":"chatcmpl-7777b0f0-c67f-403a-bb6c-650f24af92f5","object":"chat.completion","created":1713647491,"model":"llama3-8b-8192","choices":[{"index":0,"message":{"role":"assistant","content":"The"},"logprobs":null,"finish_reason":"length"}],"usage":{"prompt_tokens":24,"prompt_time":0.011,"completion_tokens":1,"completion_time":0,"total_tokens":25,"total_time":0.011},"system_fingerprint":"fp_dadc9d6142","x_groq":{"id":"req_01hvynk65hejetdcgwbn15s8h5"}}
+
+        '
+  recorded_at: Sat, 20 Apr 2024 21:11:31 GMT
+recorded_with: VCR 6.2.0
diff --git a/test/fixtures/vcr_cassettes/llama3-8b-8192/chat_messages.yml b/test/fixtures/vcr_cassettes/llama3-8b-8192/chat_messages.yml
index 1e3bd9a..8896970 100644
--- a/test/fixtures/vcr_cassettes/llama3-8b-8192/chat_messages.yml
+++ b/test/fixtures/vcr_cassettes/llama3-8b-8192/chat_messages.yml
@@ -5,10 +5,10 @@ http_interactions:
     uri: https://api.groq.com/openai/v1/chat/completions
     body:
       encoding: UTF-8
-      string: '{"model":"llama3-8b-8192","messages":[{"role":"system","content":"I
-        am an obedient AI."},{"role":"user","content":"What''s the next day after
-        Wednesday?"},{"role":"assistant","content":"The next day after Wednesday is
-        Thursday."},{"role":"user","content":"What''s the next day after that?"}]}'
+      string: '{"model":"llama3-8b-8192","messages":[{"role":"user","content":"What''s
+        the next day after Wednesday?"},{"role":"assistant","content":"The next day
+        after Wednesday is Thursday."},{"role":"user","content":"What''s the next
+        day after that?"}],"max_tokens":1024,"temperature":1}'
     headers:
       User-Agent:
       - Faraday v2.9.0
@@ -26,7 +26,7 @@ http_interactions:
       message: OK
     headers:
       Date:
-      - Sat, 20 Apr 2024 21:05:14 GMT
+      - Sat, 20 Apr 2024 21:11:24 GMT
       Content-Type:
       - application/json
       Transfer-Encoding:
@@ -44,13 +44,13 @@ http_interactions:
       X-Ratelimit-Remaining-Requests:
       - '14399'
       X-Ratelimit-Remaining-Tokens:
-      - '14949'
+      - '14959'
       X-Ratelimit-Reset-Requests:
       - 6s
       X-Ratelimit-Reset-Tokens:
-      - 204ms
+      - 164ms
       X-Request-Id:
-      - req_01hvyn7p1vef08pyxh4f4rwckh
+      - req_01hvynjz99ejdb0apt5j7v7w4q
       Via:
       - 1.1 google
       Alt-Svc:
@@ -58,18 +58,18 @@ http_interactions:
       Cf-Cache-Status:
       - DYNAMIC
       Set-Cookie:
-      - __cf_bm=i5D44.YoS7Oaw7.esHDZPQYkvmWn8KRxX2OC_aVUV4E-1713647114-1.0.1.1-8jMHyVbeoEAM619NPYVSrU1wErvw.OpcVGJZlszrTAm4mBMpIQakcxPd.4a8Gd1ys2APrsmITKsKGiXP0w64kA;
-        path=/; expires=Sat, 20-Apr-24 21:35:14 GMT; domain=.groq.com; HttpOnly; Secure;
+      - __cf_bm=d9NBIpFcnrkEqTLIn3ZusU4q3hzxHo.xBZofaGW.wDM-1713647484-1.0.1.1-TbAUs8TMqSsg6ud5bZUeXRZaH6WtD1C0gHykT50_O2YTi14X7Kcw10AIopckBARs5LG3XIwF_2tjnH2Q2VIr5w;
+        path=/; expires=Sat, 20-Apr-24 21:41:24 GMT; domain=.groq.com; HttpOnly; Secure;
         SameSite=None
       Server:
       - cloudflare
       Cf-Ray:
-      - 8778175f2ff35bf6-SYD
+      - 87782067cc095d25-SYD
     body:
       encoding: ASCII-8BIT
-      string: '{"id":"chatcmpl-2267a059-68c5-4060-b03c-44034d26c36b","object":"chat.completion","created":1713647114,"model":"llama3-8b-8192","choices":[{"index":0,"message":{"role":"assistant","content":"The
-        next day after Thursday is Friday."},"logprobs":null,"finish_reason":"stop"}],"usage":{"prompt_tokens":55,"prompt_time":0.024,"completion_tokens":8,"completion_time":0.011,"total_tokens":63,"total_time":0.035},"system_fingerprint":"fp_dadc9d6142","x_groq":{"id":"req_01hvyn7p1vef08pyxh4f4rwckh"}}
+      string: '{"id":"chatcmpl-4db7334c-ace9-4540-b954-496a65bead9b","object":"chat.completion","created":1713647484,"model":"llama3-8b-8192","choices":[{"index":0,"message":{"role":"assistant","content":"The
+        next day after Thursday is Friday."},"logprobs":null,"finish_reason":"stop"}],"usage":{"prompt_tokens":44,"prompt_time":0.029,"completion_tokens":8,"completion_time":0.009,"total_tokens":52,"total_time":0.038},"system_fingerprint":"fp_dadc9d6142","x_groq":{"id":"req_01hvynjz99ejdb0apt5j7v7w4q"}}
 
         '
-  recorded_at: Sat, 20 Apr 2024 21:05:14 GMT
+  recorded_at: Sat, 20 Apr 2024 21:11:24 GMT
 recorded_with: VCR 6.2.0
diff --git a/test/groq/test_client.rb b/test/groq/test_client.rb
index 17d6eed..fefdae9 100644
--- a/test/groq/test_client.rb
+++ b/test/groq/test_client.rb
@@ -3,6 +3,13 @@
 require "test_helper"
 
 class TestGroqClient < Minitest::Test
+  def test_defaults
+    client = Groq::Client.new
+    assert_equal "llama3-8b-8192", client.model_id
+    assert_equal 1024, client.max_tokens
+    assert_equal 1, client.temperature
+  end
+
   # define "say hello world" for each model, such as: test_hello_world_llama3_8b et al
   Groq::Model::MODELS.each do |model|
     model_id = model[:model_id]
@@ -103,4 +110,15 @@ def test_tools_weather_report
       assert_equal response, {"role" => "assistant", "content" => "The weather in Brisbane, QLD is 25 degrees Celsius."}
     end
   end
+
+  def test_max_tokens
+    VCR.use_cassette("llama3-8b-8192/chat_max_tokens") do
+      client = Groq::Client.new(model_id: "llama3-8b-8192")
+      response = client.chat("What's the next day after Wednesday?", max_tokens: 1)
+      assert_equal response, {
+        "role" => "assistant", "content" => "The"
+      }
+      # Yeah, max_tokens=1 still returns a full word; because its a single token.
+    end
+  end
 end