Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Compare agent bet histories with kelly strategy #419

Merged
merged 12 commits into from
Sep 23, 2024
230 changes: 190 additions & 40 deletions examples/monitor/match_bets_with_langfuse_traces.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,20 @@
from datetime import datetime
from typing import Any

import pandas as pd
from langfuse import Langfuse
from web3 import Web3
from pydantic import BaseModel

from prediction_market_agent_tooling.config import APIKeys
from prediction_market_agent_tooling.deploy.betting_strategy import (
BettingStrategy,
KellyBettingStrategy,
MaxAccuracyBettingStrategy,
MaxAccuracyWithKellyScaledBetsStrategy,
MaxExpectedValueBettingStrategy,
ProbabilisticAnswer,
TradeType,
)
from prediction_market_agent_tooling.markets.data_models import ResolvedBet
from prediction_market_agent_tooling.markets.omen.omen import OmenAgentMarket
from prediction_market_agent_tooling.tools.langfuse_client_utils import (
Expand All @@ -12,49 +23,188 @@
get_trace_for_bet,
get_traces_for_agent,
)
from prediction_market_agent_tooling.tools.utils import get_private_key_from_gcp_secret

if __name__ == "__main__":
api_keys = APIKeys()
assert api_keys.bet_from_address == Web3.to_checksum_address(
"0xA8eFa5bb5C6ad476c9E0377dbF66cC41CB6D5bdD" # prophet_gpt4_final
)
start_time = datetime(2024, 9, 13)
langfuse = Langfuse(
secret_key=api_keys.langfuse_secret_key.get_secret_value(),
public_key=api_keys.langfuse_public_key,
host=api_keys.langfuse_host,

class SimulatedOutcome(BaseModel):
size: float
direction: bool
correct: bool
profit: float


def get_outcome_for_trace(
strategy: BettingStrategy,
trace: ProcessMarketTrace,
market_outcome: bool,
) -> SimulatedOutcome | None:
market = trace.market
answer = trace.answer

trades = strategy.calculate_trades(
existing_position=None,
answer=ProbabilisticAnswer(
p_yes=answer.p_yes,
confidence=answer.confidence,
),
market=market,
)
# For example, when our predicted p_yes is 95%, but market is already trading at 99%, and we don't have anything to sell, Kelly will yield no trades.
if not trades:
return None
assert (
len(trades) == 1
), f"Should be always one trade if no existing position is given: {trades=}; {answer=}; {market=}"
assert (
trades[0].trade_type == TradeType.BUY
), "Can only buy without previous position."
buy_trade = trades[0]

traces = get_traces_for_agent(
agent_name="DeployablePredictionProphetGPT4TurboFinalAgent",
trace_name="process_market",
from_timestamp=start_time,
has_output=True,
client=langfuse,
received_outcome_tokens = market.get_buy_token_amount(
bet_amount=market.get_bet_amount(buy_trade.amount.amount),
direction=buy_trade.outcome,
).amount

correct = buy_trade.outcome == market_outcome
profit = (
received_outcome_tokens - buy_trade.amount.amount
if correct
else -buy_trade.amount.amount
)
print(f"All traces: {len(traces)}")
process_market_traces = []
for trace in traces:
if process_market_trace := ProcessMarketTrace.from_langfuse_trace(trace):
process_market_traces.append(process_market_trace)
print(f"All process_market_traces: {len(process_market_traces)}")

bets: list[ResolvedBet] = OmenAgentMarket.get_resolved_bets_made_since(
better_address=api_keys.bet_from_address,
start_time=start_time,
end_time=None,

return SimulatedOutcome(
size=buy_trade.amount.amount,
direction=buy_trade.outcome,
correct=correct,
profit=profit,
)

# All bets should have a trace, but not all traces should have a bet
# (e.g. if all markets are deemed unpredictable), so iterate over bets
bets_with_traces: list[ResolvedBetWithTrace] = []
for bet in bets:
trace = get_trace_for_bet(bet, process_market_traces)
if trace:
bets_with_traces.append(ResolvedBetWithTrace(bet=bet, trace=trace))

print(f"Number of bets since {start_time}: {len(bets_with_traces)}")
if len(bets_with_traces) != len(bets):
raise ValueError(
f"{len(bets) - len(bets_with_traces)} bets do not have a corresponding trace"

if __name__ == "__main__":
# Get the private keys for the agents from GCP Secret Manager
agent_gcp_secret_map = {
"DeployablePredictionProphetGPT4TurboFinalAgent": "pma-prophetgpt4turbo-final",
"DeployablePredictionProphetGPT4TurboPreviewAgent": "pma-prophetgpt4",
"DeployablePredictionProphetGPT4oAgent": "pma-prophetgpt3",
"DeployableOlasEmbeddingOAAgent": "pma-evo-olas-embeddingoa",
# "DeployableThinkThoroughlyAgent": "pma-think-thoroughly", # no bets!
# "DeployableThinkThoroughlyProphetResearchAgent": "pma-think-thoroughly-prophet-research", # no bets!
"DeployableKnownOutcomeAgent": "pma-knownoutcome",
}
agent_pkey_map = {
k: get_private_key_from_gcp_secret(v) for k, v in agent_gcp_secret_map.items()
}
# Define strategies we want to test out
strategies = [
MaxAccuracyBettingStrategy(bet_amount=1),
MaxAccuracyBettingStrategy(bet_amount=2),
MaxAccuracyBettingStrategy(bet_amount=25),
KellyBettingStrategy(max_bet_amount=1),
KellyBettingStrategy(max_bet_amount=2),
KellyBettingStrategy(max_bet_amount=25),
MaxAccuracyWithKellyScaledBetsStrategy(max_bet_amount=1),
MaxAccuracyWithKellyScaledBetsStrategy(max_bet_amount=2),
MaxAccuracyWithKellyScaledBetsStrategy(max_bet_amount=25),
MaxExpectedValueBettingStrategy(bet_amount=1),
MaxExpectedValueBettingStrategy(bet_amount=2),
MaxExpectedValueBettingStrategy(bet_amount=25),
]

print("# Agent Bet vs Simulated Bet Comparison")
for agent_name, private_key in agent_pkey_map.items():
print(f"\n## {agent_name}\n")
api_keys = APIKeys(BET_FROM_PRIVATE_KEY=private_key)

# Pick a time after pool token number is stored in OmenAgentMarket
start_time = datetime(2024, 9, 13)

langfuse = Langfuse(
secret_key=api_keys.langfuse_secret_key.get_secret_value(),
public_key=api_keys.langfuse_public_key,
host=api_keys.langfuse_host,
)

traces = get_traces_for_agent(
agent_name=agent_name,
trace_name="process_market",
from_timestamp=start_time,
has_output=True,
client=langfuse,
)
process_market_traces: list[ProcessMarketTrace] = []
for trace in traces:
if process_market_trace := ProcessMarketTrace.from_langfuse_trace(trace):
process_market_traces.append(process_market_trace)

bets: list[ResolvedBet] = OmenAgentMarket.get_resolved_bets_made_since(
better_address=api_keys.bet_from_address,
start_time=start_time,
end_time=None,
)

# All bets should have a trace, but not all traces should have a bet
# (e.g. if all markets are deemed unpredictable), so iterate over bets
bets_with_traces: list[ResolvedBetWithTrace] = []
for bet in bets:
trace = get_trace_for_bet(bet, process_market_traces)
if trace:
bets_with_traces.append(ResolvedBetWithTrace(bet=bet, trace=trace))

print(f"Number of bets since {start_time}: {len(bets_with_traces)}\n")
if len(bets_with_traces) != len(bets):
raise ValueError(
f"{len(bets) - len(bets_with_traces)} bets do not have a corresponding trace"
)

evangriffiths marked this conversation as resolved.
Show resolved Hide resolved
Comment on lines +113 to +158
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM! Consider improving error handling for mismatched bets and traces.

The data retrieval and processing logic is well-implemented. Matching bets with corresponding traces and checking for data integrity are crucial steps.

The current error handling for mismatched bets and traces could be improved. Instead of raising a ValueError, consider logging a warning and continuing with the available data. This approach would allow the script to process as much data as possible, even if some bets are missing traces.

Replace the following code:

if len(bets_with_traces) != len(bets):
    raise ValueError(
        f"{len(bets) - len(bets_with_traces)} bets do not have a corresponding trace"
    )

with:

import logging

# ... (earlier in the file, set up logging)

if len(bets_with_traces) != len(bets):
    missing_traces = len(bets) - len(bets_with_traces)
    logging.warning(f"{missing_traces} bets do not have a corresponding trace. Continuing with available data.")
    print(f"Warning: {missing_traces} bets do not have a corresponding trace. Continuing with available data.")

This change will allow the script to continue processing even if some bets are missing traces, providing more robust behavior.

simulations: list[dict[str, Any]] = []

for strategy_idx, strategy in enumerate(strategies):
# "Born" agent with initial funding, simulate as if he was doing bets one by one.
starting_balance = 50.0
agent_balance = starting_balance
simulated_outcomes: list[SimulatedOutcome] = []

for bet_with_trace in bets_with_traces:
bet = bet_with_trace.bet
trace = bet_with_trace.trace
simulated_outcome = get_outcome_for_trace(
strategy=strategy, trace=trace, market_outcome=bet.market_outcome
)
if simulated_outcome is None:
continue
simulated_outcomes.append(simulated_outcome)
agent_balance += simulated_outcome.profit

total_bet_amount = sum([bt.bet.amount.amount for bt in bets_with_traces])
total_bet_profit = sum([bt.bet.profit.amount for bt in bets_with_traces])
total_simulated_amount = sum([so.size for so in simulated_outcomes])
total_simulated_profit = sum([so.profit for so in simulated_outcomes])
roi = 100 * total_bet_profit / total_bet_amount
simulated_roi = 100 * total_simulated_profit / total_simulated_amount

# At the beginning, add also the agent's current strategy.
if strategy_idx == 0:
simulations.append(
{
"strategy": "original",
"bet_amount": total_bet_amount,
"bet_profit": total_bet_profit,
"roi": roi,
# We don't know these for the original run.
"start_balance": None,
"end_balance": None,
}
)

simulations.append(
{
"strategy": repr(strategy),
"bet_amount": total_simulated_amount,
"bet_profit": total_simulated_profit,
"roi": simulated_roi,
"start_balance": starting_balance,
"end_balance": agent_balance,
}
)

print(pd.DataFrame.from_records(simulations).to_markdown(index=False))
Comment on lines +159 to +210
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM! Consider adding summary statistics for easier comparison.

The simulation logic and results presentation are well-implemented. The comparison of ROI between actual bets and various simulated strategies provides valuable insights.

To enhance the analysis of results, consider adding summary statistics to compare the performance of different strategies more easily. You could add the following code after the DataFrame is created:

df = pd.DataFrame.from_records(simulations)
print(df.to_markdown(index=False))

# Add summary statistics
print("\nSummary Statistics:")
summary = df.groupby('strategy').agg({
    'roi': ['mean', 'std', 'min', 'max'],
    'bet_amount': 'sum',
    'bet_profit': 'sum'
}).reset_index()
summary.columns = ['strategy', 'mean_roi', 'std_roi', 'min_roi', 'max_roi', 'total_bet_amount', 'total_profit']
print(summary.to_markdown(index=False))

This addition will provide a concise overview of each strategy's performance, making it easier to compare and identify the most effective approaches.

67 changes: 67 additions & 0 deletions prediction_market_agent_tooling/deploy/betting_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,9 @@ def calculate_trades(
def calculate_direction(market_p_yes: float, estimate_p_yes: float) -> bool:
return estimate_p_yes >= 0.5

def __repr__(self) -> str:
return f"{self.__class__.__name__}(bet_amount={self.bet_amount})"


class MaxExpectedValueBettingStrategy(MaxAccuracyBettingStrategy):
@staticmethod
Expand Down Expand Up @@ -202,3 +205,67 @@ def calculate_trades(
existing_position, target_position, market=market
)
return trades

def __repr__(self) -> str:
return f"{self.__class__.__name__}(max_bet_amount={self.max_bet_amount})"


class MaxAccuracyWithKellyScaledBetsStrategy(BettingStrategy):
def __init__(self, max_bet_amount: float = 10):
self.max_bet_amount = max_bet_amount

def adjust_bet_amount(
self, existing_position: Position | None, market: AgentMarket
) -> float:
existing_position_total_amount = (
existing_position.total_amount.amount if existing_position else 0
)
return self.max_bet_amount + existing_position_total_amount

Comment on lines +217 to +224
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ensure max_bet_amount enforces the intended maximum

In the adjust_bet_amount method, adding existing_position_total_amount to self.max_bet_amount might result in a total bet exceeding max_bet_amount when there's an existing position. If max_bet_amount is intended to cap the total bet amount, consider revising the calculation to enforce this limit.

def calculate_trades(
self,
existing_position: Position | None,
answer: ProbabilisticAnswer,
market: AgentMarket,
) -> list[Trade]:
adjusted_bet_amount = self.adjust_bet_amount(existing_position, market)
outcome_token_pool = check_not_none(market.outcome_token_pool)

# Fixed direction of bet, only use Kelly to adjust the bet size based on market's outcome pool size.
estimated_p_yes = float(answer.p_yes > 0.5)
confidence = 1.0

kelly_bet = (
Comment on lines +235 to +238
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider using actual estimated probabilities and confidence

Currently, estimated_p_yes is set to 1.0 or 0.0 based on whether answer.p_yes > 0.5, and confidence is hardcoded to 1.0. This may not fully utilize the model's probability estimates and confidence levels. To leverage the Kelly criterion effectively, consider using the actual answer.p_yes and answer.confidence values.

get_kelly_bet_full(
yes_outcome_pool_size=outcome_token_pool[
market.get_outcome_str_from_bool(True)
],
no_outcome_pool_size=outcome_token_pool[
market.get_outcome_str_from_bool(False)
],
estimated_p_yes=estimated_p_yes,
max_bet=adjusted_bet_amount,
confidence=confidence,
)
if market.has_token_pool()
else get_kelly_bet_simplified(
adjusted_bet_amount,
market.current_p_yes,
estimated_p_yes,
confidence,
)
)

amounts = {
market.get_outcome_str_from_bool(kelly_bet.direction): TokenAmount(
amount=kelly_bet.size, currency=market.currency
),
}
target_position = Position(market_id=market.id, amounts=amounts)
trades = self._build_rebalance_trades_from_positions(
existing_position, target_position, market=market
)
return trades
Comment on lines +231 to +268
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Refactor shared logic into a common method

The calculate_trades method contains logic similar to that in KellyBettingStrategy. To promote code reusability and reduce duplication, consider refactoring the common code for calculating the Kelly bet into a shared method or utility function.


def __repr__(self) -> str:
return f"{self.__class__.__name__}(max_bet_amount={self.max_bet_amount})"
18 changes: 18 additions & 0 deletions prediction_market_agent_tooling/tools/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import os
import subprocess
import typing as t
Expand All @@ -6,12 +7,14 @@

import pytz
import requests
from google.cloud import secretmanager
from pydantic import BaseModel, ValidationError
from scipy.optimize import newton
from scipy.stats import entropy

from prediction_market_agent_tooling.gtypes import (
DatetimeWithTimezone,
PrivateKey,
Probability,
SecretStr,
)
Expand Down Expand Up @@ -210,3 +213,18 @@ def f(r: float) -> float:

amount_to_sell = newton(f, 0)
return float(amount_to_sell) * 0.999999 # Avoid rounding errors


def get_private_key_from_gcp_secret(
secret_id: str,
project_id: str = "582587111398", # Gnosis AI default project_id
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider Removing Hardcoded Default project_id

Hardcoding the project_id as a default parameter reduces the function's flexibility and may lead to issues in different environments or projects. It's better to require project_id as a mandatory parameter or retrieve it from an environment variable or configuration file.

Apply this diff to make project_id a required parameter:

-    project_id: str = "582587111398",  # Gnosis AI default project_id
+    project_id: str,
Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
project_id: str = "582587111398", # Gnosis AI default project_id
project_id: str,

version_id: str = "latest",
) -> PrivateKey:
client = secretmanager.SecretManagerServiceClient()
name = f"projects/{project_id}/secrets/{secret_id}/versions/{version_id}"
response = client.access_secret_version(request={"name": name})
Comment on lines +223 to +225
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add Exception Handling for Secret Manager Operations

Accessing secrets from GCP Secret Manager can raise exceptions due to network errors, authentication issues, or missing permissions. To improve robustness, add exception handling to gracefully manage potential errors.

Wrap the secret access code in a try-except block:

try:
    client = secretmanager.SecretManagerServiceClient()
    name = f"projects/{project_id}/secrets/{secret_id}/versions/{version_id}"
    response = client.access_secret_version(request={"name": name})
except Exception as e:
    raise ValueError(f"Failed to access secret: {e}")

secret_payload = response.payload.data.decode("UTF-8")
secret_json = json.loads(secret_payload)
if "private_key" not in secret_json:
raise ValueError(f"Private key not found in gcp secret {secret_id}")
return PrivateKey(SecretStr(secret_json["private_key"]))
Loading