diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..6552c39 Binary files /dev/null and b/.DS_Store differ diff --git a/setup.py b/setup.py index 6540fd6..505e9cb 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ install_requires = [ 'numpy', 'openai', 'pandas','orion', 'matplotlib', 'scikit-learn', - 'tiktoken', + 'tiktoken', 'transformers', 'torch' ] setup_requires = [ diff --git a/sigllm/data.py b/sigllm/data.py index 7d28dd8..3bb4caf 100644 --- a/sigllm/data.py +++ b/sigllm/data.py @@ -12,7 +12,7 @@ def rolling_window_sequences(X, index, window_size, step_size): """Create rolling window sequences out of time series data. - The function creates an array of sequences by rolling over the input sequence. + This function creates an array of sequences by rolling over the input sequence. Args: X (ndarray): diff --git a/sigllm/gpt.py b/sigllm/gpt.py index 3778336..65cdd13 100644 --- a/sigllm/gpt.py +++ b/sigllm/gpt.py @@ -8,54 +8,141 @@ import os from openai import OpenAI +import tiktoken - -def load_system_prompt(file_path): - with open(file_path) as f: - system_prompt = f.read() - return system_prompt - - -CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) - -ZERO_SHOT_FILE = 'gpt_system_prompt_zero_shot.txt' -ONE_SHOT_FILE = 'gpt_system_prompt_one_shot.txt' - -ZERO_SHOT_DIR = os.path.join(CURRENT_DIR, "..", "template", ZERO_SHOT_FILE) -ONE_SHOT_DIR = os.path.join(CURRENT_DIR, "..", "template", ONE_SHOT_FILE) - - -GPT_model = "gpt-4" # "gpt-4-0125-preview" # # #"gpt-3.5-turbo" # client = OpenAI() +VALID_NUMBERS = list("0123456789+- ") +BIAS = 30 +SEP = "," +GPT_MODEL = "gpt-3.5-turbo" +TOKENIZER = tiktoken.encoding_for_model(GPT_MODEL) +VALID_TOKENS = [] +for number in VALID_NUMBERS: + token = TOKENIZER.encode(number)[0] + VALID_TOKENS.append(token) + +VALID_TOKENS.append(TOKENIZER.encode(SEP)[0]) +LOGIT_BIAS = {token: BIAS for token in VALID_TOKENS} + + +def get_gpt_model_response( + message, +# frequency_penalty = 0, +# logit_bias = LOGIT_BIAS, +# logprobs = False, (KeyError that can't be fixed as of now) +# top_logprobs = None, +# max_tokens = None, +# n = 1, +# presence_penalty = 0, +# response_format = {"type": "text"}, +# seed = None, +# stop = None, +# stream = False, +# temperature = 1, +# top_p = 1, +# tools = [], +# tool_choice = 'none', +# user = "" + ): + """Return GPT model response to message prompt + + Args: + message (List[dict]): + prompt written in template format. + gpt_model (str): + GPT model name. Defaults to `"gpt-3.5-turbo"`. + frequency_penalty (number or null): + Number between -2.0 and 2.0. Positive values penalize new tokens based on their + existing frequency in the text so far, decreasing the model's likelihood to repeat + the same line verbatim. Defaults to `0`. + logit_bias (map): + Modify the likelihood of specified tokens appearing in the completion. + Accepts a JSON object that maps tokens (specified by their token ID in the tokenizer) + to an associated bias value from -100 to 100. Mathematically, the bias is added to the + logits generated by the model prior to sampling. The exact effect will vary per model, + but values between -1 and 1 should decrease or increase likelihood of selection; + values like -100 or 100 should result in a ban or exclusive selection of the relevant token. + Defaults to `None`. + logprobs (bool or null): + Whether to return log probabilities of the output tokens or not. If true, returns the + log probabilities of each output token returned in the content of message. This option + is currently not available on the gpt-4-vision-preview model. Defaults to `False`. + top_logprobs (int or null): + An integer between 0 and 20 specifying the number of most likely tokens to return at + each token position, each with an associated log probability. logprobs must be set to + `True` if this parameter is used. Default to `None`. + max_tokens (int or null): + The maximum number of tokens that can be generated in the chat completion. Defaults to `None`. + n (int or null): + How many chat completion choices to generate for each input message. Note that you will be charged + based on the number of generated tokens across all of the choices. Defaults to `1`. + present_penalty (number or null): + Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the + text so far, increasing the model's likelihood to talk about new topics. Defaults to `0`. + response_format (object): + An object specifying the format that the model must output. Compatible with GPT-4 Turbo + and all GPT-3.5 Turbo models newer than gpt-3.5-turbo-1106. + Setting to { "type": "json_object" } enables JSON mode, which guarantees the message + the model generates is valid JSON. + Defaults to `{"type": "text"}` + seed (int or null): + This feature is in Beta. If specified, our system will make a best effort to sample deterministically, + such that repeated requests with the same seed and parameters should return the same result. + Determinism is not guaranteed, and you should refer to the system_fingerprint response parameter + to monitor changes in the backend. Defaults to `None`. + stop (str/array/null): + Up to 4 sequences where the API will stop generating further tokens. Defaults to `None`. + stream (bool or null): + If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only + server-sent events as they become available, with the stream terminated by a data: [DONE] message. + Defaults to `False`. + temperature (number or null): + What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the + output more random, while lower values like 0.2 will make it more focused and deterministic. + We generally recommend altering this or top_p but not both. Defaults to `1`. + top_p (number or null): + An alternative to sampling with temperature, called nucleus sampling, where the model considers + the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising + the top 10% probability mass are considered. + We generally recommend altering this or temperature but not both. Defaults to `1`. + tools (array): + A list of tools the model may call. Currently, only functions are supported as a tool. + Use this to provide a list of functions the model may generate JSON inputs for. + A max of 128 functions are supported. Defaults to `[]`. + tool_choice (str or object): + Controls which (if any) function is called by the model. + `none` means the model will not call a function and instead generates a message. + `auto` means the model can pick between generating a message or calling a function. + Specifying a particular function via {"type": "function", "function": {"name": "my_function"}} + forces the model to call that function. + Defaults to `none`. + user (str): + A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. + + Returns: + chat competion object: + GPT model response. + """ -def get_gpt_model_response(message, gpt_model=GPT_model): completion = client.chat.completions.create( - model=gpt_model, - messages=message, + model = GPT_MODEL, + messages = message, + # frequency_penalty = frequency_penalty, + logit_bias = LOGIT_BIAS, + # logprobs = logprobs, +# top_logprobs = top_logprobs, +# max_tokens = max_tokens, +# n = n, +# presence_penalty = presence_penalty, + # response_format = response_format, +# seed = seed, +# stop = stop, + # stream = stream, +# temperature = temperature, +# top_p = top_p, + # tools = tools, + # tool_choice = tool_choice, + # user = user ) - return completion.choices[0].message.content - - -def create_message_zero_shot(seq_query, system_prompt_file=ZERO_SHOT_DIR): - messages = [] - - messages.append({"role": "system", "content": load_system_prompt(system_prompt_file)}) - - # final prompt - messages.append({"role": "user", "content": f"Sequence: {seq_query}"}) - return messages - - -def create_message_one_shot(seq_query, seq_ex, ano_idx_ex, system_prompt_file=ONE_SHOT_DIR): - messages = [] - - messages.append({"role": "system", "content": load_system_prompt(system_prompt_file)}) - - # one shot - messages.append({"role": "user", "content": f"Sequence: {seq_ex}"}) - messages.append({"role": "assistant", "content": ano_idx_ex}) - - # final prompt - messages.append({"role": "user", "content": f"Sequence: {seq_query}"}) - return messages + return completion.choices[0].message.content \ No newline at end of file diff --git a/sigllm/mass_exp.py b/sigllm/mass_exp.py new file mode 100644 index 0000000..2bd2f57 --- /dev/null +++ b/sigllm/mass_exp.py @@ -0,0 +1,84 @@ +import pandas as pd +from data import rolling_window_sequences, sig2str +from orion.data import load_signal, load_anomalies +from sigllm import get_anomalies +from gpt import get_gpt_model_response +from prompt import create_message_zero_shot +from anomalies import merge_anomaly_seq +import numpy as np +from urllib.error import HTTPError +from orion.evaluation.utils import from_list_points_timestamps +from orion.evaluation.point import point_confusion_matrix, point_accuracy, point_f1_score, point_precision, point_recall +from orion.evaluation.contextual import contextual_accuracy, contextual_f1_score, contextual_precision, contextual_recall +import pickle + +df = pd.read_csv("data_summary.csv") #signal summary df + +#signal and result directory +try: + df_res = pd.read_csv("gpt-3.5-turbo-2-digits-res.csv") +except: + df_res = pd.DataFrame(columns=['signal', 'pkl_file_name']) + +computed = df_res['signal'] + +for i, row in df.iterrows(): + if row.signal in computed: + continue + true_ano = load_anomalies(row.signal) + try: + signal = load_signal(row.signal) + except HTTPError: + S3_URL = 'https://sintel-orion-benchmark.s3.amazonaws.com/{}' + signal = pd.read_csv(S3_URL.format(row.signal + '.csv')) + values = signal['value'].values + indices = signal.index.values + #make rolling windows + window_size = 2500 + step_size = 500 + windows, start_indices = rolling_window_sequences(values, indices, window_size, step_size) + #rolling window anomaly detection + final_ano = [] + i = 0 + error = dict() #to save error (if any) when running each window + for seq in windows: + try: + final_ano.append(get_anomalies(seq, create_message_zero_shot, get_gpt_model_response, space = True, decimal = 2)) + except Exception as e: + error[i] = e + i+= 1 + ano_idx = merge_anomaly_seq(final_ano, start_indices, window_size, step_size, beta = 0) + anomalies_pts = idx2time(signal, final_res) + anomalies_contextual = from_list_points_timestamps(anomalies, gap = row.interval) + + ground_truth_pts = [] + ground_truth_context = [] + for i,interval in true_ano.iterrows(): + ground_truth_pts += range(interval.start, interval.end +1) + ground_truth_context.append((interval.start, interval.end)) + + start, end = (int(signal.iloc[0].timestamp), int(signal.iloc[-1].timestamp)) + #benchmark + tn, fp, fn, tp = point_confusion_matrix(ground_truth_pts, anomalies_pts, start = start, end = end) + point_precision = point_precision(ground_truth_pts, anomalies_pts, start = start, end = end) + point_recall = point_recall(ground_truth_pts, anomalies_pts, start = start, end = end) + point_accuracy = point_accuracy(ground_truth_pts, anomalies_pts, start = start, end = end) + point_f1_score = point_f1_score(ground_truth_pts, anomalies_pts, start = start, end = end) + + contextual_accuracy = contextual_accuracy(ground_truth_context, anomalies_contextual, start = start, end = end) + contextual_f1_score = contextual_f1_score(ground_truth_context, anomalies_contextual, start = start, end = end) + contextual_precision = contextual_precision(ground_truth_context, anomalies_contextual, start = start, end = end) + contextual_recall = contextual_recall(ground_truth_context, anomalies_contextual, start = start, end = end) + + result = {'signal': row.signal, 'error': error, 'anomalies': anomalies_pts, 'tp': tp, 'fp': fp, + 'fn': fn, 'point_precision': point_precision, 'point_recall': point_recall, + 'point_accuracy': point_accuracy, 'point_f1': point_f1_score, + 'contextual_precision': contextual_precision, 'contextual_recall': contextual_recall, + 'contextual_accuracy': contextual_accuracy, 'contextual_f1': contextual_f1_score} + file_name = row.signal + 'gpt-3.5-turbo.pickle' + + with open(file_name, 'wb') as handle: + pickle.dump(result, handle, protocol=pickle.HIGHEST_PROTOCOL) + df_res = df_res.append({'signal': row.signal, 'pkl_file_name': file_name}) + +df_res.to_csv("gpt-3.5-turbo-2-digits-res.csv") \ No newline at end of file diff --git a/sigllm/prompt.py b/sigllm/prompt.py new file mode 100644 index 0000000..6792ffa --- /dev/null +++ b/sigllm/prompt.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- + +""" +Prompt module. + +This module contains functions that are specifically used to create prompt +""" +import os + +def load_system_prompt(file_path): + with open(file_path) as f: + system_prompt = f.read() + return system_prompt + + +CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) + +ZERO_SHOT_FILE = 'gpt_system_prompt_zero_shot.txt' +ONE_SHOT_FILE = 'gpt_system_prompt_one_shot.txt' + +ZERO_SHOT_DIR = os.path.join(CURRENT_DIR, "..", "template", ZERO_SHOT_FILE) +ONE_SHOT_DIR = os.path.join(CURRENT_DIR, "..", "template", ONE_SHOT_FILE) + +def create_message_zero_shot(seq_query, system_prompt_file=ZERO_SHOT_DIR): + messages = [] + + messages.append({"role": "system", "content": load_system_prompt(system_prompt_file)}) + + # final prompt + messages.append({"role": "user", "content": f"Sequence: {seq_query}"}) + return messages + + +def create_message_one_shot(seq_query, seq_ex, ano_idx_ex, system_prompt_file=ONE_SHOT_DIR): + messages = [] + + messages.append({"role": "system", "content": load_system_prompt(system_prompt_file)}) + + # one shot + messages.append({"role": "user", "content": f"Sequence: {seq_ex}"}) + messages.append({"role": "assistant", "content": ano_idx_ex}) + + # final prompt + messages.append({"role": "user", "content": f"Sequence: {seq_query}"}) + return messages + diff --git a/sigllm/sigllm.py b/sigllm/sigllm.py index 20df5d0..f0d0bf8 100644 --- a/sigllm/sigllm.py +++ b/sigllm/sigllm.py @@ -9,7 +9,34 @@ from data import sig2str -def get_anomalies(seq, msg_func, model_func, num_iters=1, alpha=0.5): +def get_anomalies( + seq, + msg_func, + model_func, + sep=',', + space=False, + decimal=0, + rescale=True, + num_iters=1, + alpha=0.5, +# gpt_model = "gpt-3.5-turbo", +# frequency_penalty = 0, +# logit_bias = None, +# logprobs = False, +# top_logprobs = None, +# max_tokens = None, +# n = 1, +# presence_penalty = 0, +# response_format = {"type": "text"}, +# seed = None, +# stop = None, +# stream = False, +# temperature = 1, +# top_p = 1, +# tools = [], +# tool_choice = 'none', +# user = "" + ): """Get LLM anomaly detection results. The function get the LLM's anomaly detection and converts them into an 1D array @@ -23,15 +50,94 @@ def get_anomalies(seq, msg_func, model_func, num_iters=1, alpha=0.5): Function to get LLM answer. num_iters (int): Number of times to run the same query. + sep (str): + String to separate each element in values. Default to `","`. + space (bool): + Whether to add space between each digit in the result. Default to `False`. + decimal (int): + Number of decimal points to keep from the float representation. Default to `0`. + rescale(bool): + Whether to rescale the time series. Default to `True` alpha (float): Percentage of total number of votes that an index needs to have to be considered anomalous. Default: 0.5 + decimal (int): + Number of decimal points to keep from the float representation. Default to `0`. + gpt_model (str): + GPT model name. Defaults to `"gpt-3.5-turbo"`. + frequency_penalty (number or null): + Number between -2.0 and 2.0. Positive values penalize new tokens based on their + existing frequency in the text so far, decreasing the model's likelihood to repeat + the same line verbatim. Defaults to `0`. + logit_bias (map): + Modify the likelihood of specified tokens appearing in the completion. + Accepts a JSON object that maps tokens (specified by their token ID in the tokenizer) + to an associated bias value from -100 to 100. Mathematically, the bias is added to the + logits generated by the model prior to sampling. The exact effect will vary per model, + but values between -1 and 1 should decrease or increase likelihood of selection; + values like -100 or 100 should result in a ban or exclusive selection of the relevant token. + Defaults to `None`. + logprobs (bool or null): + Whether to return log probabilities of the output tokens or not. If true, returns the + log probabilities of each output token returned in the content of message. This option + is currently not available on the gpt-4-vision-preview model. Defaults to `False`. + top_logprobs (int or null): + An integer between 0 and 20 specifying the number of most likely tokens to return at + each token position, each with an associated log probability. logprobs must be set to + `True` if this parameter is used. Default to `None`. + max_tokens (int or null): + The maximum number of tokens that can be generated in the chat completion. Defaults to `None`. + n (int or null): + How many chat completion choices to generate for each input message. Note that you will be charged + based on the number of generated tokens across all of the choices. Defaults to `1`. + present_penalty (number or null): + Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the + text so far, increasing the model's likelihood to talk about new topics. Defaults to `0`. + response_format (object): + An object specifying the format that the model must output. Compatible with GPT-4 Turbo + and all GPT-3.5 Turbo models newer than gpt-3.5-turbo-1106. + Setting to { "type": "json_object" } enables JSON mode, which guarantees the message + the model generates is valid JSON. + Defaults to `{"type": "text"}` + seed (int or null): + This feature is in Beta. If specified, our system will make a best effort to sample deterministically, + such that repeated requests with the same seed and parameters should return the same result. + Determinism is not guaranteed, and you should refer to the system_fingerprint response parameter + to monitor changes in the backend. Defaults to `None`. + stop (str/array/null): + Up to 4 sequences where the API will stop generating further tokens. Defaults to `None`. + stream (bool or null): + If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only + server-sent events as they become available, with the stream terminated by a data: [DONE] message. + Defaults to `False`. + temperature (number or null): + What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the + output more random, while lower values like 0.2 will make it more focused and deterministic. + We generally recommend altering this or top_p but not both. Defaults to `1`. + top_p (number or null): + An alternative to sampling with temperature, called nucleus sampling, where the model considers + the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising + the top 10% probability mass are considered. + We generally recommend altering this or temperature but not both. Defaults to `1`. + tools (array): + A list of tools the model may call. Currently, only functions are supported as a tool. + Use this to provide a list of functions the model may generate JSON inputs for. + A max of 128 functions are supported. Defaults to `[]`. + tool_choice (str or object): + Controls which (if any) function is called by the model. + `none` means the model will not call a function and instead generates a message. + `auto` means the model can pick between generating a message or calling a function. + Specifying a particular function via {"type": "function", "function": {"name": "my_function"}} + forces the model to call that function. + Defaults to `none`. + user (str): + A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Returns: ndarray: 1D array containing anomalous indices of the sequence. """ - message = msg_func(sig2str(seq, space=True)) + message = msg_func(sig2str(seq, sep = sep, space=space, decimal = decimal, rescale = rescale)) res_list = [] for i in range(num_iters): res = model_func(message)