diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000..6552c39
Binary files /dev/null and b/.DS_Store differ
diff --git a/setup.py b/setup.py
index 6540fd6..505e9cb 100644
--- a/setup.py
+++ b/setup.py
@@ -13,7 +13,7 @@
 
 install_requires = [
     'numpy', 'openai', 'pandas','orion', 'matplotlib', 'scikit-learn',
-    'tiktoken',
+    'tiktoken', 'transformers', 'torch'
 ]
 
 setup_requires = [
diff --git a/sigllm/data.py b/sigllm/data.py
index 7d28dd8..3bb4caf 100644
--- a/sigllm/data.py
+++ b/sigllm/data.py
@@ -12,7 +12,7 @@
 def rolling_window_sequences(X, index, window_size, step_size):
     """Create rolling window sequences out of time series data.
 
-    The function creates an array of sequences by rolling over the input sequence.
+    This function creates an array of sequences by rolling over the input sequence.
 
     Args:
         X (ndarray):
diff --git a/sigllm/gpt.py b/sigllm/gpt.py
index 3778336..65cdd13 100644
--- a/sigllm/gpt.py
+++ b/sigllm/gpt.py
@@ -8,54 +8,141 @@
 import os
 
 from openai import OpenAI
+import tiktoken
 
-
-def load_system_prompt(file_path):
-    with open(file_path) as f:
-        system_prompt = f.read()
-    return system_prompt
-
-
-CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
-
-ZERO_SHOT_FILE = 'gpt_system_prompt_zero_shot.txt'
-ONE_SHOT_FILE = 'gpt_system_prompt_one_shot.txt'
-
-ZERO_SHOT_DIR = os.path.join(CURRENT_DIR, "..", "template", ZERO_SHOT_FILE)
-ONE_SHOT_DIR = os.path.join(CURRENT_DIR, "..", "template", ONE_SHOT_FILE)
-
-
-GPT_model = "gpt-4"  # "gpt-4-0125-preview" #  #  #"gpt-3.5-turbo" #
 client = OpenAI()
+VALID_NUMBERS = list("0123456789+- ")
+BIAS = 30
+SEP = ","
+GPT_MODEL = "gpt-3.5-turbo"
+TOKENIZER = tiktoken.encoding_for_model(GPT_MODEL)
+VALID_TOKENS = []
+for number in VALID_NUMBERS:
+    token = TOKENIZER.encode(number)[0]
+    VALID_TOKENS.append(token)
+
+VALID_TOKENS.append(TOKENIZER.encode(SEP)[0])
+LOGIT_BIAS = {token: BIAS for token in VALID_TOKENS}
+
+
+def get_gpt_model_response(
+        message, 
+#         frequency_penalty = 0, 
+#        logit_bias = LOGIT_BIAS,           
+#             logprobs = False, (KeyError that can't be fixed as of now)
+#         top_logprobs = None, 
+#         max_tokens = None, 
+#         n = 1,       
+#         presence_penalty = 0, 
+#             response_format = {"type": "text"},
+#         seed = None, 
+#         stop = None, 
+#             stream = False, 
+#         temperature = 1,
+#         top_p = 1, 
+#             tools = [], 
+#             tool_choice = 'none', 
+#             user = ""
+      ):
+    """Return GPT model response to message prompt
+
+    Args: 
+        message (List[dict]): 
+            prompt written in template format.
+        gpt_model (str): 
+            GPT model name. Defaults to `"gpt-3.5-turbo"`.
+        frequency_penalty (number or null): 
+            Number between -2.0 and 2.0. Positive values penalize new tokens based on their 
+            existing frequency in the text so far, decreasing the model's likelihood to repeat 
+            the same line verbatim. Defaults to `0`.
+        logit_bias (map):
+            Modify the likelihood of specified tokens appearing in the completion.
+            Accepts a JSON object that maps tokens (specified by their token ID in the tokenizer) 
+            to an associated bias value from -100 to 100. Mathematically, the bias is added to the 
+            logits generated by the model prior to sampling. The exact effect will vary per model, 
+            but values between -1 and 1 should decrease or increase likelihood of selection; 
+            values like -100 or 100 should result in a ban or exclusive selection of the relevant token.
+            Defaults to `None`.
+        logprobs (bool or null): 
+            Whether to return log probabilities of the output tokens or not. If true, returns the 
+            log probabilities of each output token returned in the content of message. This option 
+            is currently not available on the gpt-4-vision-preview model. Defaults to `False`.
+        top_logprobs (int or null): 
+            An integer between 0 and 20 specifying the number of most likely tokens to return at 
+            each token position, each with an associated log probability. logprobs must be set to 
+            `True` if this parameter is used. Default to `None`.
+        max_tokens (int or null): 
+            The maximum number of tokens that can be generated in the chat completion. Defaults to `None`.
+        n (int or null):
+            How many chat completion choices to generate for each input message. Note that you will be charged 
+            based on the number of generated tokens across all of the choices. Defaults to `1`.
+        present_penalty (number or null): 
+            Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the 
+            text so far, increasing the model's likelihood to talk about new topics. Defaults to `0`.
+        response_format (object):
+            An object specifying the format that the model must output. Compatible with GPT-4 Turbo 
+            and all GPT-3.5 Turbo models newer than gpt-3.5-turbo-1106. 
+            Setting to { "type": "json_object" } enables JSON mode, which guarantees the message 
+            the model generates is valid JSON.
+            Defaults to `{"type": "text"}`
+        seed (int or null):
+            This feature is in Beta. If specified, our system will make a best effort to sample deterministically, 
+            such that repeated requests with the same seed and parameters should return the same result. 
+            Determinism is not guaranteed, and you should refer to the system_fingerprint response parameter 
+            to monitor changes in the backend. Defaults to `None`.
+        stop (str/array/null): 
+            Up to 4 sequences where the API will stop generating further tokens. Defaults to `None`.
+        stream (bool or null): 
+            If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only 
+            server-sent events as they become available, with the stream terminated by a data: [DONE] message.
+            Defaults to `False`.
+        temperature (number or null): 
+            What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the 
+            output more random, while lower values like 0.2 will make it more focused and deterministic.
+            We generally recommend altering this or top_p but not both. Defaults to `1`.
+        top_p (number or null): 
+            An alternative to sampling with temperature, called nucleus sampling, where the model considers 
+            the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising 
+            the top 10% probability mass are considered.
+            We generally recommend altering this or temperature but not both. Defaults to `1`.
+        tools (array): 
+            A list of tools the model may call. Currently, only functions are supported as a tool. 
+            Use this to provide a list of functions the model may generate JSON inputs for. 
+            A max of 128 functions are supported. Defaults to `[]`.
+        tool_choice (str or object): 
+            Controls which (if any) function is called by the model.
+            `none` means the model will not call a function and instead generates a message. 
+            `auto` means the model can pick between generating a message or calling a function. 
+            Specifying a particular function via {"type": "function", "function": {"name": "my_function"}} 
+            forces the model to call that function.
+            Defaults to `none`.
+        user (str): 
+            A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.
+
+    Returns: 
+        chat competion object: 
+            GPT model response.
+    """
 
 
-def get_gpt_model_response(message, gpt_model=GPT_model):
     completion = client.chat.completions.create(
-        model=gpt_model,
-        messages=message,
+        model = GPT_MODEL,
+        messages = message,
+    #             frequency_penalty = frequency_penalty, 
+        logit_bias = LOGIT_BIAS, 
+    #             logprobs = logprobs, 
+#         top_logprobs = top_logprobs,
+#         max_tokens = max_tokens,
+#         n = n, 
+#         presence_penalty = presence_penalty,
+    #             response_format = response_format,
+#         seed = seed, 
+#         stop = stop, 
+    #             stream = stream,
+#         temperature = temperature,
+#         top_p = top_p,
+    #             tools = tools,
+    #             tool_choice = tool_choice,
+    #             user = user
     )
-    return completion.choices[0].message.content
-
-
-def create_message_zero_shot(seq_query, system_prompt_file=ZERO_SHOT_DIR):
-    messages = []
-
-    messages.append({"role": "system", "content": load_system_prompt(system_prompt_file)})
-
-    # final prompt
-    messages.append({"role": "user", "content": f"Sequence: {seq_query}"})
-    return messages
-
-
-def create_message_one_shot(seq_query, seq_ex, ano_idx_ex, system_prompt_file=ONE_SHOT_DIR):
-    messages = []
-
-    messages.append({"role": "system", "content": load_system_prompt(system_prompt_file)})
-
-    # one shot
-    messages.append({"role": "user", "content": f"Sequence: {seq_ex}"})
-    messages.append({"role": "assistant", "content": ano_idx_ex})
-
-    # final prompt
-    messages.append({"role": "user", "content": f"Sequence: {seq_query}"})
-    return messages
+    return completion.choices[0].message.content
\ No newline at end of file
diff --git a/sigllm/mass_exp.py b/sigllm/mass_exp.py
new file mode 100644
index 0000000..2bd2f57
--- /dev/null
+++ b/sigllm/mass_exp.py
@@ -0,0 +1,84 @@
+import pandas as pd
+from data import rolling_window_sequences, sig2str
+from orion.data import load_signal, load_anomalies
+from sigllm import get_anomalies
+from gpt import get_gpt_model_response
+from prompt import create_message_zero_shot
+from anomalies import merge_anomaly_seq
+import numpy as np
+from urllib.error import HTTPError
+from orion.evaluation.utils import from_list_points_timestamps
+from orion.evaluation.point import point_confusion_matrix, point_accuracy, point_f1_score, point_precision, point_recall
+from orion.evaluation.contextual import contextual_accuracy, contextual_f1_score, contextual_precision, contextual_recall
+import pickle
+
+df = pd.read_csv("data_summary.csv") #signal summary df
+
+#signal and result directory
+try: 
+    df_res = pd.read_csv("gpt-3.5-turbo-2-digits-res.csv")
+except: 
+    df_res = pd.DataFrame(columns=['signal', 'pkl_file_name'])
+    
+computed = df_res['signal']
+
+for i, row in df.iterrows(): 
+    if row.signal in computed: 
+        continue
+    true_ano = load_anomalies(row.signal)
+    try:
+        signal = load_signal(row.signal)
+    except HTTPError:
+        S3_URL = 'https://sintel-orion-benchmark.s3.amazonaws.com/{}'
+        signal = pd.read_csv(S3_URL.format(row.signal + '.csv'))
+    values = signal['value'].values
+    indices = signal.index.values
+    #make rolling windows
+    window_size = 2500
+    step_size = 500
+    windows, start_indices = rolling_window_sequences(values, indices, window_size, step_size)
+    #rolling window anomaly detection
+    final_ano = []
+    i = 0
+    error = dict() #to save error (if any) when running each window
+    for seq in windows: 
+        try:
+            final_ano.append(get_anomalies(seq, create_message_zero_shot, get_gpt_model_response, space = True, decimal = 2))
+        except Exception as e:
+            error[i] = e
+        i+= 1
+    ano_idx = merge_anomaly_seq(final_ano, start_indices, window_size, step_size, beta = 0)
+    anomalies_pts = idx2time(signal, final_res)
+    anomalies_contextual = from_list_points_timestamps(anomalies, gap = row.interval) 
+     
+    ground_truth_pts = []
+    ground_truth_context = []
+    for i,interval in true_ano.iterrows(): 
+        ground_truth_pts += range(interval.start, interval.end +1)
+        ground_truth_context.append((interval.start, interval.end))
+    
+    start, end = (int(signal.iloc[0].timestamp), int(signal.iloc[-1].timestamp))
+    #benchmark
+    tn, fp, fn, tp = point_confusion_matrix(ground_truth_pts, anomalies_pts, start = start, end = end)
+    point_precision = point_precision(ground_truth_pts, anomalies_pts, start = start, end = end)
+    point_recall = point_recall(ground_truth_pts, anomalies_pts, start = start, end = end)
+    point_accuracy = point_accuracy(ground_truth_pts, anomalies_pts, start = start, end = end)
+    point_f1_score = point_f1_score(ground_truth_pts, anomalies_pts, start = start, end = end)
+    
+    contextual_accuracy = contextual_accuracy(ground_truth_context, anomalies_contextual, start = start, end = end)
+    contextual_f1_score = contextual_f1_score(ground_truth_context, anomalies_contextual, start = start, end = end)
+    contextual_precision = contextual_precision(ground_truth_context, anomalies_contextual, start = start, end = end)
+    contextual_recall = contextual_recall(ground_truth_context, anomalies_contextual, start = start, end = end)
+            
+    result = {'signal': row.signal, 'error': error, 'anomalies': anomalies_pts, 'tp': tp, 'fp': fp, 
+                'fn': fn, 'point_precision': point_precision, 'point_recall': point_recall, 
+               'point_accuracy': point_accuracy, 'point_f1': point_f1_score, 
+               'contextual_precision': contextual_precision, 'contextual_recall': contextual_recall, 
+                'contextual_accuracy': contextual_accuracy, 'contextual_f1': contextual_f1_score}
+    file_name = row.signal + 'gpt-3.5-turbo.pickle'
+    
+    with open(file_name, 'wb') as handle:
+        pickle.dump(result, handle, protocol=pickle.HIGHEST_PROTOCOL)        
+    df_res = df_res.append({'signal': row.signal, 'pkl_file_name': file_name})
+    
+df_res.to_csv("gpt-3.5-turbo-2-digits-res.csv")
\ No newline at end of file
diff --git a/sigllm/prompt.py b/sigllm/prompt.py
new file mode 100644
index 0000000..6792ffa
--- /dev/null
+++ b/sigllm/prompt.py
@@ -0,0 +1,46 @@
+# -*- coding: utf-8 -*-
+
+"""
+Prompt module.
+
+This module contains functions that are specifically used to create prompt
+"""
+import os
+
+def load_system_prompt(file_path):
+    with open(file_path) as f:
+        system_prompt = f.read()
+    return system_prompt
+
+
+CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
+
+ZERO_SHOT_FILE = 'gpt_system_prompt_zero_shot.txt'
+ONE_SHOT_FILE = 'gpt_system_prompt_one_shot.txt'
+
+ZERO_SHOT_DIR = os.path.join(CURRENT_DIR, "..", "template", ZERO_SHOT_FILE)
+ONE_SHOT_DIR = os.path.join(CURRENT_DIR, "..", "template", ONE_SHOT_FILE)
+
+def create_message_zero_shot(seq_query, system_prompt_file=ZERO_SHOT_DIR):
+    messages = []
+
+    messages.append({"role": "system", "content": load_system_prompt(system_prompt_file)})
+
+    # final prompt
+    messages.append({"role": "user", "content": f"Sequence: {seq_query}"})
+    return messages
+
+
+def create_message_one_shot(seq_query, seq_ex, ano_idx_ex, system_prompt_file=ONE_SHOT_DIR):
+    messages = []
+
+    messages.append({"role": "system", "content": load_system_prompt(system_prompt_file)})
+
+    # one shot
+    messages.append({"role": "user", "content": f"Sequence: {seq_ex}"})
+    messages.append({"role": "assistant", "content": ano_idx_ex})
+
+    # final prompt
+    messages.append({"role": "user", "content": f"Sequence: {seq_query}"})
+    return messages
+
diff --git a/sigllm/sigllm.py b/sigllm/sigllm.py
index 20df5d0..f0d0bf8 100644
--- a/sigllm/sigllm.py
+++ b/sigllm/sigllm.py
@@ -9,7 +9,34 @@
 from data import sig2str
 
 
-def get_anomalies(seq, msg_func, model_func, num_iters=1, alpha=0.5):
+def get_anomalies(
+            seq, 
+            msg_func, 
+            model_func, 
+            sep=',', 
+            space=False, 
+            decimal=0, 
+            rescale=True,
+            num_iters=1, 
+            alpha=0.5, 
+#            gpt_model = "gpt-3.5-turbo", 
+#             frequency_penalty = 0, 
+#             logit_bias = None,           
+#             logprobs = False, 
+#             top_logprobs = None, 
+#             max_tokens = None, 
+#             n = 1,       
+#             presence_penalty = 0, 
+#             response_format = {"type": "text"},
+#             seed = None, 
+#             stop = None, 
+#             stream = False, 
+#             temperature = 1,
+#             top_p = 1, 
+#             tools = [], 
+#             tool_choice = 'none', 
+#             user = ""
+            ):
     """Get LLM anomaly detection results.
 
     The function get the LLM's anomaly detection and converts them into an 1D array
@@ -23,15 +50,94 @@ def get_anomalies(seq, msg_func, model_func, num_iters=1, alpha=0.5):
             Function to get LLM answer.
         num_iters (int):
             Number of times to run the same query.
+        sep (str):
+            String to separate each element in values. Default to `","`.
+        space (bool):
+            Whether to add space between each digit in the result. Default to `False`.
+        decimal (int):
+            Number of decimal points to keep from the float representation. Default to `0`.
+        rescale(bool):
+            Whether to rescale the time series. Default to `True`
         alpha (float):
             Percentage of total number of votes that an index needs to have to be
             considered anomalous. Default: 0.5
+        decimal (int):
+            Number of decimal points to keep from the float representation. Default to `0`.
+        gpt_model (str): 
+            GPT model name. Defaults to `"gpt-3.5-turbo"`.
+        frequency_penalty (number or null): 
+            Number between -2.0 and 2.0. Positive values penalize new tokens based on their 
+            existing frequency in the text so far, decreasing the model's likelihood to repeat 
+            the same line verbatim. Defaults to `0`.
+        logit_bias (map):
+            Modify the likelihood of specified tokens appearing in the completion.
+            Accepts a JSON object that maps tokens (specified by their token ID in the tokenizer) 
+            to an associated bias value from -100 to 100. Mathematically, the bias is added to the 
+            logits generated by the model prior to sampling. The exact effect will vary per model, 
+            but values between -1 and 1 should decrease or increase likelihood of selection; 
+            values like -100 or 100 should result in a ban or exclusive selection of the relevant token.
+            Defaults to `None`.
+        logprobs (bool or null): 
+            Whether to return log probabilities of the output tokens or not. If true, returns the 
+            log probabilities of each output token returned in the content of message. This option 
+            is currently not available on the gpt-4-vision-preview model. Defaults to `False`.
+        top_logprobs (int or null): 
+            An integer between 0 and 20 specifying the number of most likely tokens to return at 
+            each token position, each with an associated log probability. logprobs must be set to 
+            `True` if this parameter is used. Default to `None`.
+        max_tokens (int or null): 
+            The maximum number of tokens that can be generated in the chat completion. Defaults to `None`.
+        n (int or null):
+            How many chat completion choices to generate for each input message. Note that you will be charged 
+            based on the number of generated tokens across all of the choices. Defaults to `1`.
+        present_penalty (number or null): 
+            Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the 
+            text so far, increasing the model's likelihood to talk about new topics. Defaults to `0`.
+        response_format (object):
+            An object specifying the format that the model must output. Compatible with GPT-4 Turbo 
+            and all GPT-3.5 Turbo models newer than gpt-3.5-turbo-1106. 
+            Setting to { "type": "json_object" } enables JSON mode, which guarantees the message 
+            the model generates is valid JSON.
+            Defaults to `{"type": "text"}`
+        seed (int or null):
+            This feature is in Beta. If specified, our system will make a best effort to sample deterministically, 
+            such that repeated requests with the same seed and parameters should return the same result. 
+            Determinism is not guaranteed, and you should refer to the system_fingerprint response parameter 
+            to monitor changes in the backend. Defaults to `None`.
+        stop (str/array/null): 
+            Up to 4 sequences where the API will stop generating further tokens. Defaults to `None`.
+        stream (bool or null): 
+            If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only 
+            server-sent events as they become available, with the stream terminated by a data: [DONE] message.
+            Defaults to `False`.
+        temperature (number or null): 
+            What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the 
+            output more random, while lower values like 0.2 will make it more focused and deterministic.
+            We generally recommend altering this or top_p but not both. Defaults to `1`.
+        top_p (number or null): 
+            An alternative to sampling with temperature, called nucleus sampling, where the model considers 
+            the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising 
+            the top 10% probability mass are considered.
+            We generally recommend altering this or temperature but not both. Defaults to `1`.
+        tools (array): 
+            A list of tools the model may call. Currently, only functions are supported as a tool. 
+            Use this to provide a list of functions the model may generate JSON inputs for. 
+            A max of 128 functions are supported. Defaults to `[]`.
+        tool_choice (str or object): 
+            Controls which (if any) function is called by the model.
+            `none` means the model will not call a function and instead generates a message. 
+            `auto` means the model can pick between generating a message or calling a function. 
+            Specifying a particular function via {"type": "function", "function": {"name": "my_function"}} 
+            forces the model to call that function.
+            Defaults to `none`.
+        user (str): 
+            A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.
 
     Returns:
         ndarray:
             1D array containing anomalous indices of the sequence.
     """
-    message = msg_func(sig2str(seq, space=True))
+    message = msg_func(sig2str(seq, sep = sep, space=space, decimal = decimal, rescale = rescale))
     res_list = []
     for i in range(num_iters):
         res = model_func(message)