empirical-run · arjunattam · Mar 17, 2024 · Mar 17, 2024 · Mar 17, 2024 · Mar 17, 2024
diff --git a/examples/humaneval/README.md b/examples/humaneval/README.md
@@ -0,0 +1,14 @@
+# HumanEval
+
+This example runs OpenAI's [HumanEval](https://github.com/openai/human-eval) benchmark on Empirical.
+
+## Usage
+
+1. Download the dataset from [this link](https://github.com/openai/human-eval/blob/master/data/HumanEval.jsonl.gz)
+
+1. Extract and keep the dataset as `HumanEval.jsonl` 
+
+1. Run with Empirical
+    ```
+    npx @empiricalrun/cli run
+    ```
diff --git a/examples/humaneval/empiricalrc.json b/examples/humaneval/empiricalrc.json
@@ -0,0 +1,15 @@
+{
+    "version": "0.0.1",
+    "runs": [
+      {
+        "name": "gpt-3.5-turbo run",
+        "model": "openai:gpt-3.5-turbo",
+        "prompt": "I will give you an incomplete Python function. Complete the function body such that it follows the specifications in the docstring. Use the exact same function names and parameters. Do not import any third-party modules. Output the complete function with your additions.\n\n```python\n{{prompt}}\n```",
+        "assert": [
+        ]
+      }
+    ],
+    "dataset": {
+      "path": "HumanEval.jsonl"
+    }
+  }
diff --git a/packages/cli/src/bin/dataset.ts b/packages/cli/src/bin/dataset.ts
@@ -0,0 +1,62 @@
+import { Dataset, DatasetSampleInput } from "@empiricalrun/types";
+import { red, green } from "picocolors";
+import { promises as fs } from "fs";
+
+async function downloadDataset(path: string): Promise<Dataset | undefined> {
+  if (path.startsWith("http")) {
+    const response = await fetch(path);
+    const body = await response.text();
+    return JSON.parse(body);
+  } else {
+    if (path.endsWith("json")) {
+      // This assumes the json is a well-formed Empirical dataset
+      const data = await fs.readFile(path);
+      return JSON.parse(data.toString());
+    } else if (path.endsWith("jsonl")) {
+      // This assumes the jsonl has 1 set of inputs per line
+      // and builds up the Empirical dataset format
+      const data = await fs.readFile(path);
+      const lines = data.toString().split("\n");
+      let samples = [];
+      for (let [index, line] of lines.entries()) {
+        if (line.length === 0) {
+          continue;
+        }
+        let inputs: DatasetSampleInput[] = [];
+        try {
+          const parsedLine = JSON.parse(line);
+          Object.keys(parsedLine).forEach((key) => {
+            inputs.push({ name: key, value: parsedLine[key] });
+          });
+          samples.push({ id: index.toString(), inputs: inputs });
+        } catch (error) {
+          console.log(
+            `${red("[Error]")} Failed to parse line in ${path}: ${line}`,
+          );
+        }
+      }
+      return { id: path, samples: samples };
+    }
+  }
+}
+
+export async function loadDataset(dsConfig: any): Promise<Dataset | undefined> {
+  let dataset = dsConfig;
+  if (dsConfig.path && !dsConfig.samples) {
+    try {
+      const downloaded = await downloadDataset(dsConfig.path);
+      if (downloaded) {
+        dataset.samples = downloaded.samples;
+        console.log(
+          `${green("[Success]")} Dataset fetched from ${dsConfig.path}`,
+        );
+      }
+    } catch (error) {
+      console.log(
+        `${red("[Error]")} Failed to fetch dataset at ${dsConfig.path}`,
+      );
+      return;
+    }
+  }
+  return dataset;
+}
diff --git a/packages/cli/src/bin/index.ts b/packages/cli/src/bin/index.ts
@@ -6,8 +6,8 @@ import { program } from "commander";
 import packageJSON from "../../package.json";
 import { RunsConfig } from "../types";
 import { execute } from "@empiricalrun/core";
-import { Dataset } from "@empiricalrun/types";
-import { RunCompletion } from "@empiricalrun/types";
+import { loadDataset } from "./dataset";
+import { Dataset, RunCompletion } from "@empiricalrun/types";
 import cliProgress from "cli-progress";
 import express from "express";
 import path from "path";
@@ -17,13 +17,6 @@ const cwd = process.cwd();
 const configFileFullPath = `${cwd}/${configFileName}`;
 const config = getDefaultRunsConfig(DefaultRunsConfigType.DEFAULT);
 
-async function downloadDataset(path: string): Promise<Dataset | undefined> {
-  if (path.startsWith("http")) {
-    const response = await fetch(path);
-    const body = await response.text();
-    return JSON.parse(body);
-  }
-}
 const outputFileName = "output.json";
 const cacheDir = ".empiricalrun";
 const outputFilePath = `${cwd}/${cacheDir}/${outputFileName}`;
@@ -67,12 +60,11 @@ program
 
     console.log(`${green("[Success]")} - read ${configFileName} file`);
     const jsonStr = data.toString();
-    const { runs, dataset } = JSON.parse(jsonStr) as RunsConfig;
+    const { runs, dataset: datasetConfig } = JSON.parse(jsonStr) as RunsConfig;
     // TODO: add check here for empty runs config. Add validator of the file
-
-    if (dataset.path && !dataset.samples) {
-      const downloaded = await downloadDataset(dataset.path);
-      dataset.samples = downloaded?.samples;
+    const dataset = await loadDataset(datasetConfig);
+    if (!dataset) {
+      return;
     }
 
     const progressBar = setupProgressBar(