Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: load dataset from files #16

Merged
merged 5 commits into from
Mar 17, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions examples/humaneval/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# HumanEval

This example runs OpenAI's [HumanEval](https://github.com/openai/human-eval) benchmark on Empirical.

## Usage

1. Download the dataset from [this link](https://github.com/openai/human-eval/blob/master/data/HumanEval.jsonl.gz)

1. Extract and keep the dataset as `HumanEval.jsonl`

1. Run with Empirical
```
npx @empiricalrun/cli run
```
15 changes: 15 additions & 0 deletions examples/humaneval/empiricalrc.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"version": "0.0.1",
"runs": [
{
"name": "gpt-3.5-turbo run",
"model": "openai:gpt-3.5-turbo",
"prompt": "I will give you an incomplete Python function. Complete the function body such that it follows the specifications in the docstring. Use the exact same function names and parameters. Do not import any third-party modules. Output the complete function with your additions.\n\n```python\n{{prompt}}\n```",
"assert": [
]
}
],
"dataset": {
"path": "HumanEval.jsonl"
}
}
62 changes: 62 additions & 0 deletions packages/cli/src/bin/dataset.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import { Dataset, DatasetSampleInput } from "@empiricalrun/types";
import { red, green } from "picocolors";
import { promises as fs } from "fs";

async function downloadDataset(path: string): Promise<Dataset | undefined> {
if (path.startsWith("http")) {
const response = await fetch(path);
const body = await response.text();
return JSON.parse(body);
} else {
if (path.endsWith("json")) {
// This assumes the json is a well-formed Empirical dataset
const data = await fs.readFile(path);
return JSON.parse(data.toString());
} else if (path.endsWith("jsonl")) {
// This assumes the jsonl has 1 set of inputs per line
// and builds up the Empirical dataset format
const data = await fs.readFile(path);
const lines = data.toString().split("\n");
let samples = [];
for (let [index, line] of lines.entries()) {
if (line.length === 0) {
continue;
}
let inputs: DatasetSampleInput[] = [];
try {
const parsedLine = JSON.parse(line);
Object.keys(parsedLine).forEach((key) => {
saikatmitra91 marked this conversation as resolved.
Show resolved Hide resolved
inputs.push({ name: key, value: parsedLine[key] });
});
samples.push({ id: index.toString(), inputs: inputs });
} catch (error) {
console.log(
`${red("[Error]")} Failed to parse line in ${path}: ${line}`,
);
}
}
return { id: path, samples: samples };
}
}
}

export async function loadDataset(dsConfig: any): Promise<Dataset | undefined> {
let dataset = dsConfig;
if (dsConfig.path && !dsConfig.samples) {
try {
const downloaded = await downloadDataset(dsConfig.path);
if (downloaded) {
dataset.samples = downloaded.samples;
console.log(
`${green("[Success]")} Dataset fetched from ${dsConfig.path}`,
);
}
} catch (error) {
console.log(
`${red("[Error]")} Failed to fetch dataset at ${dsConfig.path}`,
);
return;
}
}
return dataset;
}
20 changes: 6 additions & 14 deletions packages/cli/src/bin/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ import { program } from "commander";
import packageJSON from "../../package.json";
import { RunsConfig } from "../types";
import { execute } from "@empiricalrun/core";
import { Dataset } from "@empiricalrun/types";
import { RunCompletion } from "@empiricalrun/types";
import { loadDataset } from "./dataset";
import { Dataset, RunCompletion } from "@empiricalrun/types";
import cliProgress from "cli-progress";
import express from "express";
import path from "path";
Expand All @@ -17,13 +17,6 @@ const cwd = process.cwd();
const configFileFullPath = `${cwd}/${configFileName}`;
const config = getDefaultRunsConfig(DefaultRunsConfigType.DEFAULT);

async function downloadDataset(path: string): Promise<Dataset | undefined> {
if (path.startsWith("http")) {
const response = await fetch(path);
const body = await response.text();
return JSON.parse(body);
}
}
const outputFileName = "output.json";
const cacheDir = ".empiricalrun";
const outputFilePath = `${cwd}/${cacheDir}/${outputFileName}`;
Expand Down Expand Up @@ -67,12 +60,11 @@ program

console.log(`${green("[Success]")} - read ${configFileName} file`);
const jsonStr = data.toString();
const { runs, dataset } = JSON.parse(jsonStr) as RunsConfig;
const { runs, dataset: datasetConfig } = JSON.parse(jsonStr) as RunsConfig;
// TODO: add check here for empty runs config. Add validator of the file

if (dataset.path && !dataset.samples) {
const downloaded = await downloadDataset(dataset.path);
dataset.samples = downloaded?.samples;
const dataset = await loadDataset(datasetConfig);
if (!dataset) {
return;
}

const progressBar = setupProgressBar(
Expand Down