-
Notifications
You must be signed in to change notification settings - Fork 264
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
✨ [Tasks] JSON Schema spec for Inference types + TS type generation #449
Changes from 28 commits
7c50482
fd98112
352e7c5
5551f5b
fad594b
9a8f327
02ba10c
93c37f5
bbf72ec
16a9beb
b27846c
7d9a9f6
dbd0254
6d90348
d027115
224c039
b8dae86
b84825e
4484e39
a9c9ae1
f9fd4f9
d4ec535
00501a6
29fecc0
d220a9b
49a1d50
f4784bf
a33987f
6558af4
0724e26
29f5975
3a98f58
e0a4939
2d46399
c1151c0
077a88f
6b10c4d
c35fe85
6f1a8b3
9d25d28
499ed5f
bf48f5e
49a8151
e4f3d13
826181a
0000f02
8dc4d17
9ccb3a4
accdeff
3a3d4ba
4742c9e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,9 +24,10 @@ | |
"format": "prettier --write .", | ||
"format:check": "prettier --check .", | ||
"prepublishOnly": "pnpm run build", | ||
"build": "tsup src/index.ts --format cjs,esm --clean --dts", | ||
"build": "tsup src/index.ts src/scripts/**.ts --format cjs,esm --clean --dts", | ||
"prepare": "pnpm run build", | ||
"check": "tsc" | ||
"check": "tsc", | ||
"inference-codegen": "pnpm run build && node dist/scripts/inference-codegen.js" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably should include this directly in the build command |
||
}, | ||
"files": [ | ||
"dist", | ||
|
@@ -40,5 +41,8 @@ | |
], | ||
"author": "Hugging Face", | ||
"license": "MIT", | ||
"devDependencies": {} | ||
"devDependencies": { | ||
"@types/node": "^20.11.5", | ||
"quicktype-core": "https://github.com/huggingface/quicktype/raw/pack-18.0.15/packages/quicktype-core/quicktype-core-18.0.15.tgz" | ||
} | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe the |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
import type { SerializedRenderResult } from "quicktype-core"; | ||
import { quicktype, InputData, JSONSchemaInput, FetchingJSONSchemaStore } from "quicktype-core"; | ||
import * as fs from "fs/promises"; | ||
import { existsSync as pathExists } from "fs"; | ||
import * as path from "path"; | ||
|
||
const TYPESCRIPT_HEADER_FILE = ` | ||
/** | ||
* Inference code generated from the JSON schema spec in ./spec | ||
* | ||
* Using src/scripts/inference-codegen | ||
*/ | ||
|
||
`; | ||
|
||
const rootDirFinder = function (): string { | ||
const parts = __dirname.split("/"); | ||
let level = parts.length - 1; | ||
while (level > 0) { | ||
const currentPath = parts.slice(0, level).join("/"); | ||
console.debug(currentPath); | ||
try { | ||
require(`${currentPath}/package.json`); | ||
return path.normalize(currentPath); | ||
} catch (err) { | ||
/// noop | ||
} | ||
level--; | ||
} | ||
return ""; | ||
}; | ||
SBrandeis marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
/** | ||
* | ||
* @param taskId The ID of the task for which we are generating code | ||
* @param taskSpecDir The path to the directory where the input.json & output.json files are | ||
* @param allSpecFiles An array of paths to all the tasks specs. Allows resolving cross-file references ($ref). | ||
*/ | ||
async function buildInputData(taskId: string, taskSpecDir: string, allSpecFiles: string[]): Promise<InputData> { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
const schema = new JSONSchemaInput(new FetchingJSONSchemaStore(), [], allSpecFiles); | ||
await schema.addSource({ | ||
name: `${taskId}-input`, | ||
schema: await fs.readFile(`${taskSpecDir}/input.json`, { encoding: "utf-8" }), | ||
}); | ||
await schema.addSource({ | ||
name: `${taskId}-output`, | ||
schema: await fs.readFile(`${taskSpecDir}/output.json`, { encoding: "utf-8" }), | ||
}); | ||
const inputData = new InputData(); | ||
inputData.addInput(schema); | ||
return inputData; | ||
} | ||
|
||
async function generateTypescript(inputData: InputData): Promise<SerializedRenderResult> { | ||
return await quicktype({ | ||
inputData, | ||
lang: "typescript", | ||
alphabetizeProperties: true, | ||
rendererOptions: { | ||
"just-types": true, | ||
"nice-property-names": true, | ||
"prefer-unions": true, | ||
"prefer-const-values": true, | ||
"prefer-unknown": true, | ||
// "explicit-unions": true, | ||
}, | ||
}); | ||
} | ||
|
||
async function main() { | ||
const rootDir = rootDirFinder(); | ||
const tasksDir = path.join(rootDir, "src", "tasks"); | ||
const allTasks = await Promise.all( | ||
(await fs.readdir(tasksDir, { withFileTypes: true })) | ||
.filter((entry) => entry.isDirectory()) | ||
.filter((entry) => entry.name !== "placeholder") | ||
.map(async (entry) => ({ task: entry.name, dirPath: path.join(entry.path, entry.name) })) | ||
); | ||
const allSpecFiles = allTasks | ||
.flatMap(({ dirPath }) => [path.join(dirPath, "spec", "input.json"), path.join(dirPath, "spec", "output.json")]) | ||
.filter((filepath) => pathExists(filepath)); | ||
|
||
for (const { task, dirPath } of allTasks) { | ||
const taskSpecDir = path.join(dirPath, "spec"); | ||
if (!(pathExists(path.join(taskSpecDir, "input.json")) && pathExists(path.join(taskSpecDir, "output.json")))) { | ||
console.debug(`No spec found for task ${task} - skipping`); | ||
continue; | ||
} | ||
console.debug(`✨ Generating types for task`, task); | ||
|
||
console.debug(" 📦 Building input data"); | ||
const inputData = await buildInputData(task, taskSpecDir, allSpecFiles); | ||
|
||
console.debug(" 🏭 Generating typescript code"); | ||
{ | ||
const { lines } = await generateTypescript(inputData); | ||
await fs.writeFile(`${dirPath}/inference.ts`, [TYPESCRIPT_HEADER_FILE, ...lines].join(`\n`), { | ||
flag: "w+", | ||
encoding: "utf-8", | ||
}); | ||
} | ||
} | ||
console.debug("✅ All done!"); | ||
} | ||
|
||
let exit = 0; | ||
main() | ||
.catch((err) => { | ||
console.error("Failure", err); | ||
exit = 1; | ||
}) | ||
.finally(() => process.exit(exit)); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
/** | ||
Wauplin marked this conversation as resolved.
Show resolved
Hide resolved
|
||
* Inference code generated from the JSON schema spec in ./spec | ||
* | ||
* Using src/scripts/inference-codegen | ||
*/ | ||
|
||
/** | ||
* Inputs for Audio Classification inference | ||
*/ | ||
export interface AudioClassificationInput { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Audio data is usually pass through https://huggingface.co/docs/api-inference/detailed_parameters#audio-classification-task There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Re-flagging this comment in case it was lost There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same for images. The jsonschema cannot specify this since sending as raw data and sending as json are 2 different things. So for now it's kind of a blind spot. If we provide an openapi schema for our APIs in the future, then it will be possible to document it. Openapi easily integrates with jsonschema so having them is already a first good step. (difference between a jsonschema as in this PR and an openapi description is that this PR describes objects with their attributes while the openapi description with include stuff like server routes, accepted headers, etc.) (^ only my understanding of the specs, anyone feel free to correct me 😄) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes - sorry for the delay in answering Leaving the image/audio data as
Yes that is correct, there will be some additional work necessary to generate an OpenAPI spec for an inference API (including actually specifying how we expect the binary data to be represented) |
||
/** | ||
* One or several audio files to classify | ||
*/ | ||
inputs: unknown; | ||
/** | ||
* Additional inference parameters | ||
*/ | ||
parameters?: AudioClassificationParameters; | ||
[property: string]: unknown; | ||
} | ||
|
||
/** | ||
* Additional inference parameters | ||
* | ||
* Additional inference parameters for Audio Classification | ||
*/ | ||
export interface AudioClassificationParameters { | ||
/** | ||
* When specified, limits the output to the top K most probable classes. | ||
*/ | ||
topK?: number; | ||
[property: string]: unknown; | ||
} | ||
|
||
/** | ||
* Outputs for Audio Classification inference | ||
*/ | ||
export interface AudioClassificationOutput { | ||
/** | ||
* The predicted class label (model specific). | ||
*/ | ||
label: string; | ||
/** | ||
* The corresponding probability. | ||
*/ | ||
score: number; | ||
[property: string]: unknown; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
tsup is used for files to be published.
If you just want to run the script, you can take inspiration from
doc-internal
:No need to add
ts-node
, it's already included in the root packageThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
^ that command fails with a syntax error (
cannot use import outside of a module
or smth)Is it OK tu use
tsc
to compile the script inpnpm run inference-codegen
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we can add
type: "module"
to thepackage.json