Skip to content

Commit

Permalink
wip: spec generate parameters
Browse files Browse the repository at this point in the history
  • Loading branch information
SBrandeis committed Jan 26, 2024
1 parent e4f3d13 commit 826181a
Show file tree
Hide file tree
Showing 5 changed files with 387 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,91 @@ export interface AutomaticSpeechRecognitionParameters {
*/
export interface GenerationParameters {
/**
* I can be the papa you'd be the mama
* Whether to use sampling instead of greedy decoding when generating new tokens.
*/
doSample?: boolean;
/**
* Controls the stopping condition for beam-based methods.
*/
earlyStopping?: EarlyStoppingUnion;
/**
* If set to float strictly between 0 and 1, only tokens with a conditional probability
* greater than epsilon_cutoff will be sampled. In the paper, suggested values range from
* 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language
* Model Desmoothing](https://hf.co/papers/2210.15191) for more details.
*/
epsilonCutoff?: number;
/**
* Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
* float strictly between 0 and 1, a token is only considered if it is greater than either
* eta_cutoff or sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits))). The latter
* term is intuitively the expected next token probability, scaled by sqrt(eta_cutoff). In
* the paper, suggested values range from 3e-4 to 2e-3, depending on the size of the model.
* See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191)
* for more details.
*/
etaCutoff?: number;
/**
* The maximum length (in tokens) of the generated text, including the input.
*/
maxLength?: number;
/**
* The maximum number of tokens to generate. Takes precedence over maxLength.
*/
maxNewTokens?: number;
/**
* The minimum length (in tokens) of the generated text, including the input.
*/
minLength?: number;
/**
* The minimum number of tokens to generate. Takes precedence over maxLength.
*/
minNewTokens?: number;
/**
* Number of groups to divide num_beams into in order to ensure diversity among different
* groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
*/
numBeamGroups?: number;
/**
* Number of beams to use for beam search.
*/
numBeams?: number;
/**
* The value balances the model confidence and the degeneration penalty in contrastive
* search decoding.
*/
penaltyAlpha?: number;
/**
* The value used to modulate the next token probabilities.
*/
temperature?: number;
/**
* The number of highest probability vocabulary tokens to keep for top-k-filtering.
*/
topK?: number;
/**
* If set to float < 1, only the smallest set of most probable tokens with probabilities
* that add up to top_p or higher are kept for generation.
*/
topP?: number;
/**
* Local typicality measures how similar the conditional probability of predicting a target
* token next is to the expected conditional probability of predicting a random token next,
* given the partial text already generated. If set to float < 1, the smallest set of the
* most locally typical tokens with probabilities that add up to typical_p or higher are
* kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details.
*/
typicalP?: number;
/**
* Whether the model should use the past last key/values attentions to speed up decoding
*/
useCache?: boolean;
[property: string]: unknown;
}
/**
* Controls the stopping condition for beam-based methods.
*/
export type EarlyStoppingUnion = boolean | "never";
export interface AutomaticSpeechRecognitionOutputChunk {
/**
* A chunk of text identified by the model
Expand Down
82 changes: 81 additions & 1 deletion packages/tasks/src/tasks/image-to-text/inference.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,91 @@ export interface ImageToTextParameters {
*/
export interface GenerationParameters {
/**
* I can be the papa you'd be the mama
* Whether to use sampling instead of greedy decoding when generating new tokens.
*/
doSample?: boolean;
/**
* Controls the stopping condition for beam-based methods.
*/
earlyStopping?: EarlyStoppingUnion;
/**
* If set to float strictly between 0 and 1, only tokens with a conditional probability
* greater than epsilon_cutoff will be sampled. In the paper, suggested values range from
* 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language
* Model Desmoothing](https://hf.co/papers/2210.15191) for more details.
*/
epsilonCutoff?: number;
/**
* Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
* float strictly between 0 and 1, a token is only considered if it is greater than either
* eta_cutoff or sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits))). The latter
* term is intuitively the expected next token probability, scaled by sqrt(eta_cutoff). In
* the paper, suggested values range from 3e-4 to 2e-3, depending on the size of the model.
* See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191)
* for more details.
*/
etaCutoff?: number;
/**
* The maximum length (in tokens) of the generated text, including the input.
*/
maxLength?: number;
/**
* The maximum number of tokens to generate. Takes precedence over maxLength.
*/
maxNewTokens?: number;
/**
* The minimum length (in tokens) of the generated text, including the input.
*/
minLength?: number;
/**
* The minimum number of tokens to generate. Takes precedence over maxLength.
*/
minNewTokens?: number;
/**
* Number of groups to divide num_beams into in order to ensure diversity among different
* groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
*/
numBeamGroups?: number;
/**
* Number of beams to use for beam search.
*/
numBeams?: number;
/**
* The value balances the model confidence and the degeneration penalty in contrastive
* search decoding.
*/
penaltyAlpha?: number;
/**
* The value used to modulate the next token probabilities.
*/
temperature?: number;
/**
* The number of highest probability vocabulary tokens to keep for top-k-filtering.
*/
topK?: number;
/**
* If set to float < 1, only the smallest set of most probable tokens with probabilities
* that add up to top_p or higher are kept for generation.
*/
topP?: number;
/**
* Local typicality measures how similar the conditional probability of predicting a target
* token next is to the expected conditional probability of predicting a random token next,
* given the partial text already generated. If set to float < 1, the smallest set of the
* most locally typical tokens with probabilities that add up to typical_p or higher are
* kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details.
*/
typicalP?: number;
/**
* Whether the model should use the past last key/values attentions to speed up decoding
*/
useCache?: boolean;
[property: string]: unknown;
}
/**
* Controls the stopping condition for beam-based methods.
*/
export type EarlyStoppingUnion = boolean | "never";
export type ImageToTextOutput = ImageToTextOutputElement[];
/**
* Outputs of inference for the Image To Text task
Expand Down
64 changes: 62 additions & 2 deletions packages/tasks/src/tasks/schema-utils.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"$id": "/inference/schemas/schema-utils.json",
"$schema": "http://json-schema.org/draft-06/schema#",
"description": "Common type definitions shared by several tasks",
"description": "(Incomplete!) Common type definitions shared by several tasks",
"definitions": {
"GenerationParameters": {
"title": "GenerationParameters",
Expand All @@ -10,7 +10,67 @@
"properties": {
"temperature": {
"type": "number",
"description": "I can be the papa you'd be the mama"
"description": "The value used to modulate the next token probabilities."
},
"topK": {
"type": "integer",
"description": "The number of highest probability vocabulary tokens to keep for top-k-filtering."
},
"topP": {
"type": "number",
"description": "If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation."
},
"typicalP": {
"type": "number",
"description": " Local typicality measures how similar the conditional probability of predicting a target token next is to the expected conditional probability of predicting a random token next, given the partial text already generated. If set to float < 1, the smallest set of the most locally typical tokens with probabilities that add up to typical_p or higher are kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details."
},
"epsilonCutoff": {
"type": "number",
"description": "If set to float strictly between 0 and 1, only tokens with a conditional probability greater than epsilon_cutoff will be sampled. In the paper, suggested values range from 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191) for more details."
},
"etaCutoff": {
"type": "number",
"description": "Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to float strictly between 0 and 1, a token is only considered if it is greater than either eta_cutoff or sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits))). The latter term is intuitively the expected next token probability, scaled by sqrt(eta_cutoff). In the paper, suggested values range from 3e-4 to 2e-3, depending on the size of the model. See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191) for more details."
},
"maxLength": {
"type": "integer",
"description": "The maximum length (in tokens) of the generated text, including the input."
},
"maxNewTokens": {
"type": "integer",
"description": "The maximum number of tokens to generate. Takes precedence over maxLength."
},
"minLength": {
"type": "integer",
"description": "The minimum length (in tokens) of the generated text, including the input."
},
"minNewTokens": {
"type": "integer",
"description": "The minimum number of tokens to generate. Takes precedence over maxLength."
},
"doSample": {
"type": "boolean",
"description": "Whether to use sampling instead of greedy decoding when generating new tokens."
},
"earlyStopping": {
"description": "Controls the stopping condition for beam-based methods.",
"oneOf": [{ "type": "boolean" }, { "const": "never", "type": "string" }]
},
"numBeams": {
"type": "integer",
"description": "Number of beams to use for beam search."
},
"numBeamGroups": {
"type": "integer",
"description": "Number of groups to divide num_beams into in order to ensure diversity among different groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details."
},
"penaltyAlpha": {
"type": "number",
"description": "The value balances the model confidence and the degeneration penalty in contrastive search decoding."
},
"useCache": {
"type": "boolean",
"description": "Whether the model should use the past last key/values attentions to speed up decoding"
}
}
}
Expand Down
82 changes: 81 additions & 1 deletion packages/tasks/src/tasks/text-to-audio/inference.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,91 @@ export interface TextToAudioParameters {
*/
export interface GenerationParameters {
/**
* I can be the papa you'd be the mama
* Whether to use sampling instead of greedy decoding when generating new tokens.
*/
doSample?: boolean;
/**
* Controls the stopping condition for beam-based methods.
*/
earlyStopping?: EarlyStoppingUnion;
/**
* If set to float strictly between 0 and 1, only tokens with a conditional probability
* greater than epsilon_cutoff will be sampled. In the paper, suggested values range from
* 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language
* Model Desmoothing](https://hf.co/papers/2210.15191) for more details.
*/
epsilonCutoff?: number;
/**
* Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
* float strictly between 0 and 1, a token is only considered if it is greater than either
* eta_cutoff or sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits))). The latter
* term is intuitively the expected next token probability, scaled by sqrt(eta_cutoff). In
* the paper, suggested values range from 3e-4 to 2e-3, depending on the size of the model.
* See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191)
* for more details.
*/
etaCutoff?: number;
/**
* The maximum length (in tokens) of the generated text, including the input.
*/
maxLength?: number;
/**
* The maximum number of tokens to generate. Takes precedence over maxLength.
*/
maxNewTokens?: number;
/**
* The minimum length (in tokens) of the generated text, including the input.
*/
minLength?: number;
/**
* The minimum number of tokens to generate. Takes precedence over maxLength.
*/
minNewTokens?: number;
/**
* Number of groups to divide num_beams into in order to ensure diversity among different
* groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
*/
numBeamGroups?: number;
/**
* Number of beams to use for beam search.
*/
numBeams?: number;
/**
* The value balances the model confidence and the degeneration penalty in contrastive
* search decoding.
*/
penaltyAlpha?: number;
/**
* The value used to modulate the next token probabilities.
*/
temperature?: number;
/**
* The number of highest probability vocabulary tokens to keep for top-k-filtering.
*/
topK?: number;
/**
* If set to float < 1, only the smallest set of most probable tokens with probabilities
* that add up to top_p or higher are kept for generation.
*/
topP?: number;
/**
* Local typicality measures how similar the conditional probability of predicting a target
* token next is to the expected conditional probability of predicting a random token next,
* given the partial text already generated. If set to float < 1, the smallest set of the
* most locally typical tokens with probabilities that add up to typical_p or higher are
* kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details.
*/
typicalP?: number;
/**
* Whether the model should use the past last key/values attentions to speed up decoding
*/
useCache?: boolean;
[property: string]: unknown;
}
/**
* Controls the stopping condition for beam-based methods.
*/
export type EarlyStoppingUnion = boolean | "never";
export type TextToAudioOutput = TextToAudioOutputElement[];
/**
* Outputs of inference for the Text To Audio task
Expand Down
Loading

0 comments on commit 826181a

Please sign in to comment.