-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* ToknizersAPIsUpdate * Address the feedback
- Loading branch information
Showing
38 changed files
with
4,831 additions
and
2,817 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
// See the LICENSE file in the project root for more information. | ||
|
||
using System.Collections.Generic; | ||
|
||
namespace Microsoft.ML.Tokenizers | ||
{ | ||
/// <summary> | ||
/// The result of encoding a text. | ||
/// </summary> | ||
/// <typeparam name="T">The type of the tokens.</typeparam> | ||
public struct EncodeResults<T> | ||
{ | ||
/// <summary> | ||
/// Gets or sets the list of tokens generated from the encoded text. | ||
/// </summary> | ||
public IReadOnlyList<T> Tokens { get; set; } | ||
|
||
/// <summary> | ||
/// Gets or sets the normalized text generated during the encoding process. This can be <see langword="null"/> if the encoding process does not normalize the input text. | ||
/// </summary> | ||
public string? NormalizedText { get; set; } | ||
|
||
/// <summary> | ||
/// Gets or sets the count of characters consumed from the input text. | ||
/// </summary> | ||
public int CharsConsumed { get; set; } | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
// See the LICENSE file in the project root for more information. | ||
|
||
namespace Microsoft.ML.Tokenizers | ||
{ | ||
/// <summary> | ||
/// The settings used to encode a text. | ||
/// </summary> | ||
public struct EncodeSettings | ||
{ | ||
public EncodeSettings() { MaxTokenCount = int.MaxValue; } | ||
/// <summary> | ||
/// Gets or sets a value indicating whether to consider the input normalization during encoding. | ||
/// </summary> | ||
public bool ConsiderNormalization { get; set; } | ||
|
||
/// <summary> | ||
/// Gets or sets a value indicating whether to consider the pre-tokenization during encoding. | ||
/// </summary> | ||
public bool ConsiderPreTokenization { get; set; } | ||
|
||
/// <summary> | ||
/// Gets or sets the maximum number of tokens to generate. | ||
/// </summary> | ||
public int MaxTokenCount { get; set; } | ||
} | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.