diff --git a/README.md b/README.md index 99bf2ae3..cae18e7e 100644 --- a/README.md +++ b/README.md @@ -100,6 +100,7 @@ Available backends: - `deepl` - [DeepL Pro](#deepl-translation-config) - `yandex` - [Yandex Translate](#yandex-translation-config) - `openai` - [OpenAI](#openai-translation-config) +- `watsonx` - [watsonx](#watsonx-translation-config) ### Find usages @@ -483,6 +484,28 @@ OPENAI_API_KEY= OPENAI_MODEL= ``` + +### watsonx Translate + +`i18n-tasks translate-missing` requires a watsonx project and api key, get it at [IBM watsonx](https://www.ibm.com/watsonx/). + +```yaml +# config/i18n-tasks.yml +translation: + backend: watsonx + watsonx_api_key: + watsonx_project_id: + watsonx_model: +``` + +or via environment variable: + +```bash +WATSONX_API_KEY= +WATSONX_PROJECT_ID= +WATSONX_MODEL= +``` + ### Contextual Rails Parser There is an experimental feature to parse Rails with more context. `i18n-tasks` will support: diff --git a/config/locales/en.yml b/config/locales/en.yml index 2e72475a..a464e5bc 100644 --- a/config/locales/en.yml +++ b/config/locales/en.yml @@ -130,6 +130,15 @@ en: none: Every translation is in use. usages: none: No key usages found. + watsonx_translate: + errors: + no_api_key: >- + Set watsonx API key via WATSONX_API_KEY environment variable or translation.watsonx_api_key + in config/i18n-tasks.yml. Get the key at https://www.ibm.com/products/watsonx-ai. + no_project_id: >- + Set watsonx Project ID via WATSONX_PROJECT_ID environment variable or translation.watsonx_api_key + in config/i18n-tasks.yml. Get the key at https://www.ibm.com/products/watsonx-ai. + no_results: watsonx returned no results. yandex_translate: errors: no_api_key: >- diff --git a/config/locales/ru.yml b/config/locales/ru.yml index c6e7e4b2..e928191d 100644 --- a/config/locales/ru.yml +++ b/config/locales/ru.yml @@ -131,6 +131,15 @@ ru: none: Все переводы используются. usages: none: Не найдено использований. + watsonx_translate: + errors: + no_api_key: >- + Установите ключ API watsonx через переменную среды WATSONX_API_KEY или translation.watsonx_api_key + в config/i18n-tasks.yml. Получите ключ на https://www.ibm.com/products/watsonx-ai. + no_project_id: >- + Установите идентификатор проекта watsonx через переменную среды WATSONX_PROJECT_ID или translation.watsonx_api_key + в config/i18n-tasks.yml. Получите ключ на https://www.ibm.com/products/watsonx-ai. + no_results: watsonx не вернул результатов. yandex_translate: errors: no_api_key: |- diff --git a/lib/i18n/tasks/command/options/locales.rb b/lib/i18n/tasks/command/options/locales.rb index 1d06127e..aadf5877 100644 --- a/lib/i18n/tasks/command/options/locales.rb +++ b/lib/i18n/tasks/command/options/locales.rb @@ -32,7 +32,7 @@ module Locales parser: OptionParsers::Locale::Parser, default: 'base' - TRANSLATION_BACKENDS = %w[google deepl yandex openai].freeze + TRANSLATION_BACKENDS = %w[google deepl yandex openai watsonx].freeze arg :translation_backend, '-b', '--backend BACKEND', diff --git a/lib/i18n/tasks/configuration.rb b/lib/i18n/tasks/configuration.rb index 1bc3c4a4..938bb1ab 100644 --- a/lib/i18n/tasks/configuration.rb +++ b/lib/i18n/tasks/configuration.rb @@ -60,7 +60,7 @@ def data_config # translation config # @return [Hash{String => String,Hash,Array}] - def translation_config # rubocop:disable Metrics/AbcSize + def translation_config # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity @config_sections[:translation] ||= begin conf = (config[:translation] || {}).with_indifferent_access conf[:backend] ||= DEFAULTS[:translation_backend] @@ -70,6 +70,9 @@ def translation_config # rubocop:disable Metrics/AbcSize conf[:deepl_version] = ENV['DEEPL_VERSION'] if ENV.key?('DEEPL_VERSION') conf[:openai_api_key] = ENV['OPENAI_API_KEY'] if ENV.key?('OPENAI_API_KEY') conf[:openai_model] = ENV['OPENAI_MODEL'] if ENV.key?('OPENAI_MODEL') + conf[:watsonx_api_key] = ENV['WATSONX_API_KEY'] if ENV.key?('WATSONX_API_KEY') + conf[:watsonx_project_id] = ENV['WATSONX_PROJECT_ID'] if ENV.key?('WATSONX_PROJECT_ID') + conf[:watsonx_model] = ENV['WATSONX_MODEL'] if ENV.key?('WATSONX_MODEL') conf[:yandex_api_key] = ENV['YANDEX_API_KEY'] if ENV.key?('YANDEX_API_KEY') conf end diff --git a/lib/i18n/tasks/translation.rb b/lib/i18n/tasks/translation.rb index b73bfcba..99c00abb 100644 --- a/lib/i18n/tasks/translation.rb +++ b/lib/i18n/tasks/translation.rb @@ -3,6 +3,7 @@ require 'i18n/tasks/translators/deepl_translator' require 'i18n/tasks/translators/google_translator' require 'i18n/tasks/translators/openai_translator' +require 'i18n/tasks/translators/watsonx_translator' require 'i18n/tasks/translators/yandex_translator' module I18n::Tasks @@ -19,6 +20,8 @@ def translate_forest(forest, from:, backend:) Translators::GoogleTranslator.new(self).translate_forest(forest, from) when :openai Translators::OpenAiTranslator.new(self).translate_forest(forest, from) + when :watsonx + Translators::WatsonxTranslator.new(self).translate_forest(forest, from) when :yandex Translators::YandexTranslator.new(self).translate_forest(forest, from) else diff --git a/lib/i18n/tasks/translators/watsonx_translator.rb b/lib/i18n/tasks/translators/watsonx_translator.rb new file mode 100644 index 00000000..8ef01bcf --- /dev/null +++ b/lib/i18n/tasks/translators/watsonx_translator.rb @@ -0,0 +1,153 @@ +# frozen_string_literal: true + +require 'i18n/tasks/translators/base_translator' +require 'active_support/core_ext/string/filters' + +module I18n::Tasks::Translators + class WatsonxTranslator < BaseTranslator + # max allowed texts per request + BATCH_SIZE = 50 + DEFAULT_SYSTEM_PROMPT = <<~PROMPT.squish + You are a helpful assistant that translates content from the %{from} locale + to the %{to} locale in an i18n locale array. + You always preserve the structure and formatting exactly as it is. + + The array has a structured format and contains multiple strings. Your task is to translate + each of these strings and create a new array with the translated strings. + + Reminder: + - Translate only the text, preserving the structure and formatting. + - Do not translate any URLs. + - Do not translate HTML tags like `
` and ``. + - HTML markups (enclosed in < and > characters) must not be changed under any circumstance. + - Variables (starting with %%{ and ending with }) must not be changed under any circumstance. + - Output only the result, without any additional information or comments. + PROMPT + + def options_for_translate_values(from:, to:, **options) + options.merge( + from: from, + to: to + ) + end + + def options_for_html + {} + end + + def options_for_plain + {} + end + + def no_results_error_message + I18n.t('i18n_tasks.watsonx_translate.errors.no_results') + end + + private + + def translator + @translator ||= WatsonxClient.new(key: api_key) + end + + def api_key + @api_key ||= begin + key = @i18n_tasks.translation_config[:watsonx_api_key] + fail ::I18n::Tasks::CommandError, I18n.t('i18n_tasks.watsonx_translate.errors.no_api_key') if key.blank? + + key + end + end + + def project_id + @project_id ||= begin + project_id = @i18n_tasks.translation_config[:watsonx_project_id] + if project_id.blank? + fail ::I18n::Tasks::CommandError, + I18n.t('i18n_tasks.watsonx_translate.errors.no_project_id') + end + + project_id + end + end + + def model + @model ||= @i18n_tasks.translation_config[:watsonx_model].presence || 'meta-llama/llama-3-2-90b-vision-instruct' + end + + def system_prompt + @system_prompt ||= @i18n_tasks.translation_config[:watsonx_system_prompt].presence || DEFAULT_SYSTEM_PROMPT + end + + def translate_values(list, from:, to:) + results = [] + + list.each_slice(BATCH_SIZE) do |batch| + translations = translate(batch, from, to) + + results << JSON.parse(translations) + end + + results.flatten + end + + def translate(values, from, to) + prompt = [ + '<|eot_id|><|start_header_id|>system<|end_header_id|>', + format(system_prompt, from: from, to: to), + '<|eot_id|><|start_header_id|>user<|end_header_id|>Translate this array:', + "<|eot_id|><|start_header_id|>user<|end_header_id|>#{values.to_json}", + '<|eot_id|><|start_header_id|>assistant<|end_header_id|>' + ].join + + response = translator.generate_text( + model_id: model, + project_id: project_id, + input: prompt, + parameters: { + decoding_method: :greedy, + max_new_tokens: 2048, + repetition_penalty: 1 + } + ) + response.dig('results', 0, 'generated_text') + end + end +end + +class WatsonxClient + WATSONX_BASE_URL = 'https://us-south.ml.cloud.ibm.com/ml/' + IBM_CLOUD_IAM_URL = 'https://iam.cloud.ibm.com/identity/token' + + def initialize(key:) + begin + require 'faraday' + rescue LoadError + raise ::I18n::Tasks::CommandError, "Add gem 'faraday' to your Gemfile to use this command" + end + + @http = Faraday.new(url: WATSONX_BASE_URL) do |conn| + conn.use Faraday::Response::RaiseError + conn.request :json + conn.response :json + conn.options.timeout = 600 + conn.request :authorization, :Bearer, token(key) + end + end + + def generate_text(**opts) + @http.post('v1/text/generation?version=2024-05-20', **opts).body + end + + private + + def token(key) + Faraday.new(url: IBM_CLOUD_IAM_URL) do |conn| + conn.use Faraday::Response::RaiseError + conn.response :json + conn.params = { + grant_type: 'urn:ibm:params:oauth:grant-type:apikey', + apikey: key + } + end.post.body['access_token'] + end +end