From ccd2b72823ffe11828732e9405ef4d26ea702e6b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 24 Mar 2026 09:09:41 +0000 Subject: [PATCH 01/14] Initial plan From feae6edc3e5ec9dde6d264cb855dc7ca664b21eb Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 24 Mar 2026 09:16:57 +0000 Subject: [PATCH 02/14] Add ol_openedx_ai_static_translations plugin from PR #758 Co-authored-by: asadali145 <52656433+asadali145@users.noreply.github.com> Agent-Logs-Url: https://github.com/mitodl/open-edx-plugins/sessions/eaab10e1-8468-4675-9118-cd9b1fdac54e --- .../CHANGELOG.rst | 11 + .../LICENSE.txt | 28 + .../README.rst | 54 + .../__init__.py | 3 + .../ol_openedx_ai_static_translations/apps.py | 24 + .../constants.py | 235 ++ .../glossaries/machine_learning/ar.txt | 175 ++ .../glossaries/machine_learning/de.txt | 175 ++ .../glossaries/machine_learning/el.txt | 988 ++++++ .../glossaries/machine_learning/es.txt | 175 ++ .../glossaries/machine_learning/es_419.txt | 175 ++ .../glossaries/machine_learning/fr.txt | 175 ++ .../glossaries/machine_learning/ja.txt | 175 ++ .../glossaries/machine_learning/pt_BR.txt | 175 ++ .../glossaries/machine_learning/ru.txt | 213 ++ .../management/__init__.py | 0 .../management/commands/__init__.py | 0 .../commands/sync_and_translate_language.py | 2673 +++++++++++++++++ .../settings/__init__.py | 40 + .../settings/cms.py | 10 + .../utils.py | 1869 ++++++++++++ .../pyproject.toml | 39 + .../setup.cfg | 41 + 23 files changed, 7453 insertions(+) create mode 100644 src/ol_openedx_ai_static_translations/CHANGELOG.rst create mode 100644 src/ol_openedx_ai_static_translations/LICENSE.txt create mode 100644 src/ol_openedx_ai_static_translations/README.rst create mode 100644 src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/__init__.py create mode 100644 src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/apps.py create mode 100644 src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/constants.py create mode 100644 src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/ar.txt create mode 100644 src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/de.txt create mode 100644 src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/el.txt create mode 100644 src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/es.txt create mode 100644 src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/es_419.txt create mode 100644 src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/fr.txt create mode 100644 src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/ja.txt create mode 100644 src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/pt_BR.txt create mode 100644 src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/ru.txt create mode 100644 src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/management/__init__.py create mode 100644 src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/management/commands/__init__.py create mode 100644 src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/management/commands/sync_and_translate_language.py create mode 100644 src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/settings/__init__.py create mode 100644 src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/settings/cms.py create mode 100644 src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/utils.py create mode 100644 src/ol_openedx_ai_static_translations/pyproject.toml create mode 100644 src/ol_openedx_ai_static_translations/setup.cfg diff --git a/src/ol_openedx_ai_static_translations/CHANGELOG.rst b/src/ol_openedx_ai_static_translations/CHANGELOG.rst new file mode 100644 index 000000000..2e194af77 --- /dev/null +++ b/src/ol_openedx_ai_static_translations/CHANGELOG.rst @@ -0,0 +1,11 @@ +Change Log +---------- + +.. + All enhancements and patches to ol_openedx_ai_static_translations will be documented + in this file. It adheres to the structure of https://keepachangelog.com/ , + but in reStructuredText instead of Markdown (for ease of incorporation into + Sphinx documentation and the PyPI description). + + This project adheres to Semantic Versioning (https://semver.org/). +.. There should always be an "Unreleased" section for changes pending release. diff --git a/src/ol_openedx_ai_static_translations/LICENSE.txt b/src/ol_openedx_ai_static_translations/LICENSE.txt new file mode 100644 index 000000000..83284fb7e --- /dev/null +++ b/src/ol_openedx_ai_static_translations/LICENSE.txt @@ -0,0 +1,28 @@ +Copyright (C) 2022 MIT Open Learning + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/src/ol_openedx_ai_static_translations/README.rst b/src/ol_openedx_ai_static_translations/README.rst new file mode 100644 index 000000000..86a9fd232 --- /dev/null +++ b/src/ol_openedx_ai_static_translations/README.rst @@ -0,0 +1,54 @@ +OL Open edX AI Static Translations +==================================== + +An Open edX plugin that provides AI-powered static translation management. It syncs translation keys, translates them using LLM providers, and creates pull requests with translated content. + +Purpose +******* + +This plugin provides the ``sync_and_translate_language`` management command for syncing and translating Open edX static strings (frontend JSON and backend PO files) using LLM providers (OpenAI, Gemini, Mistral) with optional glossary support. + +Setup +===== + +For detailed installation instructions, please refer to the `plugin installation guide <../../docs#installation-guide>`_. + +Installation required in: + +* Studio (CMS) + +Configuration +============= + +This plugin shares settings with ``ol_openedx_course_translations``. Ensure the following settings are configured: + +.. code-block:: python + + TRANSLATIONS_PROVIDERS: { + "default_provider": "mistral", + "openai": {"api_key": "", "default_model": "gpt-5.2"}, + "gemini": {"api_key": "", "default_model": "gemini-3-pro-preview"}, + "mistral": {"api_key": "", "default_model": "mistral-large-latest"}, + } + TRANSLATIONS_GITHUB_TOKEN: + TRANSLATIONS_REPO_PATH: "" + TRANSLATIONS_REPO_URL: "https://github.com/mitodl/mitxonline-translations.git" + +Usage +===== + +.. code-block:: bash + + # Sync and translate a language + ./manage.py cms sync_and_translate_language el + + # With specific provider and model + ./manage.py cms sync_and_translate_language el --provider openai --model gpt-5.2 --glossary + +License +******* + +The code in this repository is licensed under the BSD 3-Clause license unless +otherwise noted. + +Please see `LICENSE.txt `_ for details. diff --git a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/__init__.py b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/__init__.py new file mode 100644 index 000000000..13e6bccff --- /dev/null +++ b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/__init__.py @@ -0,0 +1,3 @@ +""" +MIT's Open edX AI static translations plugin +""" diff --git a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/apps.py b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/apps.py new file mode 100644 index 000000000..e9757453b --- /dev/null +++ b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/apps.py @@ -0,0 +1,24 @@ +""" +ol_openedx_ai_static_translations Django application initialization. +""" + +from django.apps import AppConfig +from edx_django_utils.plugins import PluginSettings +from openedx.core.djangoapps.plugins.constants import ProjectType, SettingsType + + +class OLOpenedXAIStaticTranslationsConfig(AppConfig): + """ + Configuration for the ol_openedx_ai_static_translations Django application. + """ + + name = "ol_openedx_ai_static_translations" + verbose_name = "OL AI Static Translations" + + plugin_app = { + PluginSettings.CONFIG: { + ProjectType.CMS: { + SettingsType.COMMON: {PluginSettings.RELATIVE_PATH: "settings.cms"}, + }, + }, + } diff --git a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/constants.py b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/constants.py new file mode 100644 index 000000000..c2f57f9e0 --- /dev/null +++ b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/constants.py @@ -0,0 +1,235 @@ +"""Constants for AI static translation synchronization.""" + +# LLM Provider names +PROVIDER_DEEPL = "deepl" +PROVIDER_GEMINI = "gemini" +PROVIDER_MISTRAL = "mistral" +PROVIDER_OPENAI = "openai" + +# Learner-facing frontend applications that require translation +LEARNER_FACING_APPS = [ + "frontend-app-learning", + "frontend-app-learner-dashboard", + "frontend-app-learner-record", + "frontend-app-account", + "frontend-app-profile", + "frontend-app-authn", + "frontend-app-catalog", + "frontend-app-discussions", + "frontend-component-header", + "frontend-component-footer", + "frontend-app-ora", + "frontend-platform", +] + +# Plural forms configuration for different languages +# Based on GNU gettext plural forms specification +# See: https://www.gnu.org/software/gettext/manual/html_node/Plural-forms.html +PLURAL_FORMS = { + # Languages with no plural forms (nplurals=1) + "ja": "nplurals=1; plural=0;", # Japanese + "ko": "nplurals=1; plural=0;", # Korean + "zh": "nplurals=1; plural=0;", # Chinese (all variants) + "th": "nplurals=1; plural=0;", # Thai + "vi": "nplurals=1; plural=0;", # Vietnamese + "id": "nplurals=1; plural=0;", # Indonesian + "ms": "nplurals=1; plural=0;", # Malay + "km": "nplurals=1; plural=0;", # Khmer + "bo": "nplurals=1; plural=0;", # Tibetan + # Languages with 2 plural forms: plural=(n != 1) + "en": "nplurals=2; plural=(n != 1);", # English + "es": "nplurals=2; plural=(n != 1);", # Spanish (all variants) + "de": "nplurals=2; plural=(n != 1);", # German + "el": "nplurals=2; plural=(n != 1);", # Greek + "it": "nplurals=2; plural=(n != 1);", # Italian + "pt": "nplurals=2; plural=(n != 1);", # Portuguese (all variants) + "nl": "nplurals=2; plural=(n != 1);", # Dutch + "sv": "nplurals=2; plural=(n != 1);", # Swedish + "da": "nplurals=2; plural=(n != 1);", # Danish + "no": "nplurals=2; plural=(n != 1);", # Norwegian + "nb": "nplurals=2; plural=(n != 1);", # Norwegian Bokmål + "nn": "nplurals=2; plural=(n != 1);", # Norwegian Nynorsk + "fi": "nplurals=2; plural=(n != 1);", # Finnish + "is": "nplurals=2; plural=(n != 1);", # Icelandic + "et": "nplurals=2; plural=(n != 1);", # Estonian + "lv": "nplurals=2; plural=(n != 1);", # Latvian + "he": "nplurals=2; plural=(n != 1);", # Hebrew + "hi": "nplurals=2; plural=(n != 1);", # Hindi + "bn": "nplurals=2; plural=(n != 1);", # Bengali + "gu": "nplurals=2; plural=(n != 1);", # Gujarati + "kn": "nplurals=2; plural=(n != 1);", # Kannada + "ml": "nplurals=2; plural=(n != 1);", # Malayalam + "ta": "nplurals=2; plural=(n != 1);", # Tamil + "te": "nplurals=2; plural=(n != 1);", # Telugu + "or": "nplurals=2; plural=(n != 1);", # Oriya + "si": "nplurals=2; plural=(n != 1);", # Sinhala + "ne": "nplurals=2; plural=(n != 1);", # Nepali + "mr": "nplurals=2; plural=(n != 1);", # Marathi + "ur": "nplurals=2; plural=(n != 1);", # Urdu + "az": "nplurals=2; plural=(n != 1);", # Azerbaijani + "uz": "nplurals=2; plural=(n != 1);", # Uzbek + "kk": "nplurals=2; plural=(n != 1);", # Kazakh + "mn": "nplurals=2; plural=(n != 1);", # Mongolian + "sq": "nplurals=2; plural=(n != 1);", # Albanian + "eu": "nplurals=2; plural=(n != 1);", # Basque + "ca": "nplurals=2; plural=(n != 1);", # Catalan + "gl": "nplurals=2; plural=(n != 1);", # Galician + "tr": "nplurals=2; plural=(n != 1);", # Turkish + "af": "nplurals=2; plural=(n != 1);", # Afrikaans + "fil": "nplurals=2; plural=(n != 1);", # Filipino + # Languages with 2 plural forms: plural=(n > 1) + "fr": "nplurals=2; plural=(n > 1);", # French + "br": "nplurals=2; plural=(n > 1);", # Breton + # Languages with 3 plural forms + "pl": ( + "nplurals=3; plural=(n==1 ? 0 : n%10>=2 && n%10<=4 && " + "(n%100<10 || n%100>=20) ? 1 : 2);" + ), # Polish + "ru": ( + "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && " + "(n%100<10 || n%100>=20) ? 1 : 2);" + ), # Russian + "uk": ( + "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && " + "(n%100<10 || n%100>=20) ? 1 : 2);" + ), # Ukrainian + "be": ( + "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && " + "(n%100<10 || n%100>=20) ? 1 : 2);" + ), # Belarusian + "sr": ( + "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && " + "(n%100<10 || n%100>=20) ? 1 : 2);" + ), # Serbian + "hr": ( + "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && " + "(n%100<10 || n%100>=20) ? 1 : 2);" + ), # Croatian + "bs": ( + "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && " + "(n%100<10 || n%100>=20) ? 1 : 2);" + ), # Bosnian + "cs": "nplurals=3; plural=(n==1 ? 0 : (n>=2 && n<=4) ? 1 : 2);", # Czech + "sk": "nplurals=3; plural=(n==1 ? 0 : (n>=2 && n<=4) ? 1 : 2);", # Slovak + "lt": ( + "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && " + "(n%100<10 || n%100>=20) ? 1 : 2);" + ), # Lithuanian + "hy": "nplurals=3; plural=(n==1 ? 0 : n>=2 && n<=4 ? 1 : 2);", # Armenian + "ro": ( + "nplurals=3; plural=(n==1 ? 0 : (n==0 || (n%100 > 0 && n%100 < 20)) ? 1 : 2);" + ), # Romanian + # Languages with 4 plural forms + "cy": ( + "nplurals=4; plural=(n==1 ? 0 : n==2 ? 1 : (n==8 || n==11) ? 2 : 3);" + ), # Welsh + "ga": "nplurals=4; plural=(n==1 ? 0 : n==2 ? 1 : (n>2 && n<7) ? 2 : 3);", # Irish + "gd": ( + "nplurals=4; plural=(n==1 || n==11) ? 0 : (n==2 || n==12) ? 1 : " + "(n>2 && n<20) ? 2 : 3);" + ), # Scottish Gaelic + "mt": ( + "nplurals=4; plural=(n==1 ? 0 : n==0 || (n%100>=2 && n%100<=10) ? 1 : " + "(n%100>=11 && n%100<=19) ? 2 : 3);" + ), # Maltese + # Languages with 6 plural forms + "ar": ( + "nplurals=6; plural=(n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && " + "n%100<=10 ? 3 : n%100>=11 && n%100<=99 ? 4 : 5);" + ), # Arabic + # Other languages + "fa": "nplurals=2; plural=(n==0 || n==1 ? 0 : 1);", # Persian/Farsi + "hu": "nplurals=2; plural=(n != 1);", # Hungarian + "bg": "nplurals=2; plural=(n != 1);", # Bulgarian + "am": "nplurals=2; plural=(n > 1);", # Amharic +} + +# Default plural form fallback (English-style) +# Used when a language code is not found in PLURAL_FORMS +DEFAULT_PLURAL_FORM = "nplurals=2; plural=(n != 1);" + +# Typo patterns to fix in translation files +TYPO_PATTERNS = [ + ("Serch", "Search"), +] + +# Backend PO file names +BACKEND_PO_FILES = ["django.po", "djangojs.po"] + +# Backend plugin apps: (repo_dir, module_name) under translations/. +# Used by sync_and_translate_language to sync/translate at +# translations///conf/locale//LC_MESSAGES/django.po. +# When pulled in edx-platform (make pull_translations), these go to +# conf/plugins-locale/plugins//. +TRANSLATABLE_PLUGINS = [ + ("open-edx-plugins", "ol_openedx_chat"), +] + +# PO file header metadata +PO_HEADER_PROJECT_VERSION = "0.1a" +PO_HEADER_BUGS_EMAIL = "openedx-translation@googlegroups.com" +PO_HEADER_POT_CREATION_DATE = "2023-06-13 08:00+0000" +PO_HEADER_MIME_VERSION = "1.0" +PO_HEADER_CONTENT_TYPE = "text/plain; charset=UTF-8" +PO_HEADER_CONTENT_TRANSFER_ENCODING = "8bit" +PO_HEADER_TRANSIFEX_TEAM_BASE_URL = "https://app.transifex.com/open-edx/teams/6205" + +# File and directory names +TRANSLATION_FILE_NAMES = { + "transifex_input": "transifex_input.json", + "english": "en.json", + "messages_dir": "messages", + "i18n_dir": "i18n", + "locale_dir": "locale", + "lc_messages": "LC_MESSAGES", + "conf_dir": "conf", + "edx_platform": "edx-platform", +} + +# JSON file formatting +DEFAULT_JSON_INDENT = 2 + +# Language code to human-readable name mapping +# Used in PO file headers for Language-Team field +LANGUAGE_MAPPING = { + "ar": "Arabic", + "de": "German", + "el": "Greek", + "es": "Spanish", + "fr": "French", + "hi": "Hindi", + "id": "Indonesian", + "ja": "Japanese", + "kr": "Korean", + "pt": "Portuguese", + "ru": "Russian", + "sq": "Albanian", + "tr": "Turkish", + "zh": "Chinese", +} + +# Maximum number of retries for failed translation batches +MAX_RETRIES = 3 + +# Glossary parsing constants +EXPECTED_GLOSSARY_PARTS = 2 # English term and translation separated by "->" + +# HTTP Status Codes +HTTP_OK = 200 +HTTP_CREATED = 201 +HTTP_NOT_FOUND = 404 +HTTP_TOO_MANY_REQUESTS = 429 +HTTP_UNPROCESSABLE_ENTITY = 422 + +# Error message length limit +MAX_ERROR_MESSAGE_LENGTH = 200 + +# Maximum length for strings in log messages (truncate with "...") +MAX_LOG_STRING_LENGTH = 50 +MAX_LOG_ICU_STRING_LENGTH = 100 + +# Plural category counts (GNU gettext nplurals) +PLURAL_CATEGORIES_ARABIC = 6 # zero, one, two, few, many, other +PLURAL_CATEGORIES_FOUR = 4 # one, two, few, other +PLURAL_CATEGORIES_THREE = 3 # one, few, other +PLURAL_CATEGORIES_TWO = 2 # one, other (most languages) diff --git a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/ar.txt b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/ar.txt new file mode 100644 index 000000000..246ddba39 --- /dev/null +++ b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/ar.txt @@ -0,0 +1,175 @@ +# AR HINTS +## TERM MAPPINGS +These are preferred terminology choices for this language. Use them whenever they sound natural; adapt freely if context requires. + +- 'accuracy' -> 'الدقة' +- 'activation function' -> 'دالّة التفعيل' +- 'artificial intelligence' -> 'الذكاء الاصطناعي' +- 'AUC' -> 'AUC' +- 'AUC (Area under the ROC curve)' -> 'المساحة تحت منحنى ROC' +- 'backpropagation' -> 'الانتشار العكسي' +- 'batch' -> 'دفعة' +- 'batch size' -> 'حجم الدفعة' +- 'bias (ethics/fairness)' -> 'التحيّز (الأخلاقيات/الإنصاف)' +- 'bias (math) or bias term' -> 'الانحياز (في الرياضيات) أو مصطلح الانحياز' +- 'bias in ethics and fairness' -> 'التحيز في الأخلاق والعدالة' +- 'bias term' -> 'مصطلح التحيز' +- 'binary classification' -> 'التصنيف الثنائي' +- 'bucketing' -> 'تصنيف البيانات' +- 'categorical' -> 'فئوية' +- 'categorical data' -> 'البيانات الفئوية' +- 'class' -> 'صنف' +- 'class-imbalanced dataset' -> 'مجموعة بيانات غير متوازنة الفئات' +- 'class-imbalanced datasets' -> 'مجموعات بيانات غير متوازنة الفئات' +- 'classification' -> 'التصنيف' +- 'classification model' -> 'نموذج التصنيف' +- 'classification threshold' -> 'عتبة التصنيف' +- 'classifier' -> 'مصنِّف' +- 'clipping' -> 'القص' +- 'confusion matrix' -> 'مصفوفة نجاح التوقعات' +- 'continuous feature' -> 'خاصية مستمرة' +- 'convergence' -> 'التقارب' +- 'data set or dataset' -> 'مجموعة البيانات' +- 'DataFrame' -> 'DataFrame' +- 'dataset' -> 'مجموعة بيانات' +- 'deep learning' -> 'التعلم العميق' +- 'deep model' -> 'نموذج عميق' +- 'dense feature' -> 'خاصية كثيفة' +- 'depth' -> 'العمق' +- 'discrete feature' -> 'خاصية محدّدة القيم' +- 'discrete features' -> 'الميزات المنفصلة' +- 'dynamic' -> 'ديناميكي' +- 'dynamic model' -> 'نموذج ديناميكي' +- 'early stopping' -> 'الإيقاف المبكر' +- 'embedding layer' -> 'طبقة التضمين' +- 'embedding layers' -> 'طبقات تضمين' +- 'epoch' -> 'حقبة' +- 'example' -> 'على سبيل المثال' +- 'false negative (FN)' -> 'سالب خاطئ (FN)' +- 'false negatives' -> 'الحالات السالبة الخاطئة' +- 'false positive (FP)' -> 'موجب خاطئ (FP)' +- 'false positive rate' -> 'معدّل الموجب الخاطئ' +- 'false positive rate (FPR)' -> 'معدّل الموجب الخاطئ' +- 'false positives' -> 'الحالات الموجبة الخاطئة' +- 'feature' -> 'ميزة' +- 'feature cross' -> 'مضروب مجموعات الخصائص' +- 'feature crosses' -> 'تقاطع الميزات' +- 'feature engineering' -> 'هندسة الميزات' +- 'feature set' -> 'مجموعة الميزات' +- 'feature vector' -> 'متّجه الميزات' +- 'feedback loop' -> 'حلقة الملاحظات' +- 'generalization' -> 'التعميم' +- 'generalization curve' -> 'منحنى التعميم' +- 'gradient descent' -> 'النزول المتدرّج' +- 'ground truth' -> 'معلومات فعلية' +- 'hidden layer' -> 'الطبقة المخفية' +- 'hidden layer(s)' -> 'الطبقات المخفية' +- 'hyperparameter' -> 'المعلَمة الفائقة' +- 'independently and identically distributed (i.i.d)' -> 'موزّعة بشكل مستقل ومتشابه' +- 'inference' -> 'الاستنتاج' +- 'input layer' -> 'طبقة الإدخال' +- 'interpretability' -> 'القابلية للتفسير' +- 'iteration' -> 'التكرار' +- 'L0regularization' -> 'التسوية من النوع L0' +- 'L1loss' -> 'L1' +- 'L1regularization' -> 'التسوية من النوع L1' +- 'L2loss' -> 'فقدانL2' +- 'L2regularization' -> 'التسوية من النوع L2' +- 'label' -> 'التصنيف' +- 'labeled example' -> 'مثال مصنّف' +- 'lambda' -> 'lambda' +- 'layer' -> 'طبقة' +- 'learning rate' -> 'معدّل التعلّم' +- 'linear' -> 'خطي' +- 'linear model' -> 'النموذج الخطي' +- 'linear models' -> 'النماذج الخطية' +- 'linear regression' -> 'الانحدار الخطي' +- 'Log Loss' -> 'الخسارة اللوغاريتمية' +- 'log-odds' -> 'لوغاريتم فرص الأفضلية' +- 'logistic regression' -> 'الانحدار اللوجستي' +- 'loss' -> 'خسارة' +- 'loss curve' -> 'منحنى الخسارة' +- 'loss function' -> 'دالة الخسارة' +- 'machine learning' -> 'تعلُم الآلة' +- 'majority class' -> 'الفئة الأكبر' +- 'mini-batch' -> 'دفعة صغيرة' +- 'minority class' -> 'فئة الأقلية' +- 'model' -> 'نموذج' +- 'multi-class classification' -> 'التصنيف المتعدّد الفئات' +- 'negative class' -> 'فئة سالبة' +- 'negative classes' -> 'الفئات السلبية' +- 'neural network' -> 'شبكة عصبونية' +- 'neural networks' -> 'للشبكات العصبية' +- 'neuron' -> 'عصبون' +- 'node (neural network)' -> 'عقدة (شبكة عصبونية)' +- 'nonlinear' -> 'غير خطي' +- 'nonstationarity' -> 'عدم الثبات' +- 'normalization' -> 'التسوية' +- 'numerical data' -> 'البيانات الرقمية' +- 'offline' -> 'بلا إنترنت' +- 'offline inference' -> 'الاستنتاج المؤخَّر' +- 'one-hot encoding' -> 'الترميز الأحادي' +- 'one-hot vector' -> 'متجهًا ذا ترميز ساخن' +- 'one-vs.-all' -> 'واحد-مقابل-الكل' +- 'online' -> 'online' +- 'online inference' -> 'الاستنتاج الحي' +- 'output layer' -> 'الطبقة النهائية' +- 'output layers' -> 'الطبقات النهائية' +- 'overfitting' -> 'فرط التخصيص' +- 'pandas' -> 'باندا' +- 'parameter' -> 'مَعلمة' +- 'positive class' -> 'فئة موجبة' +- 'positive classes' -> 'الفئات الإيجابية' +- 'post-processing' -> 'المعالجة اللاحقة' +- 'precision' -> 'الدقة' +- 'prediction' -> 'التوقّع' +- 'proxy labels' -> 'تصنيفات تقريبية' +- 'RAG' -> 'التوليد المعزّز بالاسترجاع (RAG)' +- 'rater' -> 'مُصنِّف' +- 'recall' -> 'تذكُّر الإعلان' +- 'Rectified Linear Unit (ReLU)' -> 'وحدة خطية مصحَّحة (ReLU)' +- 'regression model' -> 'نموذج الانحدار' +- 'regularization' -> 'التسوية' +- 'regularization rate' -> 'معدّل التسوية' +- 'ReLU' -> 'ReLU' +- 'retrieval-augmented generation' -> 'التوليد المعزّز بالاسترجاع' +- 'retrieval-augmented generation (RAG)' -> 'التوليد المعزّز بالاسترجاع (RAG)' +- 'ROC (receiver operating characteristic) Curve' -> 'منحنى الأمثلة الإيجابية' +- 'ROC curve' -> 'منحنى ROC' +- 'Root Mean Squared Error (RMSE)' -> 'جذر الخطأ التربيعي المتوسّط (RMSE)' +- 'sigmoid function' -> 'الدالّة الإسية' +- 'softmax' -> 'softmax' +- 'sparse feature' -> 'خاصية متناثرة' +- 'sparse representation' -> 'التمثيل المتناثر' +- 'sparse vector' -> 'متّجه متناثر' +- 'squared loss' -> 'الخسارة التربيعية' +- 'static' -> 'ثابت' +- 'static inference' -> 'الاستنتاج الثابت' +- 'static model' -> 'النموذج الثابت' +- 'stationarity' -> 'الثبات' +- 'Stochastic Gradient Descent (SGD)' -> 'النزول المتدرّج العشوائي (SGD)' +- 'supervised learning' -> 'التعلم المُوجّه' +- 'supervised machine learning' -> 'تعلُّم الآلة الخاضع للإشراف' +- 'synthetic feature' -> 'خاصية مصطنعة' +- 'synthetic features' -> 'ميزات اصطناعية' +- 'test loss' -> 'فقدان الاختبار' +- 'training' -> 'التدريب' +- 'training loss' -> 'فقدان التدريب' +- 'training set' -> 'مجموعة التدريب' +- 'training-serving skew' -> 'اختلاف بين بيانات التدريب وبيانات العرض' +- 'true negative (TN)' -> 'سالب صحيح' +- 'true negatives' -> 'الحالات السالبة الصحيحة' +- 'true positive (TP)' -> 'موجب صحيح (TP)' +- 'true positive rate' -> 'معدّل الإيجابية الحقيقية' +- 'true positive rate (TPR)' -> 'معدّل الموجب الصحيح (TPR)' +- 'true positives' -> 'الحالات الموجبة الصحيحة' +- 'underfitting' -> 'فرط التعميم' +- 'unlabeled example' -> 'مثال غير مصنّف' +- 'unsupervised machine learning' -> 'تعلُّم الآلة غير الموجَّه' +- 'validation' -> 'الإثبات' +- 'validation dataset' -> 'مجموعة بيانات التحقّق من الصحة' +- 'validation loss' -> 'فقدان التحقّق من الصحة' +- 'validation set' -> 'مجموعة التحقّق' +- 'weight' -> 'الوزن' +- 'weighted sum' -> 'المجموع الموزون' +- 'Z-score normalization' -> 'التسوية باستخدام الدرجة المعيارية' diff --git a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/de.txt b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/de.txt new file mode 100644 index 000000000..c53a3be9e --- /dev/null +++ b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/de.txt @@ -0,0 +1,175 @@ +# DE HINTS +## TERM MAPPINGS +These are preferred terminology choices for this language. Use them whenever they sound natural; adapt freely if context requires. + +- 'accuracy' -> ‚Genauigkeit' +- 'activation function' -> ‚Aktivierungsfunktion' +- 'artificial intelligence' -> ‚künstliche Intelligenz' +- 'AUC' -> ‚AUC' +- 'AUC (Area under the ROC curve)' -> ‚AUC (Area Under the ROC Curve, Bereich unter der ROC-Kurve)' +- 'backpropagation' -> ‚Rückpropagation' +- 'batch' -> ‚Batch' +- 'batch size' -> ‚Batchgröße' +- 'bias (ethics/fairness)' -> ‚Bias (Ethik/Fairness)' +- 'bias (math) or bias term' -> ‚Bias (mathematisch) oder Bias-Term' +- 'bias in ethics and fairness' -> ‚Bias in Bezug auf Ethik und Fairness' +- 'bias term' -> ‚Bias-Term' +- 'binary classification' -> ‚Binärklassifizierung' +- 'bucketing' -> ‚Bucketing' +- 'categorical' -> ‚kategorialen' +- 'categorical data' -> ‚Kategoriale Daten' +- 'class' -> ‚Klasse' +- 'class-imbalanced dataset' -> ‚Dataset mit Klassenungleichgewicht' +- 'class-imbalanced datasets' -> ‚Datasets mit ungleichmäßiger Klassenverteilung' +- 'classification' -> ‚Klassifizierungsaufgabe' +- 'classification model' -> ‚Klassifikationsmodell' +- 'classification threshold' -> ‚Klassifizierungsschwellenwert' +- 'classifier' -> ‚Klassifikator' +- 'clipping' -> ‚Clipping' +- 'confusion matrix' -> ‚Wahrheitsmatrix' +- 'continuous feature' -> ‚stetiges Feature' +- 'convergence' -> ‚Konvergenz' +- 'data set or dataset' -> ‚Dataset oder Dataset' +- 'DataFrame' -> ‚DataFrame' +- 'dataset' -> ‚Dataset' +- 'deep learning' -> ‚Deep Learning' +- 'deep model' -> ‚Deep-Modell' +- 'dense feature' -> ‚vollbesetztes Feature' +- 'depth' -> ‚Tiefe' +- 'discrete feature' -> ‚diskretes Feature' +- 'discrete features' -> ‚diskrete Features' +- 'dynamic' -> ‚dynamic' +- 'dynamic model' -> ‚dynamisches Modell' +- 'early stopping' -> ‚Vorzeitiges Beenden' +- 'embedding layer' -> ‚Einbettungsebene' +- 'embedding layers' -> ‚Einbettungsebenen' +- 'epoch' -> ‚Epoche' +- 'example' -> ‚Beispiel' +- 'false negative (FN)' -> ‚falsch negativ (FN)' +- 'false negatives' -> ‚falsch negativen Ergebnisse' +- 'false positive (FP)' -> ‚falsch positiv (FP)' +- 'false positive rate' -> ‚Falsch-Positiv-Rate' +- 'false positive rate (FPR)' -> ‚Rate falsch positiver Ergebnisse (False Positive Rate, FPR)' +- 'false positives' -> ‚falsch positiven Ergebnisse' +- 'feature' -> ‚Feature' +- 'feature cross' -> ‚Featureverknüpfung' +- 'feature crosses' -> ‚Feature-Kombinationen' +- 'feature engineering' -> ‚Feature Engineering' +- 'feature set' -> ‚Feature-Set' +- 'feature vector' -> ‚Featurevektor' +- 'feedback loop' -> ‚Feedbackschleife' +- 'generalization' -> ‚Generalisierung' +- 'generalization curve' -> ‚Verallgemeinerungskurve' +- 'gradient descent' -> ‚Gradientenabstieg' +- 'ground truth' -> ‚Ground Truth' +- 'hidden layer' -> ‚versteckte Ebene' +- 'hidden layer(s)' -> ‚verborgenen Schichten' +- 'hyperparameter' -> ‚Hyperparameter' +- 'independently and identically distributed (i.i.d)' -> ‚unabhängig und identisch verteilt (i.i.d.)' +- 'inference' -> ‚Inferenz' +- 'input layer' -> ‚Eingabelayer' +- 'interpretability' -> ‚Interpretierbarkeit' +- 'iteration' -> ‚Iteration' +- 'L0regularization' -> ‚L0-Regularisierung' +- 'L1loss' -> ‚L1-Verlust' +- 'L1regularization' -> ‚L1-Regularisierung' +- 'L2loss' -> ‚L2-Verlust' +- 'L2regularization' -> ‚L2-Regularisierung' +- 'label' -> ‚Label' +- 'labeled example' -> ‚Beispiel mit Label' +- 'lambda' -> ‚Lambda' +- 'layer' -> ‚Layer' +- 'learning rate' -> ‚Lernrate' +- 'linear' -> ‚Linear' +- 'linear model' -> ‚Lineares Modell' +- 'linear models' -> ‚linearen Modellen' +- 'linear regression' -> ‚lineare Regression' +- 'Log Loss' -> ‚Log Loss' +- 'log-odds' -> ‚Log-Odds' +- 'logistic regression' -> ‚logistische Regression' +- 'loss' -> ‚Niederlage' +- 'loss curve' -> ‚Verlustkurve' +- 'loss function' -> ‚Verlustfunktion' +- 'machine learning' -> ‚Machine Learning' +- 'majority class' -> ‚Mehrheitsklasse' +- 'mini-batch' -> ‚Mini-Batch' +- 'minority class' -> ‚Minderheitsklasse' +- 'model' -> ‚Modell' +- 'multi-class classification' -> ‚Klassifizierung mit mehreren Klassen' +- 'negative class' -> ‚negative Klasse' +- 'negative classes' -> ‚negativen Klassen' +- 'neural network' -> ‚neuronales Netzwerk' +- 'neural networks' -> ‚neuronale Netze' +- 'neuron' -> ‚Neuron' +- 'node (neural network)' -> ‚Knoten (neuronales Netzwerk)' +- 'nonlinear' -> ‚nicht linear' +- 'nonstationarity' -> ‚Nichtstationarität' +- 'normalization' -> ‚Normalisierung' +- 'numerical data' -> ‚Numerische Daten' +- 'offline' -> ‚offline' +- 'offline inference' -> ‚Offlineinferenz' +- 'one-hot encoding' -> ‚One-Hot-Codierung' +- 'one-hot vector' -> ‚One-Hot-Vektor' +- 'one-vs.-all' -> ‚One-vs.-All' +- 'online' -> ‚online' +- 'online inference' -> ‚Onlineinferenz' +- 'output layer' -> ‚Ausgabeschicht' +- 'output layers' -> ‚Ausgabelayer' +- 'overfitting' -> ‚Überanpassung' +- 'pandas' -> ‚pandas' +- 'parameter' -> ‚Parameter' +- 'positive class' -> ‚positive Klasse' +- 'positive classes' -> ‚positive Klassen' +- 'post-processing' -> ‚Nachbearbeitung' +- 'precision' -> ‚Precision' +- 'prediction' -> ‚Vorhersage-' +- 'proxy labels' -> ‚Proxy-Labels' +- 'RAG' -> ‚RAG' +- 'rater' -> ‚Bewerter' +- 'recall' -> ‚Rückruf' +- 'Rectified Linear Unit (ReLU)' -> ‚Rektifizierte lineare Einheit (ReLU)' +- 'regression model' -> ‚Regressionsmodell' +- 'regularization' -> ‚Regularisierung' +- 'regularization rate' -> ‚Regularisierungsrate' +- 'ReLU' -> ‚ReLU' +- 'retrieval-augmented generation' -> ‚Retrieval-Augmented Generation' +- 'retrieval-augmented generation (RAG)' -> ‚Retrieval-Augmented Generation (RAG)' +- 'ROC (receiver operating characteristic) Curve' -> ‚ROC-Kurve (Receiver Operating Characteristic)' +- 'ROC curve' -> ‚ROC-Kurve' +- 'Root Mean Squared Error (RMSE)' -> ‚Wurzel der mittleren Fehlerquadratsumme (RMSE)' +- 'sigmoid function' -> ‚Sigmoidfunktion' +- 'softmax' -> ‚Softmax-Funktion' +- 'sparse feature' -> ‚dünnbesetztes Feature' +- 'sparse representation' -> ‚dünnbesetzte Darstellung' +- 'sparse vector' -> ‚dünnbesetzter Vektor' +- 'squared loss' -> ‚Quadratischer Verlust' +- 'static' -> ‚Statisch' +- 'static inference' -> ‚Statische Inferenz' +- 'static model' -> ‚statischen Modell' +- 'stationarity' -> ‚Stationarität' +- 'Stochastic Gradient Descent (SGD)' -> ‚Stochastic Gradient Descent (SGD)' +- 'supervised learning' -> ‚überwachtes Lernen' +- 'supervised machine learning' -> ‚überwachtes maschinelles Lernen' +- 'synthetic feature' -> ‚synthetisches Feature' +- 'synthetic features' -> ‚synthetische Features' +- 'test loss' -> ‚Testverlust' +- 'training' -> ‚Training' +- 'training loss' -> ‚Trainingsverlust' +- 'training set' -> ‚Trainings-Dataset' +- 'training-serving skew' -> ‚Abweichungen zwischen Training und Bereitstellung' +- 'true negative (TN)' -> ‚richtig negativ (RN)' +- 'true negatives' -> ‚richtig negativen Ergebnisse' +- 'true positive (TP)' -> ‚Richtig positiv (TP)' +- 'true positive rate' -> ‚Rate der richtig positiven Ergebnisse' +- 'true positive rate (TPR)' -> ‚Rate richtig positiver Ergebnisse (True Positive Rate, TPR)' +- 'true positives' -> ‚richtig positiven Ergebnisse' +- 'underfitting' -> ‚Unteranpassung' +- 'unlabeled example' -> ‚Beispiel ohne Label' +- 'unsupervised machine learning' -> ‚unüberwachtes maschinelles Lernen' +- 'validation' -> ‚Validierung' +- 'validation dataset' -> ‚Validierungs-Dataset' +- 'validation loss' -> ‚Validierungsverlust' +- 'validation set' -> ‚Validierungs-Dataset' +- 'weight' -> ‚Gewicht' +- 'weighted sum' -> ‚gewichtete Summe' +- 'Z-score normalization' -> ‚Z-Score-Normalisierung' diff --git a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/el.txt b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/el.txt new file mode 100644 index 000000000..22c5b4e4c --- /dev/null +++ b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/el.txt @@ -0,0 +1,988 @@ +# EL HINTS +## TERM MAPPINGS +The following mappings are the CANONICAL Greek translations for AI/ML terms. +When translating TO GREEK, you MUST use these exact Greek terms for the listed English expressions whenever the same technical meaning is intended. + +- 'a priori probability' -> «πιθανότητα εκ των προτέρων» +- 'A* Search' -> «αλγόριθμοι αναζήτησης Α*» +- 'Abductive logic programming (ALP)' -> «Προγραμματισμός απαγωγικής λογικής (ALP)» +- 'Abductive reasoning' -> «Απαγωγικός συλλογισμός» +- 'abductive reasoning' -> «απαγωγική συλλογιστική» +- 'Abstract data type' -> «Αφηρημένος τύπος δεδομένων» +- 'abstract plan' -> «αφηρημένο πλάνο» +- 'Abstraction' -> «Αφαίρεση» +- 'Accelerating change' -> «Επιταχυνόμενη αλλαγή» +- 'accretive associative memory' -> «προσαυξητική μνήμη συσχέτισης» +- 'acquisitional efficiency' -> «αποδοτικότητα απόκτησης» +- 'action' -> «ενέργεια» +- 'Action language' -> «Γλώσσα δράσης» +- 'Action model learning' -> «Εκμάθηση μοντέλου δράσης» +- 'action schemas' -> «σχήματα ενεργειών» +- 'Action selection' -> «Επιλογή δράσης» +- 'Activation function' -> «Λειτουργία ενεργοποίησης» +- 'activation function' -> «συνάρτηση ενεργοποίησης» +- 'active' -> «ενεργός» +- 'active database' -> «ενεργή βάση δεδομένων» +- 'active rule' -> «ενεργός κανόνας» +- 'active troubleshooting' -> «ενεργητική διάγνωση» +- 'Adaptive algorithm' -> «Προσαρμοστικός αλγόριθμος» +- 'Adaptive neuro fuzzy inference system (ANFIS)' -> «Προσαρμοστικό σύστημα ασαφών συμπερασμάτων δίκτυου» +- 'adaptivity' -> «προσαρμοστικότητα» +- 'add list' -> «λίστα προσθηκών» +- 'admissibility criterion' -> «κριτήριο αποδοχής» +- 'admissible' -> «αποδεκτός» +- 'Admissible heuristic' -> «Παραδεκτό ευρετικό» +- 'adversary game' -> «ανταγωνιστικό παίγνιο» +- 'Affective computing' -> «Συναισθηματική υπολογιστική» +- 'agent' -> «πράκτορας» +- 'Agent architecture' -> «Αρχιτεκτονική πράκτορα» +- 'agglomerative algorithm' -> «αλγόριθμος συγχώνευσης» +- 'AI' -> «τεχνητή νοημοσύνη» +- 'AI accelerator' -> «Επιταχυντής AI» +- 'AI-complete' -> «AI-πλήρης» +- 'Algorithm' -> «Αλγόριθμος» +- 'algorithm' -> «αλγόριθμος» +- 'Algorithmic efficiency' -> «Αλγοριθμική απόδοση» +- 'Algorithmic probability' -> «Αλγοριθμική πιθανότητα» +- 'Alpha-Beta algorithm' -> «αλγόριθμος άλφα-βήτα» +- 'Alpha-Beta search' -> «αναζήτηση άλφα-βήτα» +- 'Ambient intelligence (AmI)' -> «Ευφυΐα περιβάλλοντος» +- 'ambiguity' -> «πολυσημαντικότητα» +- 'analogical reasoning' -> «συλλογιστική με αναλογίες» +- 'Analysis of algorithms' -> «Ανάλυση αλγορίθμων» +- 'Analytics' -> «Ανάλυση» +- 'AND tree' -> «δένδρο ΚΑΙ» +- 'AND/OR tree' -> «δένδρο ΚΑΙ / Ή» +- 'Answer set programming (ASP)' -> «Προγραμματισμός συνόλου απαντήσεων» +- 'Anytime algorithm' -> «Ανα πάσα στιγμή Αλγόριθμος» +- 'Application programming interface (API)' -> «Διεπαφή προγραμματισμού εφαρμογών» +- 'Approximate string matching' -> «Κατά προσέγγιση ταίριασμα συμβολοσειρών» +- 'Approximation error' -> «Σφάλμα προσέγγισης» +- 'arc consistency' -> «συνέπεια τόξου» +- 'Argumentation framework' -> «Πλαίσιο επιχειρημάτων» +- 'artificial agent' -> «τεχνητός πράκτορας» +- 'Artificial general intelligence (AGI)' -> «Τεχνητή γενική νοημοσύνη» +- 'Artificial immune system (AIS)' -> «Τεχνητό ανοσοποιητικό σύστημα» +- 'artificial intelligence' -> «τεχνητή νοημοσύνη» +- 'Artificial Intelligence Markup Language' -> «Γλώσσα σήμανσης τεχνητής νοημοσύνης» +- 'Artificial intelligence (AI)' -> «Τεχνητή νοημοσύνη» +- 'Artificial neural network (ANN)' -> «Τεχνητό νευρωνικό δίκτυο» +- 'artificial neuron' -> «τεχνητός νευρώνας» +- 'association rules' -> «κανόνες συσχέτισης» +- 'associative memory' -> «μνήμη συσχέτισης» +- 'Asymptotic computational complexity' -> «Ασυμπτωτική υπολογιστική πολυπλοκότητα» +- 'atomic formula' -> «ατομικός τύπος» +- 'atoms' -> «άτομα» +- 'attribute selection' -> «επιλογή χαρακτηριστικών» +- 'attributes' -> «χαρακτηριστικά» +- 'Attributional calculus' -> «Λογισμός απόδοσης» +- 'auctions protocols' -> «πρωτόκολλα πλειστηριασμού» +- 'Augmented reality (AR)' -> «Επαυξημένη πραγματικότητα» +- 'auto-associative memories' -> «αυτοσυσχετιζόμενες μνήμες» +- 'Automata theory' -> «Θεωρία αυτομάτων» +- 'Automated machine learning (AutoML)' -> «Αυτοματοποιημένη μηχανική εκμάθηση» +- 'Automated planning and scheduling' -> «Αυτοματοποιημένος προγραμματισμός» +- 'Automated reasoning' -> «Αυτοματοποιημένη συλλογιστική» +- 'automated synopsis' -> «αυτόματη περίληψη» +- 'automatic translation' -> «αυτόματη μετάφραση» +- 'Autonomic computing (AC)' -> «Αυτόνομος Υπολογισμός» +- 'Autonomous car' -> «Αυτόνομο αυτοκίνητο» +- 'Autonomous robot' -> «Αυτόνομο ρομπότ» +- 'autonomy' -> «αυτονομία» +- 'average reward model' -> «μοντέλο μέσης ανταμοιβής» +- 'axon' -> «άξονας» +- 'back propagation' -> «ανάστροφη μετάδοση» +- 'Backpropagation' -> «Ο πίσω πολλαπλασιασμός» +- 'Backpropagation through time (BPTT)' -> «Πίσω διάδοση στο χρόνο (BPTT)» +- 'backtracking' -> «οπισθοδρόμηση» +- 'backtracking points' -> «σημεία οπισθοδρόμησης» +- 'Backward chaining' -> «Αλυσίδα προς τα πίσω» +- 'backward chaining' -> «ανάστροφη ακολουθία εκτέλεσης» +- 'backward pass' -> «ανάστροφο πέρασμα» +- 'Bag-of-words model' -> «Μοντέλο τσάντα με λέξεις» +- 'Bag-of-words model in computer vision' -> «Μοντέλο τσάντας λέξεων στην όραση υπολογιστή» +- 'basic probability assignment' -> «βασική κατανομή πιθανότητας» +- 'batch learning' -> «μάθηση δέσμης» +- 'Batch normalization' -> «Ομαλοποίηση παρτίδας» +- 'Bayesian programming' -> «Μπεϋζιανός προγραμματισμός» +- 'Beam Search' -> «ακτινωτή αναζήτηση» +- 'Bees algorithm' -> «Αλγόριθμος μελισσών» +- 'Behavior informatics (BI)' -> «Πληροφορική συμπεριφοράς» +- 'Behavior tree (BT)' -> «Δέντρο συμπεριφοράς» +- 'belief' -> «πεποίθηση» +- 'Belief-desire-intention software model (BDI)' -> «Μοντέλο λογισμικού πεποίθησης-επιθυμίας-πρόθεσης» +- 'benevolence' -> «αγαθή προαίρεση» +- 'Best-First Search' -> «αναζήτηση πρώτα στο καλύτερο» +- 'Bias–variance tradeoff' -> «Ανταλλαγή μεροληψίας-διακύμανσης» +- 'bidirectional associative memories' -> «μνήμη συσχέτισης διπλής κατεύθυνσης» +- 'Bidirectional Search' -> «αναζήτηση διπλής κατεύθυνσης» +- 'Big data' -> «Μεγάλα δεδομένα» +- 'Big O notation' -> «Σημείωση Big O» +- 'binary constraint' -> «δυαδικός περιορισμός» +- 'Binary tree' -> «Δυαδικό δέντρο» +- 'blackboard' -> «μαυροπίνακας» +- 'blackboard architecture' -> «αρχιτεκτονική μαυροπίνακα» +- 'Blackboard system' -> «Σύστημα μαυροπίνακα» +- 'blackboard systems' -> «συστήματα μαυροπίνακα» +- 'Blind Search' -> «τυφλή αναζήτηση» +- 'Boltzmann machine' -> «Μηχανή Boltzmann» +- 'Boolean satisfiability problem' -> «Πρόβλημα ικανοποίησης Boolean» +- 'Brain technology' -> «Τεχνολογία εγκεφάλου» +- 'Branch and Bound Search' -> «αναζήτηση με επέκταση και οριοθέτηση» +- 'Branching factor' -> «Συντελεστής διακλάδωσης» +- 'branching factor' -> «παράγοντας διακλάδωσης» +- 'Breadth First Search' -> «αναζήτηση πρώτα σε» +- 'Brute-force search' -> «Αναζήτηση ωμής βίας» +- 'candidate elimination' -> «απαλοιφή υποψηφίων» +- 'canonical form' -> «κανονική μορφή» +- 'canonical formation rules' -> «κανόνες ορθής διαμόρφωσης» +- 'Capsule neural network (CapsNet)' -> «Νευρωνικό δίκτυο κάψουλας» +- 'card sorting' -> «ταξινόμηση καρτών» +- 'case adaptation' -> «προσαρμογή περιπτώσεων» +- 'case indexing' -> «δεικτοδότηση περιπτώσεων» +- 'case learning' -> «εκμάθηση περιπτώσεων» +- 'case library' -> «βιβλιοθήκη περιπτώσεων» +- 'case retrieval' -> «ανάκληση περιπτώσεων» +- 'case verification' -> «επαλήθευση περιπτώσεων» +- 'case-based' -> «βασισμένο σε περιπτώσεις» +- 'case-based learning' -> «μάθηση κατά περίπτωση» +- 'case-based planning' -> «σχεδιασμός βασισμένος σε παραδείγματα» +- 'case-based reasoning' -> «συλλογιστική βασισμένη σε περιπτώσεις» +- 'Case-based reasoning (CBR)' -> «Συλλογισμός βάσει περιπτώσεων» +- 'causal link' -> «αιτιολογικές συνδέσεις» +- 'causal model' -> «αιτιοκρατικό μοντέλο» +- 'certainty factors' -> «συντελεστές βεβαιότητας» +- 'chaining' -> «ακολουθία εκτέλεσης κανόνων» +- 'chromosome' -> «χρωμόσωμα» +- 'chronological backtracking' -> «χρονική οπισθοδρόμηση» +- 'circumscription' -> «μέθοδος περιγράμματος» +- 'class' -> «κλάση» +- 'class extension' -> «επέκταση κλάσης» +- 'classical negation' -> «κλασική άρνηση» +- 'classification' -> «ταξινόμηση» +- 'classification rules' -> «κανόνες ταξινόμησης» +- 'classification trees' -> «δένδρο ταξινόμησης» +- 'clausal form' -> «προτασιακή μορφή» +- 'closed formula' -> «κλειστός τύπος» +- 'closed set' -> «κλειστό σύνολο» +- 'closed world' -> «κλειστός κόσμος» +- 'closed world assumption' -> «υπόθεση κλειστού κόσμού» +- 'Cloud robotics' -> «Cloud ρομποτική» +- 'CLP' -> «λογικός προγραμματισμός με περιορισμούς» +- 'Cluster analysis' -> «Ανάλυση συστάδων» +- 'clustering' -> «ομαδοποίηση» +- 'clusters' -> «ομάδες» +- 'coarse grain' -> «αδρή υφή» +- 'Cobweb' -> «Ιστός αράχνης» +- 'Cognitive architecture' -> «Γνωστική αρχιτεκτονική» +- 'Cognitive computing' -> «Γνωστική Υπολογιστική» +- 'Cognitive science' -> «Γνωστική επιστήμη» +- 'cognitive science' -> «γνωσιολογική επιστήμη» +- 'combinatorial explosion' -> «συνδυαστική έκρηξη» +- 'Combinatorial optimization' -> «Συνδυαστική βελτιστοποίηση» +- 'commitment' -> «δέσμευση» +- 'Committee machine' -> «Μηχανή επιτροπής» +- 'common sense' -> «κοινή λογική» +- 'Commonsense knowledge' -> «Κοινή γνώση» +- 'Commonsense reasoning' -> «Κοινός συλλογισμός» +- 'communication protocol' -> «πρωτόκολλο επικοινωνίας» +- 'competition' -> «ανταγωνισμός» +- 'competitive neural networks' -> «νευρωνικά δίκτυα με ανταγωνισμό» +- 'compiled knowledge' -> «αυτοματοποιημένη γνώση» +- 'complementary pairs' -> «συμπληρωματικά ζεύγη» +- 'complete' -> «πλήρης» +- 'complete plan' -> «πλήρες πλάνο» +- 'completeness' -> «πληρότητα» +- 'Computational chemistry' -> «Υπολογιστική χημεία» +- 'Computational complexity theory' -> «Υπολογιστική θεωρία πολυπλοκότητας» +- 'Computational creativity' -> «Υπολογιστική δημιουργικότητα» +- 'Computational cybernetics' -> «Υπολογιστική κυβερνητική» +- 'Computational humor' -> «Υπολογιστικό χιούμορ» +- 'computational intelligence' -> «υπολογιστική νοημοσύνη» +- 'Computational intelligence (CI)' -> «Υπολογιστική νοημοσύνη» +- 'Computational learning theory' -> «Υπολογιστική θεωρία μάθησης» +- 'Computational linguistics' -> «Υπολογιστική γλωσσολογία» +- 'Computational mathematics' -> «Υπολογιστικά μαθηματικά» +- 'Computational neuroscience' -> «Υπολογιστική νευροεπιστήμη» +- 'Computational number theory' -> «Υπολογιστική θεωρία αριθμών» +- 'Computational problem' -> «Υπολογιστικό πρόβλημα» +- 'Computational statistics' -> «Υπολογιστική στατιστική» +- 'Computational Tree Logic' -> «Λογική Υπολογιστικού Δένδρου» +- 'Computer audition (CA)' -> «Οντισιόν υπολογιστή (CA)» +- 'Computer science' -> «Επιστήμη των υπολογιστών» +- 'Computer vision' -> «Υπολογιστική όραση» +- 'Computer-automated design (CAutoD)' -> «Αυτοματοποιημένη σχεδίαση υπολογιστή» +- 'concept' -> «έννοια» +- 'Concept drift' -> «Εννοιολογική μετατόπιση» +- 'concept learning' -> «μάθηση εννοιών» +- 'concept type' -> «τύπος έννοιας» +- 'conceptual dependency' -> «εννοιολογική εξάρτηση» +- 'conceptual dependency graph' -> «γράφος εννοιολογικής εξάρτησης» +- 'conceptual dependency relationships' -> «σχέσεις εννοιολογικής εξάρτησης» +- 'conceptual graphs' -> «εννοιολογικός γράφος» +- 'conceptual relation' -> «εννοιολογικές σχέσεις» +- 'conditional effects' -> «αποτελέσματα υπό προϋπόθεση» +- 'conditional probability' -> «πιθανότητα υπό συνθήκη» +- 'confidence' -> «εμπιστοσύνη» +- 'configuration' -> «διαμόρφωση» +- 'conflict' -> «σύγκρουση κανόνων» +- 'conflict resolution' -> «επίλυση συγκρούσεων» +- 'conflict set' -> «σύνολο σύγκρουσης» +- 'conflicting literals' -> «αντικρουόμενα λεκτικά» +- 'conjunctive model of classification' -> «συζευκτικό μοντέλο ταξινόμησης» +- 'conjunctive normal form' -> «συζευκτική κανονική μορφή» +- 'Connectionism' -> «Συνδεσιονισμός» +- 'connectionist approach' -> «συνδετική προσέγγιση» +- 'connectives' -> «συνδετικά» +- 'consistency check' -> «έλεγχος συνέπειας» +- 'consistency check algorithms' -> «αλγόριθμος ελέγχου συνέπειας» +- 'Consistent heuristic' -> «Συνεπής ευρετική» +- 'Constrained conditional model (CCM)' -> «Περιορισμένο υπό όρους μοντέλο» +- 'constraint' -> «περιορισμός» +- 'constraint graph' -> «γράφος περιορισμών» +- 'Constraint logic programming' -> «Προγραμματισμός λογικής περιορισμών» +- 'Constraint Logic Programming' -> «λογικός προγραμματισμός με περιορισμούς» +- 'constraint programming' -> «προγραμματισμός με περιορισμούς» +- 'constraint propagation' -> «διάδοση περιορισμών» +- 'constraint satisfaction' -> «ικανοποίηση περιορισμών» +- 'constraint satisfaction problems' -> «προβλήματα ικανοποίησης περιορισμών» +- 'constraint solving problems' -> «προβλήματα επίλυσης περιορισμών» +- 'Constructed language' -> «Κατασκευασμένη γλώσσα» +- 'content addressability' -> «δυνατότητα ανάκλησης περιεχομένου» +- 'context' -> «συμφραζόμενα» +- 'contracting net protocol' -> «συντονισμός πρακτόρων με σύναψη συμβολαίων» +- 'control' -> «έλεγχος» +- 'Control theory' -> «Θεωρία ελέγχου» +- 'convention' -> «σύμβαση» +- 'Convolutional neural network' -> «Συνελικτικό νευρωνικό δίκτυο» +- 'cooperation' -> «συνεργασία» +- 'crisp value' -> «σαφής τιμή» +- 'critical point' -> «κρίσιμο σημείο» +- 'criticality value' -> «τιμή σημαντικότητας» +- 'critics' -> «κανόνες προσαρμογής περιπτώσεων» +- 'Crossover' -> «Διασταύρωση» +- 'crossover' -> «διασταύρωση» +- 'crossover mask' -> «μάσκα διασταύρωσης» +- 'Darkforest' -> «Σκοτεινό δάσος» +- 'Dartmouth workshop' -> «Εργαστήριο Dartmouth» +- 'data abstraction' -> «γενίκευση δεδομένων» +- 'Data augmentation' -> «Αύξηση δεδομένων» +- 'data driven' -> «αναζήτηση οδηγούμενη από δεδομένα» +- 'Data fusion' -> «Συγχώνευση δεδομένων» +- 'Data integration' -> «Ενοποίηση δεδομένων» +- 'Data mining' -> «Εξόρυξη δεδομένων» +- 'data mining' -> «εξόρυξη σε δεδομένα» +- 'data mining incremental' -> «εξόρυξη σε δεδομένα σταδιακή» +- 'Data science' -> «Επιστημονικά δεδομένα» +- 'Data set' -> «Σύνολο δεδομένων» +- 'data space' -> «χώρος δεδομένων» +- 'data warehouse' -> «συστήματα αποθήκευσης δεδομένων» +- 'Data warehouse (DW or DWH)' -> «Αποθήκη δεδομένων» +- 'Datalog' -> «Καταγραφή δεδομένων» +- 'deafisible inference' -> «αναιρέσιμη εξαγωγή συμπερασμάτων» +- 'decidable logic' -> «καταληκτική λογική» +- 'Decision boundary' -> «Όριο απόφασης» +- 'Decision support system (DSS)' -> «Σύστημα υποστήριξης αποφάσεων» +- 'Decision theory' -> «Θεωρία απόφασης» +- 'Decision tree learning' -> «Εκμάθηση του δέντρου αποφάσεων» +- 'Declarative programming' -> «Δηλωτικός προγραμματισμός» +- 'deduction system' -> «σύστημα εξαγωγής συμπερασμάτων» +- 'Deductive classifier' -> «Απαγωγικός ταξινομητής» +- 'deductive reasoning' -> «συνεπαγωγική συλλογιστική» +- 'Deep Blue' -> «Βαθύ μπλε» +- 'deep knowledge' -> «βαθιά γνώση» +- 'Deep learning' -> «Βαθιά μάθηση» +- 'DeepMind Technologies' -> «Τεχνολογίες DeepMind» +- 'default reasoning' -> «συλλογιστική εύλογων υποθέσεων» +- 'defeasible logic' -> «αναιρέσιμη λογική» +- 'defeasible rules' -> «αναιρέσιμοι κανόνες» +- 'defeasible theory' -> «αναιρέσιμη θεωρία» +- 'defeaters' -> «αναιρετές» +- 'definite clause grammars' -> «γραμματικές οριστικών προτάσεων» +- 'definite inference' -> «οριστική απόδειξη» +- 'defuzzification' -> «αποσαφήνιση» +- 'degree of consistency' -> «βαθμός συνέπειας» +- 'degree of truth' -> «βαθμός αληθείας» +- 'delete list' -> «λίστα διαγραφών» +- 'deliberative agent' -> «πράκτορας με εσωτερική κατάσταση» +- 'Delta rule' -> «κανόνας Δέλτα» +- 'demons' -> «δαίμονας» +- 'demotion' -> «υποβιβασμός» +- 'dendrite' -> «δενδρίτης» +- 'Depth-First Search' -> «αναζήτηση πρώτα σε βάθος» +- 'Description logic (DL)' -> «Λογική περιγραφής» +- 'design' -> «σχεδίαση» +- 'design stance' -> «σχεδιαστική προσέγγιση» +- 'detach' -> «διαχωρισμός» +- 'deterministic effects' -> «ντετερμινιστικά αποτελέσματα» +- 'Developmental robotics (DevRob)' -> «Αναπτυξιακή ρομποτική» +- 'Diagnosis' -> «Διάγνωση» +- 'diagnosis' -> «διάγνωση» +- 'Dialogue system' -> «Σύστημα διαλόγου» +- 'Dimensionality reduction' -> «Μείωση διαστάσεων» +- 'discrepancy' -> «ασυμφωνία τιμών» +- 'Discrete system' -> «Διακριτό σύστημα» +- 'discretization' -> «διακριτοποίηση» +- 'disjunctive normal form' -> «διαζευκτική κανονική μορφή» +- 'distributed artificial intelligence' -> «κατανεμημένη τεχνητή νοημοσύνη» +- 'Distributed artificial intelligence (DAI)' -> «Κατανεμημένη τεχνητή νοημοσύνη» +- 'distributed data mining' -> «κατανεμημένη εξόρυξη σε δεδομένα» +- 'distributed memory' -> «κατανεμημένη μνήμη» +- 'distributed multi-agent planning' -> «κατανεμημένος πολυπρακτορικός σχεδιασμός» +- 'divisive algorithm' -> «αλγόριθμος διαίρεσης» +- 'domain expert' -> «ειδικός του τομέα» +- 'Dynamic epistemic logic (DEL)' -> «Δυναμική επιστημική λογική» +- 'dynamic programming' -> «δυναμικός προγραμματισμός» +- 'Eager learning' -> «Πρόθυμη μάθηση» +- 'eager learning' -> «έγκαιρη μάθηση» +- 'Ebert test' -> «Τεστ Έμπερτ» +- 'Echo state network (ESN)' -> «Δίκτυο κατάστασης Echo» +- 'edge detection' -> «εντοπισμός ακμών» +- 'effectors' -> «εξαρτήματα δράσης» +- 'Embodied agent' -> «Ενσαρκωμένος πράκτορας» +- 'Embodied cognitive science' -> «Ενσωματωμένη γνωστική επιστήμη» +- 'encapsulation' -> «εγκλεισμός (αντικειμένου)» +- 'energy function' -> «συνάρτηση ενέργειας» +- 'Enforced Hill-Climbing Search' -> «αναζήτηση με εξαναγκασμένη αναρρίχηση λόφου» +- 'Ensemble averaging' -> «Μέσος όρος του συνόλου» +- 'entropy of information' -> «εντροπία πληροφορίας» +- 'episode mining algorithms' -> «εξόρυξη επεισοδίων» +- 'episodical knowledge' -> «επεισοδιακή γνώση» +- 'Epoch (machine learning)' -> «Εποχή (μηχανική μάθηση)» +- 'epochs' -> «εποχές» +- 'equivalence' -> «ισοδυναμία» +- 'equivalence rules' -> «κανόνες ισοδυναμίας» +- 'erasure' -> «διαγραφή» +- 'error driven learning' -> «μάθηση καθοδηγούμενη από το σφάλμα» +- 'Error-driven learning' -> «Μάθηση με γνώμονα τα σφάλματα» +- 'Ethics of artificial intelligence' -> «Ηθική της τεχνητής νοημοσύνης» +- 'Euclidian distance' -> «Ευκλείδεια απόσταση» +- 'evaluation' -> «αποτίμηση» +- 'evaluation function' -> «συνάρτηση αξιολόγησης» +- 'event-driven rule' -> «ενεργός κανόνας» +- 'evoking strength' -> «δύναμη πρόκλησης» +- 'Evolutionary algorithm (EA)' -> «Εξελικτικός αλγόριθμος» +- 'Evolutionary computation' -> «Εξελικτικός υπολογισμός» +- 'Evolving classification function (ECF)' -> «Εξελισσόμενη συνάρτηση ταξινόμησης» +- 'exhaustive search' -> «εξαντλητική αναζήτηση» +- 'existential graphs' -> «υπαρξιακοί γράφοι» +- 'existential quantifier' -> «υπαρξιακός ποσοδείκτης» +- 'Existential risk' -> «Υπαρξιακός κίνδυνος» +- 'exoneration' -> «αθώωση» +- 'Expert system' -> «Ειδικό σύστημα» +- 'expert system' -> «έμπειρο σύστημα» +- 'expert system shell' -> «κέλυφος έμπειρου συστήματος» +- 'explicit knowledge' -> «ρητή γνώση» +- 'extension principle' -> «αρχή της επέκτασης» +- 'Fast-and-frugal trees' -> «Γρήγορα και λιτά δέντρα» +- 'Feature extraction' -> «Εξαγωγή χαρακτηριστικών» +- 'Feature learning' -> «Εκμάθηση χαρακτηριστικών» +- 'Feature selection' -> «Επιλογή χαρακτηριστικών» +- 'Federated learning' -> «Ομοσπονδιακή μάθηση» +- 'feedback' -> «ανάδραση, ανατροφοδότηση» +- 'feedforward' -> «πρόσθια τροφοδότηση» +- 'filtering algorithm' -> «αλγόριθμος διήθησης τιμών» +- 'final state' -> «τελική κατάσταση» +- 'fine grain' -> «λεπτή υφή» +- 'first fail principle' -> «αρχή συντομότερης αποτυχίας» +- 'first order predicate logic' -> «κατηγορηματική λογική πρώτης τάξης» +- 'First-order logic' -> «Λογική πρώτης τάξης» +- 'fitness function' -> «συνάρτηση καταλληλότητας» +- 'Fluent' -> «Ευφραδής» +- 'Formal language' -> «Επίσημη γλώσσα» +- 'Forward chaining' -> «Αλυσίδα προς τα εμπρός» +- 'forward chaining' -> «ορθή ακολουθία εκτέλεσης» +- 'forward checking' -> «προοπτικός έλεγχος» +- 'Frame' -> «Πλαίσιο» +- 'frame axioms' -> «αξιώματα του πλαισίου» +- 'Frame language' -> «Γλώσσα πλαισίου» +- 'frame of discernment' -> «πλαίσιο διάκρισης» +- 'Frame problem' -> «Πρόβλημα πλαισίου» +- 'frame problem' -> «πρόβλημα πλαισίου» +- 'frames' -> «πλαίσια» +- 'Friendly artificial intelligence' -> «Φιλική τεχνητή νοημοσύνη» +- 'full look ahead' -> «πλήρης έγκαιρη εξέταση» +- 'functional dependency' -> «λειτουργική εξάρτηση» +- 'functional term' -> «συναρτησιακός όρος» +- 'Futures studies' -> «Μελλοντικές μελέτες» +- 'fuzzification' -> «μετατροπή μεγέθους σε ασαφές» +- 'fuzziness' -> «ασάφεια» +- 'fuzzy complement' -> «συμπληρωματικό ασαφούς συνόλου» +- 'fuzzy composition' -> «σύνθεση ασαφών σχέσεων» +- 'Fuzzy control system' -> «Ασαφές σύστημα ελέγχου» +- 'fuzzy linguistic description' -> «ασαφής λεκτική περιγραφή» +- 'fuzzy linguistic variable' -> «ασαφής λεκτική μεταβλητή» +- 'Fuzzy logic' -> «Ασαφής λογική» +- 'fuzzy logic' -> «ασαφής λογική» +- 'fuzzy numbers' -> «ασαφείς αριθμοί» +- 'fuzzy reasoning' -> «ασαφής συλλογιστική» +- 'fuzzy relations' -> «ασαφείς σχέσεις» +- 'Fuzzy rule' -> «Ασαφής κανόνας» +- 'fuzzy rule' -> «ασαφής κανόνας» +- 'Fuzzy set' -> «Ασαφές σύνολο» +- 'fuzzy set' -> «ασαφή σύνολα» +- 'fuzzy set theory' -> «θεωρία ασαφών συνόλων» +- 'fuzzy variable' -> «ασαφής μεταβλητή» +- 'Game theor' -> «Θεωρία παιγνίων» +- 'game tree' -> «δένδρο παιγνίου» +- 'gene' -> «γονίδιο» +- 'general problem solver' -> «γενικός επιλυτής προβλημάτων» +- 'generalization rule' -> «κανόνας γενίκευσης» +- 'generalized modus ponens' -> «γενικευμένος τρόπος του θέτειν» +- 'generalized modus tollens' -> «γενικευμένος τρόπος του αναιρείν» +- 'generate and test' -> «παραγωγή και δοκιμή» +- 'generation gap' -> «χάσμα γενεών» +- 'Generative adversarial network (GAN)' -> «Δημιουργικό ανταγωνιστικό δίκτυο» +- 'genetic algorithms' -> «γενετικοί αλγόριθμοι» +- 'Genetic algorithm (GA)' -> «Γενετικός αλγόριθμος» +- 'Genetic operator' -> «Γενετικός χειριστής» +- 'genetic programming' -> «γενετικός προγραμματισμός» +- 'genotype' -> «γονότυπος» +- 'Glowworm swarm optimization' -> «Βελτιστοποίηση σμήνους Glowworm» +- 'goal driven' -> «αναζήτηση οδηγούμενη από στόχους» +- 'goals of attainment' -> «στόχοι επίτευξης» +- 'graded learning' -> «βαθμολογημένη μάθηση» +- 'gradient descent' -> «επικλινής καθόδος» +- 'gradient descent optimization' -> «βελτιστοποίηση επικλινούς καθόδου» +- 'Graph (abstract data type)' -> «Γράφημα» +- 'Graph (discrete mathematics)' -> «Γράφημα (διακριτά μαθηματικά)» +- 'Graph database (GDB)' -> «Βάση δεδομένων γραφημάτων» +- 'graph expansion' -> «επέκταση γράφου» +- 'Graph theory' -> «Θεωρία γραφημάτων» +- 'Graph traversal' -> «Διασύνδεση γραφήματος» +- 'graph-based planning' -> «σχεδιασμός βασισμένος σε γράφους» +- 'grid' -> «πλέγμα» +- 'grip' -> «λαβή» +- 'ground term' -> «βασικός όρος» +- 'guided-probe approach' -> «προσέγγιση καθοδηγούμενων δοκιμών» +- 'hetero-associative memories' -> «ετεροσυσχετιζόμενες μνήμες» +- 'Heuristic' -> «Ευρετική» +- 'heuristic' -> «ευρετικός μηχανισμός» +- 'heuristic classification' -> «ευρετική κατηγοριοποίηση» +- 'heuristic function' -> «ευρετική συνάρτηση» +- 'heuristic match' -> «ευρετική ταυτοποίηση» +- 'heuristic search' -> «ευρετική αναζήτηση» +- 'heuristic value' -> «ευρετική τιμή» +- 'Hidden layer' -> «Κρυφό στρώμα» +- 'hidden layers' -> «κρυφά επίπεδα» +- 'Hidden unit' -> «Κρυφή μονάδα» +- 'hierarchical planning' -> «ιεραρχικός σχεδιασμός» +- 'Hierarchical Task Networks' -> «ιεραρχικά δίκτυα διεργασιών» +- 'hierarchy concept type' -> «ιεραρχία τύπων εννοιών» +- 'hierarchy relation type' -> «ιεραρχία τύπων σχέσεων» +- 'higher order constraint' -> «περιορισμός ανώτερης τάξης» +- 'Hill Climbing Search' -> «αναζήτηση αναρρίχησης λόφων» +- 'horizon effect' -> «φαινόμενο ορίζοντα» +- 'humanoid robots' -> «ανθρωποειδή ρομπότ» +- 'hybrid agent' -> «υβριδικός πράκτορας» +- 'Hyper-heuristic' -> «Υπερ-ευρετικό» +- 'hypotheses discrimination' -> «διάκριση υποθέσεων» +- 'hypothesis space' -> «χώρος υποθέσεων» +- 'hypothesize and test' -> «δημιουργία και έλεγχος υποθέσεων» +- 'IEEE Computational Intelligence Society' -> «Κοινωνία Υπολογιστικής Νοημοσύνης» +- 'if-needed demon' -> «προσκόλληση διαδικασιών» +- 'implication' -> «συνεπαγωγή» +- 'imprecise data' -> «ανακριβή δεδομένα» +- 'incomplete' -> «μη-πλήρης» +- 'incomplete data' -> «ελλιπή δεδομένα» +- 'inconsistency effects' -> «ασυνεπή αποτελέσματα» +- 'inconsistency support' -> «ασύμβατη υποστήριξη» +- 'Incremental learning' -> «Αυξητική μάθηση» +- 'incremental learning' -> «επαυξητική μάθηση» +- 'indivisible action' -> «αδιαίρετη ενέργεια» +- 'induction' -> «επαγωγή» +- 'inductive learning' -> «επαγωγική μάθηση» +- 'inductive learning hypothesis' -> «υπόθεση επαγωγικής μάθησης» +- 'inductive logic programming' -> «επαγωγικός λογικός προγραμματισμός» +- 'inductive reasoning' -> «επαγωγική συλλογιστική» +- 'inference' -> «εξαγωγή συμπερασμάτων» +- 'Inference engine' -> «Μηχανή συμπερασμάτων» +- 'inference engine' -> «μηχανή εξαγωγής συμπερασμάτων» +- 'inference mechanism' -> «μηχανισμός εξαγωγής συμπερασμάτων» +- 'inference rules' -> «κανόνες εξαγωγής συμπερασμάτων» +- 'inferential adequacy' -> «επάρκεια συνεπαγωγής» +- 'inferential efficiency' -> «αποδοτικότητα συνεπαγωγής» +- 'inferential inefficiency' -> «μη-αποδοτικότητα επαγωγής» +- 'information gain' -> «κέρδος πληροφορίας» +- 'Information integration (II)' -> «Ενοποίηση πληροφοριών» +- 'Information Processing Language (IPL)' -> «Γλώσσα επεξεργασίας πληροφοριών» +- 'information retrieval' -> «ανάκτηση πληροφοριών» +- 'information value theory' -> «θεωρία αξίας της πληροφορίας» +- 'informative patterns' -> «πρότυπα πληροφόρησης» +- 'inheritance' -> «κληρονομικότητα» +- 'initial state' -> «αρχική κατάσταση» +- 'input layer' -> «επίπεδο εισόδου» +- 'instance' -> «στιγμιότυπο» +- 'instance-based learning' -> «μάθηση κατά περίπτωση» +- 'Intelligence amplification (IA)' -> «Ενίσχυση νοημοσύνης» +- 'Intelligence explosion' -> «Έκρηξη πληροφοριών» +- 'intelligent agent' -> «ευφυής πράκτορας» +- 'Intelligent agent (IA)' -> «Ευφυής παράγοντας» +- 'Intelligent control' -> «Έξυπνος έλεγχος» +- 'Intelligent personal assistant' -> «Έξυπνος προσωπικός βοηθός» +- 'intention' -> «πρόθεση» +- 'intentional stance' -> «προθεσιαρχική προσέγγιση» +- 'inter-transactional association rules' -> «δια-συναλλακτικοί κανόνες συσχέτισης» +- 'interaction protocol' -> «πρωτόκολλο αλληλεπίδρασης» +- 'interference' -> «παρέμβαση» +- 'interoperability' -> «διαλειτουργικότητα» +- 'interpolative associative memories' -> «μνήμη συσχέτισης παρεμβολής» +- 'Interpretation' -> «Ερμηνεία» +- 'interpretation' -> «ερμηνεία» +- 'interpretation models' -> «ερμηνευτικά μοντέλα» +- 'interpreter' -> «διερμηνέας» +- 'intra-transactional association rules' -> «ενδο-συναλλακτικοί κανόνες συσχέτισης» +- 'Intrinsic motivation' -> «Εσωτερικά κίνητρα» +- 'Issue tree' -> «Δέντρο έκδοσης» +- 'Iterative Deepening A* Search' -> «αναζήτηση Α* με επαναληπτική εκβάθυνση» +- 'Iterative Deepening Search' -> «αναζήτηση επαναληπτικής εκβάθυνσης» +- 'job-shop scheduling' -> «χρονοπρογραμματισμός καταστημάτων εργασιών» +- 'join' -> «συνένωση» +- 'Junction tree algorithm' -> «Αλγόριθμος δέντρων διασταύρωσης» +- 'K-consistency' -> «Κ-συνέπεια» +- 'K-means algorithm' -> «αλγόριθμος Κ-μέσων» +- 'k-nearest neighbors algorithm' -> «αλγόριθμος κ-πλησιέστερων γειτόνων» +- 'Kernel method' -> «Μέθοδος πυρήνα» +- 'knapsack problem' -> «πρόβλημα ταξιδιωτικού σάκου» +- 'knowledge' -> «γνώση» +- 'Knowledge acquisition' -> «Απόκτηση γνώσης» +- 'knowledge acquisition' -> «απόκτηση γνώσης» +- 'knowledge base' -> «βάση γνώσης» +- 'knowledge based system' -> «σύστημα βασισμένο στη γνώση» +- 'knowledge capture' -> «σύλληψη γνώσης» +- 'knowledge elicitation' -> «εκμαίευση γνώσης» +- 'knowledge engineer' -> «μηχανικός γνώσης» +- 'knowledge engineering' -> «τεχνολογία γνώσης» +- 'Knowledge engineering (KE)' -> «Μηχανική Γνώσης» +- 'Knowledge extraction' -> «Εξαγωγή γνώσης» +- 'knowledge extraction' -> «εξαγωγή γνώσης» +- 'Knowledge Interchange Format (KIF)' -> «Μορφή ανταλλαγής γνώσεων» +- 'knowledge management' -> «διαχείριση γνώσης» +- 'knowledge modeling' -> «μοντελοποίηση γνώσης» +- 'Knowledge representation and reasoning (KR² or KR&R)' -> «Αναπαράσταση και συλλογιστική γνώσης» +- 'knowledge source' -> «πηγή γνώσης» +- 'knowledge system' -> «σύστημα βασισμένο στη γνώση» +- 'Knowledge-based system (KBS)' -> «Σύστημα βασισμένο στη γνώση» +- 'laddered grids' -> «βαθμωτά πλέγματα» +- 'lambda expressions' -> «εκφράσεις-λ» +- 'lateral excitation' -> «παράπλευρη διέγερση» +- 'lateral inhibition' -> «παράπλευρη καταστολή» +- 'layer' -> «στρώματα» +- 'Lazy learning' -> «Τεμπέλικη μάθηση» +- 'lazy learning' -> «αναβλητική μάθηση» +- 'learning' -> «μάθηση» +- 'learning from examples' -> «μάθηση με παραδείγματα» +- 'learning from observation' -> «μάθηση από παρατήρηση» +- 'least commitment principle' -> «αρχή της ελάχιστης δέσμευσης» +- 'linear associator' -> «γραμμικός συσχετιστής» +- 'linear plan' -> «γραμμικό πλάνο» +- 'linear regression' -> «γραμμική παρεμβολή» +- 'linear resolution' -> «γραμμική ανάλυση» +- 'linear time logic' -> «γραμμική χρονική λογική» +- 'linearly separable problems' -> «γραμμικώς διαχωρίσιμα προβλήματα» +- 'literal' -> «λεκτικό» +- 'local minima' -> «τοπικά ελάχιστα» +- 'logic clause' -> «λογική πρόταση» +- 'logic contradiction' -> «λογική αντίφαση» +- 'Logic programming' -> «Λογικός προγραμματισμός» +- 'logic semantics' -> «λογική σημασιολογία» +- 'logic substitution' -> «λογική αντικατάσταση» +- 'logical inadequacy' -> «λογική ανεπάρκεια» +- 'logical necessity' -> «λογική αναγκαιότητα» +- 'logical sufficiency' -> «λογική επάρκεια» +- 'logistic function' -> «λογιστική συνάρτηση» +- 'logistics' -> «εφοδιαστική» +- 'Long short-term memory (LSTM)' -> «Μακροπρόθεσμη μνήμη» +- 'machine evolution' -> «μηχανική εξέλιξη» +- 'machine learning' -> «μηχανική μάθηση» +- 'Machine learning (ML)' -> «Μηχανική μάθηση» +- 'Machine listening' -> «Μηχανική ακρόαση» +- 'Machine perception' -> «Μηχανική αντίληψη» +- 'machine vision' -> «μηχανική όραση» +- 'Machine vision (MV)' -> «Μηχανική όραση» +- 'maintaining arc consistency' -> «διατήρηση συνέπεια τόξου» +- 'manifestation frequency' -> «συχνότητα εκδήλωσης συμπτώματος» +- 'manufacturing robots' -> «κατασκευαστικά ρομπότ» +- 'Markov chain' -> «Αλυσίδα Markov» +- 'Markov decision process (MDP)' -> «Διαδικασία απόφασης Markov» +- 'mathematical logic' -> «μαθηματική λογική» +- 'Mathematical optimization' -> «Μαθηματική βελτιστοποίηση» +- 'means-ends analysis' -> «ανάλυση μέσων και στόχων» +- 'Mechanism design' -> «Σχεδιασμός μηχανισμού» +- 'Mechatronics' -> «Μηχατρονική» +- 'mediator' -> «διαμεσολαβητής» +- 'membership function' -> «συνάρτηση συγγένειας» +- 'memory capacity' -> «χωρητικότητα μνήμης» +- 'message passing systems' -> «συστήματα ανταλλαγής μηνυμάτων» +- 'meta -control' -> «μετα- έλεγχος» +- 'meta -knowledge' -> «μετα- γνώση» +- 'meta -rule' -> «μετα- κανόνας» +- 'Metabolic network reconstruction and simulation' -> «Ανακατασκευή και προσομοίωση μεταβολικού δικτύου» +- 'metadata' -> «μεταδεδομένα» +- 'Metaheuristic' -> «Μεταευρετική» +- 'mgu' -> «γενικότερος ενοποιητής» +- 'min conflicts heuristic' -> «ευριστικός μηχανισμός ελαχίστων συγκρούσεων» +- 'minimax algorithm' -> «αλγόριθμοι αναζήτησης ελαχίστου-μεγίστου» +- 'minimax search' -> «αναζήτηση ελαχίστου-μεγίστου» +- 'missing data' -> «ελλιπή δεδομένα» +- 'mobile robots' -> «μετακινούμενα ρομπότ» +- 'mobility' -> «κινητικότητα» +- 'modal logic' -> «λογική τροπική» +- 'model' -> «μοντέλο» +- 'Model checking' -> «Έλεγχος μοντέλου» +- 'model checking' -> «έλεγχος μοντέλων» +- 'model-based diagnosis' -> «διάγνωση βασισμένη σε μοντέλα» +- 'model-based reasoning' -> «συλλογιστική βασισμένη σε μοντέλα» +- 'module' -> «ενότητα» +- 'modus ponens' -> «τρόπος του θέτειν» +- 'modus tollens' -> «τρόπος του αναίρειν» +- 'Monte Carlo tree search' -> «Αναζήτηση δέντρων στο Μόντε Κάρλο» +- 'morphological analysis' -> «μορφολογική ανάλυση» +- 'morphology derivational' -> «μορφολογία ετυμολογική» +- 'morphology inflectional' -> «μορφολογία κλίσεων» +- 'most general unifier' -> «γενικότερος ενοποιητής» +- 'multi-agent planning' -> «πολυπρακτορικός σχεδιασμός» +- 'multi-agent system' -> «πολυπρακτορικό σύστημα» +- 'Multi-agent system (MAS)' -> «Σύστημα πολλαπλών πρακτόρων» +- 'Multi-swarm optimization' -> «Βελτιστοποίηση πολλαπλών σμήνων» +- 'multiple inheritance' -> «πολλαπλή κληρονομικότητα» +- 'multistage classification' -> «πολυβάθμια κατηγοριοποίηση» +- 'Mutation' -> «Μετάλλαξη» +- 'mutation' -> «μετάλλαξη» +- 'mutual exclusion relations' -> «σχέσεις αμοιβαίου αποκλεισμού» +- 'Naive Bayes classifier' -> «Ταξινομητής Naive Bayes» +- 'Naive semantics' -> «Αφελής σημασιολογία» +- 'Name binding' -> «Δέσμευση ονόματος» +- 'Named graph' -> «Ονομασμένο γράφημα» +- 'Named-entity recognition (NER)' -> «Αναγνώριση επώνυμης οντότητας» +- 'namespace' -> «χώρος ονομάτων» +- 'natural language' -> «φυσική γλώσσα» +- 'Natural language generation (NLG)' -> «Δημιουργία φυσικής γλώσσας» +- 'Natural language processing (NLP)' -> «Επεξεργασία φυσικής γλώσσας» +- 'Natural language programming' -> «Προγραμματισμός φυσικής γλώσσας» +- 'negation as failure' -> «άρνηση ως αποτυχία» +- 'negative context' -> «αρνητικό πλαίσιο (συμφραζόμενων)» +- 'negative preconditions' -> «αρνητικές προϋποθέσεις» +- 'negotiation' -> «διαπραγμάτευση» +- 'Network motif' -> «Μοτίβο δικτύου» +- 'network paralysis' -> «παράλυση νευρωνικού δικτύου» +- 'Neural machine translation (NMT)' -> «Νευρωνική μηχανική μετάφραση» +- 'neural network' -> «νευρωνικό δίκτυο» +- 'Neural Turing machine (NTM)' -> «Μηχανή Neural Turing» +- 'Neuro-fuzzy' -> «Νευρο-ασαφής» +- 'Neurocybernetics' -> «Νευροκυβερνητική» +- 'Neuromorphic engineering' -> «Νευρομορφική μηχανική» +- 'neuron' -> «νευρώνας» +- 'Node' -> «Κόμβος» +- 'node consistency' -> «συνέπεια κόμβου» +- 'noise reduction' -> «μείωση θορύβου» +- 'non-determinism' -> «μη-αιτιοκρατία» +- 'non-monotonic modal logic' -> «μη μονότονη τροπική λογική» +- 'non-symbolic artificial intelligence' -> «μη συμβολική τεχνητή νοημοσύνη» +- 'Nondeterministic algorithm' -> «Μη προσδιοριστικός αλγόριθμος» +- 'Nouvelle AI' -> «Νέο AI» +- 'NP-completeness' -> «NP-πληρότητα» +- 'NP-hardness' -> «NP-σκληρότητα» +- 'null plan' -> «μηδενικό πλάνο» +- 'object' -> «αντικείμενο» +- 'object instances' -> «στιγμιότυπα αντικειμένου» +- 'object-oriented programming' -> «αντικειμενοστραφής προγραμματισμός» +- 'obligation' -> «υποχρέωση» +- 'Occam's razor' -> «ξυράφι του Όκαμ» +- 'occurs check' -> «έλεγχος εμφάνισης» +- 'OCR – Optical Character Recognition' -> «οπτική αναγνώριση χαρακτήρων» +- 'Offline learning' -> «Εκμάθηση εκτός σύνδεσης» +- 'offsprings' -> «απόγονοι» +- 'omniscience' -> «παντογνωσία» +- 'Online machine learning' -> «Διαδικτυακή μηχανική εκμάθηση» +- 'ontology' -> «οντολογία» +- 'Ontology learning' -> «Εκμάθηση οντολογίας» +- 'open world' -> «ανοιχτός κόσμος» +- 'Open-source software (OSS)' -> «Λογισμικό ανοιχτού κώδικα» +- 'opportunistic scheduling' -> «καιροσκοπικός χρονοπρογραμματισμός» +- 'optimal solution' -> «βέλτιστη λύση» +- 'optimization' -> «βελτιστοποίηση» +- 'order inconsistent plan' -> «πλάνο ασυνεπές ως προς τις διατάξεις» +- 'ordered game tree' -> «διατεταγμένο δένδρο» +- 'ordering constraint' -> «περιορισμοί διάταξης» +- 'output layer' -> «επίπεδα εξόδου» +- 'overfitting' -> «υπερπροσαρμογή» +- 'overloading' -> «υπερφόρτωση» +- 'parallel search' -> «παράλληλη αναζήτηση» +- 'parse tree' -> «δένδρο συντακτικής ανάλυσης» +- 'partial look ahead algorithm' -> «αλγόριθμος έγκαιρης μερικής εξέτασης» +- 'Partial order reduction' -> «Μερική μείωση παραγγελίας» +- 'Partially observable Markov decision process (POMDP)' -> «Μερικώς παρατηρήσιμη διαδικασία απόφασης Markov» +- 'Particle swarm optimization (PSO)' -> «Βελτιστοποίηση σμήνος σωματιδίων» +- 'passive troubleshooting' -> «παθητική διάγνωση» +- 'path consistency algorithm' -> «αλγόριθμος συνέπειας μονοπατιού» +- 'Pathfinding' -> «Διαδρομή» +- 'pattern' -> «πρότυπα» +- 'pattern matching' -> «ταυτοποίηση» +- 'pattern of activity' -> «πρότυπα δραστηριότητας» +- 'Pattern recognition' -> «Αναγνώριση μοτίβου» +- 'phenotype' -> «φαινότυπο» +- 'phonemes' -> «φθόγγοι» +- 'physical stance' -> «φυσική προσέγγιση» +- 'pixel' -> «εικονοστοιχείο» +- 'plan' -> «πλάνο» +- 'plan solution' -> «λύση πλάνου» +- 'plan space' -> «χώρος πλάνων» +- 'planner' -> «σχεδιαστής» +- 'planning contingency' -> «σχεδιασμός πολλαπλών ενδεχομένων» +- 'planning graph' -> «γράφος σχεδιασμού» +- 'planning system' -> «σύστημα σχεδιασμού» +- 'polymorphism' -> «πολυμορφισμός» +- 'portals' -> «διαδικτυακές πύλες» +- 'positive context' -> «θετικό πλαίσιο συμφραζόμενων» +- 'powerset' -> «δυναμοσύνολο» +- 'pragmatic analysis' -> «πραγματολογική ανάλυση» +- 'precondition list' -> «λίστα προϋποθέσεων» +- 'predicate' -> «κατηγόρημα» +- 'Predicate logic' -> «Λογική κατηγορήματος» +- 'predicate logic' -> «κατηγορηματική λογική» +- 'prediction' -> «πρόγνωση» +- 'Predictive analytics' -> «Προγνωστική ανάλυση» +- 'predictive models' -> «μοντέλο πρόβλεψης» +- 'prenex conjunctive normal form' -> «προσημασμένη συζευκτική κανονική μορφή» +- 'primitive action' -> «αρχέγονη ενέργεια» +- 'primitive conceptualizations' -> «αρχέγονες εννοιολογικές μορφές» +- 'primitive problem' -> «αρχέγονο πρόβλημα» +- 'Principal component analysis (PCA)' -> «Ανάλυση κύριου συστατικού» +- 'Principle of rationality' -> «Αρχή του ορθολογισμού» +- 'prior probability' -> «προϋπάρχουσα πιθανότητα» +- 'pro-activeness' -> «προνοητικότητα» +- 'Probabilistic programming (PP)' -> «Πιθανοτικός προγραμματισμός» +- 'probability planning' -> «σχεδιασμός με πιθανότητες» +- 'problem description' -> «περιγραφή προβλήματος» +- 'problem world' -> «κόσμος προβλήματος» +- 'procedural knowledge' -> «διαδικαστική γνώση» +- 'production rules' -> «κανόνες παραγωγής» +- 'Production system' -> «Σύστημα παραγωγής» +- 'production system' -> «σύστημα κανόνων παραγωγής» +- 'Programming language' -> «Γλώσσα προγραμματισμού» +- 'progression' -> «ορθή διάσχιση» +- 'projection' -> «προβολή» +- 'promotion' -> «προβιβασμός» +- 'proof' -> «απόδειξη» +- 'proof by contradiction' -> «εις άτοπο απαγωγή» +- 'proof layer' -> «επίπεδο αξιοπιστίας» +- 'proof procedure' -> «διαδικασία απόδειξης» +- 'Propositional calculus' -> «Προτασιακός λογισμός» +- 'propositional logic' -> «προτασιακή λογική» +- 'propositional rules' -> «προτασιακοί κανόνες» +- 'pruning' -> «κλάδεμα» +- 'pure node' -> «αμιγής κόμβος» +- 'pure tree' -> «αμιγές δένδρο» +- 'Python' -> «Πύθων» +- 'Qualification problem' -> «Πρόβλημα προσόντων» +- 'qualitative reasoning' -> «ποιοτική συλλογιστική» +- 'Quantifier' -> «Ποσοτικοποιητής» +- 'quantifier' -> «ποσοδείκτες» +- 'Quantum computing' -> «Κβαντική Υπολογιστική» +- 'Query language' -> «Γλώσσα ερωτήματος» +- 'R programming language' -> «Γλώσσα προγραμματισμού R» +- 'Radial basis function network' -> «Δίκτυο λειτουργίας ακτινικής βάσης» +- 'Random forest' -> «Τυχαίο δάσος» +- 'random learning' -> «τυχαία μάθηση» +- 'rationality' -> «λογικότητα» +- 'reactive agent' -> «αντιδραστικός πράκτορας» +- 'reactive rules' -> «αντιδραστικοί κανόνες» +- 'reactiveness' -> «αντιδραστικότητα» +- 'reasoning' -> «συλλογιστική» +- 'Reasoning system' -> «Σύστημα συλλογισμού» +- 'recurrent' -> «ανατροφοδοτούμενος» +- 'recurrent neural networks' -> «νευρωνικά δίκτυα με ανατροφοδότηση» +- 'Recurrent neural network (RNN)' -> «Επαναλαμβανόμενο νευρωνικό δίκτυο» +- 'recursion' -> «αναδρομή» +- 'reduction' -> «αναγωγή» +- 'reduction operator' -> «τελεστής αναγωγής» +- 'refutation' -> «εις άτοπο απαγωγή» +- 'refutation completeness' -> «πληρότητα ατόπου» +- 'Region connection calculus' -> «Λογισμός σύνδεσης περιοχής» +- 'regression' -> «παλινδρόμηση» +- 'reinforcement learning' -> «ενισχυτική μάθηση» +- 'Reinforcement learning (RL)' -> «Ενισχυτική μάθηση» +- 'repair algorithm' -> «αλγόριθμος επιδιόρθωσης» +- 'repair space' -> «χώρος επιδιορθώσεων» +- 'replanning' -> «επανασχεδιασμός» +- 'representational adequacy' -> «επάρκεια αναπαράστασης» +- 'Reservoir computing' -> «Υπολογισμός δεξαμενής» +- 'resolution principle' -> «αρχή της ανάλυσης» +- 'resolvent' -> «αναλυθέν» +- 'resource competition' -> «ανταγωνισμός πόρων» +- 'Resource Description Framework (RDF)' -> «Πλαίσιο Περιγραφής Πόρων» +- 'resource planning' -> «σχεδιασμός με πόρους» +- 'Restricted Boltzmann machine (RBM)' -> «Περιορισμένη μηχανή Boltzmann» +- 'restriction' -> «περιορισμός» +- 'reversible operator' -> «τελεστής αντιστρέψιμος» +- 'robot' -> «ρομπότ» +- 'robotic agent' -> «ρομποτικός πράκτορας» +- 'Robotics' -> «Ρομποτική» +- 'rule action' -> «ενέργεια κανόνα» +- 'rule base' -> «βάση κανόνων» +- 'rule cluster' -> «ομάδα κανόνων» +- 'rule conclusion' -> «συμπέρασμα κανόνα» +- 'rule condition' -> «συνθήκη κανόνα» +- 'rule of inference' -> «κανόνας συμπερασμού» +- 'Rule-based system' -> «Σύστημα βασισμένο σε κανόνες» +- 'Satisfiability' -> «Ικανοποίηση» +- 'scheduler' -> «χρονοπρογραμματιστής» +- 'schema theorem' -> «θεώρημα σχημάτων» +- 'scout' -> «ανιχνευτής» +- 'scripts' -> «σενάρια» +- 'Search algorithm' -> «Αλγόριθμος αναζήτησης» +- 'search algorithms' -> «αλγόριθμοι αναζήτησης» +- 'search engines' -> «μηχανές αναζήτησης» +- 'search frontier' -> «μέτωπο αναζήτησης» +- 'search space' -> «χώρος αναζήτησης» +- 'search thread' -> «νήμα αναζήτησης» +- 'search tree' -> «δένδρο αναζήτησης» +- 'Selection' -> «Επιλογή» +- 'selection fitness proportionate' -> «επιλογή αναλογικής καταλληλότητας» +- 'selection roulette wheel' -> «επιλογή ρουλέτας» +- 'selection tournament' -> «επιλογή τουρνουά» +- 'Selective Linear Definite clause resolution' -> «Επιλεκτική γραμμική ανάλυση οριστικής πρότασης» +- 'self decay' -> «εξασθένιση» +- 'Self-management' -> «Αυτοδιαχείρηση» +- 'self-organizing feature map' -> «αυτο-οργανούμενη απεικόνιση» +- 'semantic analysis' -> «σημασιολογική ανάλυση» +- 'semantic knowledge' -> «σημασιολογική γνώση» +- 'Semantic network' -> «Σημασιολογικό δίκτυο» +- 'semantic networks' -> «σημασιολογικά δίκτυα» +- 'Semantic query' -> «Σημασιολογική ερώτηση» +- 'Semantic reasoner' -> «Σημασιολογικός λογιστής» +- 'semantic web' -> «σημασιολογικός ιστός» +- 'Semantics' -> «Σημασιολογία» +- 'semantics' -> «σημασιολογία» +- 'sensor' -> «αισθητήρας» +- 'Sensor fusion' -> «Σύντηξη αισθητήρα» +- 'Separation logic' -> «Λογική χωρισμού» +- 'sequential covering algorithm' -> «αλγόριθμος σειριακής κάλυψης» +- 'sequential pattern minimg' -> «εξόρυξη ακολουθιακών προτύπων» +- 'shallow knowledge' -> «ρηχή γνώση» +- 'shell' -> «κέλυφος έμπειρου συστήματος» +- 'sigmoid functions' -> «σιγμοειδείς συναρτήσεις» +- 'sign function' -> «συνάρτηση πρόσημου» +- 'Similarity learning' -> «Εκμάθηση ομοιότητας» +- 'simplification' -> «απλοποίηση» +- 'Simulated Annealing Search' -> «αναζήτηση προσομοιωμένης ανόπτησης» +- 'Simulated annealing (SA)' -> «Προσομοίωση ανόπτησης» +- 'Situated approach' -> «Τοποθετημένη προσέγγιση» +- 'Situation calculus' -> «Λογισμός καταστάσεων» +- 'situation calculus' -> «λογισμός καταστάσεων» +- 'skeptical logic' -> «σκεπτικιστική λογική» +- 'skolemization' -> «σκολεμοποίηση» +- 'smoothing' -> «εξομάλυνση» +- 'social ability' -> «κοινωνικότητα» +- 'softbot' -> «λογισμικός πράκτορας» +- 'Software' -> «Λογισμικό» +- 'software agent' -> «λογισμικός πράκτορας» +- 'Software engineering' -> «Μηχανική Λογισμικού» +- 'solution extraction' -> «εξαγωγή λύσης» +- 'solution refinement' -> «επιλογή λύσης» +- 'sparse data' -> «αραιά δεδομένα» +- 'Spatial-temporal reasoning' -> «Χωροχρονικός συλλογισμός» +- 'specialization rule' -> «κανόνας εξειδίκευσης» +- 'spectrogram' -> «φασματογράφημα» +- 'Speech Act Theory' -> «Θεωρία Πράξεων Λόγου» +- 'Speech recognition' -> «Αναγνώρισης ομιλίας» +- 'speech recognition' -> «αναγνώριση ομιλίας» +- 'spelling correction rules' -> «αλγόριθμος διόρθωσης ορθογραφικών λαθών» +- 'Spiking neural network (SNN)' -> «Spiking νευρωνικό δίκτυο» +- 'Stanford Research Institute Problem Solver (STRIPS)' -> «Επίλυση προβλημάτων του Ερευνητικού Ινστιτούτου Στάνφορντ» +- 'State' -> «Κατάσταση» +- 'state' -> «κατάσταση» +- 'state space' -> «χώρος καταστάσεων» +- 'state-space planning' -> «σχεδιασμός χώρου καταστάσεων» +- 'static world' -> «στατικός κόσμος» +- 'Statistical classification' -> «Στατιστική ταξινόμηση» +- 'Statistical relational learning (SRL)' -> «Στατιστική σχεσιακή μάθηση» +- 'step function' -> «βηματική συνάρτηση» +- 'Stochastic optimization (SO)' -> «Στοχαστική βελτιστοποίηση» +- 'Stochastic semantic analysis' -> «Στοχαστική σημασιολογική ανάλυση» +- 'strict rules' -> «ισχυροί κανόνες» +- 'strong negation' -> «κλασική άρνηση» +- 'Subject-matter expert' -> «Εμπειρογνώμονας σε θέματα» +- 'subsumption architecture' -> «αρχιτεκτονική υπαγωγής» +- 'Superintelligence' -> «Υπερευφυΐα» +- 'superiority relation' -> «σχέση υπεροχής» +- 'Supervised learning' -> «Επίβλεψη μάθησης» +- 'supervised learning' -> «μάθηση με επίβλεψη» +- 'support' -> «υποστήριξη» +- 'Support Vector Machines' -> «μηχανές διανυσμάτων υποστήριξης» +- 'Support-vector machines' -> «Υποστήριξη-διανυσματικά μηχανήματα» +- 'Swarm intelligence (SI)' -> «Νοημοσύνη σμήνους» +- 'Symbolic artificial intelligence' -> «Συμβολική τεχνητή νοημοσύνη» +- 'symbolic artificial intelligence' -> «συμβολική τεχνητή νοημοσύνη» +- 'symbolic logic' -> «συμβολική λογική» +- 'synapse' -> «σύναψη» +- 'syntactic analysis' -> «συντακτική ανάλυση» +- 'Synthetic intelligence (SI)' -> «Συνθετική νοημοσύνη» +- 'system model' -> «μοντέλο συστήματος» +- 'Systems neuroscience' -> «Συστημική νευροεπιστήμη» +- 'Tabu Search' -> «αναζήτηση με απαγορευμένες καταστάσεις» +- 'tacit knowledge' -> «άρρητη γνώση» +- 'target function' -> «συνάρτηση στόχος» +- 'tautology' -> «ταυτολογία» +- 'teach-back' -> «επαναδιδασκαλία» +- 'Technological singularity' -> «Τεχνολογική ιδιομορφία» +- 'temporal association rules' -> «κανόνες συσχέτισης χρονικοί» +- 'Temporal difference learning' -> «Εκμάθηση χρονικής διαφοράς» +- 'temporal logic' -> «λογική χρονική» +- 'Tensor network theory' -> «Θεωρία τανυστικού δικτύου» +- 'term' -> «όρος» +- 'term assignment' -> «ανάθεση όρων» +- 'terminal state' -> «τερματική κατάσταση» +- 'text categorization' -> «κατηγοριοποίηση κειμένων» +- 'text planning' -> «σχεδιασμός κειμένου» +- 'Theoretical computer science (TCS)' -> «Θεωρητική επιστήμη των υπολογιστών» +- 'Theory of computation' -> «Θεωρία υπολογισμού» +- 'therapy space' -> «χώρος θεραπειών» +- 'Thompson sampling' -> «Δειγματοληψία Thompson» +- 'threat' -> «απειλή» +- 'threshold effect' -> «φαινόμενο κατωφλίου» +- 'threshold function' -> «συνάρτηση ενεργοποίησης» +- 'Time complexity' -> «Χρονική πολυπλοκότητα» +- 'timetable' -> «ωρολόγιο πρόγραμμα» +- 'topological sort' -> «τοπολογική διάταξη» +- 'total ordered plan' -> «πλάνο πλήρους διάταξης» +- 'Transhumanism' -> «Υπερανθρωπισμός» +- 'transition operator' -> «τελεστής μετάβασης» +- 'Transition system' -> «Σύστημα μετάβασης» +- 'Tree traversal' -> «Διάβαση δέντρου» +- 'trigger' -> «σκανδαλιστές» +- 'troubleshooting' -> «επιδιόρθωση βλαβών» +- 'True quantified Boolean formula' -> «Αληθής ποσοτικοποιημένος τύπος Boolean» +- 'trust layer' -> «επίπεδο αξιοπιστίας» +- 'truth maintenance' -> «συντήρηση αλήθειας» +- 'truth table' -> «πίνακας αληθείας» +- 'Turing machine' -> «Μηχανή Turing» +- 'Turing test' -> «Δοκιμή Turing» +- 'tutorial interview' -> «διδακτική συνέντευξη» +- 'two-person game' -> «παίγνια δύο αντιπάλων» +- 'Type system' -> «Σύστημα τύπου» +- 'unary constraint' -> «μοναδιαίος περιορισμός» +- 'unconditional probability' -> «πιθανότητα άνευ συνθηκών» +- 'underfitting' -> «υποπροσαρμογή» +- 'unification' -> «ενοποίηση» +- 'unifier' -> «ενοποιητής» +- 'unit clause' -> «μοναδιαία πρόταση» +- 'universal quantifier' -> «καθολικός ποσοδείκτης» +- 'unrestrict' -> «επέκταση» +- 'Unsupervised learning' -> «Εκμάθηση χωρίς επίβλεψη» +- 'unsupervised learning' -> «μάθηση χωρίς επίβλεψη» +- 'valence' -> «σθένος» +- 'valid plan' -> «έγκυρο πλάνο» +- 'validation' -> «έλεγχος αξιοπιστίας» +- 'validation data' -> «δεδομένα επικύρωσης» +- 'veracity' -> «ειλικρίνεια» +- 'verification' -> «επαλήθευση» +- 'Vision processing unit (VPU)' -> «Μονάδα επεξεργασίας όρασης» +- 'Weak AI' -> «Αδύναμη AI» +- 'web portals' -> «πύλες παγκόσμιου ιστού» +- 'web resource' -> «πόρος παγκόσμιου ιστού» +- 'web services' -> «υπηρεσίες παγκόσμιου ιστού» +- 'well formed formulae' -> «ορθά δομημένοι τύποι» +- 'working memory' -> «χώρος εργασίας» +- 'World Wide Web Consortium (W3C)' -> «Κοινοπραξία World Wide Web» diff --git a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/es.txt b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/es.txt new file mode 100644 index 000000000..1412f5b64 --- /dev/null +++ b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/es.txt @@ -0,0 +1,175 @@ +# ES HINTS +## TERM MAPPINGS +These are preferred terminology choices for this language. Use them whenever they sound natural; adapt freely if context requires. + +- 'accuracy' -> 'exactitud' +- 'activation function' -> 'función de activación' +- 'artificial intelligence' -> 'inteligencia artificial' +- 'AUC' -> 'AUC' +- 'AUC (Area under the ROC curve)' -> 'AUC (área bajo la curva ROC)' +- 'backpropagation' -> 'propagación inversa' +- 'batch' -> 'lote' +- 'batch size' -> 'tamaño del lote' +- 'bias (ethics/fairness)' -> 'sesgo (ética/equidad)' +- 'bias (math) or bias term' -> 'ordenada al origen (matemática) o término de sesgo' +- 'bias in ethics and fairness' -> 'sesgo en ética y equidad' +- 'bias term' -> 'término de sesgo' +- 'binary classification' -> 'Clasificación binaria' +- 'bucketing' -> 'Agrupamiento' +- 'categorical' -> 'categórico' +- 'categorical data' -> 'datos categóricos' +- 'class' -> 'clase' +- 'class-imbalanced dataset' -> 'conjunto de datos con desequilibrio de clases' +- 'class-imbalanced datasets' -> 'conjuntos de datos con desequilibrio de clases' +- 'classification' -> 'clasificación' +- 'classification model' -> 'modelo de clasificación' +- 'classification threshold' -> 'umbral de clasificación' +- 'classifier' -> 'clasificador' +- 'clipping' -> 'recorte' +- 'confusion matrix' -> 'matriz de confusión' +- 'continuous feature' -> 'atributo continuo' +- 'convergence' -> 'convergencia' +- 'data set or dataset' -> 'conjunto de datos (data set o dataset)' +- 'DataFrame' -> 'DataFrame' +- 'dataset' -> 'conjunto de datos' +- 'deep learning' -> 'aprendizaje profundo' +- 'deep model' -> 'modelo profundo' +- 'dense feature' -> 'atributo denso' +- 'depth' -> 'depth' +- 'discrete feature' -> 'atributo discreto' +- 'discrete features' -> 'atributos discretos' +- 'dynamic' -> 'dinámico' +- 'dynamic model' -> 'modelo dinámico' +- 'early stopping' -> 'Interrupción anticipada' +- 'embedding layer' -> 'Capa de embedding' +- 'embedding layers' -> 'capas de incorporación' +- 'epoch' -> 'época' +- 'example' -> 'ejemplo' +- 'false negative (FN)' -> 'falso negativo (FN)' +- 'false negatives' -> 'falsos negativos' +- 'false positive (FP)' -> 'Falso positivo (FP)' +- 'false positive rate' -> 'tasa de falsos positivos' +- 'false positive rate (FPR)' -> 'tasa de falsos positivos (FPR)' +- 'false positives' -> 'falsos positivos' +- 'feature' -> 'función' +- 'feature cross' -> 'combinación de atributos' +- 'feature crosses' -> 'combinaciones de atributos' +- 'feature engineering' -> 'ingeniería de atributos.' +- 'feature set' -> 'conjunto de atributos' +- 'feature vector' -> 'vector de atributos' +- 'feedback loop' -> 'ciclo de retroalimentación' +- 'generalization' -> 'generalización' +- 'generalization curve' -> 'Curva de generalización' +- 'gradient descent' -> 'descenso de gradientes' +- 'ground truth' -> 'Verdad fundamental' +- 'hidden layer' -> 'Capa oculta' +- 'hidden layer(s)' -> 'capas ocultas' +- 'hyperparameter' -> 'hiperparámetro' +- 'independently and identically distributed (i.i.d)' -> 'independiente e idénticamente distribuido (i.i.d.)' +- 'inference' -> 'Inferencia' +- 'input layer' -> 'capa de entrada' +- 'interpretability' -> 'interpretabilidad' +- 'iteration' -> 'iteración' +- 'L0regularization' -> 'Regularización L0' +- 'L1loss' -> 'pérdida L1' +- 'L1regularization' -> 'regularización L1' +- 'L2loss' -> 'pérdida L2' +- 'L2regularization' -> 'regularización L2' +- 'label' -> 'etiqueta' +- 'labeled example' -> 'ejemplo etiquetado' +- 'lambda' -> 'lambda' +- 'layer' -> 'oculta' +- 'learning rate' -> 'Tasa de aprendizaje' +- 'linear' -> 'linear' +- 'linear model' -> 'modelo lineal' +- 'linear models' -> 'modelos lineales' +- 'linear regression' -> 'regresión lineal' +- 'Log Loss' -> 'pérdida logística' +- 'log-odds' -> 'Logaritmo de probabilidad' +- 'logistic regression' -> 'regresión logística' +- 'loss' -> 'pérdida' +- 'loss curve' -> 'Curva de pérdida' +- 'loss function' -> 'función de pérdida' +- 'machine learning' -> 'aprendizaje automático' +- 'majority class' -> 'clase mayoritaria' +- 'mini-batch' -> 'minilote' +- 'minority class' -> 'clase minoritaria' +- 'model' -> 'modelo' +- 'multi-class classification' -> 'clasificación de clases múltiples' +- 'negative class' -> 'clase negativa' +- 'negative classes' -> 'clases negativas' +- 'neural network' -> 'neuronal prealimentada' +- 'neural networks' -> 'redes neuronales' +- 'neuron' -> 'neurona' +- 'node (neural network)' -> 'nodo (red neuronal)' +- 'nonlinear' -> 'no lineal' +- 'nonstationarity' -> 'no estacionariedad' +- 'normalization' -> 'Normalización' +- 'numerical data' -> 'datos numéricos' +- 'offline' -> 'Sin conexión' +- 'offline inference' -> 'inferencia sin conexión' +- 'one-hot encoding' -> 'codificación one-hot' +- 'one-hot vector' -> 'vector de un solo 1' +- 'one-vs.-all' -> 'uno frente a todos' +- 'online' -> 'en línea' +- 'online inference' -> 'inferencia en línea' +- 'output layer' -> 'capa de salida' +- 'output layers' -> 'capas de salida' +- 'overfitting' -> 'sobreajuste' +- 'pandas' -> 'pandas' +- 'parameter' -> 'parámetro' +- 'positive class' -> 'clase positiva' +- 'positive classes' -> 'clases positivas' +- 'post-processing' -> 'posprocesamiento' +- 'precision' -> 'precision' +- 'prediction' -> 'predicción' +- 'proxy labels' -> 'etiquetas de proxy' +- 'RAG' -> 'RAG' +- 'rater' -> 'evaluador' +- 'recall' -> 'recall' +- 'Rectified Linear Unit (ReLU)' -> 'Unidad lineal rectificada (ReLU)' +- 'regression model' -> 'modelo de regresión' +- 'regularization' -> 'regularización' +- 'regularization rate' -> 'tasa de regularización' +- 'ReLU' -> 'ReLU' +- 'retrieval-augmented generation' -> 'generación aumentada por recuperación' +- 'retrieval-augmented generation (RAG)' -> 'Generación mejorada por recuperación (RAG)' +- 'ROC (receiver operating characteristic) Curve' -> 'Curva ROC (característica operativa del receptor)' +- 'ROC curve' -> 'curva ROC' +- 'Root Mean Squared Error (RMSE)' -> 'Raíz cuadrada del error cuadrático medio (RMSE)' +- 'sigmoid function' -> 'función sigmoidea' +- 'softmax' -> 'softmax' +- 'sparse feature' -> 'atributo disperso' +- 'sparse representation' -> 'representación dispersa' +- 'sparse vector' -> 'vector disperso' +- 'squared loss' -> 'Pérdida al cuadrado' +- 'static' -> 'static' +- 'static inference' -> 'Inferencia estática' +- 'static model' -> 'modelo estático' +- 'stationarity' -> 'Estacionariedad' +- 'Stochastic Gradient Descent (SGD)' -> 'Descenso de gradientes estocástico (SGD)' +- 'supervised learning' -> 'aprendizaje supervisado' +- 'supervised machine learning' -> 'aprendizaje automático supervisado' +- 'synthetic feature' -> 'atributo sintético' +- 'synthetic features' -> 'atributos sintéticos' +- 'test loss' -> 'Pérdida de prueba' +- 'training' -> 'entrenamiento' +- 'training loss' -> 'Pérdida de entrenamiento' +- 'training set' -> 'conjunto de entrenamiento' +- 'training-serving skew' -> 'Sesgo entre el entrenamiento y la entrega' +- 'true negative (TN)' -> 'verdadero negativo (VN)' +- 'true negatives' -> 'verdaderos negativos' +- 'true positive (TP)' -> 'verdadero positivo (VP)' +- 'true positive rate' -> 'tasa de verdaderos positivos' +- 'true positive rate (TPR)' -> 'tasa de verdaderos positivos (TVP)' +- 'true positives' -> 'verdaderos positivos' +- 'underfitting' -> 'Subajuste' +- 'unlabeled example' -> 'ejemplo sin etiqueta' +- 'unsupervised machine learning' -> 'aprendizaje automático no supervisado' +- 'validation' -> 'validación' +- 'validation dataset' -> 'conjunto de datos de validación' +- 'validation loss' -> 'Pérdida de validación' +- 'validation set' -> 'conjunto de validación' +- 'weight' -> 'peso' +- 'weighted sum' -> 'suma ponderada' +- 'Z-score normalization' -> 'normalización de la puntuación Z' diff --git a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/es_419.txt b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/es_419.txt new file mode 100644 index 000000000..1412f5b64 --- /dev/null +++ b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/es_419.txt @@ -0,0 +1,175 @@ +# ES HINTS +## TERM MAPPINGS +These are preferred terminology choices for this language. Use them whenever they sound natural; adapt freely if context requires. + +- 'accuracy' -> 'exactitud' +- 'activation function' -> 'función de activación' +- 'artificial intelligence' -> 'inteligencia artificial' +- 'AUC' -> 'AUC' +- 'AUC (Area under the ROC curve)' -> 'AUC (área bajo la curva ROC)' +- 'backpropagation' -> 'propagación inversa' +- 'batch' -> 'lote' +- 'batch size' -> 'tamaño del lote' +- 'bias (ethics/fairness)' -> 'sesgo (ética/equidad)' +- 'bias (math) or bias term' -> 'ordenada al origen (matemática) o término de sesgo' +- 'bias in ethics and fairness' -> 'sesgo en ética y equidad' +- 'bias term' -> 'término de sesgo' +- 'binary classification' -> 'Clasificación binaria' +- 'bucketing' -> 'Agrupamiento' +- 'categorical' -> 'categórico' +- 'categorical data' -> 'datos categóricos' +- 'class' -> 'clase' +- 'class-imbalanced dataset' -> 'conjunto de datos con desequilibrio de clases' +- 'class-imbalanced datasets' -> 'conjuntos de datos con desequilibrio de clases' +- 'classification' -> 'clasificación' +- 'classification model' -> 'modelo de clasificación' +- 'classification threshold' -> 'umbral de clasificación' +- 'classifier' -> 'clasificador' +- 'clipping' -> 'recorte' +- 'confusion matrix' -> 'matriz de confusión' +- 'continuous feature' -> 'atributo continuo' +- 'convergence' -> 'convergencia' +- 'data set or dataset' -> 'conjunto de datos (data set o dataset)' +- 'DataFrame' -> 'DataFrame' +- 'dataset' -> 'conjunto de datos' +- 'deep learning' -> 'aprendizaje profundo' +- 'deep model' -> 'modelo profundo' +- 'dense feature' -> 'atributo denso' +- 'depth' -> 'depth' +- 'discrete feature' -> 'atributo discreto' +- 'discrete features' -> 'atributos discretos' +- 'dynamic' -> 'dinámico' +- 'dynamic model' -> 'modelo dinámico' +- 'early stopping' -> 'Interrupción anticipada' +- 'embedding layer' -> 'Capa de embedding' +- 'embedding layers' -> 'capas de incorporación' +- 'epoch' -> 'época' +- 'example' -> 'ejemplo' +- 'false negative (FN)' -> 'falso negativo (FN)' +- 'false negatives' -> 'falsos negativos' +- 'false positive (FP)' -> 'Falso positivo (FP)' +- 'false positive rate' -> 'tasa de falsos positivos' +- 'false positive rate (FPR)' -> 'tasa de falsos positivos (FPR)' +- 'false positives' -> 'falsos positivos' +- 'feature' -> 'función' +- 'feature cross' -> 'combinación de atributos' +- 'feature crosses' -> 'combinaciones de atributos' +- 'feature engineering' -> 'ingeniería de atributos.' +- 'feature set' -> 'conjunto de atributos' +- 'feature vector' -> 'vector de atributos' +- 'feedback loop' -> 'ciclo de retroalimentación' +- 'generalization' -> 'generalización' +- 'generalization curve' -> 'Curva de generalización' +- 'gradient descent' -> 'descenso de gradientes' +- 'ground truth' -> 'Verdad fundamental' +- 'hidden layer' -> 'Capa oculta' +- 'hidden layer(s)' -> 'capas ocultas' +- 'hyperparameter' -> 'hiperparámetro' +- 'independently and identically distributed (i.i.d)' -> 'independiente e idénticamente distribuido (i.i.d.)' +- 'inference' -> 'Inferencia' +- 'input layer' -> 'capa de entrada' +- 'interpretability' -> 'interpretabilidad' +- 'iteration' -> 'iteración' +- 'L0regularization' -> 'Regularización L0' +- 'L1loss' -> 'pérdida L1' +- 'L1regularization' -> 'regularización L1' +- 'L2loss' -> 'pérdida L2' +- 'L2regularization' -> 'regularización L2' +- 'label' -> 'etiqueta' +- 'labeled example' -> 'ejemplo etiquetado' +- 'lambda' -> 'lambda' +- 'layer' -> 'oculta' +- 'learning rate' -> 'Tasa de aprendizaje' +- 'linear' -> 'linear' +- 'linear model' -> 'modelo lineal' +- 'linear models' -> 'modelos lineales' +- 'linear regression' -> 'regresión lineal' +- 'Log Loss' -> 'pérdida logística' +- 'log-odds' -> 'Logaritmo de probabilidad' +- 'logistic regression' -> 'regresión logística' +- 'loss' -> 'pérdida' +- 'loss curve' -> 'Curva de pérdida' +- 'loss function' -> 'función de pérdida' +- 'machine learning' -> 'aprendizaje automático' +- 'majority class' -> 'clase mayoritaria' +- 'mini-batch' -> 'minilote' +- 'minority class' -> 'clase minoritaria' +- 'model' -> 'modelo' +- 'multi-class classification' -> 'clasificación de clases múltiples' +- 'negative class' -> 'clase negativa' +- 'negative classes' -> 'clases negativas' +- 'neural network' -> 'neuronal prealimentada' +- 'neural networks' -> 'redes neuronales' +- 'neuron' -> 'neurona' +- 'node (neural network)' -> 'nodo (red neuronal)' +- 'nonlinear' -> 'no lineal' +- 'nonstationarity' -> 'no estacionariedad' +- 'normalization' -> 'Normalización' +- 'numerical data' -> 'datos numéricos' +- 'offline' -> 'Sin conexión' +- 'offline inference' -> 'inferencia sin conexión' +- 'one-hot encoding' -> 'codificación one-hot' +- 'one-hot vector' -> 'vector de un solo 1' +- 'one-vs.-all' -> 'uno frente a todos' +- 'online' -> 'en línea' +- 'online inference' -> 'inferencia en línea' +- 'output layer' -> 'capa de salida' +- 'output layers' -> 'capas de salida' +- 'overfitting' -> 'sobreajuste' +- 'pandas' -> 'pandas' +- 'parameter' -> 'parámetro' +- 'positive class' -> 'clase positiva' +- 'positive classes' -> 'clases positivas' +- 'post-processing' -> 'posprocesamiento' +- 'precision' -> 'precision' +- 'prediction' -> 'predicción' +- 'proxy labels' -> 'etiquetas de proxy' +- 'RAG' -> 'RAG' +- 'rater' -> 'evaluador' +- 'recall' -> 'recall' +- 'Rectified Linear Unit (ReLU)' -> 'Unidad lineal rectificada (ReLU)' +- 'regression model' -> 'modelo de regresión' +- 'regularization' -> 'regularización' +- 'regularization rate' -> 'tasa de regularización' +- 'ReLU' -> 'ReLU' +- 'retrieval-augmented generation' -> 'generación aumentada por recuperación' +- 'retrieval-augmented generation (RAG)' -> 'Generación mejorada por recuperación (RAG)' +- 'ROC (receiver operating characteristic) Curve' -> 'Curva ROC (característica operativa del receptor)' +- 'ROC curve' -> 'curva ROC' +- 'Root Mean Squared Error (RMSE)' -> 'Raíz cuadrada del error cuadrático medio (RMSE)' +- 'sigmoid function' -> 'función sigmoidea' +- 'softmax' -> 'softmax' +- 'sparse feature' -> 'atributo disperso' +- 'sparse representation' -> 'representación dispersa' +- 'sparse vector' -> 'vector disperso' +- 'squared loss' -> 'Pérdida al cuadrado' +- 'static' -> 'static' +- 'static inference' -> 'Inferencia estática' +- 'static model' -> 'modelo estático' +- 'stationarity' -> 'Estacionariedad' +- 'Stochastic Gradient Descent (SGD)' -> 'Descenso de gradientes estocástico (SGD)' +- 'supervised learning' -> 'aprendizaje supervisado' +- 'supervised machine learning' -> 'aprendizaje automático supervisado' +- 'synthetic feature' -> 'atributo sintético' +- 'synthetic features' -> 'atributos sintéticos' +- 'test loss' -> 'Pérdida de prueba' +- 'training' -> 'entrenamiento' +- 'training loss' -> 'Pérdida de entrenamiento' +- 'training set' -> 'conjunto de entrenamiento' +- 'training-serving skew' -> 'Sesgo entre el entrenamiento y la entrega' +- 'true negative (TN)' -> 'verdadero negativo (VN)' +- 'true negatives' -> 'verdaderos negativos' +- 'true positive (TP)' -> 'verdadero positivo (VP)' +- 'true positive rate' -> 'tasa de verdaderos positivos' +- 'true positive rate (TPR)' -> 'tasa de verdaderos positivos (TVP)' +- 'true positives' -> 'verdaderos positivos' +- 'underfitting' -> 'Subajuste' +- 'unlabeled example' -> 'ejemplo sin etiqueta' +- 'unsupervised machine learning' -> 'aprendizaje automático no supervisado' +- 'validation' -> 'validación' +- 'validation dataset' -> 'conjunto de datos de validación' +- 'validation loss' -> 'Pérdida de validación' +- 'validation set' -> 'conjunto de validación' +- 'weight' -> 'peso' +- 'weighted sum' -> 'suma ponderada' +- 'Z-score normalization' -> 'normalización de la puntuación Z' diff --git a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/fr.txt b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/fr.txt new file mode 100644 index 000000000..3f64f3098 --- /dev/null +++ b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/fr.txt @@ -0,0 +1,175 @@ +# FR HINTS +## TERM MAPPINGS +These are preferred terminology choices for this language. Use them whenever they sound natural; adapt freely if context requires. + +- 'accuracy' -> 'accuracy' +- 'activation function' -> 'fonction d'activation' +- 'artificial intelligence' -> 'intelligence artificielle' +- 'AUC' -> 'AUC' +- 'AUC (Area under the ROC curve)' -> 'AUC (aire sous la courbe ROC)' +- 'backpropagation' -> 'rétropropagation' +- 'batch' -> 'lot' +- 'batch size' -> 'taille du lot' +- 'bias (ethics/fairness)' -> 'biais (éthique/équité) (bias (ethics/fairness))' +- 'bias (math) or bias term' -> 'biais (mathématiques) ou terme de biais' +- 'bias in ethics and fairness' -> 'biais en matière d'éthique et d'équité' +- 'bias term' -> 'biais' +- 'binary classification' -> 'classification binaire' +- 'bucketing' -> 'le binning' +- 'categorical' -> 'catégorielle' +- 'categorical data' -> 'données catégorielles' +- 'class' -> 'classe' +- 'class-imbalanced dataset' -> 'ensemble de données avec déséquilibre des classes' +- 'class-imbalanced datasets' -> 'ensembles de données déséquilibrés en termes de classes' +- 'classification' -> 'classification' +- 'classification model' -> 'modèle de classification' +- 'classification threshold' -> 'seuil de classification' +- 'classifier' -> 'classificateur' +- 'clipping' -> 'écrêtage' +- 'confusion matrix' -> 'matrice de confusion' +- 'continuous feature' -> 'caractéristique continue' +- 'convergence' -> 'convergence' +- 'data set or dataset' -> 'ensemble de données (data set ou dataset)' +- 'DataFrame' -> 'DataFrame' +- 'dataset' -> 'ensemble de données' +- 'deep learning' -> 'deep learning' +- 'deep model' -> 'modèle deep learning' +- 'dense feature' -> 'caractéristique dense' +- 'depth' -> 'profondeur' +- 'discrete feature' -> 'caractéristique discrète' +- 'discrete features' -> 'caractéristiques discrètes' +- 'dynamic' -> 'dynamic' +- 'dynamic model' -> 'modèle dynamique' +- 'early stopping' -> 'arrêt prématuré' +- 'embedding layer' -> 'couche d'embedding' +- 'embedding layers' -> 'couches d'embedding' +- 'epoch' -> 'epoch' +- 'example' -> 'exemple' +- 'false negative (FN)' -> 'Faux négatif (FN)' +- 'false negatives' -> 'faux négatifs' +- 'false positive (FP)' -> 'Faux positif (FP)' +- 'false positive rate' -> 'taux de faux positifs' +- 'false positive rate (FPR)' -> 'taux de faux positifs (TFP) (false positive rate (FPR))' +- 'false positives' -> 'faux positifs' +- 'feature' -> 'fonctionnalité' +- 'feature cross' -> 'croisement de caractéristiques' +- 'feature crosses' -> 'caractéristiques croisées' +- 'feature engineering' -> 'l'ingénierie des caractéristiques.' +- 'feature set' -> 'ensemble de fonctionnalités' +- 'feature vector' -> 'vecteur de caractéristiques' +- 'feedback loop' -> 'boucle de rétroaction' +- 'generalization' -> 'généralisation' +- 'generalization curve' -> 'courbe de généralisation' +- 'gradient descent' -> 'descente de gradient' +- 'ground truth' -> 'vérité terrain' +- 'hidden layer' -> 'couche cachée' +- 'hidden layer(s)' -> 'couches cachées' +- 'hyperparameter' -> 'hyperparamètre' +- 'independently and identically distributed (i.i.d)' -> 'variables indépendantes et identiquement distribuées (i.i.d)' +- 'inference' -> 'inférence' +- 'input layer' -> 'couche d'entrée' +- 'interpretability' -> 'interprétabilité' +- 'iteration' -> 'itération' +- 'L0regularization' -> 'Régularisation L0' +- 'L1loss' -> 'perte L1' +- 'L1regularization' -> 'régularisationL1' +- 'L2loss' -> 'perte L2' +- 'L2regularization' -> 'régularisationL2' +- 'label' -> 'étiquette' +- 'labeled example' -> 'exemple étiqueté' +- 'lambda' -> 'lambda' +- 'layer' -> 'cachée)' +- 'learning rate' -> 'taux d'apprentissage' +- 'linear' -> 'linear' +- 'linear model' -> 'modèle linéaire' +- 'linear models' -> 'modèles linéaires' +- 'linear regression' -> 'régression linéaire' +- 'Log Loss' -> 'perte logistique' +- 'log-odds' -> 'logarithme de cote' +- 'logistic regression' -> 'régression logistique' +- 'loss' -> 'perte' +- 'loss curve' -> 'courbe de perte' +- 'loss function' -> 'fonction de perte' +- 'machine learning' -> 'machine learning' +- 'majority class' -> 'classe majoritaire' +- 'mini-batch' -> 'mini-lot' +- 'minority class' -> 'classe minoritaire' +- 'model' -> 'modèle' +- 'multi-class classification' -> 'classification à classes multiples' +- 'negative class' -> 'classe négative' +- 'negative classes' -> 'classes négatives' +- 'neural network' -> 'neurones feedforward' +- 'neural networks' -> 'réseaux de neurones' +- 'neuron' -> 'neurone' +- 'node (neural network)' -> 'nœud (réseau de neurones)' +- 'nonlinear' -> 'non linéaire' +- 'nonstationarity' -> 'non-stationnarité' +- 'normalization' -> 'normalisation' +- 'numerical data' -> 'données numériques' +- 'offline' -> 'Hors connexion' +- 'offline inference' -> 'inférence hors connexion' +- 'one-hot encoding' -> 'Encodage one-hot' +- 'one-hot vector' -> 'vecteur one-hot' +- 'one-vs.-all' -> 'un contre tous' +- 'online' -> 'online' +- 'online inference' -> 'inférence en ligne' +- 'output layer' -> 'couche de sortie' +- 'output layers' -> 'couches de sortie' +- 'overfitting' -> 'surapprentissage' +- 'pandas' -> 'pandas' +- 'parameter' -> 'paramètre' +- 'positive class' -> 'classe positive' +- 'positive classes' -> 'classes positives' +- 'post-processing' -> 'post-traitement' +- 'precision' -> 'precision' +- 'prediction' -> 'prédiction' +- 'proxy labels' -> 'étiquettes de substitution' +- 'RAG' -> 'RAG' +- 'rater' -> 'évaluateur' +- 'recall' -> 'recall (rappel)' +- 'Rectified Linear Unit (ReLU)' -> 'Unité de rectification linéaire (ReLU)' +- 'regression model' -> 'modèle de régression' +- 'regularization' -> 'régularisation' +- 'regularization rate' -> 'taux de régularisation' +- 'ReLU' -> 'ReLU' +- 'retrieval-augmented generation' -> 'génération augmentée par récupération' +- 'retrieval-augmented generation (RAG)' -> 'génération augmentée par récupération (RAG)' +- 'ROC (receiver operating characteristic) Curve' -> 'Courbe ROC (receiver operating characteristic)' +- 'ROC curve' -> 'courbe ROC' +- 'Root Mean Squared Error (RMSE)' -> 'la racine carrée de l'erreur quadratique moyenne (RMSE, Root Mean Squared Error)' +- 'sigmoid function' -> 'fonction sigmoïde' +- 'softmax' -> 'softmax' +- 'sparse feature' -> 'caractéristique creuse' +- 'sparse representation' -> 'représentation creuse' +- 'sparse vector' -> 'vecteur creux' +- 'squared loss' -> 'perte quadratique' +- 'static' -> 'static' +- 'static inference' -> 'inférence statique' +- 'static model' -> 'modèle statique' +- 'stationarity' -> 'stationnarité' +- 'Stochastic Gradient Descent (SGD)' -> 'Descente de gradient stochastique (SGD, Stochastic Gradient Descent)' +- 'supervised learning' -> 'apprentissage supervisé' +- 'supervised machine learning' -> 'machine learning supervisé' +- 'synthetic feature' -> 'caractéristique synthétique' +- 'synthetic features' -> 'caractéristiques synthétiques' +- 'test loss' -> 'perte de test' +- 'training' -> 'entraînement' +- 'training loss' -> 'perte d'entraînement' +- 'training set' -> 'ensemble d'entraînement' +- 'training-serving skew' -> 'décalage entraînement/mise en service' +- 'true negative (TN)' -> 'vrai négatif (VN)' +- 'true negatives' -> 'vrais négatifs' +- 'true positive (TP)' -> 'vrai positif (VP)' +- 'true positive rate' -> 'taux de vrais positifs' +- 'true positive rate (TPR)' -> 'taux de vrais positifs (TVP)' +- 'true positives' -> 'vrais positifs' +- 'underfitting' -> 'sous-ajustement' +- 'unlabeled example' -> 'exemple sans étiquette' +- 'unsupervised machine learning' -> 'machine learning non supervisé' +- 'validation' -> 'validation' +- 'validation dataset' -> 'ensemble de données de validation' +- 'validation loss' -> 'perte de validation' +- 'validation set' -> 'ensemble de validation' +- 'weight' -> 'weight' +- 'weighted sum' -> 'Somme pondérée' +- 'Z-score normalization' -> 'Normalisation du score Z' diff --git a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/ja.txt b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/ja.txt new file mode 100644 index 000000000..fb3787a79 --- /dev/null +++ b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/ja.txt @@ -0,0 +1,175 @@ +# JA HINTS +## TERM MAPPINGS +These are preferred terminology choices for this language. Use them whenever they sound natural; adapt freely if context requires. + +- 'accuracy' -> 「accuracy」 +- 'activation function' -> 「活性化関数」 +- 'artificial intelligence' -> 「AI」 +- 'AUC' -> 「AUC」 +- 'AUC (Area under the ROC curve)' -> 「AUC(ROC 曲線の下の面積)」 +- 'backpropagation' -> 「バックプロパゲーション」 +- 'batch' -> 「Batch」 +- 'batch size' -> 「バッチサイズ」 +- 'bias (ethics/fairness)' -> 「バイアス(倫理/公平性)」 +- 'bias (math) or bias term' -> 「バイアス(数学)またはバイアス項」 +- 'bias in ethics and fairness' -> 「倫理と公平性のバイアス」 +- 'bias term' -> 「バイアス項」 +- 'binary classification' -> 「バイナリ分類」 +- 'bucketing' -> 「バケット化、」 +- 'categorical' -> 「カテゴリカル」 +- 'categorical data' -> 「カテゴリデータ」 +- 'class' -> 「クラス」 +- 'class-imbalanced dataset' -> 「クラスの不均衡なデータセット」 +- 'class-imbalanced datasets' -> 「クラス不均衡データセット」 +- 'classification' -> 「分類」 +- 'classification model' -> 「分類モデル」 +- 'classification threshold' -> 「分類しきい値」 +- 'classifier' -> 「分類器」 +- 'clipping' -> 「クリッピング」 +- 'confusion matrix' -> 「混同行列」 +- 'continuous feature' -> 「連続特徴」 +- 'convergence' -> 「収束」 +- 'data set or dataset' -> 「データセット」 +- 'DataFrame' -> 「DataFrame」 +- 'dataset' -> 「データセット」 +- 'deep learning' -> 「ディープ ラーニング」 +- 'deep model' -> 「ディープモデル」 +- 'dense feature' -> 「密な特徴」 +- 'depth' -> 「深さ」 +- 'discrete feature' -> 「離散特徴」 +- 'discrete features' -> 「離散特徴」 +- 'dynamic' -> 「動的」 +- 'dynamic model' -> 「動的モデル」 +- 'early stopping' -> 「早期停止」 +- 'embedding layer' -> 「エンベディング レイヤ」 +- 'embedding layers' -> 「エンベディング レイヤ」 +- 'epoch' -> 「エポック」 +- 'example' -> 「例」 +- 'false negative (FN)' -> 「偽陰性(FN)」 +- 'false negatives' -> 「偽陰性」 +- 'false positive (FP)' -> 「偽陽性(FP)」 +- 'false positive rate' -> 「偽陽性率」 +- 'false positive rate (FPR)' -> 「偽陽性率(FPR)」 +- 'false positives' -> 「偽陽性」 +- 'feature' -> 「機能」 +- 'feature cross' -> 「特徴クロス」 +- 'feature crosses' -> 「特徴交差」 +- 'feature engineering' -> 「2つのステップが含まれます」 +- 'feature set' -> 「機能セット」 +- 'feature vector' -> 「特徴ベクトル」 +- 'feedback loop' -> 「フィードバック ループ」 +- 'generalization' -> 「一般化」 +- 'generalization curve' -> 「汎化曲線」 +- 'gradient descent' -> 「勾配降下法」 +- 'ground truth' -> 「グラウンド トゥルース」 +- 'hidden layer' -> 「隠れ層」 +- 'hidden layer(s)' -> 「隠れ層」 +- 'hyperparameter' -> 「ハイパーパラメータ」 +- 'independently and identically distributed (i.i.d)' -> 「独立同分布(i.i.d)」 +- 'inference' -> 「推論」 +- 'input layer' -> 「入力レイヤ」 +- 'interpretability' -> 「解釈可能性」 +- 'iteration' -> 「繰り返し」 +- 'L0regularization' -> 「L0正規化」 +- 'L1loss' -> 「L1損失」 +- 'L1regularization' -> 「L1正則化」 +- 'L2loss' -> 「L2損失」 +- 'L2regularization' -> 「L2正則化」 +- 'label' -> 「ラベル」 +- 'labeled example' -> 「ラベル付きの例」 +- 'lambda' -> 「lambda」 +- 'layer' -> 「レイヤ」 +- 'learning rate' -> 「学習率」 +- 'linear' -> 「線形」 +- 'linear model' -> 「線形モデル」 +- 'linear models' -> 「線形モデル」 +- 'linear regression' -> 「線形回帰」 +- 'Log Loss' -> 「対数損失」 +- 'log-odds' -> 「対数オッズ」 +- 'logistic regression' -> 「ロジスティック回帰」 +- 'loss' -> 「損失」 +- 'loss curve' -> 「損失曲線」 +- 'loss function' -> 「損失関数」 +- 'machine learning' -> 「機械学習」 +- 'majority class' -> 「多数派クラス」 +- 'mini-batch' -> 「ミニバッチ」 +- 'minority class' -> 「少数派クラス」 +- 'model' -> 「モデル」 +- 'multi-class classification' -> 「マルチクラス分類」 +- 'negative class' -> 「陰性クラス」 +- 'negative classes' -> 「陰性クラス」 +- 'neural network' -> 「ニューラル ネットワークの」 +- 'neural networks' -> 「ニューラル ネットワーク」 +- 'neuron' -> 「ニューロン」 +- 'node (neural network)' -> 「ノード(ニューラル ネットワーク)」 +- 'nonlinear' -> 「非線形」 +- 'nonstationarity' -> 「非定常性」 +- 'normalization' -> 「正規化」 +- 'numerical data' -> 「数値データ」 +- 'offline' -> 「オフライン」 +- 'offline inference' -> 「オフライン推論」 +- 'one-hot encoding' -> 「ワンホット エンコード」 +- 'one-hot vector' -> 「ワンホット ベクトル」 +- 'one-vs.-all' -> 「1 対すべて」 +- 'online' -> 「オンライン」 +- 'online inference' -> 「オンライン推論」 +- 'output layer' -> 「出力レイヤ」 +- 'output layers' -> 「出力レイヤ」 +- 'overfitting' -> 「過学習」 +- 'pandas' -> 「pandas」 +- 'parameter' -> 「パラメータ」 +- 'positive class' -> 「陽性クラス」 +- 'positive classes' -> 「陽性クラス」 +- 'post-processing' -> 「後処理」 +- 'precision' -> 「precision」 +- 'prediction' -> 「予測」 +- 'proxy labels' -> 「プロキシラベル」 +- 'RAG' -> 「RAG」 +- 'rater' -> 「rater」 +- 'recall' -> 「recall」 +- 'Rectified Linear Unit (ReLU)' -> 「正規化線形ユニット(ReLU)」 +- 'regression model' -> 「回帰モデル」 +- 'regularization' -> 「正則化」 +- 'regularization rate' -> 「正則化率」 +- 'ReLU' -> 「ReLU」 +- 'retrieval-augmented generation' -> 「検索拡張生成」 +- 'retrieval-augmented generation (RAG)' -> 「検索拡張生成(RAG)」 +- 'ROC (receiver operating characteristic) Curve' -> 「ROC(受信者操作特性)曲線」 +- 'ROC curve' -> 「ROC 曲線」 +- 'Root Mean Squared Error (RMSE)' -> 「二乗平均平方根誤差(RMSE)」 +- 'sigmoid function' -> 「シグモイド関数」 +- 'softmax' -> 「Softmax」 +- 'sparse feature' -> 「スパース特徴」 +- 'sparse representation' -> 「スパース表現」 +- 'sparse vector' -> 「スパース ベクトル」 +- 'squared loss' -> 「二乗損失」 +- 'static' -> 「static」 +- 'static inference' -> 「静的推論」 +- 'static model' -> 「静的モデル」 +- 'stationarity' -> 「定常性」 +- 'Stochastic Gradient Descent (SGD)' -> 「確率的勾配降下法(SGD)」 +- 'supervised learning' -> 「教師あり学習」 +- 'supervised machine learning' -> 「教師あり機械学習」 +- 'synthetic feature' -> 「合成特徴」 +- 'synthetic features' -> 「合成特徴」 +- 'test loss' -> 「テスト損失」 +- 'training' -> 「トレーニング」 +- 'training loss' -> 「トレーニングの損失」 +- 'training set' -> 「トレーニング セット」 +- 'training-serving skew' -> 「トレーニング サービング スキュー」 +- 'true negative (TN)' -> 「真陰性(TN)」 +- 'true negatives' -> 「真陰性」 +- 'true positive (TP)' -> 「真陽性(TP)」 +- 'true positive rate' -> 「真陽性率」 +- 'true positive rate (TPR)' -> 「真陽性率(TPR)」 +- 'true positives' -> 「真陽性」 +- 'underfitting' -> 「アンダーフィット」 +- 'unlabeled example' -> 「ラベルのない例」 +- 'unsupervised machine learning' -> 「教師なし機械学習」 +- 'validation' -> 「検証」 +- 'validation dataset' -> 「検証データセット」 +- 'validation loss' -> 「検証損失」 +- 'validation set' -> 「検証セット」 +- 'weight' -> 「weight」 +- 'weighted sum' -> 「加重合計」 +- 'Z-score normalization' -> 「Z スコアの正規化」 diff --git a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/pt_BR.txt b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/pt_BR.txt new file mode 100644 index 000000000..16b2b9dee --- /dev/null +++ b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/pt_BR.txt @@ -0,0 +1,175 @@ +# PT-BR HINTS +## TERM MAPPINGS +These are preferred terminology choices for this language. Use them whenever they sound natural; adapt freely if context requires. + +- 'accuracy' -> 'precisão' +- 'activation function' -> 'função de ativação' +- 'artificial intelligence' -> 'inteligência artificial' +- 'AUC' -> 'AUC' +- 'AUC (Area under the ROC curve)' -> 'AUC (área sob a curva ROC)' +- 'backpropagation' -> 'retropropagação' +- 'batch' -> 'lote' +- 'batch size' -> 'tamanho do lote' +- 'bias (ethics/fairness)' -> 'viés (ética/justiça)' +- 'bias (math) or bias term' -> 'viés (matemática) ou termo de viés' +- 'bias in ethics and fairness' -> 'viés em ética e justiça' +- 'bias term' -> 'termo de viés' +- 'binary classification' -> 'classificação binária' +- 'bucketing' -> 'agrupamento por classes' +- 'categorical' -> 'categórico' +- 'categorical data' -> 'dados categóricos' +- 'class' -> 'classe' +- 'class-imbalanced dataset' -> 'conjunto de dados não balanceado' +- 'class-imbalanced datasets' -> 'conjuntos de dados com classes desbalanceadas' +- 'classification' -> 'classificação' +- 'classification model' -> 'modelo de classificação' +- 'classification threshold' -> 'limiar de classificação' +- 'classifier' -> 'classificador' +- 'clipping' -> 'corte' +- 'confusion matrix' -> 'matriz de confusão' +- 'continuous feature' -> 'atributo contínuo' +- 'convergence' -> 'convergência' +- 'data set or dataset' -> 'conjunto de dados' +- 'DataFrame' -> 'DataFrame' +- 'dataset' -> 'conjunto de dados' +- 'deep learning' -> 'aprendizado profundo' +- 'deep model' -> 'modelo profundo' +- 'dense feature' -> 'atributo denso' +- 'depth' -> 'profundidade' +- 'discrete feature' -> 'atributo discreto' +- 'discrete features' -> 'recursos discretos' +- 'dynamic' -> 'dinâmico' +- 'dynamic model' -> 'modelo dinâmico' +- 'early stopping' -> 'parada antecipada' +- 'embedding layer' -> 'camada de embedding' +- 'embedding layers' -> 'camadas de embedding' +- 'epoch' -> 'época' +- 'example' -> 'exemplo' +- 'false negative (FN)' -> 'falso negativo (FN)' +- 'false negatives' -> 'falsos negativos' +- 'false positive (FP)' -> 'falso positivo (FP)' +- 'false positive rate' -> 'taxa de falso positivo' +- 'false positive rate (FPR)' -> 'taxa de falso positivo (FPR)' +- 'false positives' -> 'falsos positivos' +- 'feature' -> 'recurso' +- 'feature cross' -> 'cruzamento de atributos' +- 'feature crosses' -> 'cruzamentos de recursos' +- 'feature engineering' -> 'engenharia de atributos' +- 'feature set' -> 'conjunto de atributos' +- 'feature vector' -> 'vetor de atributos' +- 'feedback loop' -> 'ciclo de feedback' +- 'generalization' -> 'generalização' +- 'generalization curve' -> 'curva de generalização' +- 'gradient descent' -> 'gradiente descendente' +- 'ground truth' -> 'informações empíricas' +- 'hidden layer' -> 'camada oculta' +- 'hidden layer(s)' -> 'camadas ocultas' +- 'hyperparameter' -> 'hiperparâmetro' +- 'independently and identically distributed (i.i.d)' -> 'independente e identicamente distribuído (i.i.d)' +- 'inference' -> 'inferência' +- 'input layer' -> 'camada de entrada' +- 'interpretability' -> 'interpretabilidade' +- 'iteration' -> 'iteração' +- 'L0regularization' -> 'Regularização L0' +- 'L1loss' -> 'L1' +- 'L1regularization' -> 'regularização L1' +- 'L2loss' -> 'perda L2' +- 'L2regularization' -> 'regularizaçãoL2' +- 'label' -> 'o rótulo.' +- 'labeled example' -> 'exemplo rotulado' +- 'lambda' -> 'lambda' +- 'layer' -> 'layer' +- 'learning rate' -> 'taxa de aprendizado' +- 'linear' -> 'linear' +- 'linear model' -> 'modelo linear' +- 'linear models' -> 'modelos lineares' +- 'linear regression' -> 'regressão linear' +- 'Log Loss' -> 'perda logarítmica' +- 'log-odds' -> 'log-odds' +- 'logistic regression' -> 'regressão logística' +- 'loss' -> 'perda' +- 'loss curve' -> 'curva de perda' +- 'loss function' -> 'função de perda' +- 'machine learning' -> 'machine learning' +- 'majority class' -> 'classe majoritária' +- 'mini-batch' -> 'minilote' +- 'minority class' -> 'classe minoritária' +- 'model' -> 'modelo' +- 'multi-class classification' -> 'classificação multiclasse' +- 'negative class' -> 'classe negativa' +- 'negative classes' -> 'classes negativas' +- 'neural network' -> 'do feedforward' +- 'neural networks' -> 'redes neurais' +- 'neuron' -> 'neurônio' +- 'node (neural network)' -> 'nó (rede neural)' +- 'nonlinear' -> 'não linear' +- 'nonstationarity' -> 'não estacionariedade' +- 'normalization' -> 'normalização' +- 'numerical data' -> 'dados numéricos' +- 'offline' -> 'off-line' +- 'offline inference' -> 'inferência off-line' +- 'one-hot encoding' -> 'codificação one-hot' +- 'one-hot vector' -> 'vetor one-hot' +- 'one-vs.-all' -> 'um-contra-todos' +- 'online' -> 'on-line' +- 'online inference' -> 'inferência on-line' +- 'output layer' -> 'camada de saída' +- 'output layers' -> 'camadas de saída' +- 'overfitting' -> 'overfitting' +- 'pandas' -> 'pandas' +- 'parameter' -> 'parâmetro' +- 'positive class' -> 'classe positiva' +- 'positive classes' -> 'classes positivas' +- 'post-processing' -> 'pós-processamento' +- 'precision' -> 'precision' +- 'prediction' -> 'previsão' +- 'proxy labels' -> 'rotulação indireta' +- 'RAG' -> 'RAG' +- 'rater' -> 'rotulador' +- 'recall' -> 'recall' +- 'Rectified Linear Unit (ReLU)' -> 'Unidade linear retificada (ReLU)' +- 'regression model' -> 'modelo de regressão' +- 'regularization' -> 'regularização' +- 'regularization rate' -> 'taxa de regularização' +- 'ReLU' -> 'ReLU' +- 'retrieval-augmented generation' -> 'geração aumentada de recuperação' +- 'retrieval-augmented generation (RAG)' -> 'geração aumentada de recuperação (RAG)' +- 'ROC (receiver operating characteristic) Curve' -> 'Curva ROC' +- 'ROC curve' -> 'curva ROC' +- 'Root Mean Squared Error (RMSE)' -> 'Raiz do erro quadrático médio (RMSE)' +- 'sigmoid function' -> 'função sigmoide' +- 'softmax' -> 'softmax' +- 'sparse feature' -> 'atributo esparso' +- 'sparse representation' -> 'representação esparsa' +- 'sparse vector' -> 'vetor esparso' +- 'squared loss' -> 'perda quadrática' +- 'static' -> 'static' +- 'static inference' -> 'inferência estática' +- 'static model' -> 'modelo estático' +- 'stationarity' -> 'estacionariedade' +- 'Stochastic Gradient Descent (SGD)' -> 'Gradiente descendente estocástico (GDE)' +- 'supervised learning' -> 'aprendizado supervisionado' +- 'supervised machine learning' -> 'aprendizado de máquina supervisionado' +- 'synthetic feature' -> 'atributo sintético' +- 'synthetic features' -> 'recursos sintéticos' +- 'test loss' -> 'perda de teste' +- 'training' -> 'treinamento' +- 'training loss' -> 'perda de treinamento' +- 'training set' -> 'conjunto de treinamento' +- 'training-serving skew' -> 'desvio entre treinamento e disponibilização' +- 'true negative (TN)' -> 'verdadeiro negativo (VN)' +- 'true negatives' -> 'verdadeiros negativos' +- 'true positive (TP)' -> 'verdadeiro positivo (VP)' +- 'true positive rate' -> 'taxa de verdadeiros positivos' +- 'true positive rate (TPR)' -> 'taxa de verdadeiro positivo (TVP)' +- 'true positives' -> 'verdadeiros positivos' +- 'underfitting' -> 'underfitting' +- 'unlabeled example' -> 'exemplo sem rótulo' +- 'unsupervised machine learning' -> 'aprendizado de máquina sem supervisão' +- 'validation' -> 'validação' +- 'validation dataset' -> 'conjunto de dados de validação' +- 'validation loss' -> 'perda de validação' +- 'validation set' -> 'conjunto de validação' +- 'weight' -> 'peso' +- 'weighted sum' -> 'soma de pesos' +- 'Z-score normalization' -> 'Normalização de pontuação Z' diff --git a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/ru.txt b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/ru.txt new file mode 100644 index 000000000..0c87ef85e --- /dev/null +++ b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/ru.txt @@ -0,0 +1,213 @@ +# RU HINTS +## TERM MAPPINGS +These are preferred terminology choices for this language. Use them whenever they sound natural; adapt freely if context requires. + +- 'accuracy' -> «точность» +- 'activation function' -> «функция активации» +- 'artificial intelligence' -> «искусственный интеллект» +- 'AUC' -> «AUC» +- 'AUC (Area under the ROC curve)' -> «AUC (площадь под ROC-кривой)» +- 'backpropagation' -> «обратное распространение» +- 'batch' -> «партия» +- 'batch size' -> «размер партии» +- 'bias (ethics/fairness)' -> «предвзятость (этика/справедливость)» +- 'bias (math) or bias term' -> «предвзятость (математика) или термин предвзятости» +- 'bias in ethics and fairness' -> «предвзятостью в этике и справедливости» +- 'bias term' -> «термином «смещение»» +- 'binary classification' -> «бинарная классификация» +- 'bucketing' -> «распределение» +- 'categorical' -> «категориальном» +- 'categorical data' -> «категориальные данные» +- 'class' -> «сорт» +- 'class-imbalanced dataset' -> «набор данных с несбалансированным классом» +- 'class-imbalanced datasets' -> «несбалансированные по классам наборы данных» +- 'classification' -> «классификации» +- 'classification model' -> «модель классификации» +- 'classification threshold' -> «порог классификации» +- 'classifier' -> «классификатор» +- 'clipping' -> «вырезка» +- 'confusion matrix' -> «матрица путаницы» +- 'continuous feature' -> «непрерывная функция» +- 'convergence' -> «конвергенция» +- 'data set or dataset' -> «набор данных или набор данных» +- 'DataFrame' -> «DataFrame» +- 'dataset' -> «Набор данных» +- 'deep learning' -> «глубоком обучении» +- 'deep model' -> «глубокая модель» +- 'dense feature' -> «плотная особенность» +- 'depth' -> «глубина» +- 'discrete feature' -> «дискретная особенность» +- 'discrete features' -> «дискретными признаками» +- 'dynamic' -> «динамический» +- 'dynamic model' -> «динамическая модель» +- 'early stopping' -> «ранняя остановка» +- 'embedding layer' -> «слой внедрения» +- 'embedding layers' -> «встраиваемых слоев» +- 'epoch' -> «эпоха» +- 'example' -> «пример» +- 'false negative (FN)' -> «ложноотрицательный результат (ЛО)» +- 'false negatives' -> «ложноотрицательных результатов» +- 'false positive (FP)' -> «ложноположительный результат (ЛП)» +- 'false positive rate' -> «false positive rate» +- 'false positive rate (FPR)' -> «частота ложноположительных результатов (FPR)» +- 'false positives' -> «ложноположительных результатов» +- 'feature' -> «особенность» +- 'feature cross' -> «кросс-функция» +- 'feature crosses' -> «пересечение признаков» +- 'feature engineering' -> «проектирование функций» +- 'feature set' -> «набор функций» +- 'feature vector' -> «вектор признаков» +- 'feedback loop' -> «петля обратной связи» +- 'generalization' -> «обобщение» +- 'generalization curve' -> «кривая обобщения» +- 'gradient descent' -> «градиентный спуск» +- 'ground truth' -> «истина» +- 'hidden layer' -> «скрытый слой» +- 'hidden layer(s)' -> «скрытых слоях» +- 'hyperparameter' -> «гиперпараметр» +- 'independently and identically distributed (i.i.d)' -> «независимо и одинаково распределены (iid)» +- 'inference' -> «вывод» +- 'input layer' -> «входной слой» +- 'interpretability' -> «интерпретируемость» +- 'iteration' -> «итерация» +- 'L0regularization' -> «L0регуляризация» +- 'L1loss' -> «потеряL1» +- 'L1regularization' -> «регуляризации L1» +- 'L2loss' -> «Потери L2» +- 'L2regularization' -> «регуляризацииL2» +- 'label' -> «этикетка» +- 'labeled example' -> «помеченный пример» +- 'lambda' -> «лямбда» +- 'layer' -> «слой» +- 'learning rate' -> «скорость обучения» +- 'linear' -> «линейный» +- 'linear model' -> «линейная модель» +- 'linear models' -> «линейных моделях» +- 'linear regression' -> «линейная регрессия» +- 'Log Loss' -> «Log Loss» +- 'log-odds' -> «логарифмические шансы» +- 'logistic regression' -> «логистическая регрессия» +- 'loss' -> «потеря» +- 'loss curve' -> «кривая потерь» +- 'loss function' -> «функция потерь» +- 'machine learning' -> «машинное обучение» +- 'majority class' -> «класс большинства» +- 'mini-batch' -> «мини-партия» +- 'minority class' -> «класс меньшинства» +- 'model' -> «модель» +- 'multi-class classification' -> «многоклассовой классификацией» +- 'negative class' -> «отрицательный класс» +- 'negative classes' -> «отрицательные классы» +- 'neural network' -> «нейронная сеть» +- 'neural networks' -> «нейронным сетям» +- 'neuron' -> «нейрон» +- 'node (neural network)' -> «узел (нейронная сеть)» +- 'nonlinear' -> «нелинейный» +- 'nonstationarity' -> «нестационарность» +- 'normalization' -> «нормализация» +- 'numerical data' -> «числовые данные» +- 'offline' -> «офлайн» +- 'offline inference' -> «автономный вывод» +- 'one-hot encoding' -> «горячее кодирование» +- 'one-hot vector' -> «вектор с одним целым» +- 'one-vs.-all' -> «один против всех» +- 'online' -> «онлайн» +- 'online inference' -> «онлайн-вывод» +- 'output layer' -> «выходной слой» +- 'output layers' -> «выходных слоев» +- 'overfitting' -> «переобучение» +- 'pandas' -> «панды» +- 'parameter' -> «параметр» +- 'positive class' -> «позитивный класс» +- 'positive classes' -> «положительные» +- 'post-processing' -> «постобработка» +- 'precision' -> «точность» +- 'prediction' -> «прогноз» +- 'proxy labels' -> «прокси-метки» +- 'RAG' -> «ТРЯПКА» +- 'rater' -> «оценщик» +- 'recall' -> «отзывать» +- 'Rectified Linear Unit (ReLU)' -> «Rectified Linear Unit (ReLU)» +- 'regression model' -> «регрессионная модель» +- 'regularization' -> «регуляризация» +- 'regularization rate' -> «regularization rate» +- 'ReLU' -> «РеЛУ» +- 'retrieval-augmented generation' -> «генерации с расширенным поиском» +- 'retrieval-augmented generation (RAG)' -> «retrieval-augmented generation (RAG)» +- 'ROC (receiver operating characteristic) Curve' -> «ROC (receiver operating characteristic) Curve» +- 'ROC curve' -> «ROC-кривой» +- 'Root Mean Squared Error (RMSE)' -> «Root Mean Squared Error (RMSE)» +- 'sigmoid function' -> «sigmoid function» +- 'softmax' -> «софтмакс» +- 'sparse feature' -> «sparse feature» +- 'sparse representation' -> «sparse representation» +- 'sparse vector' -> «sparse vector» +- 'squared loss' -> «квадрат потерь» +- 'static' -> «статический» +- 'static inference' -> «static inference» +- 'static model' -> «статической моделью» +- 'stationarity' -> «стационарность» +- 'Stochastic Gradient Descent (SGD)' -> «Стохастический градиентный спуск (SGD)» +- 'supervised learning' -> «контролируемом обучении» +- 'supervised machine learning' -> «контролируемое машинное обучение» +- 'synthetic feature' -> «synthetic feature» +- 'synthetic features' -> «синтетические признаки» +- 'test loss' -> «test loss» +- 'training' -> «обучение» +- 'training loss' -> «training loss» +- 'training set' -> «обучающий набор» +- 'training-serving skew' -> «training-serving skew» +- 'true negative (TN)' -> «true negative (TN)» +- 'true negatives' -> «истинно отрицательных результатов» +- 'true positive (TP)' -> «true positive (TP)» +- 'true positive rate' -> «истинный положительный уровень» +- 'true positive rate (TPR)' -> «true positive rate (TPR)» +- 'true positives' -> «истинно положительных результатов» +- 'underfitting' -> «недообучение» +- 'unlabeled example' -> «unlabeled example» +- 'unsupervised machine learning' -> «неконтролируемое машинное обучение» +- 'validation' -> «проверка» +- 'validation dataset' -> «проверочном наборе данных» +- 'validation loss' -> «validation loss» +- 'validation set' -> «набор для проверки» +- 'weight' -> «масса» +- 'weighted sum' -> «взвешенная сумма» +- 'Z-score normalization' -> «нормализацию Z-показателя» + +# STRICTNESS NOTE +TERM MAPPINGS above are flexible preferences. The following rules are STRICT and override them. + +## 2. Strict, Binding Terminology Rules (MANDATORY) +This section defines terminology and formatting that must always be used in Russian translations. +These rules override any flexible terminology and must be followed exactly. + +# MANDATORY RUSSIAN TERMINOLOGY RULES +## 2.1 Key Translations (Strict) +- 'Shared learning' -> «совместное обучение» + AVOID: «общее обучение». +- 'Multisource data' -> «данные из нескольких источников» + AVOID: «мультиисточниковые данные». +- 'Input embedding' -> «входное векторное представление (эмбеддинг)» +- 'Embedding' -> «эмбеддинг» +- 'Embedding space' -> «пространство представлений (пространство эмбеддингов)» +- 'Task-specific branches' -> «ветви, специфичные для задачи» +- 'Pipeline' -> «конвейер обработки данных» + «пайплайн» допускается только в неформальном контексте. + +## 2.2 Official Google Colab UI (Strict) +Use the official Russian UI strings: +- 'Change Runtime Type' -> «Сменить среду выполнения» +- 'Save a copy in Drive' -> «Сохранить копию на Диске» + +Filenames must remain in ENGLISH exactly as written. +Example: «Копия блокнота OriginalNotebookName.ipynb» + +## 2.3 Abbreviations and Hyphenation (Strict) +Keep all ML/AI abbreviations in English: ROC, AUC, TPR, FPR, L1, L2, UI, API, CNN, RNN, GPT. +Do NOT invent Russian abbreviations for these. + +When an English abbreviation precedes a Russian noun, use a hyphen: +- ROC-кривая +- AUC-показатель +- L1-регуляризация +- UI-дизайн diff --git a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/management/__init__.py b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/management/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/management/commands/__init__.py b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/management/commands/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/management/commands/sync_and_translate_language.py b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/management/commands/sync_and_translate_language.py new file mode 100644 index 000000000..f1c4bc8a2 --- /dev/null +++ b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/management/commands/sync_and_translate_language.py @@ -0,0 +1,2673 @@ +""" +Django management command to sync translation keys, translate using LLM, and create PRs. + +Usage: + ./manage.py cms sync_and_translate_language el + ./manage.py cms sync_and_translate_language el \\ + --provider openai --model gpt-4-turbo --glossary +""" + +import json +import logging +import os +import re +import shutil +import subprocess +import textwrap +import time +import urllib.parse +from configparser import NoSectionError +from contextlib import contextmanager, suppress +from pathlib import Path +from typing import Any, TypedDict, cast + +import git +import requests +from django.conf import settings +from django.core.management.base import BaseCommand, CommandError +from litellm import completion + +import ol_openedx_ai_static_translations.utils as utils_module +from ol_openedx_ai_static_translations.constants import ( + HTTP_CREATED, + HTTP_NOT_FOUND, + HTTP_OK, + HTTP_TOO_MANY_REQUESTS, + HTTP_UNPROCESSABLE_ENTITY, + LANGUAGE_MAPPING, + MAX_ERROR_MESSAGE_LENGTH, + MAX_LOG_ICU_STRING_LENGTH, + MAX_LOG_STRING_LENGTH, + MAX_RETRIES, + PLURAL_CATEGORIES_ARABIC, + PLURAL_CATEGORIES_FOUR, + PLURAL_CATEGORIES_THREE, + PLURAL_CATEGORIES_TWO, + PLURAL_FORMS, + PROVIDER_GEMINI, + PROVIDER_MISTRAL, +) +from ol_openedx_ai_static_translations.utils import ( + _get_base_lang, + _get_numeric_plural_keys, + _get_po_plural_count, + apply_json_translations, + apply_po_translations, + configure_litellm_for_provider, + create_branch_name, + extract_empty_keys, + get_config_value, + get_default_model_for_provider, + get_default_provider, + get_nplurals_from_po_file, + is_retryable_error, + load_glossary, + match_glossary_term, + normalize_language_code, + plural_source_has_placeholders_not_in_singular, + sanitize_for_git, + sync_all_translations, + validate_branch_name, + validate_language_code, +) + +logger = logging.getLogger(__name__) + +# Max number of rejected brace-format entries to list in PR description. +MAX_REJECTED_BRACE_DISPLAY = 50 + + +class _PluralInstructionParams(TypedDict): + """Parameters for _build_plural_instructions.""" + + json_plural_info: dict[str, Any] + plural_count: int + key_batch: list[dict] + icu_categories_str: str + lang_code: str + po_plural_count_override: int | None + + +# Plural-instruction prompts for LLM (used in _build_plural_instructions). +# Format with .format(json_plural_count=..., icu_categories_str=..., etc.). + +# For languages with multiple plural forms (e.g. Arabic): expand ICU to ALL categories. +_PROMPT_JSON_PLURAL_EXPAND_ICU = ( + "IMPORTANT: {json_plural_count} entry/entries are JSON " + "strings with ICU MessageFormat plural forms. " + "These may currently have only 'one' and 'other' " + "categories, but for this language ({icu_categories_str}), " + "you MUST expand them to include ALL {num_categories} " + "categories: {icu_categories_str}. " + "Translate the content and return a complete ICU " + "MessageFormat string with ALL categories. " + "Example format: {{count, plural, {icu_categories_str} " + "{{translation}} ... other {{translation}}}}. " + "CRITICAL: Do not preserve the existing 2-category " + "structure. Expand it to include all {num_categories} " + "required categories for this language." +) + +# For languages with 2 forms: preserve existing ICU structure. +_PROMPT_JSON_PLURAL_PRESERVE_ICU = ( + "IMPORTANT: {json_plural_count} entry/entries are JSON " + "strings with ICU MessageFormat plural forms. " + "These already have the ICU structure " + "(e.g., {{activityCount, plural, one {{# activity}} " + "other {{# activities}}}}). " + "Translate the content inside the plural forms while " + "preserving the exact ICU structure and variable names. " + "Return the complete ICU MessageFormat string with " + "translated content." +) + +# JSON plurals (no existing ICU), multiple categories. +_PROMPT_JSON_PLURAL_MULTI_CATEGORY = ( + "IMPORTANT: {json_plural_count} entry/entries are for " + "JSON files with plural forms. " + "For these, return ICU MessageFormat strings with ALL " + "plural categories: {icu_categories_str}. " + "Format: {{count, plural, {icu_categories_str} " + "{{translation}} ... other {{translation}}}}. " + "Example: {example}. " + "IMPORTANT: Include ALL {num_categories} categories in " + "your response, not just 'one' and 'other'. Each category " + "may require different word forms in this language." +) + +# JSON plurals (no existing ICU), two categories. +_PROMPT_JSON_PLURAL_TWO_CATEGORY = ( + "IMPORTANT: {json_plural_count} entry/entries are for " + "JSON files with plural forms. " + "For these, return ICU MessageFormat strings with plural " + "categories: {icu_categories_str}. " + "Format: {{count, plural, {icu_categories_str} " + "{{translation}} ... other {{translation}}}}. " + "Example: {example}." +) + +# PO plurals: language has more than 2 forms (all indices 0..N-1). +_PROMPT_PO_PLURAL_MULTI_FORM = ( + "CRITICAL - PO FILE PLURAL ENTRIES " + "({plural_count} entry/entries): " + "These are for PO files (NOT JSON files). " + "This language requires {po_plural_count} plural forms " + "(indices 0, 1, 2, ..., {po_plural_count_minus_1}). " + "For PO files, you MUST return an object with keys " + "'0', '1', '2', ..., '{po_plural_count_minus_1}', " + "covering all indices from 0 through " + "{po_plural_count_minus_1}, where each value is a " + "PLAIN TRANSLATION STRING. " + "\n" + "WRONG (DO NOT DO THIS): " + "{{'0': '{{count, plural, one {{...}} other {{...}}}}'}} " + "\n" + "CORRECT: " + "{{'0': 'translation for zero items', " + "'1': 'translation for one item', " + "'2': 'translation for two items', " + "'3': 'translation for few items', " + "'4': 'translation for many items', " + "'5': 'translation for other items'}} " + "\n" + "Each value must be a simple translated string, " + "NOT ICU MessageFormat syntax. " + "Preserve placeholders like {{count}}, %(count)s, etc. " + "in the plain strings." +) + +# PO plurals: language has 2 forms (singular/plural). +_PROMPT_PO_PLURAL_SINGULAR_PLURAL = ( + "CRITICAL - PO FILE PLURAL ENTRIES " + "({plural_count} entry/entries): " + "These are for PO files (NOT JSON files). " + "For PO files, return an object with 'singular' and " + "'plural' keys, each containing a PLAIN TRANSLATION STRING. " + "\n" + "WRONG (DO NOT DO THIS): " + "{{'singular': '{{count, plural, one {{...}} " + "other {{...}}}}'}} " + "\n" + "CORRECT: " + "{{'singular': 'translation for one item', " + "'plural': 'translation for multiple items'}} " + "\n" + "Each value must be a simple translated string, " + "NOT ICU MessageFormat syntax. " + "Preserve placeholders like {{count}}, %(count)s, etc. " + "in the plain strings." +) + +# PO plurals: language has only 1 form (e.g. Chinese, Japanese, Korean). +# We need BOTH singular and plural; we choose which to use for the single form. +_PROMPT_PO_PLURAL_ONE_FORM = ( + "CRITICAL - PO FILE PLURAL ENTRIES " + "({plural_count} entry/entries): " + "These are for PO files (NOT JSON files). " + "This language has only ONE plural form (e.g. Chinese, Japanese). " + "Always return an object with BOTH 'singular' and 'plural' keys. " + "For entries where the PLURAL source has a variable (e.g. %(num_selected)s) " + "that the SINGULAR source does not, we use your 'singular' for the single form " + "(to avoid runtime errors). So provide a natural singular WITHOUT that variable. " + "For other entries we use your 'plural' for the single form. " + "\n" + "CORRECT (variable only in plural): " + "{{'singular': 'translation without the variable', " + "'plural': 'translation with %(num_selected)s etc.'}} " + "\n" + "CORRECT (same variable in both): " + "{{'singular': 'translation with %(count)s', " + "'plural': 'translation with %(count)s'}} " + "\n" + "Each value must be a simple translated string. Preserve placeholders." +) + + +class GitRepository: + """Helper class for git operations with consistent error handling.""" + + def __init__(self, repo_path: str): + self.repo_path = Path(repo_path) + try: + self.repo = git.Repo(repo_path) + except git.exc.InvalidGitRepositoryError as e: + msg = ( + f"Invalid git repository at {repo_path}. " + f"Please remove it or specify a different path." + ) + raise CommandError(msg) from e + except git.exc.GitCommandError as e: + msg = f"Git error accessing repository: {e!s}" + raise CommandError(msg) from e + + def _handle_git_error(self, operation: str, error: Exception) -> None: + """Convert git errors to CommandError with context.""" + msg = f"Git error {operation}: {error!s}" + raise CommandError(msg) from error + + def _get_main_branch_name(self) -> str: + """ + Determine the main branch name. + Checks local branches first, then remote branches. + Fetches from remote if needed to check remote branches. + """ + # Check if 'main' exists locally + if "main" in [ref.name for ref in self.repo.heads]: + return "main" + + # If not found locally, fetch from remote and check remote branches + with suppress(git.exc.GitCommandError): + # If fetch fails, we'll try to check existing remote refs anyway + self.repo.remotes.origin.fetch() + + # Check remote branches + if "origin/main" in [ref.name for ref in self.repo.remotes.origin.refs]: + return "main" + + msg = "Main branch not found locally or on remote" + raise CommandError(msg) + + def ensure_clean(self) -> bool: + """ + Clean uncommitted changes in tracked files. + Returns True if cleaned, False if already clean. + + This ensures any leftover staged/uncommitted changes from a previous + interrupted run are removed before starting a new translation sync. + """ + try: + if self.repo.is_dirty(untracked_files=False): + self.repo.head.reset(index=True, working_tree=True) + return True + else: + return False + except git.exc.GitCommandError as e: + self._handle_git_error("cleaning repository", e) + return False # Never reached, but satisfies type checker + + def switch_to_main(self) -> None: + """Switch to main branch, deleting current branch if it's not main.""" + try: + # Get current branch name (might be in detached HEAD state) + try: + current_branch = self.repo.active_branch.name + except TypeError: + # Detached HEAD state - we'll checkout main anyway + current_branch = None + + # Get the main branch name + main_branch = self._get_main_branch_name() + + # Only switch if we're not already on the main branch + if current_branch != main_branch: + # Try to checkout the branch (will work if it exists locally) + try: + self.repo.git.checkout(main_branch) + except git.exc.GitCommandError: + # Branch doesn't exist locally, checkout from remote + self.repo.git.checkout("-b", main_branch, f"origin/{main_branch}") + + # Delete the previous branch if it exists and is not the main branch + if current_branch and current_branch != main_branch: + with suppress(git.exc.GitCommandError): + self.repo.git.branch("-D", current_branch) + except (git.exc.GitCommandError, TypeError) as e: + self._handle_git_error("switching branches", e) + + def update_from_remote(self) -> None: + """Fetch and pull latest changes from origin/main.""" + try: + self.repo.remotes.origin.fetch() + main_branch = self._get_main_branch_name() + self.repo.git.pull("origin", main_branch) + except git.exc.GitCommandError as e: + self._handle_git_error("updating repository", e) + + def get_remote_url(self) -> str | None: + """Get the current remote URL.""" + try: + return self.repo.remotes.origin.url + except (git.exc.GitCommandError, AttributeError): + return None + + def configure_user( + self, + email: str = "translations@mitodl.org", + name: str = "MIT Open Learning Translations Bot", + ) -> None: + """Configure git user for this repository.""" + try: + with self.repo.config_writer() as config: + # Check if user section exists and get existing values + try: + existing_email = config.get_value("user", "email", default=None) + existing_name = config.get_value("user", "name", default=None) + except NoSectionError: + # Section doesn't exist, set both values + existing_email = None + existing_name = None + # Set values only if they don't exist + if not existing_email: + config.set_value("user", "email", email) + if not existing_name: + config.set_value("user", "name", name) + except git.exc.GitCommandError as e: + self._handle_git_error("configuring user", e) + + def branch_exists(self, branch_name: str) -> bool: + """Check if branch exists locally or remotely.""" + validate_branch_name(branch_name) + try: + # Check local branches + if branch_name in [ref.name for ref in self.repo.heads]: + return True + # Check remote branches + remote_branch = f"origin/{branch_name}" + try: + self.repo.remotes.origin.fetch() + except git.exc.GitCommandError: + # If fetch fails, try to check existing remote refs anyway + # Check remote refs with existing data + return remote_branch in [ + ref.name for ref in self.repo.remotes.origin.refs + ] + else: + # Fetch succeeded, check remote refs + return remote_branch in [ + ref.name for ref in self.repo.remotes.origin.refs + ] + except git.exc.GitCommandError as e: + self._handle_git_error("checking branch existence", e) + return False # Never reached, but satisfies type checker + + def create_branch(self, branch_name: str) -> None: + """Create and checkout a new branch.""" + validate_branch_name(branch_name) + try: + self.repo.git.checkout("-b", branch_name) + except git.exc.GitCommandError as e: + self._handle_git_error("creating branch", e) + + def stage_all(self) -> None: + """Stage all changes.""" + try: + self.repo.git.add(".") + except git.exc.GitCommandError as e: + self._handle_git_error("staging changes", e) + + def has_changes(self) -> bool: + """Check if there are uncommitted changes.""" + try: + return self.repo.is_dirty(untracked_files=True) + except git.exc.GitCommandError as e: + self._handle_git_error("checking changes", e) + return False # Never reached, but satisfies type checker + + def commit(self, message: str) -> None: + """Commit staged changes.""" + try: + self.repo.index.commit(message) + except git.exc.GitCommandError as e: + self._handle_git_error("committing changes", e) + + @contextmanager + def authenticated_push_url(self, github_token: str): + """Context manager for authenticated push with automatic cleanup.""" + origin = self.repo.remotes.origin + original_url = origin.url + + # Build authenticated URL + match = re.search(r"github\.com[/:]([^/]+)/([^/]+?)(?:\.git)?$", original_url) + if match: + owner, repo_name = match.groups() + encoded_token = urllib.parse.quote(github_token, safe="") + push_url = f"https://{encoded_token}@github.com/{owner}/{repo_name}.git" + else: + encoded_token = urllib.parse.quote(github_token, safe="") + push_url = original_url.replace("https://", f"https://{encoded_token}@") + + try: + origin.set_url(push_url) + yield + finally: + # Always restore original URL + try: + origin.set_url(original_url) + except (git.exc.GitCommandError, ValueError) as e: + # Best effort cleanup - log but don't fail + logger.warning("Failed to restore original git remote URL: %s", e) + + def push_branch(self, branch_name: str, github_token: str | None = None) -> None: + """Push branch to remote with optional authentication.""" + validate_branch_name(branch_name) + try: + if github_token: + with self.authenticated_push_url(github_token): + self.repo.git.push("-u", "origin", branch_name) + else: + self.repo.git.push("-u", "origin", branch_name) + except git.exc.GitCommandError as e: + self._handle_git_error("pushing branch", e) + + @staticmethod + def clone(repo_url: str, repo_path: str) -> "GitRepository": + """Clone a repository and return GitRepository instance.""" + repo_path_obj = Path(repo_path) + try: + repo_path_obj.parent.mkdir(parents=True, exist_ok=True) + git.Repo.clone_from(repo_url, str(repo_path)) + return GitRepository(repo_path) + except git.exc.GitCommandError as e: + msg = f"Git error cloning repository: {e!s}" + raise CommandError(msg) from e + except OSError as e: + msg = f"Error creating directory: {e!s}" + raise CommandError(msg) from e + + +class GitHubAPIClient: + """Helper class for GitHub API operations.""" + + def __init__(self, token: str | None = None): + """Initialize with optional token.""" + self.token = ( + token + or getattr(settings, "TRANSLATIONS_GITHUB_TOKEN", None) + or os.environ.get("TRANSLATIONS_GITHUB_TOKEN") + ) + if not self.token: + msg = "TRANSLATIONS_GITHUB_TOKEN not set in settings or environment" + raise CommandError(msg) + + def _get_headers(self) -> dict: + """Get API request headers.""" + return { + "Authorization": f"Bearer {self.token}", + "Accept": "application/vnd.github.v3+json", + "Content-Type": "application/json", + } + + @staticmethod + def parse_repo_url(repo_url: str) -> tuple[str, str]: + """Extract owner and repo from GitHub URL.""" + match = re.search(r"github\.com[/:]([^/]+)/([^/]+?)(?:\.git)?$", repo_url) + if not match: + msg = f"Could not parse owner/repo from repo URL: {repo_url}" + raise CommandError(msg) + owner, repo = match.groups() + return (owner, repo) + + def _handle_rate_limit( + self, response: requests.Response, attempt: int, max_retries: int, stdout + ) -> bool: + """Handle rate limit response. Returns True if should retry.""" + if response.status_code == HTTP_TOO_MANY_REQUESTS: + retry_after = int(response.headers.get("Retry-After", 2 * (2**attempt))) + if attempt < max_retries - 1: + stdout.write( + f" Rate limit exceeded (attempt {attempt + 1}/{max_retries}). " + f"Retrying in {retry_after} seconds..." + ) + time.sleep(retry_after) + return True + else: + msg = "GitHub API rate limit exceeded. Please try again later." + raise CommandError(msg) + return False + + def _extract_error_message(self, response: requests.Response) -> str: + """Extract safe error message from response, including validation errors.""" + try: + error_data = response.json() + message = error_data.get("message", f"HTTP {response.status_code}") + + # GitHub API validation errors include detailed error info in 'errors' array + if error_data.get("errors"): + error_details = [] + for err in error_data["errors"]: + if isinstance(err, dict): + field = err.get("field", "unknown") + code = err.get("code", "unknown") + resource = err.get("resource", "unknown") + error_details.append(f"{resource}.{field}: {code}") + else: + error_details.append(str(err)) + + if error_details: + message = f"{message} ({', '.join(error_details)})" + return message + else: + return message + except (ValueError, requests.exceptions.JSONDecodeError): + return f"HTTP {response.status_code}" + + def verify_branch( + self, + owner: str, + repo: str, + branch_name: str, + stdout, # noqa: ARG002 + ) -> None: + """Verify branch exists on remote.""" + url = f"https://api.github.com/repos/{owner}/{repo}/branches/{branch_name}" + response = requests.get(url, headers=self._get_headers(), timeout=10) + + if response.status_code == HTTP_NOT_FOUND: + msg = ( + f"Branch '{branch_name}' not found on remote. " + f"Ensure the branch was pushed successfully." + ) + raise CommandError(msg) + elif response.status_code != HTTP_OK: + error_msg = self._extract_error_message(response) + msg = f"Failed to verify branch: {error_msg}" + raise CommandError(msg) + # If status_code is HTTP_OK, function returns None implicitly + + def create_pull_request( # noqa: PLR0913 + self, + owner: str, + repo: str, + branch_name: str, + title: str, + body: str, + base: str = "main", + stdout=None, + ) -> str: + """Create a pull request with retry logic.""" + url = f"https://api.github.com/repos/{owner}/{repo}/pulls" + payload = {"title": title, "body": body, "head": branch_name, "base": base} + headers = self._get_headers() + + max_retries = 3 + base_retry_delay = 2 + + for attempt in range(max_retries): + retry_delay = base_retry_delay * (2**attempt) + + try: + response = requests.post(url, json=payload, headers=headers, timeout=30) + + if response.status_code == HTTP_CREATED: + return response.json()["html_url"] + + if self._handle_rate_limit( + response, attempt, max_retries, stdout or self + ): + continue + + if response.status_code == HTTP_UNPROCESSABLE_ENTITY: + error_msg = self._extract_error_message(response) + safe_error = ( + error_msg[:MAX_ERROR_MESSAGE_LENGTH] + if len(error_msg) > MAX_ERROR_MESSAGE_LENGTH + else error_msg + ) + msg = ( + f"GitHub API validation error: {safe_error}\n" + f"This usually means the branch doesn't exist on remote " + f"or there's already a PR for this branch." + ) + raise CommandError(msg) + + error_msg = self._extract_error_message(response) + safe_error = ( + error_msg[:MAX_ERROR_MESSAGE_LENGTH] + if len(error_msg) > MAX_ERROR_MESSAGE_LENGTH + else error_msg + ) + msg = f"GitHub API error: {safe_error}" + raise CommandError(msg) + + except requests.exceptions.RequestException as e: + is_connection_error = isinstance( + e, + (requests.exceptions.ConnectionError, requests.exceptions.Timeout), + ) + + if is_connection_error and attempt < max_retries - 1: + if stdout: + error_msg = ( + f" Connection error " + f"(attempt {attempt + 1}/{max_retries}): {e!s}" + ) + stdout.write(error_msg) + stdout.write(f" Retrying in {retry_delay} seconds...") + time.sleep(retry_delay) + continue + else: + if is_connection_error: + msg = ( + f"Failed to connect to GitHub API after " + f"{max_retries} attempts: {e!s}\n" + f"Please check your network connection and try again later." + ) + raise CommandError(msg) from e + msg = f"GitHub API error: {e!s}" + raise CommandError(msg) from e + + msg = "Failed to create pull request after all retries" + raise CommandError(msg) + + +class PullRequestData(TypedDict): + """Data structure for pull request creation.""" + + lang_code: str + iso_code: str + sync_stats: dict + applied_count: int + translation_stats: dict[str, Any] + applied_by_app: dict[str, Any] + provider: str + model: str + rejected_brace_format_entries: list[dict[str, str]] + + +class TranslationParams(TypedDict): + """Parameters for translation operations.""" + + lang_code: str + provider: str + model: str + glossary: dict[str, Any] | None + batch_size: int + max_retries: int + + +class Command(BaseCommand): + help = ( + "Sync translation keys, translate using LLM, " + "and create PR in mitxonline-translations" + ) + + def add_arguments(self, parser): + parser.add_argument( + "lang", type=str, help="Language code (e.g., el, fr, es_ES)" + ) + parser.add_argument( + "--iso-code", + type=str, + help="ISO code for JSON files (default: same as lang)", + ) + parser.add_argument( + "--repo-path", + type=str, + help=( + "Path to mitxonline-translations repository. " + "Can also be set via TRANSLATIONS_REPO_PATH setting " + "or environment variable." + ), + ) + default_provider = get_default_provider() + parser.add_argument( + "--provider", + type=str, + default=default_provider, + choices=["openai", "gemini", "mistral"], + help=( + "Translation provider (openai, gemini, mistral). " + "Default is taken from TRANSLATIONS_PROVIDERS['default_provider']" + + ( + f" (currently: {default_provider})" + if default_provider + else " (not configured)" + ) + ), + ) + parser.add_argument( + "--model", + type=str, + default=None, + help=( + "Model name (e.g., gpt-4, gemini-pro, mistral-large-latest). " + "If not specified, uses the default_model for the selected provider " + "from TRANSLATIONS_PROVIDERS. " + "LiteLLM automatically detects provider from model name." + ), + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Run without committing or creating PR", + ) + parser.add_argument( + "--glossary", + action="store_true", + default=False, + help="Use glossary from plugin glossaries folder. " + "Looks for {plugin_dir}/glossaries/machine_learning/{iso_code}.txt " + "(uses --iso-code when given, else lang code).", + ) + parser.add_argument( + "--batch-size", + type=int, + default=200, + help=( + "Number of keys to translate per API request (default: 200). " + "Larger batches are faster but may hit rate limits. " + "Recommended: 200-300 for most models, " + "up to 400-500 for large models like mistral-large." + ), + ) + parser.add_argument( + "--mfe", + type=str, + nargs="+", + help=( + "Filter by specific MFE(s). " + "Use 'edx-platform' for backend translations." + ), + ) + parser.add_argument( + "--repo-url", + type=str, + help=( + "GitHub repository URL. " + "Can also be set via TRANSLATIONS_REPO_URL setting " + "or environment variable." + ), + ) + + def handle(self, *args, **options): # noqa: ARG002, PLR0915 + """Handle the command execution.""" + # Normalize language codes (convert hyphens to underscores) + lang_code = normalize_language_code(options["lang"]) + iso_code = normalize_language_code(options.get("iso_code") or lang_code) + + validate_language_code(lang_code) + validate_language_code(iso_code, "ISO code") + + repo_path = get_config_value( + "repo_path", + options, + str(Path.home() / ".mitxonline-translations"), + ) + repo_url = get_config_value( + "repo_url", + options, + "https://github.com/mitodl/mitxonline-translations.git", + ) + + # Validate repository path is not empty + if not repo_path or not repo_path.strip(): + msg = ( + "Repository path is not set. Please specify --repo-path, " + "set TRANSLATIONS_REPO_PATH in Django settings, or set " + "TRANSLATIONS_REPO_PATH environment variable." + ) + raise CommandError(msg) + + self.stdout.write(self.style.SUCCESS(f"Processing language: {lang_code}")) + self.stdout.write(f" ISO code: {iso_code}") + self.stdout.write(f" Repository: {repo_path}") + + repo = self._ensure_repo(repo_path, repo_url) + + self.stdout.write("\nSyncing translation keys...") + base_dir = Path(repo_path) / "translations" + sync_stats = sync_all_translations( + base_dir, lang_code, iso_code, skip_backend=False + ) + self._log_sync_stats(sync_stats) + + # Extract and filter empty keys + self.stdout.write("\nExtracting empty keys for translation...") + empty_keys = extract_empty_keys( + base_dir, lang_code, iso_code, skip_backend=False + ) + empty_keys = self._filter_by_mfe(empty_keys, options.get("mfe")) + + if not empty_keys: + self.stdout.write(self.style.SUCCESS("\nNo empty keys to translate!")) + return + + glossary = self._load_glossary(options, iso_code) + + provider = options.get("provider") or get_default_provider() + if not provider: + msg = ( + "Provider not specified and " + "TRANSLATIONS_PROVIDERS['default_provider'] is not set" + ) + raise CommandError(msg) + + model = options.get("model") or get_default_model_for_provider(provider) + if not model: + msg = ( + f"Model not specified and provider '{provider}' " + "does not have default_model in TRANSLATIONS_PROVIDERS" + ) + raise CommandError(msg) + + self.stdout.write(f"\nTranslating using {provider}/{model}...") + params = TranslationParams( + lang_code=lang_code, + provider=provider, + model=model, + glossary=glossary, + batch_size=options.get("batch_size", 200), + max_retries=MAX_RETRIES, + ) + translations, translation_stats = self._translate_keys(empty_keys, params) + self.stdout.write(f" Translated {len(translations)} keys") + + self.stdout.write("\nApplying translations...") + applied_count, applied_by_app = self._apply_translations( + translations, empty_keys, self.stdout, lang_code + ) + self.stdout.write(f" Applied {applied_count} translations") + + if options.get("dry_run"): + self.stdout.write(self.style.WARNING("\nDry run - no changes committed")) + return + + branch_name = create_branch_name(lang_code) + self.stdout.write(f"\nCommitting changes to branch: {branch_name}") + + if not self._commit_changes(repo, branch_name, lang_code): + return + + self.stdout.write("\nCreating pull request...") + try: + pr_data = PullRequestData( + lang_code=lang_code, + iso_code=iso_code, + sync_stats=sync_stats, + applied_count=applied_count, + translation_stats=translation_stats, + applied_by_app=applied_by_app, + provider=provider, + model=model, + rejected_brace_format_entries=applied_by_app.get( + "rejected_brace_format_entries", [] + ), + ) + pr_url = self._create_pull_request( + repo_path, + branch_name, + pr_data, + repo_url, + ) + self.stdout.write(self.style.SUCCESS(f"\nPull request created: {pr_url}")) + except CommandError as e: + # Clean up branch if PR creation fails + self.stdout.write( + self.style.ERROR(f"\nFailed to create pull request: {e!s}") + ) + self._cleanup_failed_branch(repo, branch_name) + raise + + def _ensure_repo(self, repo_path: str, repo_url: str) -> GitRepository: + """Ensure repository exists and is ready. Returns GitRepository instance.""" + repo_path_obj = Path(repo_path) + is_git_repo = repo_path_obj.exists() and (repo_path_obj / ".git").exists() + + if is_git_repo: + repo = GitRepository(repo_path) + current_url = repo.get_remote_url() + + # Normalize URLs for comparison (remove .git suffix, trailing slashes) + normalized_current = (current_url or "").rstrip(".git").rstrip("/") + normalized_new = repo_url.rstrip(".git").rstrip("/") + + # If URL changed, delete and re-clone + if normalized_current != normalized_new: + self.stdout.write( + self.style.WARNING( + f" Repository URL changed from {current_url} to {repo_url}" + ) + ) + self.stdout.write(" Removing old repository and cloning new one...") + shutil.rmtree(repo_path) + self.stdout.write(f" Cloning repository to {repo_path}...") + repo = GitRepository.clone(repo_url, repo_path) + self.stdout.write( + self.style.SUCCESS(" Repository cloned successfully") + ) + return repo + + # URL matches, use existing repo + self.stdout.write(f" Repository found at {repo_path}") + if repo.ensure_clean(): + self.stdout.write( + self.style.WARNING( + " WARNING: Found uncommitted changes (cleaned up)" + ) + ) + self.stdout.write( + self.style.SUCCESS(" Cleaned up uncommitted changes") + ) + + repo.switch_to_main() + self.stdout.write(" Updating repository...") + repo.update_from_remote() + self.stdout.write(self.style.SUCCESS(" Repository up to date")) + return repo + + elif repo_path_obj.exists(): + msg = ( + f"Path {repo_path} exists but is not a git repository. " + f"Please remove it or specify a different path." + ) + raise CommandError(msg) + else: + self.stdout.write(f" Cloning repository to {repo_path}...") + repo = GitRepository.clone(repo_url, repo_path) + self.stdout.write(self.style.SUCCESS(" Repository cloned successfully")) + return repo + + def _log_sync_stats(self, sync_stats: dict) -> None: + """Log synchronization statistics.""" + self.stdout.write( + f" Frontend: {sync_stats['frontend']['added']} keys added, " + f"{sync_stats['frontend']['fixed']} typos fixed" + ) + self.stdout.write(f" Backend: {sync_stats['backend']['added']} entries added") + + def _filter_by_mfe( + self, empty_keys: list[dict], mfe_filter: list[str] | None + ) -> list[dict]: + """Filter empty keys by MFE if specified.""" + if not mfe_filter: + self.stdout.write(f" Found {len(empty_keys)} empty keys") + return empty_keys + + mfe_set = set(mfe_filter) + original_count = len(empty_keys) + available_apps = {key.get("app", "unknown") for key in empty_keys} + filtered = [key for key in empty_keys if key.get("app") in mfe_set] + + if not filtered: + mfe_list = ", ".join(mfe_filter) + apps_list = ", ".join(sorted(available_apps)) + self.stdout.write( + self.style.WARNING( + f"\nWARNING: No empty keys found for specified MFE(s): " + f"{mfe_list}\n" + f" Available apps: {apps_list}" + ) + ) + return [] + + mfe_list = ", ".join(mfe_filter) + self.stdout.write( + f" Filtered to {len(filtered)} keys from {len(mfe_set)} MFE(s): " + f"{mfe_list} (was {original_count} total)" + ) + return filtered + + def _get_icu_plural_categories(self, lang_code: str) -> list[str]: + """Get ICU MessageFormat plural categories for a language.""" + base_lang = _get_base_lang(lang_code) + plural_form = PLURAL_FORMS.get(base_lang, "nplurals=2; plural=(n != 1);") + + nplurals_match = re.search(r"nplurals=(\d+)", plural_form) + if not nplurals_match: + return ["one", "other"] + + nplurals = int(nplurals_match.group(1)) + + # Map nplurals to ICU categories + nplurals_to_categories = { + 1: ["other"], + 2: ["one", "other"], + 3: ["one", "few", "other"], + 4: ["one", "two", "few", "other"], + 6: ["zero", "one", "two", "few", "many", "other"], + } + + return nplurals_to_categories.get(nplurals, ["one", "other"]) + + def _build_icu_example(self, categories_list: list[str]) -> str: + """Build an ICU MessageFormat example string based on categories.""" + num_categories = len(categories_list) + + templates_by_count = { + PLURAL_CATEGORIES_ARABIC: ( + # Arabic: zero, one, two, few, many, other + "{activityCount, plural, " + "zero {# activities} " + "one {# activity} " + "two {# activities} " + "few {# activities} " + "many {# activities} " + "other {# activities}}" + ), + PLURAL_CATEGORIES_FOUR: ( + # Languages with 4 forms: one, two, few, other + "{activityCount, plural, " + "one {# activity} " + "two {# activities} " + "few {# activities} " + "other {# activities}}" + ), + PLURAL_CATEGORIES_THREE: ( + # Languages with 3 forms: one, few, other (e.g., Russian, Polish) + "{activityCount, plural, " + "one {# activity} " + "few {# activities} " + "other {# activities}}" + ), + PLURAL_CATEGORIES_TWO: ( + # Languages with 2 forms: one, other (most languages) + "{activityCount, plural, one {# activity} other {# activities}}" + ), + } + + def fallback_template() -> str: + # Fallback for other multi-category languages + example_categories = " ".join( + f"{cat} {{# {'activity' if cat == 'one' else 'activities'}}}" + for cat in categories_list + ) + return f"{{activityCount, plural, {example_categories}}}" + + return templates_by_count.get(num_categories) or fallback_template() + + def _load_glossary(self, options: dict, iso_code: str) -> dict[str, Any]: + """Load glossary if enabled. Uses ISO code for file lookup. + + iso_code is already normalized (e.g. es_419). Tries {iso_code}.txt first, + then {iso_code with underscores→hyphens}.txt (e.g. es-419.txt) if not found. + """ + if not options.get("glossary", False): + return {} + + utils_file = Path(utils_module.__file__) + base_dir = utils_file.parent / "glossaries" / "machine_learning" + candidates = [ + base_dir / f"{iso_code}.txt", + base_dir / f"{iso_code.replace('_', '-')}.txt", + ] + glossary_path = None + for path in candidates: + if path.exists(): + glossary_path = path + break + + if glossary_path is not None: + self.stdout.write(f"\nLoading glossary from {glossary_path}...") + glossary = load_glossary(glossary_path, iso_code) + self.stdout.write(f" Loaded {len(glossary)} glossary terms") + return glossary + + self.stdout.write( + self.style.WARNING( + f"\nWARNING: Glossary file not found for {iso_code} " + f"(tried {candidates[0].name}, {candidates[1].name})\n" + f" Continuing without glossary." + ) + ) + return {} + + def _check_glossary_for_keys( + self, + empty_keys: list[dict], + glossary: dict[str, Any] | None, + ) -> tuple[dict[str, Any], int, list[dict]]: + """Check glossary matches for keys. + + Returns (translations, matches_count, remaining_keys). + """ + translations = {} + glossary_matches = 0 + keys_needing_llm = [] + + for key_info in empty_keys: + # Normalize file path for consistent comparison + file_path_str = str(Path(key_info["file_path"]).resolve()) + # Include msgctxt in key if it exists to distinguish entries with same msgid + msgctxt = key_info.get("msgctxt") + if msgctxt: + translation_key = f"{file_path_str}:{msgctxt}:{key_info['key']}" + else: + translation_key = f"{file_path_str}:{key_info['key']}" + + if glossary: + match_result = self._check_glossary_match(key_info, glossary) + if match_result: + translations[translation_key] = match_result + glossary_matches += 1 + continue + + keys_needing_llm.append(key_info) + + return translations, glossary_matches, keys_needing_llm + + def _process_batch_results( + self, + batch: list[dict], + batch_translations: list[Any], + translations: dict[str, Any], + ) -> tuple[int, int, dict[str, int]]: + """Process batch translation results. + + Returns (successes, errors, errors_by_app). + """ + batch_successes = 0 + batch_errors = 0 + batch_errors_by_app: dict[str, int] = {} + + for i, key_info in enumerate(batch): + # Normalize file path for consistent comparison + file_path_str = str(Path(key_info["file_path"]).resolve()) + # Include msgctxt in key if it exists to distinguish entries with same msgid + msgctxt = key_info.get("msgctxt") + if msgctxt: + translation_key = f"{file_path_str}:{msgctxt}:{key_info['key']}" + else: + translation_key = f"{file_path_str}:{key_info['key']}" + app = key_info.get("app", "unknown") + if i < len(batch_translations) and batch_translations[i]: + translations[translation_key] = batch_translations[i] + batch_successes += 1 + else: + batch_errors += 1 + batch_errors_by_app[app] = batch_errors_by_app.get(app, 0) + 1 + + return batch_successes, batch_errors, batch_errors_by_app + + def _translate_with_llm( # noqa: PLR0913 + self, + keys_needing_llm: list[dict], + translations: dict[str, Any], + lang_code: str, + provider: str, + model: str, + glossary: dict[str, Any] | None, + batch_size: int, + max_retries: int, + po_nplurals_override: int | None = None, + ) -> tuple[int, int, dict[str, int]]: + """Translate keys using LLM with batch processing. + + Returns (llm_translations, llm_errors, errors_by_app). + When po_nplurals_override is set (from translation file's Plural-Forms), + it is used for PO plural prompt instructions instead of the constant-based rule. + """ + llm_translations = 0 + llm_errors = 0 + errors_by_app: dict[str, int] = {} + + total_keys = len(keys_needing_llm) + num_batches = (total_keys + batch_size - 1) // batch_size + self.stdout.write( + f" Translating {total_keys} keys using LLM " + f"({num_batches} batches of up to {batch_size} keys each)..." + ) + + for batch_idx, batch in enumerate( + [ + keys_needing_llm[i : i + batch_size] + for i in range(0, total_keys, batch_size) + ], + 1, + ): + batch_succeeded = False + batch_apps = {key_info.get("app", "unknown") for key_info in batch} + + # Retry loop for this batch + for attempt in range(max_retries + 1): # +1 for initial attempt + try: + batch_translations = self._call_llm_batch( + batch, + lang_code, + provider, + model, + glossary, + po_nplurals_override=po_nplurals_override, + ) + batch_successes, batch_errors, batch_errors_by_app = ( + self._process_batch_results( + batch, + batch_translations, + translations, + ) + ) + + llm_translations += batch_successes + llm_errors += batch_errors + for app, count in batch_errors_by_app.items(): + errors_by_app[app] = errors_by_app.get(app, 0) + count + + completed = min(batch_idx * batch_size, total_keys) + progress_pct = min((completed / total_keys) * 100, 100) + remaining_keys = total_keys - llm_translations + + self._log_batch_progress( + batch_idx, + num_batches, + batch_successes, + batch_errors, + completed, + total_keys, + progress_pct, + remaining_keys, + batch_apps, + batch_errors_by_app, + attempt, + ) + + batch_succeeded = True + break # Success - exit retry loop + + except ( + requests.RequestException, + ValueError, + KeyError, + AttributeError, + ) as e: + if not self._handle_batch_error( + e, batch_idx, num_batches, batch_apps, attempt, max_retries + ): + break # Non-retryable error + + # If batch failed after all retries, mark all keys as errors + if not batch_succeeded: + batch_errors = len(batch) + llm_errors += batch_errors + for key_info in batch: + app = key_info.get("app", "unknown") + errors_by_app[app] = errors_by_app.get(app, 0) + 1 + apps_str = ", ".join(sorted(batch_apps)) + self.stdout.write( + self.style.ERROR( + f" Marked {batch_errors} keys as errors, " + f"continuing with next batch...\n" + f" Affected apps: {apps_str}" + ) + ) + + return llm_translations, llm_errors, errors_by_app + + def _log_batch_progress( # noqa: PLR0913 + self, + batch_idx: int, + num_batches: int, + batch_successes: int, + batch_errors: int, + completed: int, + total_keys: int, + progress_pct: float, + remaining_keys: int, + batch_apps: set[str], + batch_errors_by_app: dict[str, int], + attempt: int, + ) -> None: + """Log batch processing progress.""" + retry_msg = f" (after {attempt + 1} attempt(s))" if attempt > 0 else "" + if batch_errors > 0: + apps_str = ", ".join(sorted(batch_apps)) + errors_by_app_str = ", ".join( + f"{app}: {count}" for app, count in sorted(batch_errors_by_app.items()) + ) + self.stdout.write( + f" Batch {batch_idx}/{num_batches} completed " + f"with partial success " + f"({batch_successes} succeeded, " + f"{batch_errors} failed){retry_msg} " + f"({completed}/{total_keys} keys, " + f"{progress_pct:.1f}% complete, " + f"{remaining_keys} remaining)\n" + f" Affected apps: {apps_str}\n" + f" Errors by app: {errors_by_app_str}" + ) + else: + self.stdout.write( + f" Batch {batch_idx}/{num_batches} completed" + f"{retry_msg} " + f"({completed}/{total_keys} keys, " + f"{progress_pct:.1f}% complete, " + f"{remaining_keys} remaining)" + ) + + def _handle_batch_error( # noqa: PLR0913 + self, + error: Exception, + batch_idx: int, + num_batches: int, + batch_apps: set[str], + attempt: int, + max_retries: int, + ) -> bool: + """Handle batch error. Returns True if should retry, False otherwise.""" + apps_str = ", ".join(sorted(batch_apps)) + if not is_retryable_error(error): + # Non-retryable error - fail immediately + self.stdout.write( + self.style.ERROR( + f" ERROR: Batch {batch_idx}/{num_batches} " + f"failed with non-retryable error: {error!s}\n" + f" Affected apps: {apps_str}" + ) + ) + return False + + # Retryable error - check if we have retries left + if attempt < max_retries: + # Exponential backoff: 2^attempt seconds (1s, 2s, 4s, 8s...) + wait_time = 2**attempt + self.stdout.write( + self.style.WARNING( + f" WARNING: Batch {batch_idx}/{num_batches} " + f"failed (attempt {attempt + 1}/" + f"{max_retries + 1}): {error!s}\n" + f" Affected apps: {apps_str}\n" + f" Retrying in {wait_time} second(s)..." + ) + ) + time.sleep(wait_time) + return True + else: + # Out of retries + self.stdout.write( + self.style.ERROR( + f" ERROR: Batch {batch_idx}/{num_batches} " + f"failed after {max_retries + 1} attempts: " + f"{error!s}\n" + f" Affected apps: {apps_str}" + ) + ) + return False + + def _translate_keys( + self, + empty_keys: list[dict], + params: TranslationParams, + ) -> tuple[dict[str, Any], dict[str, Any]]: + """Translate empty keys using LLM with batch processing.""" + lang_code = params["lang_code"] + provider = params["provider"] + model = params["model"] + glossary = params["glossary"] + batch_size = params["batch_size"] + + # Add lang_code to each key_info for ICU format conversion + for key_info in empty_keys: + key_info["lang_code"] = lang_code + max_retries = params["max_retries"] + + # First pass: check glossary matches + logger.info( + "Checking glossary matches for %d empty key(s) (language: %s)", + len(empty_keys), + lang_code, + ) + translations, glossary_matches, keys_needing_llm = ( + self._check_glossary_for_keys(empty_keys, glossary) + ) + + logger.info( + "Glossary matches: %d, Keys needing LLM: %d", + glossary_matches, + len(keys_needing_llm), + ) + + if not keys_needing_llm: + logger.info("All translations found in glossary, skipping LLM translation") + return translations, { + "glossary_matches": glossary_matches, + "llm_translations": 0, + "errors": 0, + "errors_by_app": cast("dict[str, int]", {}), + } + + # Translate remaining keys with LLM + logger.info( + "Starting LLM translation for %d key(s) using %s/%s (batch size: %d)", + len(keys_needing_llm), + provider, + model, + batch_size, + ) + # Prefer nplurals from the first PO file in the translation repo + po_nplurals_override = None + for key_info in empty_keys: + if key_info.get("file_type") == "po" and key_info.get("file_path"): + n = get_nplurals_from_po_file(Path(key_info["file_path"])) + if n is not None: + po_nplurals_override = n + break + + llm_translations, llm_errors, errors_by_app = self._translate_with_llm( + keys_needing_llm, + translations, + lang_code, + provider, + model, + glossary, + batch_size, + max_retries, + po_nplurals_override=po_nplurals_override, + ) + logger.info( + "LLM translation completed: %d translated, %d errors", + llm_translations, + llm_errors, + ) + + summary = ( + f" Summary - LLM translations: {llm_translations}, Errors: {llm_errors}" + ) + if glossary: + summary = ( + f" Summary - Glossary matches: {glossary_matches}, {summary[12:]}" + ) + self.stdout.write(summary) + + return translations, { + "glossary_matches": glossary_matches, + "llm_translations": llm_translations, + "errors": llm_errors, + "errors_by_app": errors_by_app, + } + + def _check_glossary_match( + self, key_info: dict, glossary: dict[str, Any] | None + ) -> Any | None: + """ + Check if key matches glossary. Returns translation or None. + + Args: + key_info: Dictionary containing key information with 'english', + 'is_plural', etc. + glossary: Dictionary mapping English terms to translations, or None. + + Returns: + Translation string/dict if match found, None otherwise. + """ + if not glossary: + return None + + is_plural = key_info.get("is_plural", False) + msgid_plural = key_info.get("msgid_plural") + + if is_plural and msgid_plural: + return self._check_plural_glossary_match(key_info, glossary, msgid_plural) + + match = match_glossary_term(key_info["english"], glossary, exact_match=True) + if not match: + logger.debug( + "No glossary match found for key: %s", key_info.get("key", "unknown") + ) + return None + + translation = ( + match.get("translation", match.get("singular", "")) + if isinstance(match, dict) + else match + ) + logger.debug( + "Found glossary match for key: %s -> %s", + key_info.get("key", "unknown"), + str(translation)[:MAX_LOG_STRING_LENGTH] + "..." + if len(str(translation)) > MAX_LOG_STRING_LENGTH + else str(translation), + ) + return translation + + def _is_icu_format(self, text: str) -> bool: + """Check if text is already in ICU MessageFormat.""" + if not isinstance(text, str): + return False + # Match ICU MessageFormat pattern: {variable, plural, ...} + icu_pattern = r"\{[^,]+,\s*plural\s*," + return bool(re.search(icu_pattern, text)) + + def _convert_to_icu_format( + self, singular: str, plural: str, lang_code: str, count_var: str = "count" + ) -> str: + """Convert singular and plural translations to ICU MessageFormat string.""" + categories = self._get_icu_plural_categories(lang_code) + + parts = [f"{{{count_var}, plural"] + for category in categories: + translation = singular if category == "one" else plural + parts.append(f" {category} {{{translation}}}") + parts.append("}") + + icu_string = "".join(parts) + logger.debug( + "Converted singular/plural to ICU format for %s: %s (categories: %s)", + lang_code, + ( + icu_string[:MAX_LOG_ICU_STRING_LENGTH] + "..." + if len(icu_string) > MAX_LOG_ICU_STRING_LENGTH + else icu_string + ), + categories, + ) + return icu_string + + def _extract_translation_from_match(self, match: Any) -> str: + """Extract translation string from glossary match.""" + if isinstance(match, str): + return match + return match.get( + "singular", match.get("plural", match.get("translation", str(match))) + ) + + def _check_plural_glossary_match( + self, key_info: dict, glossary: dict[str, Any], msgid_plural: str + ) -> Any | None: + """Check glossary match for plural keys. Returns translation or None.""" + file_type = key_info.get("file_type", "po") + singular_match = match_glossary_term( + key_info["english"], glossary, exact_match=True + ) + plural_match = match_glossary_term(msgid_plural, glossary, exact_match=True) + + if singular_match and plural_match: + singular_str = self._extract_translation_from_match(singular_match) + plural_str = self._extract_translation_from_match(plural_match) + + if file_type == "json": + lang_code = key_info.get("lang_code", "en") + return self._convert_to_icu_format(singular_str, plural_str, lang_code) + + return {"singular": singular_str, "plural": plural_str} + + if singular_match: + key_info["_glossary_singular"] = self._extract_translation_from_match( + singular_match + ) + + return None + + def _format_glossary_for_prompt(self, glossary: dict[str, Any] | None) -> str: + """Format glossary as a prompt section for LLM translation requests. + + Args: + glossary: Dictionary mapping English terms to translations, or + None/empty dict. + + Returns: + Empty string if glossary is None or empty, otherwise returns a + formatted string with glossary terms and instructions for consistent + translation. + """ + if not glossary: + return "" + + try: + glossary_json = json.dumps(glossary, indent=2, ensure_ascii=False) + except (TypeError, ValueError) as e: + self.stdout.write( + self.style.WARNING( + f" WARNING: Could not serialize glossary for prompt: {e!s}. " + f"Continuing without glossary in LLM prompt." + ) + ) + return "" + glossary_template = f""" + IMPORTANT - Use these glossary terms when translating. If any English terms + from the glossary appear in the texts to translate, use the corresponding + translation from the glossary: + + {glossary_json} + + When translating sentences, ensure that glossary terms are translated + consistently according to the glossary above, even if they appear + within longer sentences. For example, if the glossary specifies + "certificate" -> "Πιστοποιητικό", then translate "certificate" as + "Πιστοποιητικό" even when it appears in longer sentences like + "The course completion certificate is available". + """ + return textwrap.dedent(glossary_template) + + def _build_plural_instructions(self, params: _PluralInstructionParams) -> str: + """Build plural handling instructions for LLM prompt. + + When po_plural_count_override is set (e.g. from the translation file's + Plural-Forms header), it is used for PO plural form count instead of + the constant-based _get_po_plural_count(lang_code). + """ + instructions = [] + json_plural_info = params["json_plural_info"] + key_batch = params["key_batch"] + icu_categories_str = params["icu_categories_str"] + lang_code = params["lang_code"] + po_plural_count_override = params["po_plural_count_override"] + plural_count = params["plural_count"] + + json_plural_count = json_plural_info.get("count", 0) + json_plural_entries = json_plural_info.get("entries", {}) + + if json_plural_count > 0: + categories_list = icu_categories_str.split(", ") + num_categories = len(categories_list) + has_existing_icu = any( + self._is_icu_format(key_batch[i].get("english", "")) + for i in range(len(key_batch)) + if str(i + 1) in json_plural_entries + ) + + if has_existing_icu: + if num_categories > PLURAL_CATEGORIES_TWO: + instructions.append( + _PROMPT_JSON_PLURAL_EXPAND_ICU.format( + json_plural_count=json_plural_count, + icu_categories_str=icu_categories_str, + num_categories=num_categories, + ) + ) + else: + instructions.append( + _PROMPT_JSON_PLURAL_PRESERVE_ICU.format( + json_plural_count=json_plural_count + ) + ) + else: + example = self._build_icu_example(categories_list) + if num_categories > PLURAL_CATEGORIES_TWO: + instructions.append( + _PROMPT_JSON_PLURAL_MULTI_CATEGORY.format( + json_plural_count=json_plural_count, + icu_categories_str=icu_categories_str, + num_categories=num_categories, + example=example, + ) + ) + else: + instructions.append( + _PROMPT_JSON_PLURAL_TWO_CATEGORY.format( + json_plural_count=json_plural_count, + icu_categories_str=icu_categories_str, + example=example, + ) + ) + + if plural_count > 0: + # Prefer nplurals from file; fall back to constant-based rule + po_plural_count = ( + po_plural_count_override + if po_plural_count_override is not None + else _get_po_plural_count(lang_code) + ) + if po_plural_count > PLURAL_CATEGORIES_TWO: + instructions.append( + _PROMPT_PO_PLURAL_MULTI_FORM.format( + plural_count=plural_count, + po_plural_count=po_plural_count, + po_plural_count_minus_1=po_plural_count - 1, + ) + ) + elif po_plural_count == 1: + instructions.append( + _PROMPT_PO_PLURAL_ONE_FORM.format(plural_count=plural_count) + ) + # If any PO plural entry has a placeholder only in the plural source, + # add a note so the LLM provides a safe singular (no such placeholder). + if any( + key_info.get("is_plural") + and key_info.get("msgid_plural") + and plural_source_has_placeholders_not_in_singular( + key_info.get("english", ""), + key_info.get("msgid_plural", ""), + ) + for key_info in key_batch + ): + instructions.append( + "Some of the above PO plural entries have a variable only in " + "the plural source (e.g. %(num_selected)s). For those, we use " + "your 'singular' for the single form—so provide a natural " + "singular translation WITHOUT that variable." + ) + else: + instructions.append( + _PROMPT_PO_PLURAL_SINGULAR_PLURAL.format(plural_count=plural_count) + ) + + return "\n".join(instructions) + + def _call_llm_batch( # noqa: PLR0913 + self, + key_batch: list[dict], + lang_code: str, + provider: str, + model: str, + glossary: dict[str, Any] | None = None, + timeout: int = 120, + po_nplurals_override: int | None = None, + ) -> list[str | dict[str, str] | None]: + """Call LLM API to translate multiple texts in a single request. + + Args: + key_batch: List of key information dictionaries to translate + lang_code: Target language code + provider: Translation provider name (openai, gemini, mistral) + model: LLM model name + glossary: Optional glossary dictionary + timeout: Request timeout in seconds (default: 120) + """ + api_key = self._get_llm_api_key(provider) + + texts_dict = {} + plural_entries: dict[str, bool] = {} + json_plural_entries: dict[str, bool] = {} + + for i, key_info in enumerate(key_batch, 1): + key_str = str(i) + file_type = key_info.get("file_type", "po") + english_text = key_info["english"] + is_plural = key_info.get("is_plural", False) + msgid_plural = key_info.get("msgid_plural") + + if file_type == "json" and self._is_icu_format(english_text): + texts_dict[key_str] = english_text + json_plural_entries[key_str] = True + elif is_plural and msgid_plural: + texts_dict[key_str] = {"singular": english_text, "plural": msgid_plural} + (json_plural_entries if file_type == "json" else plural_entries)[ + key_str + ] = True + else: + texts_dict[key_str] = english_text + + texts_block = json.dumps(texts_dict, indent=2, ensure_ascii=False) + plural_count = len(plural_entries) + json_plural_count = len(json_plural_entries) + + lang_name = LANGUAGE_MAPPING.get(lang_code, lang_code) + glossary_section = self._format_glossary_for_prompt(glossary) + icu_categories_str = ", ".join(self._get_icu_plural_categories(lang_code)) + plural_instructions = self._build_plural_instructions( + { + "json_plural_info": { + "count": json_plural_count, + "entries": json_plural_entries, + }, + "plural_count": plural_count, + "key_batch": key_batch, + "icu_categories_str": icu_categories_str, + "lang_code": lang_code, + "po_plural_count_override": po_nplurals_override, + } + ) + + prompt_template = ( + f"""Translate the following {len(key_batch)} text(s) to {lang_name} """ + f"""(language code: {lang_code}). + Context: These are from an educational platform. + + CRITICAL - Placeholders and variables (NEVER translate these): + - Copy every placeholder EXACTLY from source to translation: same spelling, + same braces and brackets. Do NOT translate, rename, add, or remove any + placeholder. This includes: {{variable_name}}, %(name)s, %s, {{0}}, and + HTML-like tags such as <{{tag}}> or . + - For strings containing {{variable_name}}: keep every {{variable_name}} + character-for-character in the translation. A single missing }} or wrong + name will break the build. Translate only the surrounding text. + - Preserve HTML tags and formatting. + {glossary_section} + {plural_instructions} + + Return a JSON object where each key is the number (1, 2, 3, etc.). + + FORMAT BY ENTRY TYPE: + + 1. Singular entries (no plural): value is a simple translation string. + + 2. JSON plural entries: value is an ICU MessageFormat string. + Example: "{{count, plural, one {{# item}} other {{# items}}}}" + + 3. PO plural entries: value is an object with PLAIN TRANSLATION STRINGS. + NEVER use ICU MessageFormat for PO entries! + Use simple translated strings for each form. + + For languages with 2 forms: + {{"singular": "translation for one", "plural": "translation for many"}} + + For languages with more forms (e.g., Arabic with 6): + {{"0": "translation for zero", "1": "translation for one", + "2": "translation for two", "3": "translation for few", + "4": "translation for many", "5": "translation for other"}} + + CRITICAL: Each value in PO entries must be a plain string, " + "NOT ICU syntax! Preserve placeholders ({{count}}, " + "%(count)s, etc.) in the plain strings. + + Input texts (numbered): + {texts_block} + + Return ONLY valid JSON in this format: + {{ + "1": "translation of first text", + "2": "{{count, plural, one {{singular}} " + "other {{plural}}}}", + "3": {{"singular": "singular translation", " + ""plural": "plural translation"}}, + "4": {{"0": "form 0", "1": "form 1", "2": "form 2"}} + ... + }}""" + ) + prompt = textwrap.dedent(prompt_template) + + try: + completion_kwargs = configure_litellm_for_provider( + provider=provider, + model=model, + api_key=api_key, + messages=[{"role": "user", "content": prompt}], + temperature=0.3, + timeout=timeout, + ) + + response = completion(**completion_kwargs) + response_text = response.choices[0].message.content.strip() + + logger.debug( + "LLM response received for batch of %d key(s), response length: %d", + len(key_batch), + len(response_text), + ) + + translations = self._parse_json_response(response_text, key_batch) + if translations: + logger.debug( + "Successfully parsed JSON response for batch of %d key(s)", + len(key_batch), + ) + return translations + + logger.warning( + "JSON parsing failed for batch, falling back to order-based parsing" + ) + return self._parse_order_based_response(response_text, key_batch) + + except TimeoutError: + logger.exception( + "LLM batch API call timed out after %d seconds " + "(model: %s, batch size: %d)", + timeout, + model, + len(key_batch), + ) + msg = ( + f"LLM batch API call timed out after {timeout} seconds.\n" + f"Model: {model}\n" + f"Batch size: {len(key_batch)}\n" + f"Try reducing --batch-size or check your network connection." + ) + raise CommandError(msg) from None + except (requests.RequestException, ValueError, KeyError, AttributeError) as e: + logger.exception( + "LLM batch API call failed (model: %s, batch size: %d)", + model, + len(key_batch), + ) + msg = ( + f"LLM batch API call failed: {e!s}\n" + f"Model: {model}\n" + f"Batch size: {len(key_batch)}\n" + f"Make sure TRANSLATIONS_PROVIDERS is configured in settings " + f"with the appropriate api_key, or set the environment variable " + f"(OPENAI_API_KEY, GEMINI_API_KEY, or MISTRAL_API_KEY)" + ) + raise CommandError(msg) from e + + def _extract_json_from_response(self, response_text: str) -> str: + """Extract JSON text from response, handling code blocks.""" + json_text = response_text + if "```json" in response_text: + start = response_text.find("```json") + 7 + end = response_text.find("```", start) + if end > start: + json_text = response_text[start:end].strip() + elif "```" in response_text: + start = response_text.find("```") + 3 + end = response_text.find("```", start) + if end > start: + json_text = response_text[start:end].strip() + return json_text + + def _process_translation_key( + self, key: str, value: Any, key_info: dict + ) -> tuple[str | dict[str, str] | None, bool]: + """Process a single translation key from LLM response.""" + file_type = key_info.get("file_type", "po") + is_plural = key_info.get("is_plural", False) + + translation = self._process_llm_response_value( + value, key_info, file_type, is_plural=is_plural + ) + is_missing = translation is None + + if translation is None: + self._log_rejected_translation(key, key_info) + elif translation is not None: + self._log_parsed_translation(key_info, translation) + + return translation, is_missing + + def _log_rejected_translation(self, key: str, key_info: dict) -> None: + """Log warning for rejected translation.""" + self.stdout.write( + self.style.WARNING( + f" WARNING: Translation rejected for key {key} " + f"(file: {key_info.get('file_path', 'unknown')}, " + f"key: {key_info.get('key', 'unknown')[:50]}). " + f"Likely returned ICU format for PO file." + ) + ) + logger.warning( + "Translation rejected for key %s (file: %s) - " + "likely ICU format for PO file", + key_info.get("key", "unknown"), + key_info.get("file_path", "unknown"), + ) + + def _log_parsed_translation( + self, key_info: dict, translation: str | dict[str, str] + ) -> None: + """Log debug message for parsed translation.""" + logger.debug( + "Parsed translation for key %s: %s", + key_info.get("key", "unknown"), + ( + str(translation)[:MAX_LOG_STRING_LENGTH] + "..." + if len(str(translation)) > MAX_LOG_STRING_LENGTH + else str(translation) + ), + ) + + def _parse_json_response( + self, response_text: str, key_batch: list[dict] + ) -> list[str | dict[str, str] | None] | None: + """Parse JSON response from LLM.""" + json_text = self._extract_json_from_response(response_text) + + try: + data = json.loads(json_text) + translations: list[str | dict[str, str] | None] = [] + missing_keys = [] + for i in range(len(key_batch)): + key = str(i + 1) + key_info = key_batch[i] + + if key in data: + value = data[key] + translation, is_missing = self._process_translation_key( + key, value, key_info + ) + if is_missing: + missing_keys.append(key_info.get("key", "unknown")) + translations.append(translation) + else: + missing_keys.append(key_info.get("key", "unknown")) + self.stdout.write( + self.style.WARNING( + f" WARNING: LLM did not return translation for key {key} " + f"(file: {key_info.get('file_path', 'unknown')}, " + f"key: {key_info.get('key', 'unknown')})" + ) + ) + logger.warning( + "LLM did not return translation for key %s (file: %s)", + key_info.get("key", "unknown"), + key_info.get("file_path", "unknown"), + ) + translations.append(None) + + if missing_keys: + logger.warning( + "LLM response missing %d key(s): %s", + len(missing_keys), + missing_keys, + ) + except (json.JSONDecodeError, KeyError, ValueError): + logger.exception("Failed to parse JSON response") + return None + else: + return translations + + def _parse_order_based_response( + self, response_text: str, key_batch: list[dict] + ) -> list[str | dict[str, str] | None]: + """Fallback: Parse response assuming translations are in order.""" + lines = [line.strip() for line in response_text.split("\n") if line.strip()] + cleaned_lines = [ + line.lstrip("0123456789.-) ").strip() + for line in lines + if line.lstrip("0123456789.-) ").strip() + ] + if len(cleaned_lines) < len(key_batch): + cleaned_lines.extend([""] * (len(key_batch) - len(cleaned_lines))) + # Return as list[str | dict[str, str] | None] - all strings in this fallback + return cast( + "list[str | dict[str, str] | None]", cleaned_lines[: len(key_batch)] + ) + + def _get_llm_api_key(self, provider: str) -> str | None: + """Get API key from TRANSLATIONS_PROVIDERS or environment variables. + + Args: + provider: Translation provider name (openai, gemini, mistral) + """ + try: + if hasattr(settings, "TRANSLATIONS_PROVIDERS"): + providers = getattr(settings, "TRANSLATIONS_PROVIDERS", {}) + if isinstance(providers, dict) and provider in providers: + provider_config = providers[provider] + if isinstance(provider_config, dict): + api_key = provider_config.get("api_key") + if api_key: + return api_key + except (AttributeError, TypeError) as e: + logger.debug("Error accessing TRANSLATIONS_PROVIDERS: %s", e) + + env_key_name = ( + "GEMINI_API_KEY" + if provider == PROVIDER_GEMINI + else "MISTRAL_API_KEY" + if provider == PROVIDER_MISTRAL + else "OPENAI_API_KEY" + ) + return os.environ.get(env_key_name) + + def _process_string_value( + self, value: str, file_type: str, *, is_plural: bool + ) -> tuple[str | dict | None, bool]: + """Process a string value from LLM response. + + Returns: + Tuple of (result, is_dict) where is_dict indicates if result is a dict + that should be processed further. + """ + stripped = value.strip() + if stripped.startswith("{") and stripped.endswith("}"): + return self._parse_string_dict(stripped, file_type, is_plural=is_plural) + if self._is_icu_format(stripped): + if file_type == "po" and is_plural: + return None, False + return stripped, False + return stripped, False + + def _parse_string_dict( + self, value: str, file_type: str, *, is_plural: bool + ) -> tuple[str | dict | None, bool]: + """Parse a string that looks like a dict.""" + try: + parsed = json.loads(value) + if isinstance(parsed, dict): + # Return dict to be processed further + return parsed, True + except (json.JSONDecodeError, ValueError): + # Not valid JSON; fall through to ICU check and plain string handling. + pass + + # Not a dict or parsing failed, check ICU format + if self._is_icu_format(value) and file_type == "po" and is_plural: + return None, False + return value, False + + def _process_dict_numeric_keys( + self, value: dict, file_type: str, *, is_plural: bool + ) -> dict[str, str] | None: + """Process dict with numeric keys (multiple plural forms).""" + numeric_keys = _get_numeric_plural_keys(value) + if not numeric_keys or file_type != "po" or not is_plural: + return None + + result = {} + for key in numeric_keys: + v_str = str(value[key]).strip() + if self._is_icu_format(v_str): + return None + result[str(key)] = v_str + return result if result else None + + def _process_dict_singular_plural( + self, value: dict, key_info: dict, file_type: str, *, is_plural: bool + ) -> str | dict[str, str] | None: + """Process dict with singular/plural keys.""" + if "singular" not in value or "plural" not in value: + return None + + if file_type == "json" and is_plural: + lang_code = key_info.get("lang_code", "en") + return self._convert_to_icu_format( + str(value["singular"]).strip(), + str(value["plural"]).strip(), + lang_code, + ) + return { + "singular": str(value["singular"]).strip(), + "plural": str(value["plural"]).strip(), + } + + def _process_llm_response_value( + self, value: Any, key_info: dict, file_type: str, *, is_plural: bool + ) -> str | dict[str, str] | None: + """Process a single value from LLM response, converting formats.""" + if isinstance(value, str): + result, is_dict = self._process_string_value( + value, file_type, is_plural=is_plural + ) + if result is None: + return None + if is_dict: + # Result is a dict, process it further + value = result + else: + # Result is a string, return it + return result + + if isinstance(value, dict): + # Check for numeric keys (multiple plural forms) + result = self._process_dict_numeric_keys( + value, file_type, is_plural=is_plural + ) + if result is not None: + return result + + # Check for singular/plural format + result = self._process_dict_singular_plural( + value, key_info, file_type, is_plural=is_plural + ) + if result is not None: + return result + + return str(value).strip() + + def _group_translations_by_file( + self, translations: dict[str, Any], empty_keys: list[dict] + ) -> dict[str, dict[str, Any]]: + """Group translations by file path.""" + translations_by_file: dict[str, dict[str, Any]] = {} + + for key_info in empty_keys: + file_path_str = str(Path(key_info["file_path"]).resolve()) + # Include msgctxt in key if it exists to match key structure + msgctxt = key_info.get("msgctxt") + if msgctxt: + translation_key = f"{file_path_str}:{msgctxt}:{key_info['key']}" + else: + translation_key = f"{file_path_str}:{key_info['key']}" + + if translation_key in translations: + trans_value = translations[translation_key] + if trans_value is None: + continue # Skip missing translations + file_type = key_info.get("file_type", "po") + is_plural = key_info.get("is_plural", False) + + if file_type == "json" and isinstance(trans_value, dict): + if "singular" in trans_value and "plural" in trans_value: + trans_value = self._process_llm_response_value( + trans_value, key_info, file_type, is_plural=is_plural + ) + else: + trans_value = trans_value.get("singular", str(trans_value)) + + # For PO files, include msgctxt in key for apply_po_translations + if key_info["file_type"] == "po" and msgctxt: + # Store with msgctxt prefix for proper matching + po_key = f"{msgctxt}:{key_info['key']}" + else: + po_key = key_info["key"] + + translations_by_file.setdefault(file_path_str, {})[po_key] = trans_value + + return translations_by_file + + def _apply_file_translations( + self, + file_path: Path, + file_translations: dict[str, Any], + empty_keys: list[dict], + stdout, + lang_code: str | None = None, + ) -> tuple[int, str, list[dict[str, str]]]: + """Apply translations to a single file. + + Returns (count, app, rejected_brace_entries). + """ + if not file_path.exists(): + stdout.write(self.style.WARNING(f" WARNING: File not found: {file_path}")) + return 0, "unknown", [] + + # Normalize paths for comparison + normalized_file_path = str(file_path.resolve()) + key_info = next( + k + for k in empty_keys + if str(Path(k["file_path"]).resolve()) == normalized_file_path + ) + app = key_info.get("app", "unknown") + + logger.debug( + "Applying %d translation(s) to %s (type: %s, app: %s)", + len(file_translations), + file_path.name, + key_info["file_type"], + app, + ) + rejected_brace_entries: list[dict[str, str]] = [] + if key_info["file_type"] == "json": + count = apply_json_translations(file_path, file_translations) + elif key_info["file_type"] == "po": + count = apply_po_translations( + file_path, + file_translations, + lang_code, + rejected_brace_entries=rejected_brace_entries, + ) + else: + logger.warning( + "Unknown file type '%s' for file: %s", key_info["file_type"], file_path + ) + return 0, app, [] + + if count > 0: + logger.info( + "Applied %d translation(s) to %s (app: %s)", count, file_path.name, app + ) + + return count, app, rejected_brace_entries + + def _apply_translations( + self, + translations: dict[str, Any], + empty_keys: list[dict], + stdout, + lang_code: str | None = None, + ) -> tuple[int, dict[str, Any]]: + """Apply translations to files. + + Returns (applied_count, applied_by_app dict with details and + rejected_brace_format_entries). + """ + translations_by_file = self._group_translations_by_file( + translations, empty_keys + ) + + if not translations_by_file: + stdout.write(self.style.WARNING(" WARNING: No translations to apply")) + return 0, {"by_app": {}, "details": [], "rejected_brace_format_entries": []} + + applied = 0 + applied_by_app: dict[str, int] = {} + applied_details: list[dict[str, Any]] = [] + all_rejected_brace: list[dict[str, str]] = [] + + for file_path_str, file_translations in translations_by_file.items(): + full_path = Path(file_path_str) + count, app, rejected_brace_entries = self._apply_file_translations( + full_path, file_translations, empty_keys, stdout, lang_code + ) + + applied += count + all_rejected_brace.extend(rejected_brace_entries) + if count > 0: + applied_by_app[app] = applied_by_app.get(app, 0) + count + applied_details.append( + {"app": app, "file": full_path.name, "count": count} + ) + stdout.write( + f" Applied {count} translations to {app} ({full_path.name})" + ) + + if applied_by_app: + app_summary = ", ".join( + f"{app}: {count}" for app, count in applied_by_app.items() + ) + stdout.write(f" Summary by app: {app_summary}") + + return applied, { + "by_app": applied_by_app, + "details": applied_details, + "rejected_brace_format_entries": all_rejected_brace, + } + + def _cleanup_failed_branch(self, repo: GitRepository, branch_name: str) -> None: + """Clean up branch if PR creation fails.""" + try: + repo.switch_to_main() + # Only try to delete if branch exists locally + if branch_name in [ref.name for ref in repo.repo.heads]: + with suppress(git.exc.GitCommandError): + repo.repo.git.branch("-D", branch_name) + self.stdout.write( + self.style.WARNING( + f" Cleaned up failed branch: {branch_name}" + ) + ) + except (git.exc.GitCommandError, AttributeError) as e: + self.stdout.write( + self.style.WARNING(f" Could not clean up branch {branch_name}: {e!s}") + ) + + def _commit_changes( + self, repo: GitRepository, branch_name: str, lang_code: str + ) -> bool: + """Commit changes to git repository. Returns True if committed.""" + # Check if branch already exists + if repo.branch_exists(branch_name): + self.stdout.write( + self.style.WARNING( + f" Branch '{branch_name}' already exists. " + f"Switching to it and continuing..." + ) + ) + try: + repo.repo.git.checkout(branch_name) + except git.exc.GitCommandError: + # If local branch doesn't exist but remote does, create tracking branch + repo.repo.git.checkout("-b", branch_name, f"origin/{branch_name}") + else: + repo.configure_user() + repo.create_branch(branch_name) + repo.stage_all() + + if not repo.has_changes(): + self.stdout.write( + self.style.WARNING( + " No changes to commit. Skipping commit and PR creation." + ) + ) + repo.switch_to_main() + with suppress(git.exc.GitCommandError): + repo.repo.git.branch("-D", branch_name) + return False + + safe_lang_code = sanitize_for_git(lang_code) + commit_message = ( + f"feat: Add {safe_lang_code} translations via LLM\n\n" + f"Automated translation of empty keys for {safe_lang_code} language." + ) + + repo.commit(commit_message) + + github_token = getattr( + settings, "TRANSLATIONS_GITHUB_TOKEN", None + ) or os.environ.get("TRANSLATIONS_GITHUB_TOKEN") + repo.push_branch(branch_name, github_token) + self.stdout.write(" Pushed branch to remote") + + return True + + def _create_pull_request( + self, + repo_path: str, + branch_name: str, + pr_data: PullRequestData, + repo_url: str, + ) -> str: + """Create pull request using GitHub CLI or API.""" + iso_code = pr_data["iso_code"] + provider = pr_data["provider"] + model = pr_data["model"] + provider_display = provider.replace("_", " ").title() + pr_title = ( + f"feat: Add {iso_code} translations via LLM using " + f"{provider_display} provider and model {model}" + ) + try: + # Using GitHub CLI (gh) - trusted system command + gh_path = shutil.which("gh") + if gh_path: + result = subprocess.run( # noqa: S603 + [ + gh_path, + "pr", + "create", + "--title", + pr_title, + "--body", + self._generate_pr_body(pr_data), + ], + cwd=repo_path, + capture_output=True, + text=True, + check=True, + ) + return result.stdout.strip() + except (subprocess.CalledProcessError, FileNotFoundError): + pass + # Fall back to API if gh CLI is not available or fails + return self._create_pr_via_api( + repo_path, + branch_name, + pr_data, + repo_url, + pr_title=pr_title, + ) + + def _generate_error_section( + self, errors: int, errors_by_app: dict[str, int] | None = None + ) -> str: + """Generate error warning section for PR body if there are errors. + + Args: + errors: Number of translation errors. + errors_by_app: Dictionary mapping app/MFE names to error counts. + + Returns: + Error section markdown string, or empty string if no errors. + """ + if errors == 0: + return "" + + error_details = "" + if errors_by_app: + error_lines = [ + f"- **{app}**: {count} key(s) failed" + for app, count in sorted( + errors_by_app.items(), key=lambda x: x[1], reverse=True + ) + ] + error_details = ( + "\n**Errors by app/MFE:**\n\n" + "\n".join(error_lines) + "\n" + ) + + error_template = f""" + ### Translation Errors + + **{errors} translation key(s) failed to translate** due to API errors, rate + limits, or parsing issues. + {error_details} + **Impact:** + - These keys remain untranslated in the target language files + - They will need to be translated manually or re-run the command + - The translation process continued and completed successfully + for the remaining keys + + **Recommendation:** + - Review the command output logs for specific error details + - Consider re-running the command to retry failed batches + - Check API key permissions and rate limits if errors persist + + """ + return textwrap.dedent(error_template) + + def _generate_translation_summary( + self, glossary_matches: int, llm_translations: int, errors: int + ) -> str: + """Generate translation statistics summary line. + + Args: + glossary_matches: Number of glossary matches. + llm_translations: Number of LLM translations. + errors: Number of translation errors. + + Returns: + Summary string. + """ + if glossary_matches > 0: + return ( + f"Summary - Glossary matches: {glossary_matches}, " + f"LLM translations: {llm_translations}, Errors: {errors}" + ) + return f"Summary - LLM translations: {llm_translations}, Errors: {errors}" + + def _generate_rejected_brace_section( + self, rejected_brace_format_entries: list[dict[str, str]] + ) -> str: + """Generate PR section for rejected brace-format translations.""" + if not rejected_brace_format_entries: + return "" + lines = [ + "### Rejected brace-format translations", + "", + "The following entries had invalid python-brace-format translations " + "(e.g. missing or mismatched `{placeholders}`) and were not applied. " + "They remain untranslated for manual review:", + "", + ] + for item in rejected_brace_format_entries[:MAX_REJECTED_BRACE_DISPLAY]: + msgid = (item.get("msgid") or "").replace("|", "\\|")[:100] + file_name = item.get("file", "") + lines.append(f"- `{file_name}`: {msgid!r}") + if len(rejected_brace_format_entries) > MAX_REJECTED_BRACE_DISPLAY: + extra = len(rejected_brace_format_entries) - MAX_REJECTED_BRACE_DISPLAY + lines.append(f"- ... and {extra} more.") + return "\n".join(lines) + "\n\n" + + def _generate_pr_body(self, pr_data: PullRequestData) -> str: + """Generate PR description.""" + lang_code = pr_data["lang_code"] + iso_code = pr_data["iso_code"] + sync_stats = pr_data["sync_stats"] + applied_count = pr_data["applied_count"] + translation_stats = pr_data["translation_stats"] + applied_by_app = pr_data["applied_by_app"] + provider = pr_data["provider"] + model = pr_data["model"] + rejected_brace_format_entries = pr_data.get("rejected_brace_format_entries", []) + + glossary_matches = translation_stats.get("glossary_matches", 0) + llm_translations = translation_stats.get("llm_translations", 0) + errors = translation_stats.get("errors", 0) + errors_by_app: dict[str, int] = cast( + "dict[str, int]", translation_stats.get("errors_by_app", {}) + ) + + translation_summary = self._generate_translation_summary( + glossary_matches, llm_translations, errors + ) + error_section = self._generate_error_section(errors, errors_by_app) + rejected_brace_section = self._generate_rejected_brace_section( + rejected_brace_format_entries + ) + + applied_details = applied_by_app.get("details", []) + breakdown_lines = [ + f" Applied {detail['count']} translations to " + f"{detail['app']} ({detail['file']})" + for detail in applied_details + ] + + # Build changes section with conditional error line + changes_lines = [ + f"- **Language**: {lang_code} ({iso_code})", + f"- **Keys synced**: {sync_stats['frontend']['added']} frontend keys, " + f"{sync_stats['backend']['added']} backend entries", + f"- **Translations applied**: {applied_count} keys translated", + f"- **Typos fixed**: {sync_stats['frontend']['fixed']}", + ] + if errors > 0: + changes_lines.append( + f"- **Translation errors**: {errors} keys failed to translate" + ) + if rejected_brace_format_entries: + changes_lines.append( + f"- **Rejected brace-format**: {len(rejected_brace_format_entries)} " + "entries not applied (invalid placeholders; see section below)" + ) + + # Build statistics section with conditional error line + statistics_lines = [ + translation_summary, + f" Translated {applied_count} keys", + ] + if errors > 0: + statistics_lines.append(f" Failed: {errors} keys") + + # Build next steps section with conditional error line + next_steps_lines = [ + "- Review translations for accuracy", + ] + if errors > 0: + next_steps_lines.append( + "- Address failed translations (see error section above)" + ) + if rejected_brace_format_entries: + next_steps_lines.append( + "- Manually fix or translate entries with rejected brace-format " + "(see rejected brace-format section above)" + ) + next_steps_lines.extend( + [ + "- Test in staging environment", + "- Merge when ready", + ] + ) + + provider_display = provider.replace("_", " ").title() + pr_template = ( + f"""## Summary + + This PR adds {iso_code} translations via LLM automation using { + provider_display + } provider and model {model}. + {error_section} + {rejected_brace_section} + ### Changes + + {chr(10).join(changes_lines)} + + ### Translation Statistics + + {chr(10).join(statistics_lines)} + + ### Applied Translations + + { + chr(10).join(breakdown_lines) + if breakdown_lines + else " No translations applied" + } + + ### Files Modified + + - Frontend apps: {sync_stats["frontend"]["created"]} created, """ + f"""{sync_stats["frontend"]["synced"]} synced + - Backend: PO files updated + + ### Next Steps + + {chr(10).join(next_steps_lines)} + + --- + *This PR was automatically generated by the sync_and_translate_language """ + f"""management command.* + """ + ) + return textwrap.dedent(pr_template) + + def _create_pr_via_api( + self, + repo_path: str, + branch_name: str, + pr_data: PullRequestData, + repo_url: str, + pr_title: str, + ) -> str: + """Create PR using GitHub API.""" + client = GitHubAPIClient() + owner, repo = GitHubAPIClient.parse_repo_url(repo_url) + + git_repo = GitRepository(repo_path) + main_branch = git_repo._get_main_branch_name() # noqa: SLF001 + + return client.create_pull_request( + owner=owner, + repo=repo, + branch_name=branch_name, + title=pr_title, + body=self._generate_pr_body(pr_data), + base=main_branch, + stdout=self.stdout, + ) diff --git a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/settings/__init__.py b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/settings/__init__.py new file mode 100644 index 000000000..3527c42e7 --- /dev/null +++ b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/settings/__init__.py @@ -0,0 +1,40 @@ +"""Common settings for AI Static Translations plugin.""" + + +def apply_common_settings(settings): + """ + Apply custom settings for the AI Static Translations plugin. + + These settings are shared with ol_openedx_course_translations. + If that plugin is also installed, its settings take precedence + since both plugins configure the same keys. + """ + if not hasattr(settings, "TRANSLATIONS_PROVIDERS"): + settings.TRANSLATIONS_PROVIDERS = { + "default_provider": "mistral", + "deepl": { + "api_key": "", + }, + "openai": { + "api_key": "", + "default_model": "gpt-5.2", + }, + "gemini": { + "api_key": "", + "default_model": "gemini-3-pro-preview", + }, + "mistral": { + "api_key": "", + "default_model": "mistral-large-latest", + }, + } + if not hasattr(settings, "TRANSLATIONS_GITHUB_TOKEN"): + settings.TRANSLATIONS_GITHUB_TOKEN = "" + if not hasattr(settings, "TRANSLATIONS_REPO_URL"): + settings.TRANSLATIONS_REPO_URL = ( + "https://github.com/mitodl/mitxonline-translations.git" + ) + if not hasattr(settings, "TRANSLATIONS_REPO_PATH"): + settings.TRANSLATIONS_REPO_PATH = "" + if not hasattr(settings, "LITE_LLM_REQUEST_TIMEOUT"): + settings.LITE_LLM_REQUEST_TIMEOUT = 300 # seconds diff --git a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/settings/cms.py b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/settings/cms.py new file mode 100644 index 000000000..23a77f66e --- /dev/null +++ b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/settings/cms.py @@ -0,0 +1,10 @@ +"""Settings to provide to edX""" + +from ol_openedx_ai_static_translations.settings import apply_common_settings + + +def plugin_settings(settings): + """ + Populate cms settings + """ + apply_common_settings(settings) diff --git a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/utils.py b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/utils.py new file mode 100644 index 000000000..27e89e1b0 --- /dev/null +++ b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/utils.py @@ -0,0 +1,1869 @@ +"""Utility functions for AI static translation management commands.""" + +import json +import logging +import os +import re +from collections import OrderedDict +from collections.abc import Iterator +from datetime import UTC, datetime +from pathlib import Path +from typing import Any + +import polib # type: ignore[import-untyped] +from django.conf import settings +from django.core.management.base import CommandError + +from ol_openedx_ai_static_translations.constants import ( + BACKEND_PO_FILES, + DEFAULT_JSON_INDENT, + DEFAULT_PLURAL_FORM, + EXPECTED_GLOSSARY_PARTS, + LANGUAGE_MAPPING, + LEARNER_FACING_APPS, + MAX_LOG_STRING_LENGTH, + PLURAL_FORMS, + PO_HEADER_BUGS_EMAIL, + PO_HEADER_CONTENT_TRANSFER_ENCODING, + PO_HEADER_CONTENT_TYPE, + PO_HEADER_MIME_VERSION, + PO_HEADER_POT_CREATION_DATE, + PO_HEADER_PROJECT_VERSION, + PO_HEADER_TRANSIFEX_TEAM_BASE_URL, + PROVIDER_GEMINI, + PROVIDER_MISTRAL, + TRANSLATABLE_PLUGINS, + TRANSLATION_FILE_NAMES, + TYPO_PATTERNS, +) + +logger = logging.getLogger(__name__) + +# ============================================================================ +# Validation Utilities +# ============================================================================ + +# Language code suffix length constants +REGION_CODE_LENGTH = 2 # 2-letter region codes (e.g., ES, BR) +SCRIPT_TAG_LENGTH = 4 # 4-letter script tags (e.g., Hans, Hant) + + +def normalize_language_code(code: str) -> str: + """Normalize language code to use underscores (Django/gettext format). + + Converts BCP 47 format (hyphens) to gettext format (underscores) and + normalizes case: language part lowercase, suffix properly cased. + Examples: + - 'es-419' -> 'es_419' + - 'ES-419' -> 'es_419' + - 'es-ES' -> 'es_ES' + - 'ES_ES' -> 'es_ES' + - 'zh-Hans' -> 'zh_Hans' + - 'ZH-HANS' -> 'zh_Hans' + - 'es_419' -> 'es_419' (unchanged) + - 'es' -> 'es' (unchanged) + """ + # Replace hyphens with underscores and split + parts = code.replace("-", "_").split("_", 1) + lang_part = parts[0].lower() # Language: always lowercase + + if len(parts) == 1: + return lang_part + + # Normalize suffix: uppercase 2-char regions, title case 4-char scripts + suffix = parts[1] + if len(suffix) == REGION_CODE_LENGTH: + suffix = suffix.upper() # Region codes: ES, BR, etc. + elif len(suffix) == SCRIPT_TAG_LENGTH and suffix[0].isalpha(): + suffix = suffix.title() # Script tags: Hans, Hant, etc. + # Numeric regions (419) and others stay as-is + + return f"{lang_part}_{suffix}" + + +def validate_language_code(code: str, field_name: str = "language code") -> None: + """Validate language code format. + + Accepts normalized codes (already normalized by normalize_language_code): + - xx (2 lowercase letters): e.g., 'el', 'es', 'ar' + - xx_XX (with 2-letter region): e.g., 'es_ES' + - xx_NNN (with UN M.49 numeric region): e.g., 'es_419' + - xx_Xxxx (with script subtag): e.g., 'zh_Hans' + """ + # Pattern: xx, xx_XX, xx_419, xx_Hans + pattern = r"^[a-z]{2}(_([A-Z]{2}|[0-9]{3}|[A-Z][a-z]{3}))?$" + if not re.match(pattern, code): + msg = ( + f"Invalid {field_name} format: {code}. " + f"Expected format: 'xx', 'xx_XX', 'xx_419', 'xx_Hans' " + f"(e.g., 'el', 'es_ES', 'es_419', 'zh_Hans')" + ) + raise CommandError(msg) + + +def validate_branch_name(branch_name: str) -> None: + """Validate branch name format to prevent injection.""" + if not re.match(r"^[a-z0-9/_-]+$", branch_name): + msg = f"Invalid branch name format: {branch_name}" + raise CommandError(msg) + + +# ============================================================================ +# Git Utilities +# ============================================================================ + + +def sanitize_for_git(text: str) -> str: + """Sanitize text for use in git operations.""" + return re.sub(r"[^\w\s-]", "", text) + + +def create_branch_name(lang_code: str) -> str: + """Create a safe branch name from language code.""" + safe_lang = re.sub(r"[^a-z0-9_-]", "", lang_code.lower()) + timestamp = datetime.now(tz=UTC).strftime("%Y%m%d-%H%M%S") + return f"feature/add-{safe_lang}-translations-{timestamp}" + + +# ============================================================================ +# Configuration Helpers +# ============================================================================ + + +def get_config_value(key: str, options: dict, default: Any = None) -> Any: + """Get configuration value from options, settings, or environment.""" + # Check command-line options first (Django converts --repo-path to repo_path) + option_value = options.get(key) or options.get(key.replace("_", "-")) + if option_value: + return option_value + + # Check settings with TRANSLATIONS_ prefix + setting_key = f"TRANSLATIONS_{key.upper().replace('-', '_')}" + if hasattr(settings, setting_key): + setting_value = getattr(settings, setting_key) + # Only use setting if it's not empty + if setting_value: + return setting_value + + # Check environment variable with TRANSLATIONS_ prefix + env_key = setting_key + env_value = os.environ.get(env_key) + if env_value: + return env_value + + # Return default if nothing found + return default + + +def get_default_provider() -> str | None: + """Get default provider from TRANSLATIONS_PROVIDERS.""" + providers = getattr(settings, "TRANSLATIONS_PROVIDERS", {}) + if not isinstance(providers, dict): + return None + return providers.get("default_provider") + + +def get_default_model_for_provider(provider: str) -> str | None: + """Get default model for a provider from TRANSLATIONS_PROVIDERS.""" + providers = getattr(settings, "TRANSLATIONS_PROVIDERS", {}) + if not isinstance(providers, dict): + return None + provider_config = providers.get(provider, {}) + if not isinstance(provider_config, dict): + return None + return provider_config.get("default_model") + + +def configure_litellm_for_provider( + provider: str, model: str, api_key: str | None, **base_kwargs +) -> dict[str, Any]: + """Configure LiteLLM completion kwargs for a specific provider.""" + completion_kwargs = dict(base_kwargs) + completion_kwargs["model"] = model + + if api_key: + completion_kwargs["api_key"] = api_key + if provider == PROVIDER_GEMINI: + # If no prefix, add gemini/ to force Gemini API usage (not Vertex AI) + # If vertex_ai/ or gemini/ prefix already exists, respect it + if not model.startswith(("gemini/", "vertex_ai/")): + completion_kwargs["model"] = f"gemini/{model}" + # Gemini 3 models require temperature = 1.0 to avoid issues: + # - Infinite loops in response generation + # - Degraded reasoning performance + # - Failure on complex tasks + # See: https://docs.litellm.ai/docs/providers/gemini + if "gemini-3" in model.lower(): + completion_kwargs["temperature"] = 1.0 + elif provider == PROVIDER_MISTRAL and not model.startswith("mistral/"): + completion_kwargs["model"] = f"mistral/{model}" + + return completion_kwargs + + +# ============================================================================ +# Error Handling Utilities +# ============================================================================ + + +def is_retryable_error(error: Exception) -> bool: + """ + Check if an error is retryable (network issues, rate limits, timeouts). + + Args: + error: The exception to check + + Returns: + True if the error is retryable, False otherwise + + Examples: + >>> is_retryable_error(ConnectionError("Connection timeout")) + True + >>> is_retryable_error(ValueError("Invalid API key")) + False + """ + error_str = str(error).lower() + + # Retryable errors + retryable_patterns = [ + "timeout", + "connection", + "rate limit", + "429", + "503", + "502", + "500", + "temporarily unavailable", + "service unavailable", + "too many requests", + ] + + # Non-retryable errors (don't retry these) + non_retryable_patterns = [ + "invalid api key", + "authentication", + "401", + "403", + "not found", + "404", + "bad request", + "400", + "commanderror", # Our custom errors that are usually non-retryable + ] + + # Check for non-retryable first + for pattern in non_retryable_patterns: + if pattern in error_str: + return False + + # Check for retryable patterns + for pattern in retryable_patterns: + if pattern in error_str: + return True + + # Default: retry unknown errors (could be transient) + return True + + +# ============================================================================ +# Translation File I/O +# ============================================================================ + + +def load_json_file(file_path: Path) -> dict: + """Load a JSON translation file.""" + if not file_path.exists(): + return {} + try: + with file_path.open(encoding="utf-8") as f: + return json.load(f) + except json.JSONDecodeError as e: + msg = f"Error parsing JSON file {file_path}: {e}" + raise ValueError(msg) from e + + +def save_json_file(file_path: Path, data: dict, indent: int = DEFAULT_JSON_INDENT): + """Save a JSON translation file with proper formatting.""" + file_path.parent.mkdir(parents=True, exist_ok=True) + with file_path.open("w", encoding="utf-8") as f: + json.dump(data, f, ensure_ascii=False, indent=indent) + f.write("\n") + + +def find_typo_mappings(data: dict) -> list[tuple[str, str]]: + """Find typo keys and their correct counterparts.""" + mappings = [] + + for typo, correct in TYPO_PATTERNS: + typo_keys = [k for k in data if typo in k] + for typo_key in typo_keys: + correct_key = typo_key.replace(typo, correct) + if correct_key in data: + mappings.append((typo_key, correct_key)) + + return mappings + + +def sync_or_create_json_file(en_file: Path, target_file: Path) -> dict: + """ + Sync or create a JSON translation file. + Returns dict with stats: + {'action': 'created'|'synced'|'skipped', 'added': int, + 'fixed': int, 'removed': int} + """ + try: + en_data = load_json_file(en_file) + except ValueError: + return { + "action": "skipped", + "added": 0, + "fixed": 0, + "removed": 0, + "error": "English file not readable", + } + + if not en_data: + return { + "action": "skipped", + "added": 0, + "fixed": 0, + "removed": 0, + "error": "English file is empty", + } + + target_data = load_json_file(target_file) if target_file.exists() else {} + file_exists = target_file.exists() + + stats = { + "action": "created" if not file_exists else "synced", + "added": 0, + "fixed": 0, + "removed": 0, + } + + if file_exists: + ordered_data = OrderedDict(target_data) + + typo_mappings = find_typo_mappings(ordered_data) + for typo_key, correct_key in typo_mappings: + typo_value = ordered_data.get(typo_key, "") + correct_value = ordered_data.get(correct_key, "") + + if not correct_value and typo_value: + ordered_data[correct_key] = typo_value + # Type assertion: stats["fixed"] is always int + stats["fixed"] = int(stats["fixed"]) + 1 + + if typo_key in ordered_data: + del ordered_data[typo_key] + # Type assertion: stats["removed"] is always int + stats["removed"] = int(stats["removed"]) + 1 + + for key in en_data: + if key not in ordered_data: + ordered_data[key] = "" + # Type assertion: stats["added"] is always int + stats["added"] = int(stats["added"]) + 1 + + target_data = dict(ordered_data) + else: + target_data = dict.fromkeys(en_data, "") + stats["added"] = len(en_data) + + save_json_file(target_file, target_data) + + return stats + + +# ============================================================================ +# PO File Utilities +# ============================================================================ + + +def _get_base_lang(lang_code: str) -> str: + """Extract base language code from locale code (e.g., 'es_ES' -> 'es').""" + return lang_code.split("_", maxsplit=1)[0] if "_" in lang_code else lang_code + + +def _get_plural_form(lang_code: str) -> str: + """Get plural form string for a language code.""" + base_lang = _get_base_lang(lang_code) + return PLURAL_FORMS.get(base_lang, DEFAULT_PLURAL_FORM) + + +def _get_po_plural_count(lang_code: str) -> int: + """Get number of plural forms for a language (for PO files).""" + plural_form = _get_plural_form(lang_code) + nplurals_match = re.search(r"nplurals=(\d+)", plural_form) + if not nplurals_match: + return 2 + return int(nplurals_match.group(1)) + + +def get_nplurals_from_po_file(file_path: Path) -> int | None: + """Read nplurals from a PO file's Plural-Forms header. + + Returns None if missing or unreadable. + """ + if not file_path.exists(): + return None + try: + po = polib.pofile(str(file_path)) + plural_forms_str = po.metadata.get("Plural-Forms", "") + if not plural_forms_str: + return None + nplurals_match = re.search(r"nplurals=(\d+)", plural_forms_str) + if not nplurals_match: + return None + return int(nplurals_match.group(1)) + except (OSError, polib.POFileError, ValueError): + return None + + +def _get_numeric_plural_keys(translation: dict) -> list: + """Return keys that are int or digit-string (plural form indices).""" + return [ + key for key in translation if isinstance(key, (int, str)) and str(key).isdigit() + ] + + +# Python-format placeholders as in Django: %(name)s, %(count)d, etc. +_PYTHON_FORMAT_PLACEHOLDER_RE = re.compile(r"%\(\s*(\w+)\s*\)[sdcouxXeEfFgGin%]") + +# Python brace-format placeholders: {variable_name} (Django/python-brace-format) +_PYTHON_BRACE_FORMAT_PLACEHOLDER_RE = re.compile(r"\{(\w+)\}") + + +def _get_brace_format_placeholders(text: str) -> set[str]: + """Return set of placeholder names in a Python brace-format string.""" + if not text: + return set() + return set(_PYTHON_BRACE_FORMAT_PLACEHOLDER_RE.findall(text)) + + +def _is_valid_brace_format_translation(source: str, translation: str) -> bool: + """ + Return True if translation is a valid Python brace-format string matching source. + + Ensures: same set of {name} placeholders as source, and no unterminated + format directives (so msgfmt won't fail). + """ + if not translation: + return True + source_placeholders = _get_brace_format_placeholders(source) + trans_placeholders = _get_brace_format_placeholders(translation) + if source_placeholders != trans_placeholders: + return False + try: + # Placeholders like {0}, {1} are positional; {name} is keyword. + # If all placeholders are digit strings, use positional args. + if source_placeholders and all(p.isdigit() for p in source_placeholders): + max_index = max(int(p) for p in source_placeholders) + positional = [""] * (max_index + 1) + translation.format(*positional) + else: + dummy = dict.fromkeys(source_placeholders, "") + translation.format(**dummy) + except (ValueError, KeyError, IndexError): + return False + else: + return True + + +def _get_python_format_placeholders(text: str) -> set[str]: + """Return set of placeholder names in a python-format string.""" + if not text: + return set() + return set(_PYTHON_FORMAT_PLACEHOLDER_RE.findall(text)) + + +def plural_source_has_placeholders_not_in_singular( + msgid: str, msgid_plural: str +) -> bool: + """ + Return True if msgid_plural has python-format placeholders that msgid does not. + + For nplurals=1 we must then use the singular translation for the single form + to avoid KeyError at runtime when n=1. + """ + if not msgid_plural: + return False + singular_placeholders = _get_python_format_placeholders(msgid) + plural_placeholders = _get_python_format_placeholders(msgid_plural) + return bool(plural_placeholders - singular_placeholders) + + +def _plural_has_placeholders_not_in_singular(entry: polib.POEntry) -> bool: + """Return True if plural has python-format placeholders that singular lacks. + + Used to choose singular vs plural source when nplurals=1. + """ + if not entry.msgid_plural or "python-format" not in (entry.flags or []): + return False + return plural_source_has_placeholders_not_in_singular( + entry.msgid or "", entry.msgid_plural or "" + ) + + +def _entry_has_asymmetric_placeholders(entry: polib.POEntry) -> bool: + """ + Return True if this is a python-format plural with asymmetric placeholders. + + Singular/plural have different placeholders (one has a variable the other + doesn't). For such entries we keep both msgstr[0] and msgstr[1] even when + the locale has nplurals=1, so msgfmt and runtime work correctly. + """ + if not entry.msgid_plural or "python-format" not in (entry.flags or []): + return False + singular_placeholders = _get_python_format_placeholders(entry.msgid or "") + plural_placeholders = _get_python_format_placeholders(entry.msgid_plural or "") + return singular_placeholders != plural_placeholders + + +def create_po_file_header(lang_code: str, iso_code: str | None = None) -> str: + """Create PO file header for a language.""" + if iso_code is None: + iso_code = lang_code + + base_lang = _get_base_lang(lang_code) + plural = _get_plural_form(lang_code) + lang_name = LANGUAGE_MAPPING.get(lang_code, lang_code) + + return f"""msgid "" +msgstr "" +"Project-Id-Version: {PO_HEADER_PROJECT_VERSION}\\n" +"Report-Msgid-Bugs-To: {PO_HEADER_BUGS_EMAIL}\\n" +"POT-Creation-Date: {PO_HEADER_POT_CREATION_DATE}\\n" +"PO-Revision-Date: 2025-01-01 00:00+0000\\n" +"Last-Translator: \\n" +"Language-Team: {lang_name} ({PO_HEADER_TRANSIFEX_TEAM_BASE_URL}/{base_lang}/)\\n" +"MIME-Version: {PO_HEADER_MIME_VERSION}\\n" +"Content-Type: {PO_HEADER_CONTENT_TYPE}\\n" +"Content-Transfer-Encoding: {PO_HEADER_CONTENT_TRANSFER_ENCODING}\\n" +"Language: {iso_code}\\n" +"Plural-Forms: {plural}\\n" + +""" + + +def parse_po_file(po_file: Path) -> dict[str, str]: + """ + Parse a PO file and extract msgid -> msgstr mappings. + For plural forms, uses msgid as the key + (msgid_plural entries are handled separately). + Uses polib if available, falls back to manual parsing. + """ + if not po_file.exists(): + return {} + + po = polib.pofile(str(po_file)) + entries = {} + for entry in po: + if entry.msgid: # Skip empty header msgid + # For plural entries, use msgid as key + entries[entry.msgid] = entry.msgstr or "" + return entries + + +def parse_po_file_with_metadata(po_file: Path) -> dict[str, dict]: + """ + Parse a PO file and extract msgid -> metadata mappings. + Returns dict with structure: + {msgid: {'msgstr': str, 'msgid_plural': str, 'msgstr_plural': dict, + 'locations': List[str], 'flags': List[str], 'is_plural': bool}} + Uses polib if available, falls back to manual parsing. + """ + if not po_file.exists(): + return {} + + po = polib.pofile(str(po_file)) + entries = {} + for entry in po: + if entry.msgid: # Skip empty header msgid + locations = [ + f"{occ[0]}:{occ[1]}" if len(occ) > 1 else occ[0] + for occ in entry.occurrences + ] + + entry_data = { + "msgstr": entry.msgstr or "", + "locations": locations, + "flags": entry.flags, # List of flags like ['python-format'] + "is_plural": entry.msgid_plural is not None, + } + if entry.msgid_plural: + entry_data["msgid_plural"] = entry.msgid_plural + # Convert msgstr_plural dict to simple dict + entry_data["msgstr_plural"] = { + form_index: entry.msgstr_plural.get(form_index, "") + for form_index in range(len(entry.msgstr_plural)) + } + entries[entry.msgid] = entry_data + return entries + + +def _create_po_entry_from_en( + entry: polib.POEntry, + lang_code: str | None = None, + nplurals_from_file: int | None = None, +) -> polib.POEntry: + """Create a new PO entry from an English entry with empty translation. + + Preserves all metadata from the English entry including: + - msgid, msgid_plural, msgctxt + - occurrences (location comments) + - flags (format flags like python-format) + + Args: + entry: English PO entry to copy from + lang_code: Target language code to determine number of plural forms. + If None, uses the number of forms from the English entry. + nplurals_from_file: When set, use this as the number of plural forms + (takes priority over lang_code). Use the translation file's + Plural-Forms header when syncing into an existing PO file. + """ + new_entry = polib.POEntry( + msgid=entry.msgid, + msgid_plural=entry.msgid_plural, + occurrences=entry.occurrences, + flags=entry.flags, + ) + # Preserve msgctxt (message context) if it exists + if hasattr(entry, "msgctxt") and entry.msgctxt: + new_entry.msgctxt = entry.msgctxt + + if entry.msgid_plural: + # Prefer nplurals from file, then lang rule, then English entry + if nplurals_from_file is not None: + num_forms = nplurals_from_file + elif lang_code: + num_forms = _get_po_plural_count(lang_code) + else: + num_forms = max(2, len(entry.msgstr_plural) if entry.msgstr_plural else 2) + # Special case: when singular/plural have different placeholders (e.g. variable + # only in plural), keep both [0] and [1] even if locale has nplurals=1 + if _entry_has_asymmetric_placeholders(entry): + num_forms = max(num_forms, 2) + new_entry.msgstr_plural = dict.fromkeys(range(num_forms), "") + else: + new_entry.msgstr = "" + return new_entry + + +def _sync_existing_po_file( + en_po: polib.POFile, + target_po: polib.POFile, + target_file: Path, + lang_code: str | None = None, +) -> int: + """Sync existing PO file by adding missing entries. Returns count added. + + Uses the target PO file's Plural-Forms header for the number of plural + forms when adding new entries; falls back to lang_code-based rule if + the header is missing. + """ + # Prefer nplurals from the existing translation file + nplurals_from_file: int | None = None + plural_forms_str = target_po.metadata.get("Plural-Forms", "") + if plural_forms_str: + nplurals_match = re.search(r"nplurals=(\d+)", plural_forms_str) + if nplurals_match: + nplurals_from_file = int(nplurals_match.group(1)) + + # Create a set of existing entries using (msgctxt, msgid, msgid_plural) tuple + # msgctxt is included because same msgid can have different contexts + existing_entries = set() + for entry in target_po: + if entry.msgid: + msgctxt = getattr(entry, "msgctxt", None) or None + key = ( + msgctxt, + entry.msgid, + entry.msgid_plural if entry.msgid_plural else None, + ) + existing_entries.add(key) + + # Add missing entries from English file + added_count = 0 + for entry in en_po: + if not entry.msgid: # Skip header + continue + + msgctxt = getattr(entry, "msgctxt", None) or None + entry_key = ( + msgctxt, + entry.msgid, + entry.msgid_plural if entry.msgid_plural else None, + ) + if entry_key not in existing_entries: + new_entry = _create_po_entry_from_en( + entry, lang_code, nplurals_from_file=nplurals_from_file + ) + target_po.append(new_entry) + added_count += 1 + + # CRITICAL: Normalize ALL entries to fix newline mismatches + normalized_count = _normalize_all_entries_in_po_file(target_po) + + if added_count > 0 or normalized_count > 0: + target_file.parent.mkdir(parents=True, exist_ok=True) + target_po.save(str(target_file)) + + return added_count + + +def _create_new_po_file( + en_po: polib.POFile, target_file: Path, lang_code: str, iso_code: str | None +) -> int: + """Create a new PO file with all entries from English. Returns count added.""" + target_po = polib.POFile() + + # Set metadata - preserve important fields from English file + target_po.metadata = en_po.metadata.copy() + target_po.metadata["Language"] = iso_code or lang_code + + # Set Plural-Forms for the target language (e.g. 2 forms for French) + target_po.metadata["Plural-Forms"] = _get_plural_form(lang_code) + + # Copy all entries with empty translations + added_count = 0 + for entry in en_po: + if not entry.msgid: # Skip header + continue + + new_entry = _create_po_entry_from_en(entry, lang_code) + target_po.append(new_entry) + added_count += 1 + + target_file.parent.mkdir(parents=True, exist_ok=True) + target_po.save(str(target_file)) + return added_count + + +def sync_or_create_po_file( + en_file: Path, target_file: Path, lang_code: str, iso_code: str | None = None +) -> dict: + """ + Sync or create a PO file, preserving location comments and format flags. + Returns dict with stats: {'action': 'created'|'synced'|'skipped', 'added': int} + Uses polib if available for robust PO file handling. + """ + if not en_file.exists(): + return {"action": "skipped", "added": 0, "error": "English file does not exist"} + + file_exists = target_file.exists() + stats = {"action": "created" if not file_exists else "synced", "added": 0} + + # Use polib for robust PO file handling + en_po = polib.pofile(str(en_file)) + + if not en_po: + return {"action": "skipped", "added": 0, "error": "English file has no entries"} + + if file_exists: + # File exists: sync entries + target_po = polib.pofile(str(target_file)) + stats["added"] = _sync_existing_po_file( + en_po, target_po, target_file, lang_code + ) + else: + # File doesn't exist: create new with all entries from English + stats["added"] = _create_new_po_file(en_po, target_file, lang_code, iso_code) + + return stats + + +# ============================================================================ +# Translation Key Extraction +# ============================================================================ + + +def _extract_empty_keys_from_frontend(base_dir: Path, iso_code: str) -> list[dict]: + """Extract empty translation keys from frontend JSON files.""" + logger.debug("Extracting empty keys from frontend apps for language: %s", iso_code) + empty_keys = [] + + for app in LEARNER_FACING_APPS: + target_file = ( + base_dir + / app + / "src" + / TRANSLATION_FILE_NAMES["i18n_dir"] + / TRANSLATION_FILE_NAMES["messages_dir"] + / f"{iso_code}.json" + ) + en_file = ( + base_dir + / app + / "src" + / TRANSLATION_FILE_NAMES["i18n_dir"] + / TRANSLATION_FILE_NAMES["transifex_input"] + ) + if not en_file.exists(): + en_file = ( + base_dir + / app + / "src" + / TRANSLATION_FILE_NAMES["i18n_dir"] + / TRANSLATION_FILE_NAMES["messages_dir"] + / TRANSLATION_FILE_NAMES["english"] + ) + + if not target_file.exists() or not en_file.exists(): + logger.debug( + "Skipping %s: target file or English file missing (target: %s, en: %s)", + app, + target_file.exists(), + en_file.exists(), + ) + continue + + try: + target_data = load_json_file(target_file) + en_data = load_json_file(en_file) + logger.debug( + "Processing %s: found %d keys in English file", app, len(en_data) + ) + + for key in en_data: + target_value = target_data.get(key, "") + if not target_value or ( + isinstance(target_value, str) and not target_value.strip() + ): + english_value = en_data[key] + # Skip non-string values (numbers, booleans, objects, arrays) + # These shouldn't be translated as they would break JSON structure + if not isinstance(english_value, str): + logger.debug( + "Skipping non-string value for key '%s' in %s: %s " + "(type: %s). Only string values are translatable.", + key, + app, + english_value, + type(english_value).__name__, + ) + continue + # Check if English value is already in ICU MessageFormat + is_icu_plural = ( + isinstance(english_value, str) and ", plural," in english_value + ) + + empty_keys.append( + { + "app": app, + "key": key, + "english": english_value, + "translation": "", + "file_type": "json", + "file_path": str(target_file.resolve()), + "is_plural": is_icu_plural, + } + ) + logger.debug("Extracted %d empty key(s) from %s", len(empty_keys), app) + except (OSError, ValueError, json.JSONDecodeError) as e: + logger.warning( + "Skipping %s due to error loading translation files: %s", app, e + ) + continue + + logger.info( + "Extracted %d total empty key(s) from frontend apps for language: %s", + len(empty_keys), + iso_code, + ) + return empty_keys + + +def _is_po_entry_empty( + entry: polib.POEntry, target_entry: polib.POEntry | None +) -> bool: + """Check if a PO entry is empty or missing.""" + if target_entry is None: + return True + + if entry.msgid_plural: + # Plural entry - check if plural forms are empty + return any( + not target_entry.msgstr_plural.get(form_index, "").strip() + for form_index in range(len(target_entry.msgstr_plural)) + ) + + # Singular entry - check if empty + return not target_entry.msgstr or not target_entry.msgstr.strip() + + +def _extract_empty_keys_from_po_file( + target_file: Path, en_file: Path, po_file_name: str, app_name: str +) -> list[dict]: + """Extract empty keys from one PO file. Returns list of key dicts.""" + empty_keys = [] + try: + target_po = polib.pofile(str(target_file)) + en_po = polib.pofile(str(en_file)) + target_entries_dict = {} + for entry in target_po: + if entry.msgid: + msgctxt = getattr(entry, "msgctxt", None) or None + key = (msgctxt, entry.msgid) + target_entries_dict[key] = entry + for entry in en_po: + if not entry.msgid: + continue + msgctxt = getattr(entry, "msgctxt", None) or None + entry_key = (msgctxt, entry.msgid) + target_entry = target_entries_dict.get(entry_key) + if _is_po_entry_empty(entry, target_entry): + empty_keys.append( + { + "app": app_name, + "key": entry.msgid, + "english": entry.msgid, + "translation": "", + "file_type": "po", + "file_path": str(target_file.resolve()), + "po_file": po_file_name, + "is_plural": entry.msgid_plural is not None, + "msgid_plural": ( + entry.msgid_plural if entry.msgid_plural else None + ), + "msgctxt": msgctxt, + "flags": list(entry.flags) if entry.flags else [], + } + ) + except (OSError, polib.POFileError, ValueError) as e: + logger.warning("Skipping %s due to error loading PO file: %s", target_file, e) + return empty_keys + + +def _plugin_locale_base(base_dir: Path, repo_dir: str, module_name: str) -> Path: + """Return conf/locale path for a backend plugin under translations/.""" + return ( + base_dir + / repo_dir + / module_name + / TRANSLATION_FILE_NAMES["conf_dir"] + / TRANSLATION_FILE_NAMES["locale_dir"] + ) + + +def _iter_backend_plugin_po_files( + base_dir: Path, backend_locale: str +) -> Iterator[tuple[str, Path, Path, str]]: + """ + Yield (module_name, en_file, target_file, po_file_name) for each backend + plugin PO file where the English source exists. + """ + lc_messages = TRANSLATION_FILE_NAMES["lc_messages"] + for repo_dir, module_name in TRANSLATABLE_PLUGINS: + plugin_base = _plugin_locale_base(base_dir, repo_dir, module_name) + en_locale_dir = plugin_base / "en" / lc_messages + target_locale_dir = plugin_base / backend_locale / lc_messages + for po_file_name in BACKEND_PO_FILES: + en_file = en_locale_dir / po_file_name + if not en_file.exists(): + continue + target_file = target_locale_dir / po_file_name + yield (module_name, en_file, target_file, po_file_name) + + +def _extract_empty_keys_from_backend(base_dir: Path, backend_locale: str) -> list[dict]: + """Extract empty keys from backend PO files (edx-platform + backend plugins).""" + empty_keys = [] + lc_messages = TRANSLATION_FILE_NAMES["lc_messages"] + locale_dir = ( + base_dir + / TRANSLATION_FILE_NAMES["edx_platform"] + / TRANSLATION_FILE_NAMES["conf_dir"] + / TRANSLATION_FILE_NAMES["locale_dir"] + ) + for po_file_name in BACKEND_PO_FILES: + target_file = locale_dir / backend_locale / lc_messages / po_file_name + en_file = locale_dir / "en" / lc_messages / po_file_name + if not target_file.exists() or not en_file.exists(): + continue + empty_keys.extend( + _extract_empty_keys_from_po_file( + target_file, en_file, po_file_name, "edx-platform" + ) + ) + for ( + module_name, + en_file, + target_file, + po_file_name, + ) in _iter_backend_plugin_po_files(base_dir, backend_locale): + if not target_file.exists(): + continue + empty_keys.extend( + _extract_empty_keys_from_po_file( + target_file, en_file, po_file_name, module_name + ) + ) + return empty_keys + + +def extract_empty_keys( + base_dir: Path, + lang_code: str, + iso_code: str | None = None, + *, + skip_backend: bool = False, +) -> list[dict]: + """ + Extract all empty translation keys for a language. + Returns list of dicts with: + {'app': str, 'key': str, 'english': str, 'file_type': 'json'|'po'} + """ + if iso_code is None: + iso_code = lang_code + + empty_keys = _extract_empty_keys_from_frontend(base_dir, iso_code) + + if not skip_backend: + backend_locale = iso_code if iso_code and iso_code != lang_code else lang_code + empty_keys.extend(_extract_empty_keys_from_backend(base_dir, backend_locale)) + + return empty_keys + + +# ============================================================================ +# Translation Application +# ============================================================================ + + +def apply_json_translations(file_path: Path, translations: dict[str, str]) -> int: + """ + Apply translations to a JSON file. + Returns number of translations applied. + """ + data = load_json_file(file_path) + applied = 0 + skipped = 0 + + for key, translation in translations.items(): + if key in data: + # Check if the value is empty (empty string, whitespace only, or None) + current_value = data[key] + if not current_value or ( + isinstance(current_value, str) and not current_value.strip() + ): + data[key] = translation + applied += 1 + logger.debug( + "Applied translation for key '%s' in %s", key, file_path.name + ) + else: + skipped += 1 + logger.debug( + "Skipped key '%s' in %s (already has value: %s)", + key, + file_path.name, + current_value[:50] + if isinstance(current_value, str) + else current_value, + ) + else: + skipped += 1 + logger.debug( + "Skipped key '%s' in %s (key not found in target file)", + key, + file_path.name, + ) + + if applied > 0: + save_json_file(file_path, data) + logger.info( + "Applied %d translation(s) to %s (%d skipped)", + applied, + file_path.name, + skipped, + ) + elif skipped > 0: + logger.debug( + "No translations applied to %s (%d keys skipped - already have values)", + file_path.name, + skipped, + ) + + return applied + + +def load_glossary(glossary_path: Path, _lang_code: str = "") -> dict[str, Any]: + """ + Load glossary for a language from a text file. + Parses text format with term mappings like: - 'english term' -> 'translation' + Returns dict mapping English -> Translation (string or dict for plural forms). + + Args: + glossary_path: Path to the glossary text file. + _lang_code: Language code (currently unused, kept for API compatibility). + + Returns: + Dictionary mapping English terms to translations. Translations can be: + - Strings for singular terms + - Dicts with 'singular' and 'plural' keys for plural forms + + Text file format: + # Comments and headers + ## TERM MAPPINGS + - 'english term' -> 'translation' + - 'another term' -> 'another translation' + + Example: + - 'accuracy' -> 'الدقة' + - 'activation function' -> 'دالّة التفعيل' + """ + if not glossary_path.exists(): + return {} + + glossary = {} + + try: + with glossary_path.open(encoding="utf-8") as f: + for raw_line in f: + line = raw_line.strip() + + # Skip empty lines, comments, and headers + if not line or line.startswith("#"): + continue + + # Parse lines like: - 'english term' -> 'translation' + if line.startswith("- ") and "->" in line: + # Extract the mapping + # Format: - 'english term' -> 'translation' + mapping_line = line[2:].strip() # Remove leading '- ' + parts = mapping_line.split("->", 1) + + if len(parts) == EXPECTED_GLOSSARY_PARTS: + english_term = parts[0].strip().strip("'\"") + translation = parts[1].strip().strip("'\"") + + if english_term and translation: + glossary[english_term] = translation + except (OSError, UnicodeDecodeError): + # Log specific file-related errors but return empty dict to allow continuation + # In a library function, we can't use stdout, so we just return empty dict + # The caller can handle logging if needed + return {} + except (ValueError, AttributeError, IndexError): + # Catch parsing errors and other unexpected errors + return {} + else: + return glossary + + +def match_glossary_term( + text: str, glossary: dict[str, Any] | None, *, exact_match: bool = True +) -> Any | None: + """ + Match text against glossary terms. + Returns translation (string or dict with 'singular'/'plural') if match found, + None otherwise. + Supports both simple format ("term": "translation") and plural format + ("term": {"singular": "...", "plural": "..."}). + + Args: + text: The text to match against glossary terms. + glossary: Dictionary mapping English terms to translations, or None. + exact_match: If True, only exact matches are returned. + If False, case-insensitive and partial matches are allowed. + + Returns: + Translation string/dict if match found, None otherwise. + """ + if not glossary: + return None + + if text in glossary: + # Return as-is: string for singular, dict for plural + return glossary[text] + + if not exact_match: + text_lower = text.lower().strip() + for term, translation in glossary.items(): + if term.lower().strip() == text_lower: + return translation + + for term, translation in glossary.items(): + if term.lower() in text_lower or text_lower in term.lower(): + return translation + + return None + + +# ============================================================================ +# Translation Newline Normalization +# ============================================================================ + + +def _normalize_translation_newlines(msgid: str, translation: str) -> str: + """ + Normalize translation to match msgid's newline structure EXACTLY. + + CRITICAL: msgfmt checks the ACTUAL last character of the string, NOT after + stripping whitespace. So we must match whether the string ends with '\n' or not. + + Args: + msgid: The original msgid string + translation: The translation string to normalize + + Returns: + Normalized translation string with matching newline structure + """ + if not translation: + return translation + + # Handle leading newlines + msgid_no_leading_spaces = msgid.lstrip(" \t") + msgid_starts_newline = ( + msgid_no_leading_spaces.startswith("\n") if msgid_no_leading_spaces else False + ) + + normalized = translation + + if msgid_starts_newline: + if not normalized.startswith("\n"): + normalized = "\n" + normalized + else: + normalized = normalized.lstrip("\n") + + # CRITICAL: Check the ACTUAL last character + msgid_ends_with_newline = msgid.endswith("\n") if msgid else False + normalized_ends_with_newline = normalized.endswith("\n") if normalized else False + + if msgid_ends_with_newline and not normalized_ends_with_newline: + # msgid ends with \n, but translation doesn't - add trailing newline + normalized = normalized.rstrip(" \t") + "\n" + elif not msgid_ends_with_newline and normalized_ends_with_newline: + # msgid doesn't end with \n, but translation does - remove trailing newline + normalized = normalized.rstrip(" \t\n") + + return normalized + + +def _normalize_all_entries_in_po_file(po: polib.POFile) -> int: + """ + Normalize newlines for ALL entries in a PO file. + Ensures entries with existing translations have correct newline structure. + + Returns: + Number of entries that were normalized (changed) + """ + normalized_count = 0 + + for entry in po: + if not entry.msgid: # Skip header + continue + + if entry.msgid_plural: + if _normalize_plural_entry(entry): + normalized_count += 1 + elif entry.msgstr and _normalize_singular_entry(entry): + normalized_count += 1 + + return normalized_count + + +def _normalize_plural_entry(entry: polib.POEntry) -> bool: + """Normalize plural entry newlines. Returns True if changed.""" + if not entry.msgstr_plural: + return False + + changed = False + for form_index, msgstr_plural_val in entry.msgstr_plural.items(): + if msgstr_plural_val: + # msgstr[0] matches msgid, msgstr[1+] matches msgid_plural + reference = entry.msgid if form_index == 0 else entry.msgid_plural + normalized = _normalize_translation_newlines(reference, msgstr_plural_val) + if normalized != msgstr_plural_val: + entry.msgstr_plural[form_index] = normalized + changed = True + return changed + + +def _normalize_singular_entry(entry: polib.POEntry) -> bool: + """Normalize singular entry newlines. Returns True if changed.""" + normalized = _normalize_translation_newlines(entry.msgid, entry.msgstr) + if normalized != entry.msgstr: + entry.msgstr = normalized + return True + return False + + +def _entry_has_brace_format(entry: polib.POEntry) -> bool: + """Return True if entry is python-brace-format (uses {variable} placeholders).""" + return bool(entry.flags and "python-brace-format" in entry.flags) + + +# ============================================================================ +# PO Translation Application Helpers +# ============================================================================ + + +def _apply_numeric_plural_forms( + entry: polib.POEntry, + translation: dict[str, str], + numeric_keys: list, +) -> tuple[bool, bool]: + """Apply plural forms when translation has numeric keys (e.g. 0, 1, 2). + + Returns (applied, rejected_brace_format). + """ + plural_applied = False + rejected_brace = False + for key in numeric_keys: + form_index = int(key) if isinstance(key, str) else key + if ( + form_index >= len(entry.msgstr_plural) + or entry.msgstr_plural.get(form_index, "").strip() + ): + continue + reference = entry.msgid if form_index == 0 else entry.msgid_plural + normalized = _normalize_translation_newlines( + reference or entry.msgid, str(translation[key]).strip() + ) + if _entry_has_brace_format(entry) and not _is_valid_brace_format_translation( + reference or entry.msgid or "", normalized + ): + logger.warning( + "Rejected plural form %s for brace-format entry " + "(invalid placeholders): msgid=%r", + form_index, + (entry.msgid or "")[:60], + ) + rejected_brace = True + else: + entry.msgstr_plural[form_index] = normalized + plural_applied = True + return plural_applied, rejected_brace + + +def _apply_singular_plural_one_form( + entry: polib.POEntry, translation: dict[str, str] +) -> tuple[bool, bool]: + """Apply singular/plural dict when locale has nplurals=1.""" + if entry.msgstr_plural.get(0, "").strip(): + return False, False + if _plural_has_placeholders_not_in_singular(entry): + normalized = _normalize_translation_newlines( + entry.msgid, translation["singular"] + ) + ref = entry.msgid + else: + normalized = _normalize_translation_newlines( + entry.msgid_plural or entry.msgid, translation["plural"] + ) + ref = entry.msgid_plural or entry.msgid + if _entry_has_brace_format(entry) and not _is_valid_brace_format_translation( + ref or "", normalized + ): + logger.warning( + "Rejected plural form 0 for brace-format entry: msgid=%r", + (entry.msgid or "")[:60], + ) + return False, True + entry.msgstr_plural[0] = normalized + return True, False + + +def _apply_singular_plural_multi_form( + entry: polib.POEntry, translation: dict[str, str], num_forms: int +) -> tuple[bool, bool]: + """Apply singular/plural dict when locale has two or more plural forms.""" + plural_applied = False + rejected_brace = False + if not entry.msgstr_plural.get(0, "").strip(): + normalized_singular = _normalize_translation_newlines( + entry.msgid, translation["singular"] + ) + if _entry_has_brace_format(entry) and not _is_valid_brace_format_translation( + entry.msgid or "", normalized_singular + ): + logger.warning( + "Rejected singular form for brace-format entry: msgid=%r", + (entry.msgid or "")[:60], + ) + rejected_brace = True + else: + entry.msgstr_plural[0] = normalized_singular + plural_applied = True + + plural_val = (translation.get("plural") or "").strip() + if _plural_has_placeholders_not_in_singular(entry): + required = _get_python_format_placeholders(entry.msgid_plural or "") + existing = _get_python_format_placeholders(plural_val) + missing = required - existing + if missing: + suffix = " ".join(f"%({k})s" for k in sorted(missing)) + plural_val = f"{plural_val} {suffix}" if plural_val else suffix + if ( + not plural_val + and not _plural_has_placeholders_not_in_singular(entry) + and (translation.get("singular") or "").strip() + ): + plural_val = (translation.get("singular") or "").strip() + + for form_index in range(1, num_forms): + if not entry.msgstr_plural.get(form_index, "").strip() and plural_val: + normalized_plural = _normalize_translation_newlines( + entry.msgid_plural or entry.msgid, plural_val + ) + ref_plural = entry.msgid_plural or entry.msgid or "" + if _entry_has_brace_format( + entry + ) and not _is_valid_brace_format_translation(ref_plural, normalized_plural): + logger.warning( + "Rejected plural form %s for brace-format entry: msgid_plural=%r", + form_index, + (ref_plural or "")[:60], + ) + rejected_brace = True + else: + entry.msgstr_plural[form_index] = normalized_plural + plural_applied = True + return plural_applied, rejected_brace + + +def _apply_plural_dict_translation( + entry: polib.POEntry, translation: dict[str, str] +) -> tuple[bool, bool]: + """Apply plural translation from dict. Returns (applied, rejected_brace_format).""" + numeric_keys = _get_numeric_plural_keys(translation) + if numeric_keys: + return _apply_numeric_plural_forms(entry, translation, numeric_keys) + if "singular" in translation and "plural" in translation: + num_forms = len(entry.msgstr_plural) + if num_forms == 1: + return _apply_singular_plural_one_form(entry, translation) + return _apply_singular_plural_multi_form(entry, translation, num_forms) + return False, False + + +def _apply_plural_string_translation( + entry: polib.POEntry, translation: str +) -> tuple[bool, bool]: + """Apply plural translation from string. + + Returns (applied, rejected_brace_format). + """ + plural_applied = False + rejected_brace = False + # Normalize translation to match msgid structure + normalized_translation = _normalize_translation_newlines(entry.msgid, translation) + for form_index in range(len(entry.msgstr_plural)): + if not entry.msgstr_plural.get(form_index, "").strip(): + reference = ( + entry.msgid if form_index == 0 else (entry.msgid_plural or entry.msgid) + ) + if _entry_has_brace_format( + entry + ) and not _is_valid_brace_format_translation( + reference or "", normalized_translation + ): + logger.warning( + "Rejected plural string for brace-format entry (form %s): msgid=%r", + form_index, + (entry.msgid or "")[:60], + ) + rejected_brace = True + else: + entry.msgstr_plural[form_index] = normalized_translation + plural_applied = True + return plural_applied, rejected_brace + + +def _apply_translation_to_plural_entry( + entry: polib.POEntry, translation: Any +) -> tuple[bool, bool]: + """Apply translation to a plural PO entry. + + Returns (applied, rejected_brace_format). + """ + # Check if translation is a string representation of a dict + if ( + isinstance(translation, str) + and translation.strip().startswith("{") + and translation.strip().endswith("}") + ): + try: + translation = json.loads(translation.strip()) + except (json.JSONDecodeError, ValueError): + if translation: + applied, rejected = _apply_plural_string_translation(entry, translation) + return applied, rejected + return False, False + + if isinstance(translation, dict): + numeric_keys = _get_numeric_plural_keys(translation) + if numeric_keys or "singular" in translation: + return _apply_plural_dict_translation(entry, translation) + if isinstance(translation, str) and translation: + return _apply_plural_string_translation(entry, translation) + return False, False + + +def _apply_translation_to_singular_entry( + entry: polib.POEntry, translation: Any +) -> tuple[bool, bool]: + """Apply translation to a singular PO entry. + + Returns (applied, rejected_brace_format). + """ + if isinstance(translation, dict) and "singular" in translation: + logger.info( + "LLM returned dict for singular entry; msgid=%r msgctxt=%r", + entry.msgid, + getattr(entry, "msgctxt", None), + ) + translation_str = str(translation["singular"]).strip() + if translation_str: + normalized_translation = _normalize_translation_newlines( + entry.msgid, translation_str + ) + if _entry_has_brace_format( + entry + ) and not _is_valid_brace_format_translation( + entry.msgid or "", normalized_translation + ): + logger.warning( + "Rejected translation for brace-format msgid (invalid placeholders " + "or unterminated directive): msgid=%r", + (entry.msgid or "")[:80], + ) + return False, True + entry.msgstr = normalized_translation + return True, False + if isinstance(translation, str) and translation: + normalized_translation = _normalize_translation_newlines( + entry.msgid, translation + ) + if _entry_has_brace_format(entry) and not _is_valid_brace_format_translation( + entry.msgid or "", normalized_translation + ): + logger.warning( + "Rejected translation for brace-format msgid (invalid placeholders " + "or unterminated directive): msgid=%r", + (entry.msgid or "")[:80], + ) + return False, True + entry.msgstr = normalized_translation + return True, False + return False, False + + +def _apply_translation_to_entry( + entry: polib.POEntry, translation: Any +) -> tuple[bool, bool]: + """ + Apply translation to a PO entry. Returns (applied, rejected_brace_format). + + Args: + entry: The PO entry to apply translation to. + translation: Translation value (string or dict with 'singular'/'plural' + or numeric keys '0', '1', '2', etc. for multiple forms). + + Returns: + (True if translation was applied, True if rejected due to invalid brace format). + """ + if entry.msgid_plural: + return _apply_translation_to_plural_entry(entry, translation) + if not entry.msgstr or not entry.msgstr.strip(): + return _apply_translation_to_singular_entry(entry, translation) + return False, False + + +def _expand_plural_forms_if_needed(entry: polib.POEntry, po: polib.POFile) -> bool: + """Expand plural forms if entry has fewer forms than required by language. + + Required form count comes from the PO file's Plural-Forms header (apply + may set it from constants when lang_code is provided, to avoid empty msgstr). + """ + if not entry.msgid_plural: + return False + + plural_forms_str = po.metadata.get("Plural-Forms", "") + if not plural_forms_str: + return False + + nplurals_match = re.search(r"nplurals=(\d+)", plural_forms_str) + if not nplurals_match: + return False + + required_forms = int(nplurals_match.group(1)) + # Special case: when singular/plural have different placeholders, keep both + # msgstr[0] and msgstr[1] even if locale has nplurals=1 + if _entry_has_asymmetric_placeholders(entry): + required_forms = max(required_forms, 2) + current_forms = len(entry.msgstr_plural) if entry.msgstr_plural else 0 + + if current_forms < required_forms: + if not entry.msgstr_plural: + entry.msgstr_plural = {} + for form_index in range(current_forms, required_forms): + entry.msgstr_plural[form_index] = "" + return True + + return False + + +def _get_translation_for_po_entry( + entry: polib.POEntry, translations: dict[str, Any] +) -> Any | None: + """Look up translation for a PO entry (msgctxt:msgid or msgid).""" + entry_msgctxt = getattr(entry, "msgctxt", None) or None + if entry_msgctxt: + key_with_context = f"{entry_msgctxt}:{entry.msgid}" + value = translations.get(key_with_context) + if value is not None: + return value + return translations.get(entry.msgid) + + +def _log_po_entry_result( + entry: polib.POEntry, file_path: Path, *, applied: bool +) -> None: + """Log whether a translation was applied or skipped for an entry.""" + msgid_display = ( + entry.msgid[:MAX_LOG_STRING_LENGTH] + "..." + if len(entry.msgid) > MAX_LOG_STRING_LENGTH + else entry.msgid + ) + if applied: + logger.debug( + "Applied translation for msgid '%s' in %s", + msgid_display, + file_path.name, + ) + else: + logger.debug( + "Skipped msgid '%s' in %s (already has translation)", + msgid_display, + file_path.name, + ) + + +def _save_po_if_updated( + po: polib.POFile, + file_path: Path, + counts: tuple[int, int, int], + *, + header_updated: bool = False, +) -> None: + """Save PO file and log result if any changes were made. + + counts: (applied, skipped, normalized_count). + """ + applied, skipped, normalized_count = counts + if applied > 0 or normalized_count > 0 or header_updated: + po.save(str(file_path)) + if applied > 0 or normalized_count > 0: + logger.info( + "Applied %d translation(s) to %s (%d skipped)", + applied, + file_path.name, + skipped, + ) + elif header_updated: + logger.debug("Updated Plural-Forms in %s", file_path.name) + elif skipped > 0: + logger.debug( + "No translations applied to %s (%d entries skipped - " + "already have translations)", + file_path.name, + skipped, + ) + + +def apply_po_translations( + file_path: Path, + translations: dict[str, Any], + lang_code: str | None = None, + rejected_brace_entries: list[dict[str, str]] | None = None, +) -> int: + """ + Apply translations to a PO file. Returns number of translations applied. + Handles both singular and plural forms. + For plural forms, translations dict can contain: + - Dict with 'singular' and 'plural' keys: {"singular": "...", "plural": "..."} + - Dict with numeric keys '0', '1', '2', etc. for multiple forms + - String: applies same translation to all plural forms + + The translations dict is keyed by msgid. If entries have msgctxt, we try + to match by msgid first, and if there are multiple matches, we prefer + entries without msgctxt or with matching msgctxt. + + Plural-Forms: We give priority to the nplurals/Plural-Forms already in the + translation file (e.g. from the cloned repo or Transifex). We only set + Plural-Forms from our constants when the file has no Plural-Forms header. + + If rejected_brace_entries is provided, entries whose translation was + rejected due to invalid python-brace-format are appended as + {"msgid": ..., "file": ...} for PR description logging. + """ + po = polib.pofile(str(file_path)) + applied = 0 + skipped = 0 + + header_updated = False + if lang_code: + existing_plural = po.metadata.get("Plural-Forms", "").strip() + if not existing_plural: + # File has no Plural-Forms: fall back to our constant + po.metadata["Plural-Forms"] = _get_plural_form(lang_code) + header_updated = True + # If file already has Plural-Forms, we do not overwrite it + + for entry in po: + if entry.msgid_plural: + _expand_plural_forms_if_needed(entry, po) + if not entry.msgid: + continue + + translation = _get_translation_for_po_entry(entry, translations) + if translation is None: + skipped += 1 + continue + + was_applied, rejected_brace = _apply_translation_to_entry(entry, translation) + if rejected_brace and rejected_brace_entries is not None: + rejected_brace_entries.append( + {"msgid": (entry.msgid or "")[:200], "file": file_path.name} + ) + if was_applied: + applied += 1 + else: + skipped += 1 + _log_po_entry_result(entry, file_path, applied=was_applied) + + normalized_count = _normalize_all_entries_in_po_file(po) + _save_po_if_updated( + po, + file_path, + (applied, skipped, normalized_count), + header_updated=header_updated, + ) + return applied + + +# ============================================================================ +# Full Translation Sync +# ============================================================================ + + +def _sync_frontend_translations(base_dir: Path, iso_code: str) -> dict[str, int]: + """Sync frontend translation files. Returns stats.""" + frontend_stats = {"added": 0, "fixed": 0, "removed": 0, "created": 0, "synced": 0} + + for app in LEARNER_FACING_APPS: + app_dir = base_dir / app / "src" / TRANSLATION_FILE_NAMES["i18n_dir"] + messages_dir = app_dir / TRANSLATION_FILE_NAMES["messages_dir"] + + en_file = app_dir / TRANSLATION_FILE_NAMES["transifex_input"] + if not en_file.exists(): + en_file = messages_dir / TRANSLATION_FILE_NAMES["english"] + + target_file = messages_dir / f"{iso_code}.json" + + if not en_file.exists(): + continue + + try: + stats = sync_or_create_json_file(en_file, target_file) + if stats["action"] == "created": + frontend_stats["created"] += 1 + elif stats["action"] == "synced": + frontend_stats["synced"] += 1 + + frontend_stats["added"] += stats.get("added", 0) + frontend_stats["fixed"] += stats.get("fixed", 0) + frontend_stats["removed"] += stats.get("removed", 0) + except (OSError, ValueError, json.JSONDecodeError) as e: + logger.warning( + "Skipping %s due to error syncing translation file: %s", app, e + ) + continue + + return frontend_stats + + +def _sync_backend_translations( + base_dir: Path, lang_code: str, iso_code: str +) -> dict[str, int]: + """Sync backend translation files. Returns stats.""" + backend_stats = {"added": 0} + backend_locale = iso_code if iso_code and iso_code != lang_code else lang_code + locale_dir = ( + base_dir + / TRANSLATION_FILE_NAMES["edx_platform"] + / TRANSLATION_FILE_NAMES["conf_dir"] + / TRANSLATION_FILE_NAMES["locale_dir"] + / backend_locale + / TRANSLATION_FILE_NAMES["lc_messages"] + ) + + for po_file_name in BACKEND_PO_FILES: + en_file = ( + base_dir + / TRANSLATION_FILE_NAMES["edx_platform"] + / TRANSLATION_FILE_NAMES["conf_dir"] + / TRANSLATION_FILE_NAMES["locale_dir"] + / "en" + / TRANSLATION_FILE_NAMES["lc_messages"] + / po_file_name + ) + target_file = locale_dir / po_file_name + + if not en_file.exists(): + continue + + try: + stats = sync_or_create_po_file( + en_file, target_file, backend_locale, iso_code + ) + backend_stats["added"] += stats.get("added", 0) + except (OSError, polib.POFileError, ValueError): + continue + + # Backend plugin apps: sync translations///conf/locale/... + for ( + _module_name, + en_file, + target_file, + _po_file_name, + ) in _iter_backend_plugin_po_files(base_dir, backend_locale): + try: + stats = sync_or_create_po_file( + en_file, target_file, backend_locale, iso_code + ) + backend_stats["added"] += stats.get("added", 0) + except (OSError, polib.POFileError, ValueError): + continue + + return backend_stats + + +def sync_all_translations( + base_dir: Path, + lang_code: str, + iso_code: str | None = None, + *, + skip_backend: bool = False, +) -> dict: + """ + Sync all translation files for a language. + Returns summary stats. + """ + if iso_code is None: + iso_code = lang_code + + frontend_stats = _sync_frontend_translations(base_dir, iso_code) + backend_stats = ( + _sync_backend_translations(base_dir, lang_code, iso_code) + if not skip_backend + else {"added": 0} + ) + + return { + "frontend": frontend_stats, + "backend": backend_stats, + } diff --git a/src/ol_openedx_ai_static_translations/pyproject.toml b/src/ol_openedx_ai_static_translations/pyproject.toml new file mode 100644 index 000000000..d2de24e71 --- /dev/null +++ b/src/ol_openedx_ai_static_translations/pyproject.toml @@ -0,0 +1,39 @@ +[project] +name = "ol-openedx-ai-static-translations" +version = "0.1.0" +description = "An Open edX plugin for AI-powered static translation management" +authors = [ + {name = "MIT Office of Digital Learning"} +] +license = "BSD-3-Clause" +readme = "README.rst" +requires-python = ">=3.11" +keywords = ["Python", "edx"] +dependencies = [ + "Django>=4.0", + "GitPython>=3.1.40", + "litellm>=1.80.0", + "polib>=1.2.0", + "requests>=2.31.0", +] + +[project.entry-points."cms.djangoapp"] +ol_openedx_ai_static_translations = "ol_openedx_ai_static_translations.apps:OLOpenedXAIStaticTranslationsConfig" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["ol_openedx_ai_static_translations"] +include = [ + "ol_openedx_ai_static_translations/**/*.py", + "ol_openedx_ai_static_translations/**/*.txt", +] + +[tool.hatch.build.targets.sdist] +include = [ + "ol_openedx_ai_static_translations/**/*", + "README.rst", + "pyproject.toml", +] diff --git a/src/ol_openedx_ai_static_translations/setup.cfg b/src/ol_openedx_ai_static_translations/setup.cfg new file mode 100644 index 000000000..77a520e78 --- /dev/null +++ b/src/ol_openedx_ai_static_translations/setup.cfg @@ -0,0 +1,41 @@ +[isort] +include_trailing_comma = True +indent = ' ' +line_length = 120 +multi_line_output = 3 +skip= + migrations + +[wheel] +universal = 1 + +[tool:pytest] +pep8maxlinelength = 119 +DJANGO_SETTINGS_MODULE = lms.envs.test +addopts = --nomigrations --reuse-db --durations=20 +# Enable default handling for all warnings, including those that are ignored by default; +# but hide rate-limit warnings (because we deliberately don't throttle test user logins) +# and field_data deprecation warnings (because fixing them requires a major low-priority refactoring) +filterwarnings = + default + ignore::xblock.exceptions.FieldDataDeprecationWarning + ignore::pytest.PytestConfigWarning + ignore:No request passed to the backend, unable to rate-limit:UserWarning + ignore:Flags not at the start of the expression:DeprecationWarning + ignore:Using or importing the ABCs from 'collections' instead of from 'collections.abc':DeprecationWarning + ignore:invalid escape sequence:DeprecationWarning + ignore:`formatargspec` is deprecated since Python 3.5:DeprecationWarning + ignore:the imp module is deprecated in favour of importlib:DeprecationWarning + ignore:"is" with a literal:SyntaxWarning + ignore:defusedxml.lxml is no longer supported:DeprecationWarning + ignore: `np.int` is a deprecated alias for the builtin `int`.:DeprecationWarning + ignore: `np.float` is a deprecated alias for the builtin `float`.:DeprecationWarning + ignore: `np.complex` is a deprecated alias for the builtin `complex`.:DeprecationWarning + ignore: 'etree' is deprecated. Use 'xml.etree.ElementTree' instead.:DeprecationWarning + ignore: defusedxml.cElementTree is deprecated, import from defusedxml.ElementTree instead.:DeprecationWarning + + +junit_family = xunit2 +norecursedirs = .* *.egg build conf dist node_modules test_root cms/envs lms/envs +python_classes = +python_files = tests.py test_*.py tests_*.py *_tests.py __init__.py From fed1f3bdfb66144b8628e4c01c07daeaaaa8dc47 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 24 Mar 2026 09:20:38 +0000 Subject: [PATCH 03/14] Apply PR #760 glossary path changes to ol_openedx_ai_static_translations Co-authored-by: asadali145 <52656433+asadali145@users.noreply.github.com> Agent-Logs-Url: https://github.com/mitodl/open-edx-plugins/sessions/eaab10e1-8468-4675-9118-cd9b1fdac54e --- .../README.rst | 2 +- .../commands/sync_and_translate_language.py | 23 ++++++++++--------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/ol_openedx_ai_static_translations/README.rst b/src/ol_openedx_ai_static_translations/README.rst index 86a9fd232..d57bd17cc 100644 --- a/src/ol_openedx_ai_static_translations/README.rst +++ b/src/ol_openedx_ai_static_translations/README.rst @@ -43,7 +43,7 @@ Usage ./manage.py cms sync_and_translate_language el # With specific provider and model - ./manage.py cms sync_and_translate_language el --provider openai --model gpt-5.2 --glossary + ./manage.py cms sync_and_translate_language el --provider openai --model gpt-5.2 --glossary /path/to/glossary License ******* diff --git a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/management/commands/sync_and_translate_language.py b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/management/commands/sync_and_translate_language.py index f1c4bc8a2..3f45fa91b 100644 --- a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/management/commands/sync_and_translate_language.py +++ b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/management/commands/sync_and_translate_language.py @@ -4,7 +4,7 @@ Usage: ./manage.py cms sync_and_translate_language el ./manage.py cms sync_and_translate_language el \\ - --provider openai --model gpt-4-turbo --glossary + --provider openai --model gpt-4-turbo --glossary /path/to/glossary """ import json @@ -27,7 +27,6 @@ from django.core.management.base import BaseCommand, CommandError from litellm import completion -import ol_openedx_ai_static_translations.utils as utils_module from ol_openedx_ai_static_translations.constants import ( HTTP_CREATED, HTTP_NOT_FOUND, @@ -733,11 +732,13 @@ def add_arguments(self, parser): ) parser.add_argument( "--glossary", - action="store_true", - default=False, - help="Use glossary from plugin glossaries folder. " - "Looks for {plugin_dir}/glossaries/machine_learning/{iso_code}.txt " - "(uses --iso-code when given, else lang code).", + dest="glossary", + required=False, + default=None, + help=( + "Path to glossary directory. Should contain language-specific " + "files (e.g. {iso_code}.txt)." + ), ) parser.add_argument( "--batch-size", @@ -1066,16 +1067,16 @@ def fallback_template() -> str: return templates_by_count.get(num_categories) or fallback_template() def _load_glossary(self, options: dict, iso_code: str) -> dict[str, Any]: - """Load glossary if enabled. Uses ISO code for file lookup. + """Load glossary from directory. Uses ISO code for file lookup. iso_code is already normalized (e.g. es_419). Tries {iso_code}.txt first, then {iso_code with underscores→hyphens}.txt (e.g. es-419.txt) if not found. """ - if not options.get("glossary", False): + glossary_dir = options.get("glossary") + if not glossary_dir: return {} - utils_file = Path(utils_module.__file__) - base_dir = utils_file.parent / "glossaries" / "machine_learning" + base_dir = Path(glossary_dir) candidates = [ base_dir / f"{iso_code}.txt", base_dir / f"{iso_code.replace('_', '-')}.txt", From 877eccbc3c2a7dbb37e6884166ffbc69630eb375 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 24 Mar 2026 09:39:55 +0000 Subject: [PATCH 04/14] cleanup: delete glossary dirs and remove moved code from ol_openedx_course_translations Co-authored-by: asadali145 <52656433+asadali145@users.noreply.github.com> Agent-Logs-Url: https://github.com/mitodl/open-edx-plugins/sessions/0ad538c8-6c6c-4b49-852c-62936b165920 --- .../glossaries/machine_learning/ar.txt | 175 -- .../glossaries/machine_learning/de.txt | 175 -- .../glossaries/machine_learning/el.txt | 988 ------ .../glossaries/machine_learning/es.txt | 175 -- .../glossaries/machine_learning/es_419.txt | 175 -- .../glossaries/machine_learning/fr.txt | 175 -- .../glossaries/machine_learning/ja.txt | 175 -- .../glossaries/machine_learning/pt_BR.txt | 175 -- .../glossaries/machine_learning/ru.txt | 213 -- src/ol_openedx_course_translations/README.rst | 58 - .../glossaries/machine_learning/ar.txt | 175 -- .../glossaries/machine_learning/de.txt | 175 -- .../glossaries/machine_learning/el.txt | 988 ------ .../glossaries/machine_learning/es.txt | 175 -- .../glossaries/machine_learning/es_419.txt | 175 -- .../glossaries/machine_learning/fr.txt | 175 -- .../glossaries/machine_learning/ja.txt | 175 -- .../glossaries/machine_learning/pt_BR.txt | 175 -- .../glossaries/machine_learning/ru.txt | 213 -- .../commands/sync_and_translate_language.py | 2676 ----------------- .../settings/common.py | 9 - .../utils/command_utils.py | 244 -- .../utils/constants.py | 230 +- .../utils/translation_sync.py | 1603 ---------- .../pyproject.toml | 5 +- 25 files changed, 2 insertions(+), 9675 deletions(-) delete mode 100644 src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/ar.txt delete mode 100644 src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/de.txt delete mode 100644 src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/el.txt delete mode 100644 src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/es.txt delete mode 100644 src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/es_419.txt delete mode 100644 src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/fr.txt delete mode 100644 src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/ja.txt delete mode 100644 src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/pt_BR.txt delete mode 100644 src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/ru.txt delete mode 100644 src/ol_openedx_course_translations/ol_openedx_course_translations/glossaries/machine_learning/ar.txt delete mode 100644 src/ol_openedx_course_translations/ol_openedx_course_translations/glossaries/machine_learning/de.txt delete mode 100644 src/ol_openedx_course_translations/ol_openedx_course_translations/glossaries/machine_learning/el.txt delete mode 100644 src/ol_openedx_course_translations/ol_openedx_course_translations/glossaries/machine_learning/es.txt delete mode 100644 src/ol_openedx_course_translations/ol_openedx_course_translations/glossaries/machine_learning/es_419.txt delete mode 100644 src/ol_openedx_course_translations/ol_openedx_course_translations/glossaries/machine_learning/fr.txt delete mode 100644 src/ol_openedx_course_translations/ol_openedx_course_translations/glossaries/machine_learning/ja.txt delete mode 100644 src/ol_openedx_course_translations/ol_openedx_course_translations/glossaries/machine_learning/pt_BR.txt delete mode 100644 src/ol_openedx_course_translations/ol_openedx_course_translations/glossaries/machine_learning/ru.txt delete mode 100644 src/ol_openedx_course_translations/ol_openedx_course_translations/management/commands/sync_and_translate_language.py delete mode 100644 src/ol_openedx_course_translations/ol_openedx_course_translations/utils/command_utils.py delete mode 100644 src/ol_openedx_course_translations/ol_openedx_course_translations/utils/translation_sync.py diff --git a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/ar.txt b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/ar.txt deleted file mode 100644 index 246ddba39..000000000 --- a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/ar.txt +++ /dev/null @@ -1,175 +0,0 @@ -# AR HINTS -## TERM MAPPINGS -These are preferred terminology choices for this language. Use them whenever they sound natural; adapt freely if context requires. - -- 'accuracy' -> 'الدقة' -- 'activation function' -> 'دالّة التفعيل' -- 'artificial intelligence' -> 'الذكاء الاصطناعي' -- 'AUC' -> 'AUC' -- 'AUC (Area under the ROC curve)' -> 'المساحة تحت منحنى ROC' -- 'backpropagation' -> 'الانتشار العكسي' -- 'batch' -> 'دفعة' -- 'batch size' -> 'حجم الدفعة' -- 'bias (ethics/fairness)' -> 'التحيّز (الأخلاقيات/الإنصاف)' -- 'bias (math) or bias term' -> 'الانحياز (في الرياضيات) أو مصطلح الانحياز' -- 'bias in ethics and fairness' -> 'التحيز في الأخلاق والعدالة' -- 'bias term' -> 'مصطلح التحيز' -- 'binary classification' -> 'التصنيف الثنائي' -- 'bucketing' -> 'تصنيف البيانات' -- 'categorical' -> 'فئوية' -- 'categorical data' -> 'البيانات الفئوية' -- 'class' -> 'صنف' -- 'class-imbalanced dataset' -> 'مجموعة بيانات غير متوازنة الفئات' -- 'class-imbalanced datasets' -> 'مجموعات بيانات غير متوازنة الفئات' -- 'classification' -> 'التصنيف' -- 'classification model' -> 'نموذج التصنيف' -- 'classification threshold' -> 'عتبة التصنيف' -- 'classifier' -> 'مصنِّف' -- 'clipping' -> 'القص' -- 'confusion matrix' -> 'مصفوفة نجاح التوقعات' -- 'continuous feature' -> 'خاصية مستمرة' -- 'convergence' -> 'التقارب' -- 'data set or dataset' -> 'مجموعة البيانات' -- 'DataFrame' -> 'DataFrame' -- 'dataset' -> 'مجموعة بيانات' -- 'deep learning' -> 'التعلم العميق' -- 'deep model' -> 'نموذج عميق' -- 'dense feature' -> 'خاصية كثيفة' -- 'depth' -> 'العمق' -- 'discrete feature' -> 'خاصية محدّدة القيم' -- 'discrete features' -> 'الميزات المنفصلة' -- 'dynamic' -> 'ديناميكي' -- 'dynamic model' -> 'نموذج ديناميكي' -- 'early stopping' -> 'الإيقاف المبكر' -- 'embedding layer' -> 'طبقة التضمين' -- 'embedding layers' -> 'طبقات تضمين' -- 'epoch' -> 'حقبة' -- 'example' -> 'على سبيل المثال' -- 'false negative (FN)' -> 'سالب خاطئ (FN)' -- 'false negatives' -> 'الحالات السالبة الخاطئة' -- 'false positive (FP)' -> 'موجب خاطئ (FP)' -- 'false positive rate' -> 'معدّل الموجب الخاطئ' -- 'false positive rate (FPR)' -> 'معدّل الموجب الخاطئ' -- 'false positives' -> 'الحالات الموجبة الخاطئة' -- 'feature' -> 'ميزة' -- 'feature cross' -> 'مضروب مجموعات الخصائص' -- 'feature crosses' -> 'تقاطع الميزات' -- 'feature engineering' -> 'هندسة الميزات' -- 'feature set' -> 'مجموعة الميزات' -- 'feature vector' -> 'متّجه الميزات' -- 'feedback loop' -> 'حلقة الملاحظات' -- 'generalization' -> 'التعميم' -- 'generalization curve' -> 'منحنى التعميم' -- 'gradient descent' -> 'النزول المتدرّج' -- 'ground truth' -> 'معلومات فعلية' -- 'hidden layer' -> 'الطبقة المخفية' -- 'hidden layer(s)' -> 'الطبقات المخفية' -- 'hyperparameter' -> 'المعلَمة الفائقة' -- 'independently and identically distributed (i.i.d)' -> 'موزّعة بشكل مستقل ومتشابه' -- 'inference' -> 'الاستنتاج' -- 'input layer' -> 'طبقة الإدخال' -- 'interpretability' -> 'القابلية للتفسير' -- 'iteration' -> 'التكرار' -- 'L0regularization' -> 'التسوية من النوع L0' -- 'L1loss' -> 'L1' -- 'L1regularization' -> 'التسوية من النوع L1' -- 'L2loss' -> 'فقدانL2' -- 'L2regularization' -> 'التسوية من النوع L2' -- 'label' -> 'التصنيف' -- 'labeled example' -> 'مثال مصنّف' -- 'lambda' -> 'lambda' -- 'layer' -> 'طبقة' -- 'learning rate' -> 'معدّل التعلّم' -- 'linear' -> 'خطي' -- 'linear model' -> 'النموذج الخطي' -- 'linear models' -> 'النماذج الخطية' -- 'linear regression' -> 'الانحدار الخطي' -- 'Log Loss' -> 'الخسارة اللوغاريتمية' -- 'log-odds' -> 'لوغاريتم فرص الأفضلية' -- 'logistic regression' -> 'الانحدار اللوجستي' -- 'loss' -> 'خسارة' -- 'loss curve' -> 'منحنى الخسارة' -- 'loss function' -> 'دالة الخسارة' -- 'machine learning' -> 'تعلُم الآلة' -- 'majority class' -> 'الفئة الأكبر' -- 'mini-batch' -> 'دفعة صغيرة' -- 'minority class' -> 'فئة الأقلية' -- 'model' -> 'نموذج' -- 'multi-class classification' -> 'التصنيف المتعدّد الفئات' -- 'negative class' -> 'فئة سالبة' -- 'negative classes' -> 'الفئات السلبية' -- 'neural network' -> 'شبكة عصبونية' -- 'neural networks' -> 'للشبكات العصبية' -- 'neuron' -> 'عصبون' -- 'node (neural network)' -> 'عقدة (شبكة عصبونية)' -- 'nonlinear' -> 'غير خطي' -- 'nonstationarity' -> 'عدم الثبات' -- 'normalization' -> 'التسوية' -- 'numerical data' -> 'البيانات الرقمية' -- 'offline' -> 'بلا إنترنت' -- 'offline inference' -> 'الاستنتاج المؤخَّر' -- 'one-hot encoding' -> 'الترميز الأحادي' -- 'one-hot vector' -> 'متجهًا ذا ترميز ساخن' -- 'one-vs.-all' -> 'واحد-مقابل-الكل' -- 'online' -> 'online' -- 'online inference' -> 'الاستنتاج الحي' -- 'output layer' -> 'الطبقة النهائية' -- 'output layers' -> 'الطبقات النهائية' -- 'overfitting' -> 'فرط التخصيص' -- 'pandas' -> 'باندا' -- 'parameter' -> 'مَعلمة' -- 'positive class' -> 'فئة موجبة' -- 'positive classes' -> 'الفئات الإيجابية' -- 'post-processing' -> 'المعالجة اللاحقة' -- 'precision' -> 'الدقة' -- 'prediction' -> 'التوقّع' -- 'proxy labels' -> 'تصنيفات تقريبية' -- 'RAG' -> 'التوليد المعزّز بالاسترجاع (RAG)' -- 'rater' -> 'مُصنِّف' -- 'recall' -> 'تذكُّر الإعلان' -- 'Rectified Linear Unit (ReLU)' -> 'وحدة خطية مصحَّحة (ReLU)' -- 'regression model' -> 'نموذج الانحدار' -- 'regularization' -> 'التسوية' -- 'regularization rate' -> 'معدّل التسوية' -- 'ReLU' -> 'ReLU' -- 'retrieval-augmented generation' -> 'التوليد المعزّز بالاسترجاع' -- 'retrieval-augmented generation (RAG)' -> 'التوليد المعزّز بالاسترجاع (RAG)' -- 'ROC (receiver operating characteristic) Curve' -> 'منحنى الأمثلة الإيجابية' -- 'ROC curve' -> 'منحنى ROC' -- 'Root Mean Squared Error (RMSE)' -> 'جذر الخطأ التربيعي المتوسّط (RMSE)' -- 'sigmoid function' -> 'الدالّة الإسية' -- 'softmax' -> 'softmax' -- 'sparse feature' -> 'خاصية متناثرة' -- 'sparse representation' -> 'التمثيل المتناثر' -- 'sparse vector' -> 'متّجه متناثر' -- 'squared loss' -> 'الخسارة التربيعية' -- 'static' -> 'ثابت' -- 'static inference' -> 'الاستنتاج الثابت' -- 'static model' -> 'النموذج الثابت' -- 'stationarity' -> 'الثبات' -- 'Stochastic Gradient Descent (SGD)' -> 'النزول المتدرّج العشوائي (SGD)' -- 'supervised learning' -> 'التعلم المُوجّه' -- 'supervised machine learning' -> 'تعلُّم الآلة الخاضع للإشراف' -- 'synthetic feature' -> 'خاصية مصطنعة' -- 'synthetic features' -> 'ميزات اصطناعية' -- 'test loss' -> 'فقدان الاختبار' -- 'training' -> 'التدريب' -- 'training loss' -> 'فقدان التدريب' -- 'training set' -> 'مجموعة التدريب' -- 'training-serving skew' -> 'اختلاف بين بيانات التدريب وبيانات العرض' -- 'true negative (TN)' -> 'سالب صحيح' -- 'true negatives' -> 'الحالات السالبة الصحيحة' -- 'true positive (TP)' -> 'موجب صحيح (TP)' -- 'true positive rate' -> 'معدّل الإيجابية الحقيقية' -- 'true positive rate (TPR)' -> 'معدّل الموجب الصحيح (TPR)' -- 'true positives' -> 'الحالات الموجبة الصحيحة' -- 'underfitting' -> 'فرط التعميم' -- 'unlabeled example' -> 'مثال غير مصنّف' -- 'unsupervised machine learning' -> 'تعلُّم الآلة غير الموجَّه' -- 'validation' -> 'الإثبات' -- 'validation dataset' -> 'مجموعة بيانات التحقّق من الصحة' -- 'validation loss' -> 'فقدان التحقّق من الصحة' -- 'validation set' -> 'مجموعة التحقّق' -- 'weight' -> 'الوزن' -- 'weighted sum' -> 'المجموع الموزون' -- 'Z-score normalization' -> 'التسوية باستخدام الدرجة المعيارية' diff --git a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/de.txt b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/de.txt deleted file mode 100644 index c53a3be9e..000000000 --- a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/de.txt +++ /dev/null @@ -1,175 +0,0 @@ -# DE HINTS -## TERM MAPPINGS -These are preferred terminology choices for this language. Use them whenever they sound natural; adapt freely if context requires. - -- 'accuracy' -> ‚Genauigkeit' -- 'activation function' -> ‚Aktivierungsfunktion' -- 'artificial intelligence' -> ‚künstliche Intelligenz' -- 'AUC' -> ‚AUC' -- 'AUC (Area under the ROC curve)' -> ‚AUC (Area Under the ROC Curve, Bereich unter der ROC-Kurve)' -- 'backpropagation' -> ‚Rückpropagation' -- 'batch' -> ‚Batch' -- 'batch size' -> ‚Batchgröße' -- 'bias (ethics/fairness)' -> ‚Bias (Ethik/Fairness)' -- 'bias (math) or bias term' -> ‚Bias (mathematisch) oder Bias-Term' -- 'bias in ethics and fairness' -> ‚Bias in Bezug auf Ethik und Fairness' -- 'bias term' -> ‚Bias-Term' -- 'binary classification' -> ‚Binärklassifizierung' -- 'bucketing' -> ‚Bucketing' -- 'categorical' -> ‚kategorialen' -- 'categorical data' -> ‚Kategoriale Daten' -- 'class' -> ‚Klasse' -- 'class-imbalanced dataset' -> ‚Dataset mit Klassenungleichgewicht' -- 'class-imbalanced datasets' -> ‚Datasets mit ungleichmäßiger Klassenverteilung' -- 'classification' -> ‚Klassifizierungsaufgabe' -- 'classification model' -> ‚Klassifikationsmodell' -- 'classification threshold' -> ‚Klassifizierungsschwellenwert' -- 'classifier' -> ‚Klassifikator' -- 'clipping' -> ‚Clipping' -- 'confusion matrix' -> ‚Wahrheitsmatrix' -- 'continuous feature' -> ‚stetiges Feature' -- 'convergence' -> ‚Konvergenz' -- 'data set or dataset' -> ‚Dataset oder Dataset' -- 'DataFrame' -> ‚DataFrame' -- 'dataset' -> ‚Dataset' -- 'deep learning' -> ‚Deep Learning' -- 'deep model' -> ‚Deep-Modell' -- 'dense feature' -> ‚vollbesetztes Feature' -- 'depth' -> ‚Tiefe' -- 'discrete feature' -> ‚diskretes Feature' -- 'discrete features' -> ‚diskrete Features' -- 'dynamic' -> ‚dynamic' -- 'dynamic model' -> ‚dynamisches Modell' -- 'early stopping' -> ‚Vorzeitiges Beenden' -- 'embedding layer' -> ‚Einbettungsebene' -- 'embedding layers' -> ‚Einbettungsebenen' -- 'epoch' -> ‚Epoche' -- 'example' -> ‚Beispiel' -- 'false negative (FN)' -> ‚falsch negativ (FN)' -- 'false negatives' -> ‚falsch negativen Ergebnisse' -- 'false positive (FP)' -> ‚falsch positiv (FP)' -- 'false positive rate' -> ‚Falsch-Positiv-Rate' -- 'false positive rate (FPR)' -> ‚Rate falsch positiver Ergebnisse (False Positive Rate, FPR)' -- 'false positives' -> ‚falsch positiven Ergebnisse' -- 'feature' -> ‚Feature' -- 'feature cross' -> ‚Featureverknüpfung' -- 'feature crosses' -> ‚Feature-Kombinationen' -- 'feature engineering' -> ‚Feature Engineering' -- 'feature set' -> ‚Feature-Set' -- 'feature vector' -> ‚Featurevektor' -- 'feedback loop' -> ‚Feedbackschleife' -- 'generalization' -> ‚Generalisierung' -- 'generalization curve' -> ‚Verallgemeinerungskurve' -- 'gradient descent' -> ‚Gradientenabstieg' -- 'ground truth' -> ‚Ground Truth' -- 'hidden layer' -> ‚versteckte Ebene' -- 'hidden layer(s)' -> ‚verborgenen Schichten' -- 'hyperparameter' -> ‚Hyperparameter' -- 'independently and identically distributed (i.i.d)' -> ‚unabhängig und identisch verteilt (i.i.d.)' -- 'inference' -> ‚Inferenz' -- 'input layer' -> ‚Eingabelayer' -- 'interpretability' -> ‚Interpretierbarkeit' -- 'iteration' -> ‚Iteration' -- 'L0regularization' -> ‚L0-Regularisierung' -- 'L1loss' -> ‚L1-Verlust' -- 'L1regularization' -> ‚L1-Regularisierung' -- 'L2loss' -> ‚L2-Verlust' -- 'L2regularization' -> ‚L2-Regularisierung' -- 'label' -> ‚Label' -- 'labeled example' -> ‚Beispiel mit Label' -- 'lambda' -> ‚Lambda' -- 'layer' -> ‚Layer' -- 'learning rate' -> ‚Lernrate' -- 'linear' -> ‚Linear' -- 'linear model' -> ‚Lineares Modell' -- 'linear models' -> ‚linearen Modellen' -- 'linear regression' -> ‚lineare Regression' -- 'Log Loss' -> ‚Log Loss' -- 'log-odds' -> ‚Log-Odds' -- 'logistic regression' -> ‚logistische Regression' -- 'loss' -> ‚Niederlage' -- 'loss curve' -> ‚Verlustkurve' -- 'loss function' -> ‚Verlustfunktion' -- 'machine learning' -> ‚Machine Learning' -- 'majority class' -> ‚Mehrheitsklasse' -- 'mini-batch' -> ‚Mini-Batch' -- 'minority class' -> ‚Minderheitsklasse' -- 'model' -> ‚Modell' -- 'multi-class classification' -> ‚Klassifizierung mit mehreren Klassen' -- 'negative class' -> ‚negative Klasse' -- 'negative classes' -> ‚negativen Klassen' -- 'neural network' -> ‚neuronales Netzwerk' -- 'neural networks' -> ‚neuronale Netze' -- 'neuron' -> ‚Neuron' -- 'node (neural network)' -> ‚Knoten (neuronales Netzwerk)' -- 'nonlinear' -> ‚nicht linear' -- 'nonstationarity' -> ‚Nichtstationarität' -- 'normalization' -> ‚Normalisierung' -- 'numerical data' -> ‚Numerische Daten' -- 'offline' -> ‚offline' -- 'offline inference' -> ‚Offlineinferenz' -- 'one-hot encoding' -> ‚One-Hot-Codierung' -- 'one-hot vector' -> ‚One-Hot-Vektor' -- 'one-vs.-all' -> ‚One-vs.-All' -- 'online' -> ‚online' -- 'online inference' -> ‚Onlineinferenz' -- 'output layer' -> ‚Ausgabeschicht' -- 'output layers' -> ‚Ausgabelayer' -- 'overfitting' -> ‚Überanpassung' -- 'pandas' -> ‚pandas' -- 'parameter' -> ‚Parameter' -- 'positive class' -> ‚positive Klasse' -- 'positive classes' -> ‚positive Klassen' -- 'post-processing' -> ‚Nachbearbeitung' -- 'precision' -> ‚Precision' -- 'prediction' -> ‚Vorhersage-' -- 'proxy labels' -> ‚Proxy-Labels' -- 'RAG' -> ‚RAG' -- 'rater' -> ‚Bewerter' -- 'recall' -> ‚Rückruf' -- 'Rectified Linear Unit (ReLU)' -> ‚Rektifizierte lineare Einheit (ReLU)' -- 'regression model' -> ‚Regressionsmodell' -- 'regularization' -> ‚Regularisierung' -- 'regularization rate' -> ‚Regularisierungsrate' -- 'ReLU' -> ‚ReLU' -- 'retrieval-augmented generation' -> ‚Retrieval-Augmented Generation' -- 'retrieval-augmented generation (RAG)' -> ‚Retrieval-Augmented Generation (RAG)' -- 'ROC (receiver operating characteristic) Curve' -> ‚ROC-Kurve (Receiver Operating Characteristic)' -- 'ROC curve' -> ‚ROC-Kurve' -- 'Root Mean Squared Error (RMSE)' -> ‚Wurzel der mittleren Fehlerquadratsumme (RMSE)' -- 'sigmoid function' -> ‚Sigmoidfunktion' -- 'softmax' -> ‚Softmax-Funktion' -- 'sparse feature' -> ‚dünnbesetztes Feature' -- 'sparse representation' -> ‚dünnbesetzte Darstellung' -- 'sparse vector' -> ‚dünnbesetzter Vektor' -- 'squared loss' -> ‚Quadratischer Verlust' -- 'static' -> ‚Statisch' -- 'static inference' -> ‚Statische Inferenz' -- 'static model' -> ‚statischen Modell' -- 'stationarity' -> ‚Stationarität' -- 'Stochastic Gradient Descent (SGD)' -> ‚Stochastic Gradient Descent (SGD)' -- 'supervised learning' -> ‚überwachtes Lernen' -- 'supervised machine learning' -> ‚überwachtes maschinelles Lernen' -- 'synthetic feature' -> ‚synthetisches Feature' -- 'synthetic features' -> ‚synthetische Features' -- 'test loss' -> ‚Testverlust' -- 'training' -> ‚Training' -- 'training loss' -> ‚Trainingsverlust' -- 'training set' -> ‚Trainings-Dataset' -- 'training-serving skew' -> ‚Abweichungen zwischen Training und Bereitstellung' -- 'true negative (TN)' -> ‚richtig negativ (RN)' -- 'true negatives' -> ‚richtig negativen Ergebnisse' -- 'true positive (TP)' -> ‚Richtig positiv (TP)' -- 'true positive rate' -> ‚Rate der richtig positiven Ergebnisse' -- 'true positive rate (TPR)' -> ‚Rate richtig positiver Ergebnisse (True Positive Rate, TPR)' -- 'true positives' -> ‚richtig positiven Ergebnisse' -- 'underfitting' -> ‚Unteranpassung' -- 'unlabeled example' -> ‚Beispiel ohne Label' -- 'unsupervised machine learning' -> ‚unüberwachtes maschinelles Lernen' -- 'validation' -> ‚Validierung' -- 'validation dataset' -> ‚Validierungs-Dataset' -- 'validation loss' -> ‚Validierungsverlust' -- 'validation set' -> ‚Validierungs-Dataset' -- 'weight' -> ‚Gewicht' -- 'weighted sum' -> ‚gewichtete Summe' -- 'Z-score normalization' -> ‚Z-Score-Normalisierung' diff --git a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/el.txt b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/el.txt deleted file mode 100644 index 22c5b4e4c..000000000 --- a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/el.txt +++ /dev/null @@ -1,988 +0,0 @@ -# EL HINTS -## TERM MAPPINGS -The following mappings are the CANONICAL Greek translations for AI/ML terms. -When translating TO GREEK, you MUST use these exact Greek terms for the listed English expressions whenever the same technical meaning is intended. - -- 'a priori probability' -> «πιθανότητα εκ των προτέρων» -- 'A* Search' -> «αλγόριθμοι αναζήτησης Α*» -- 'Abductive logic programming (ALP)' -> «Προγραμματισμός απαγωγικής λογικής (ALP)» -- 'Abductive reasoning' -> «Απαγωγικός συλλογισμός» -- 'abductive reasoning' -> «απαγωγική συλλογιστική» -- 'Abstract data type' -> «Αφηρημένος τύπος δεδομένων» -- 'abstract plan' -> «αφηρημένο πλάνο» -- 'Abstraction' -> «Αφαίρεση» -- 'Accelerating change' -> «Επιταχυνόμενη αλλαγή» -- 'accretive associative memory' -> «προσαυξητική μνήμη συσχέτισης» -- 'acquisitional efficiency' -> «αποδοτικότητα απόκτησης» -- 'action' -> «ενέργεια» -- 'Action language' -> «Γλώσσα δράσης» -- 'Action model learning' -> «Εκμάθηση μοντέλου δράσης» -- 'action schemas' -> «σχήματα ενεργειών» -- 'Action selection' -> «Επιλογή δράσης» -- 'Activation function' -> «Λειτουργία ενεργοποίησης» -- 'activation function' -> «συνάρτηση ενεργοποίησης» -- 'active' -> «ενεργός» -- 'active database' -> «ενεργή βάση δεδομένων» -- 'active rule' -> «ενεργός κανόνας» -- 'active troubleshooting' -> «ενεργητική διάγνωση» -- 'Adaptive algorithm' -> «Προσαρμοστικός αλγόριθμος» -- 'Adaptive neuro fuzzy inference system (ANFIS)' -> «Προσαρμοστικό σύστημα ασαφών συμπερασμάτων δίκτυου» -- 'adaptivity' -> «προσαρμοστικότητα» -- 'add list' -> «λίστα προσθηκών» -- 'admissibility criterion' -> «κριτήριο αποδοχής» -- 'admissible' -> «αποδεκτός» -- 'Admissible heuristic' -> «Παραδεκτό ευρετικό» -- 'adversary game' -> «ανταγωνιστικό παίγνιο» -- 'Affective computing' -> «Συναισθηματική υπολογιστική» -- 'agent' -> «πράκτορας» -- 'Agent architecture' -> «Αρχιτεκτονική πράκτορα» -- 'agglomerative algorithm' -> «αλγόριθμος συγχώνευσης» -- 'AI' -> «τεχνητή νοημοσύνη» -- 'AI accelerator' -> «Επιταχυντής AI» -- 'AI-complete' -> «AI-πλήρης» -- 'Algorithm' -> «Αλγόριθμος» -- 'algorithm' -> «αλγόριθμος» -- 'Algorithmic efficiency' -> «Αλγοριθμική απόδοση» -- 'Algorithmic probability' -> «Αλγοριθμική πιθανότητα» -- 'Alpha-Beta algorithm' -> «αλγόριθμος άλφα-βήτα» -- 'Alpha-Beta search' -> «αναζήτηση άλφα-βήτα» -- 'Ambient intelligence (AmI)' -> «Ευφυΐα περιβάλλοντος» -- 'ambiguity' -> «πολυσημαντικότητα» -- 'analogical reasoning' -> «συλλογιστική με αναλογίες» -- 'Analysis of algorithms' -> «Ανάλυση αλγορίθμων» -- 'Analytics' -> «Ανάλυση» -- 'AND tree' -> «δένδρο ΚΑΙ» -- 'AND/OR tree' -> «δένδρο ΚΑΙ / Ή» -- 'Answer set programming (ASP)' -> «Προγραμματισμός συνόλου απαντήσεων» -- 'Anytime algorithm' -> «Ανα πάσα στιγμή Αλγόριθμος» -- 'Application programming interface (API)' -> «Διεπαφή προγραμματισμού εφαρμογών» -- 'Approximate string matching' -> «Κατά προσέγγιση ταίριασμα συμβολοσειρών» -- 'Approximation error' -> «Σφάλμα προσέγγισης» -- 'arc consistency' -> «συνέπεια τόξου» -- 'Argumentation framework' -> «Πλαίσιο επιχειρημάτων» -- 'artificial agent' -> «τεχνητός πράκτορας» -- 'Artificial general intelligence (AGI)' -> «Τεχνητή γενική νοημοσύνη» -- 'Artificial immune system (AIS)' -> «Τεχνητό ανοσοποιητικό σύστημα» -- 'artificial intelligence' -> «τεχνητή νοημοσύνη» -- 'Artificial Intelligence Markup Language' -> «Γλώσσα σήμανσης τεχνητής νοημοσύνης» -- 'Artificial intelligence (AI)' -> «Τεχνητή νοημοσύνη» -- 'Artificial neural network (ANN)' -> «Τεχνητό νευρωνικό δίκτυο» -- 'artificial neuron' -> «τεχνητός νευρώνας» -- 'association rules' -> «κανόνες συσχέτισης» -- 'associative memory' -> «μνήμη συσχέτισης» -- 'Asymptotic computational complexity' -> «Ασυμπτωτική υπολογιστική πολυπλοκότητα» -- 'atomic formula' -> «ατομικός τύπος» -- 'atoms' -> «άτομα» -- 'attribute selection' -> «επιλογή χαρακτηριστικών» -- 'attributes' -> «χαρακτηριστικά» -- 'Attributional calculus' -> «Λογισμός απόδοσης» -- 'auctions protocols' -> «πρωτόκολλα πλειστηριασμού» -- 'Augmented reality (AR)' -> «Επαυξημένη πραγματικότητα» -- 'auto-associative memories' -> «αυτοσυσχετιζόμενες μνήμες» -- 'Automata theory' -> «Θεωρία αυτομάτων» -- 'Automated machine learning (AutoML)' -> «Αυτοματοποιημένη μηχανική εκμάθηση» -- 'Automated planning and scheduling' -> «Αυτοματοποιημένος προγραμματισμός» -- 'Automated reasoning' -> «Αυτοματοποιημένη συλλογιστική» -- 'automated synopsis' -> «αυτόματη περίληψη» -- 'automatic translation' -> «αυτόματη μετάφραση» -- 'Autonomic computing (AC)' -> «Αυτόνομος Υπολογισμός» -- 'Autonomous car' -> «Αυτόνομο αυτοκίνητο» -- 'Autonomous robot' -> «Αυτόνομο ρομπότ» -- 'autonomy' -> «αυτονομία» -- 'average reward model' -> «μοντέλο μέσης ανταμοιβής» -- 'axon' -> «άξονας» -- 'back propagation' -> «ανάστροφη μετάδοση» -- 'Backpropagation' -> «Ο πίσω πολλαπλασιασμός» -- 'Backpropagation through time (BPTT)' -> «Πίσω διάδοση στο χρόνο (BPTT)» -- 'backtracking' -> «οπισθοδρόμηση» -- 'backtracking points' -> «σημεία οπισθοδρόμησης» -- 'Backward chaining' -> «Αλυσίδα προς τα πίσω» -- 'backward chaining' -> «ανάστροφη ακολουθία εκτέλεσης» -- 'backward pass' -> «ανάστροφο πέρασμα» -- 'Bag-of-words model' -> «Μοντέλο τσάντα με λέξεις» -- 'Bag-of-words model in computer vision' -> «Μοντέλο τσάντας λέξεων στην όραση υπολογιστή» -- 'basic probability assignment' -> «βασική κατανομή πιθανότητας» -- 'batch learning' -> «μάθηση δέσμης» -- 'Batch normalization' -> «Ομαλοποίηση παρτίδας» -- 'Bayesian programming' -> «Μπεϋζιανός προγραμματισμός» -- 'Beam Search' -> «ακτινωτή αναζήτηση» -- 'Bees algorithm' -> «Αλγόριθμος μελισσών» -- 'Behavior informatics (BI)' -> «Πληροφορική συμπεριφοράς» -- 'Behavior tree (BT)' -> «Δέντρο συμπεριφοράς» -- 'belief' -> «πεποίθηση» -- 'Belief-desire-intention software model (BDI)' -> «Μοντέλο λογισμικού πεποίθησης-επιθυμίας-πρόθεσης» -- 'benevolence' -> «αγαθή προαίρεση» -- 'Best-First Search' -> «αναζήτηση πρώτα στο καλύτερο» -- 'Bias–variance tradeoff' -> «Ανταλλαγή μεροληψίας-διακύμανσης» -- 'bidirectional associative memories' -> «μνήμη συσχέτισης διπλής κατεύθυνσης» -- 'Bidirectional Search' -> «αναζήτηση διπλής κατεύθυνσης» -- 'Big data' -> «Μεγάλα δεδομένα» -- 'Big O notation' -> «Σημείωση Big O» -- 'binary constraint' -> «δυαδικός περιορισμός» -- 'Binary tree' -> «Δυαδικό δέντρο» -- 'blackboard' -> «μαυροπίνακας» -- 'blackboard architecture' -> «αρχιτεκτονική μαυροπίνακα» -- 'Blackboard system' -> «Σύστημα μαυροπίνακα» -- 'blackboard systems' -> «συστήματα μαυροπίνακα» -- 'Blind Search' -> «τυφλή αναζήτηση» -- 'Boltzmann machine' -> «Μηχανή Boltzmann» -- 'Boolean satisfiability problem' -> «Πρόβλημα ικανοποίησης Boolean» -- 'Brain technology' -> «Τεχνολογία εγκεφάλου» -- 'Branch and Bound Search' -> «αναζήτηση με επέκταση και οριοθέτηση» -- 'Branching factor' -> «Συντελεστής διακλάδωσης» -- 'branching factor' -> «παράγοντας διακλάδωσης» -- 'Breadth First Search' -> «αναζήτηση πρώτα σε» -- 'Brute-force search' -> «Αναζήτηση ωμής βίας» -- 'candidate elimination' -> «απαλοιφή υποψηφίων» -- 'canonical form' -> «κανονική μορφή» -- 'canonical formation rules' -> «κανόνες ορθής διαμόρφωσης» -- 'Capsule neural network (CapsNet)' -> «Νευρωνικό δίκτυο κάψουλας» -- 'card sorting' -> «ταξινόμηση καρτών» -- 'case adaptation' -> «προσαρμογή περιπτώσεων» -- 'case indexing' -> «δεικτοδότηση περιπτώσεων» -- 'case learning' -> «εκμάθηση περιπτώσεων» -- 'case library' -> «βιβλιοθήκη περιπτώσεων» -- 'case retrieval' -> «ανάκληση περιπτώσεων» -- 'case verification' -> «επαλήθευση περιπτώσεων» -- 'case-based' -> «βασισμένο σε περιπτώσεις» -- 'case-based learning' -> «μάθηση κατά περίπτωση» -- 'case-based planning' -> «σχεδιασμός βασισμένος σε παραδείγματα» -- 'case-based reasoning' -> «συλλογιστική βασισμένη σε περιπτώσεις» -- 'Case-based reasoning (CBR)' -> «Συλλογισμός βάσει περιπτώσεων» -- 'causal link' -> «αιτιολογικές συνδέσεις» -- 'causal model' -> «αιτιοκρατικό μοντέλο» -- 'certainty factors' -> «συντελεστές βεβαιότητας» -- 'chaining' -> «ακολουθία εκτέλεσης κανόνων» -- 'chromosome' -> «χρωμόσωμα» -- 'chronological backtracking' -> «χρονική οπισθοδρόμηση» -- 'circumscription' -> «μέθοδος περιγράμματος» -- 'class' -> «κλάση» -- 'class extension' -> «επέκταση κλάσης» -- 'classical negation' -> «κλασική άρνηση» -- 'classification' -> «ταξινόμηση» -- 'classification rules' -> «κανόνες ταξινόμησης» -- 'classification trees' -> «δένδρο ταξινόμησης» -- 'clausal form' -> «προτασιακή μορφή» -- 'closed formula' -> «κλειστός τύπος» -- 'closed set' -> «κλειστό σύνολο» -- 'closed world' -> «κλειστός κόσμος» -- 'closed world assumption' -> «υπόθεση κλειστού κόσμού» -- 'Cloud robotics' -> «Cloud ρομποτική» -- 'CLP' -> «λογικός προγραμματισμός με περιορισμούς» -- 'Cluster analysis' -> «Ανάλυση συστάδων» -- 'clustering' -> «ομαδοποίηση» -- 'clusters' -> «ομάδες» -- 'coarse grain' -> «αδρή υφή» -- 'Cobweb' -> «Ιστός αράχνης» -- 'Cognitive architecture' -> «Γνωστική αρχιτεκτονική» -- 'Cognitive computing' -> «Γνωστική Υπολογιστική» -- 'Cognitive science' -> «Γνωστική επιστήμη» -- 'cognitive science' -> «γνωσιολογική επιστήμη» -- 'combinatorial explosion' -> «συνδυαστική έκρηξη» -- 'Combinatorial optimization' -> «Συνδυαστική βελτιστοποίηση» -- 'commitment' -> «δέσμευση» -- 'Committee machine' -> «Μηχανή επιτροπής» -- 'common sense' -> «κοινή λογική» -- 'Commonsense knowledge' -> «Κοινή γνώση» -- 'Commonsense reasoning' -> «Κοινός συλλογισμός» -- 'communication protocol' -> «πρωτόκολλο επικοινωνίας» -- 'competition' -> «ανταγωνισμός» -- 'competitive neural networks' -> «νευρωνικά δίκτυα με ανταγωνισμό» -- 'compiled knowledge' -> «αυτοματοποιημένη γνώση» -- 'complementary pairs' -> «συμπληρωματικά ζεύγη» -- 'complete' -> «πλήρης» -- 'complete plan' -> «πλήρες πλάνο» -- 'completeness' -> «πληρότητα» -- 'Computational chemistry' -> «Υπολογιστική χημεία» -- 'Computational complexity theory' -> «Υπολογιστική θεωρία πολυπλοκότητας» -- 'Computational creativity' -> «Υπολογιστική δημιουργικότητα» -- 'Computational cybernetics' -> «Υπολογιστική κυβερνητική» -- 'Computational humor' -> «Υπολογιστικό χιούμορ» -- 'computational intelligence' -> «υπολογιστική νοημοσύνη» -- 'Computational intelligence (CI)' -> «Υπολογιστική νοημοσύνη» -- 'Computational learning theory' -> «Υπολογιστική θεωρία μάθησης» -- 'Computational linguistics' -> «Υπολογιστική γλωσσολογία» -- 'Computational mathematics' -> «Υπολογιστικά μαθηματικά» -- 'Computational neuroscience' -> «Υπολογιστική νευροεπιστήμη» -- 'Computational number theory' -> «Υπολογιστική θεωρία αριθμών» -- 'Computational problem' -> «Υπολογιστικό πρόβλημα» -- 'Computational statistics' -> «Υπολογιστική στατιστική» -- 'Computational Tree Logic' -> «Λογική Υπολογιστικού Δένδρου» -- 'Computer audition (CA)' -> «Οντισιόν υπολογιστή (CA)» -- 'Computer science' -> «Επιστήμη των υπολογιστών» -- 'Computer vision' -> «Υπολογιστική όραση» -- 'Computer-automated design (CAutoD)' -> «Αυτοματοποιημένη σχεδίαση υπολογιστή» -- 'concept' -> «έννοια» -- 'Concept drift' -> «Εννοιολογική μετατόπιση» -- 'concept learning' -> «μάθηση εννοιών» -- 'concept type' -> «τύπος έννοιας» -- 'conceptual dependency' -> «εννοιολογική εξάρτηση» -- 'conceptual dependency graph' -> «γράφος εννοιολογικής εξάρτησης» -- 'conceptual dependency relationships' -> «σχέσεις εννοιολογικής εξάρτησης» -- 'conceptual graphs' -> «εννοιολογικός γράφος» -- 'conceptual relation' -> «εννοιολογικές σχέσεις» -- 'conditional effects' -> «αποτελέσματα υπό προϋπόθεση» -- 'conditional probability' -> «πιθανότητα υπό συνθήκη» -- 'confidence' -> «εμπιστοσύνη» -- 'configuration' -> «διαμόρφωση» -- 'conflict' -> «σύγκρουση κανόνων» -- 'conflict resolution' -> «επίλυση συγκρούσεων» -- 'conflict set' -> «σύνολο σύγκρουσης» -- 'conflicting literals' -> «αντικρουόμενα λεκτικά» -- 'conjunctive model of classification' -> «συζευκτικό μοντέλο ταξινόμησης» -- 'conjunctive normal form' -> «συζευκτική κανονική μορφή» -- 'Connectionism' -> «Συνδεσιονισμός» -- 'connectionist approach' -> «συνδετική προσέγγιση» -- 'connectives' -> «συνδετικά» -- 'consistency check' -> «έλεγχος συνέπειας» -- 'consistency check algorithms' -> «αλγόριθμος ελέγχου συνέπειας» -- 'Consistent heuristic' -> «Συνεπής ευρετική» -- 'Constrained conditional model (CCM)' -> «Περιορισμένο υπό όρους μοντέλο» -- 'constraint' -> «περιορισμός» -- 'constraint graph' -> «γράφος περιορισμών» -- 'Constraint logic programming' -> «Προγραμματισμός λογικής περιορισμών» -- 'Constraint Logic Programming' -> «λογικός προγραμματισμός με περιορισμούς» -- 'constraint programming' -> «προγραμματισμός με περιορισμούς» -- 'constraint propagation' -> «διάδοση περιορισμών» -- 'constraint satisfaction' -> «ικανοποίηση περιορισμών» -- 'constraint satisfaction problems' -> «προβλήματα ικανοποίησης περιορισμών» -- 'constraint solving problems' -> «προβλήματα επίλυσης περιορισμών» -- 'Constructed language' -> «Κατασκευασμένη γλώσσα» -- 'content addressability' -> «δυνατότητα ανάκλησης περιεχομένου» -- 'context' -> «συμφραζόμενα» -- 'contracting net protocol' -> «συντονισμός πρακτόρων με σύναψη συμβολαίων» -- 'control' -> «έλεγχος» -- 'Control theory' -> «Θεωρία ελέγχου» -- 'convention' -> «σύμβαση» -- 'Convolutional neural network' -> «Συνελικτικό νευρωνικό δίκτυο» -- 'cooperation' -> «συνεργασία» -- 'crisp value' -> «σαφής τιμή» -- 'critical point' -> «κρίσιμο σημείο» -- 'criticality value' -> «τιμή σημαντικότητας» -- 'critics' -> «κανόνες προσαρμογής περιπτώσεων» -- 'Crossover' -> «Διασταύρωση» -- 'crossover' -> «διασταύρωση» -- 'crossover mask' -> «μάσκα διασταύρωσης» -- 'Darkforest' -> «Σκοτεινό δάσος» -- 'Dartmouth workshop' -> «Εργαστήριο Dartmouth» -- 'data abstraction' -> «γενίκευση δεδομένων» -- 'Data augmentation' -> «Αύξηση δεδομένων» -- 'data driven' -> «αναζήτηση οδηγούμενη από δεδομένα» -- 'Data fusion' -> «Συγχώνευση δεδομένων» -- 'Data integration' -> «Ενοποίηση δεδομένων» -- 'Data mining' -> «Εξόρυξη δεδομένων» -- 'data mining' -> «εξόρυξη σε δεδομένα» -- 'data mining incremental' -> «εξόρυξη σε δεδομένα σταδιακή» -- 'Data science' -> «Επιστημονικά δεδομένα» -- 'Data set' -> «Σύνολο δεδομένων» -- 'data space' -> «χώρος δεδομένων» -- 'data warehouse' -> «συστήματα αποθήκευσης δεδομένων» -- 'Data warehouse (DW or DWH)' -> «Αποθήκη δεδομένων» -- 'Datalog' -> «Καταγραφή δεδομένων» -- 'deafisible inference' -> «αναιρέσιμη εξαγωγή συμπερασμάτων» -- 'decidable logic' -> «καταληκτική λογική» -- 'Decision boundary' -> «Όριο απόφασης» -- 'Decision support system (DSS)' -> «Σύστημα υποστήριξης αποφάσεων» -- 'Decision theory' -> «Θεωρία απόφασης» -- 'Decision tree learning' -> «Εκμάθηση του δέντρου αποφάσεων» -- 'Declarative programming' -> «Δηλωτικός προγραμματισμός» -- 'deduction system' -> «σύστημα εξαγωγής συμπερασμάτων» -- 'Deductive classifier' -> «Απαγωγικός ταξινομητής» -- 'deductive reasoning' -> «συνεπαγωγική συλλογιστική» -- 'Deep Blue' -> «Βαθύ μπλε» -- 'deep knowledge' -> «βαθιά γνώση» -- 'Deep learning' -> «Βαθιά μάθηση» -- 'DeepMind Technologies' -> «Τεχνολογίες DeepMind» -- 'default reasoning' -> «συλλογιστική εύλογων υποθέσεων» -- 'defeasible logic' -> «αναιρέσιμη λογική» -- 'defeasible rules' -> «αναιρέσιμοι κανόνες» -- 'defeasible theory' -> «αναιρέσιμη θεωρία» -- 'defeaters' -> «αναιρετές» -- 'definite clause grammars' -> «γραμματικές οριστικών προτάσεων» -- 'definite inference' -> «οριστική απόδειξη» -- 'defuzzification' -> «αποσαφήνιση» -- 'degree of consistency' -> «βαθμός συνέπειας» -- 'degree of truth' -> «βαθμός αληθείας» -- 'delete list' -> «λίστα διαγραφών» -- 'deliberative agent' -> «πράκτορας με εσωτερική κατάσταση» -- 'Delta rule' -> «κανόνας Δέλτα» -- 'demons' -> «δαίμονας» -- 'demotion' -> «υποβιβασμός» -- 'dendrite' -> «δενδρίτης» -- 'Depth-First Search' -> «αναζήτηση πρώτα σε βάθος» -- 'Description logic (DL)' -> «Λογική περιγραφής» -- 'design' -> «σχεδίαση» -- 'design stance' -> «σχεδιαστική προσέγγιση» -- 'detach' -> «διαχωρισμός» -- 'deterministic effects' -> «ντετερμινιστικά αποτελέσματα» -- 'Developmental robotics (DevRob)' -> «Αναπτυξιακή ρομποτική» -- 'Diagnosis' -> «Διάγνωση» -- 'diagnosis' -> «διάγνωση» -- 'Dialogue system' -> «Σύστημα διαλόγου» -- 'Dimensionality reduction' -> «Μείωση διαστάσεων» -- 'discrepancy' -> «ασυμφωνία τιμών» -- 'Discrete system' -> «Διακριτό σύστημα» -- 'discretization' -> «διακριτοποίηση» -- 'disjunctive normal form' -> «διαζευκτική κανονική μορφή» -- 'distributed artificial intelligence' -> «κατανεμημένη τεχνητή νοημοσύνη» -- 'Distributed artificial intelligence (DAI)' -> «Κατανεμημένη τεχνητή νοημοσύνη» -- 'distributed data mining' -> «κατανεμημένη εξόρυξη σε δεδομένα» -- 'distributed memory' -> «κατανεμημένη μνήμη» -- 'distributed multi-agent planning' -> «κατανεμημένος πολυπρακτορικός σχεδιασμός» -- 'divisive algorithm' -> «αλγόριθμος διαίρεσης» -- 'domain expert' -> «ειδικός του τομέα» -- 'Dynamic epistemic logic (DEL)' -> «Δυναμική επιστημική λογική» -- 'dynamic programming' -> «δυναμικός προγραμματισμός» -- 'Eager learning' -> «Πρόθυμη μάθηση» -- 'eager learning' -> «έγκαιρη μάθηση» -- 'Ebert test' -> «Τεστ Έμπερτ» -- 'Echo state network (ESN)' -> «Δίκτυο κατάστασης Echo» -- 'edge detection' -> «εντοπισμός ακμών» -- 'effectors' -> «εξαρτήματα δράσης» -- 'Embodied agent' -> «Ενσαρκωμένος πράκτορας» -- 'Embodied cognitive science' -> «Ενσωματωμένη γνωστική επιστήμη» -- 'encapsulation' -> «εγκλεισμός (αντικειμένου)» -- 'energy function' -> «συνάρτηση ενέργειας» -- 'Enforced Hill-Climbing Search' -> «αναζήτηση με εξαναγκασμένη αναρρίχηση λόφου» -- 'Ensemble averaging' -> «Μέσος όρος του συνόλου» -- 'entropy of information' -> «εντροπία πληροφορίας» -- 'episode mining algorithms' -> «εξόρυξη επεισοδίων» -- 'episodical knowledge' -> «επεισοδιακή γνώση» -- 'Epoch (machine learning)' -> «Εποχή (μηχανική μάθηση)» -- 'epochs' -> «εποχές» -- 'equivalence' -> «ισοδυναμία» -- 'equivalence rules' -> «κανόνες ισοδυναμίας» -- 'erasure' -> «διαγραφή» -- 'error driven learning' -> «μάθηση καθοδηγούμενη από το σφάλμα» -- 'Error-driven learning' -> «Μάθηση με γνώμονα τα σφάλματα» -- 'Ethics of artificial intelligence' -> «Ηθική της τεχνητής νοημοσύνης» -- 'Euclidian distance' -> «Ευκλείδεια απόσταση» -- 'evaluation' -> «αποτίμηση» -- 'evaluation function' -> «συνάρτηση αξιολόγησης» -- 'event-driven rule' -> «ενεργός κανόνας» -- 'evoking strength' -> «δύναμη πρόκλησης» -- 'Evolutionary algorithm (EA)' -> «Εξελικτικός αλγόριθμος» -- 'Evolutionary computation' -> «Εξελικτικός υπολογισμός» -- 'Evolving classification function (ECF)' -> «Εξελισσόμενη συνάρτηση ταξινόμησης» -- 'exhaustive search' -> «εξαντλητική αναζήτηση» -- 'existential graphs' -> «υπαρξιακοί γράφοι» -- 'existential quantifier' -> «υπαρξιακός ποσοδείκτης» -- 'Existential risk' -> «Υπαρξιακός κίνδυνος» -- 'exoneration' -> «αθώωση» -- 'Expert system' -> «Ειδικό σύστημα» -- 'expert system' -> «έμπειρο σύστημα» -- 'expert system shell' -> «κέλυφος έμπειρου συστήματος» -- 'explicit knowledge' -> «ρητή γνώση» -- 'extension principle' -> «αρχή της επέκτασης» -- 'Fast-and-frugal trees' -> «Γρήγορα και λιτά δέντρα» -- 'Feature extraction' -> «Εξαγωγή χαρακτηριστικών» -- 'Feature learning' -> «Εκμάθηση χαρακτηριστικών» -- 'Feature selection' -> «Επιλογή χαρακτηριστικών» -- 'Federated learning' -> «Ομοσπονδιακή μάθηση» -- 'feedback' -> «ανάδραση, ανατροφοδότηση» -- 'feedforward' -> «πρόσθια τροφοδότηση» -- 'filtering algorithm' -> «αλγόριθμος διήθησης τιμών» -- 'final state' -> «τελική κατάσταση» -- 'fine grain' -> «λεπτή υφή» -- 'first fail principle' -> «αρχή συντομότερης αποτυχίας» -- 'first order predicate logic' -> «κατηγορηματική λογική πρώτης τάξης» -- 'First-order logic' -> «Λογική πρώτης τάξης» -- 'fitness function' -> «συνάρτηση καταλληλότητας» -- 'Fluent' -> «Ευφραδής» -- 'Formal language' -> «Επίσημη γλώσσα» -- 'Forward chaining' -> «Αλυσίδα προς τα εμπρός» -- 'forward chaining' -> «ορθή ακολουθία εκτέλεσης» -- 'forward checking' -> «προοπτικός έλεγχος» -- 'Frame' -> «Πλαίσιο» -- 'frame axioms' -> «αξιώματα του πλαισίου» -- 'Frame language' -> «Γλώσσα πλαισίου» -- 'frame of discernment' -> «πλαίσιο διάκρισης» -- 'Frame problem' -> «Πρόβλημα πλαισίου» -- 'frame problem' -> «πρόβλημα πλαισίου» -- 'frames' -> «πλαίσια» -- 'Friendly artificial intelligence' -> «Φιλική τεχνητή νοημοσύνη» -- 'full look ahead' -> «πλήρης έγκαιρη εξέταση» -- 'functional dependency' -> «λειτουργική εξάρτηση» -- 'functional term' -> «συναρτησιακός όρος» -- 'Futures studies' -> «Μελλοντικές μελέτες» -- 'fuzzification' -> «μετατροπή μεγέθους σε ασαφές» -- 'fuzziness' -> «ασάφεια» -- 'fuzzy complement' -> «συμπληρωματικό ασαφούς συνόλου» -- 'fuzzy composition' -> «σύνθεση ασαφών σχέσεων» -- 'Fuzzy control system' -> «Ασαφές σύστημα ελέγχου» -- 'fuzzy linguistic description' -> «ασαφής λεκτική περιγραφή» -- 'fuzzy linguistic variable' -> «ασαφής λεκτική μεταβλητή» -- 'Fuzzy logic' -> «Ασαφής λογική» -- 'fuzzy logic' -> «ασαφής λογική» -- 'fuzzy numbers' -> «ασαφείς αριθμοί» -- 'fuzzy reasoning' -> «ασαφής συλλογιστική» -- 'fuzzy relations' -> «ασαφείς σχέσεις» -- 'Fuzzy rule' -> «Ασαφής κανόνας» -- 'fuzzy rule' -> «ασαφής κανόνας» -- 'Fuzzy set' -> «Ασαφές σύνολο» -- 'fuzzy set' -> «ασαφή σύνολα» -- 'fuzzy set theory' -> «θεωρία ασαφών συνόλων» -- 'fuzzy variable' -> «ασαφής μεταβλητή» -- 'Game theor' -> «Θεωρία παιγνίων» -- 'game tree' -> «δένδρο παιγνίου» -- 'gene' -> «γονίδιο» -- 'general problem solver' -> «γενικός επιλυτής προβλημάτων» -- 'generalization rule' -> «κανόνας γενίκευσης» -- 'generalized modus ponens' -> «γενικευμένος τρόπος του θέτειν» -- 'generalized modus tollens' -> «γενικευμένος τρόπος του αναιρείν» -- 'generate and test' -> «παραγωγή και δοκιμή» -- 'generation gap' -> «χάσμα γενεών» -- 'Generative adversarial network (GAN)' -> «Δημιουργικό ανταγωνιστικό δίκτυο» -- 'genetic algorithms' -> «γενετικοί αλγόριθμοι» -- 'Genetic algorithm (GA)' -> «Γενετικός αλγόριθμος» -- 'Genetic operator' -> «Γενετικός χειριστής» -- 'genetic programming' -> «γενετικός προγραμματισμός» -- 'genotype' -> «γονότυπος» -- 'Glowworm swarm optimization' -> «Βελτιστοποίηση σμήνους Glowworm» -- 'goal driven' -> «αναζήτηση οδηγούμενη από στόχους» -- 'goals of attainment' -> «στόχοι επίτευξης» -- 'graded learning' -> «βαθμολογημένη μάθηση» -- 'gradient descent' -> «επικλινής καθόδος» -- 'gradient descent optimization' -> «βελτιστοποίηση επικλινούς καθόδου» -- 'Graph (abstract data type)' -> «Γράφημα» -- 'Graph (discrete mathematics)' -> «Γράφημα (διακριτά μαθηματικά)» -- 'Graph database (GDB)' -> «Βάση δεδομένων γραφημάτων» -- 'graph expansion' -> «επέκταση γράφου» -- 'Graph theory' -> «Θεωρία γραφημάτων» -- 'Graph traversal' -> «Διασύνδεση γραφήματος» -- 'graph-based planning' -> «σχεδιασμός βασισμένος σε γράφους» -- 'grid' -> «πλέγμα» -- 'grip' -> «λαβή» -- 'ground term' -> «βασικός όρος» -- 'guided-probe approach' -> «προσέγγιση καθοδηγούμενων δοκιμών» -- 'hetero-associative memories' -> «ετεροσυσχετιζόμενες μνήμες» -- 'Heuristic' -> «Ευρετική» -- 'heuristic' -> «ευρετικός μηχανισμός» -- 'heuristic classification' -> «ευρετική κατηγοριοποίηση» -- 'heuristic function' -> «ευρετική συνάρτηση» -- 'heuristic match' -> «ευρετική ταυτοποίηση» -- 'heuristic search' -> «ευρετική αναζήτηση» -- 'heuristic value' -> «ευρετική τιμή» -- 'Hidden layer' -> «Κρυφό στρώμα» -- 'hidden layers' -> «κρυφά επίπεδα» -- 'Hidden unit' -> «Κρυφή μονάδα» -- 'hierarchical planning' -> «ιεραρχικός σχεδιασμός» -- 'Hierarchical Task Networks' -> «ιεραρχικά δίκτυα διεργασιών» -- 'hierarchy concept type' -> «ιεραρχία τύπων εννοιών» -- 'hierarchy relation type' -> «ιεραρχία τύπων σχέσεων» -- 'higher order constraint' -> «περιορισμός ανώτερης τάξης» -- 'Hill Climbing Search' -> «αναζήτηση αναρρίχησης λόφων» -- 'horizon effect' -> «φαινόμενο ορίζοντα» -- 'humanoid robots' -> «ανθρωποειδή ρομπότ» -- 'hybrid agent' -> «υβριδικός πράκτορας» -- 'Hyper-heuristic' -> «Υπερ-ευρετικό» -- 'hypotheses discrimination' -> «διάκριση υποθέσεων» -- 'hypothesis space' -> «χώρος υποθέσεων» -- 'hypothesize and test' -> «δημιουργία και έλεγχος υποθέσεων» -- 'IEEE Computational Intelligence Society' -> «Κοινωνία Υπολογιστικής Νοημοσύνης» -- 'if-needed demon' -> «προσκόλληση διαδικασιών» -- 'implication' -> «συνεπαγωγή» -- 'imprecise data' -> «ανακριβή δεδομένα» -- 'incomplete' -> «μη-πλήρης» -- 'incomplete data' -> «ελλιπή δεδομένα» -- 'inconsistency effects' -> «ασυνεπή αποτελέσματα» -- 'inconsistency support' -> «ασύμβατη υποστήριξη» -- 'Incremental learning' -> «Αυξητική μάθηση» -- 'incremental learning' -> «επαυξητική μάθηση» -- 'indivisible action' -> «αδιαίρετη ενέργεια» -- 'induction' -> «επαγωγή» -- 'inductive learning' -> «επαγωγική μάθηση» -- 'inductive learning hypothesis' -> «υπόθεση επαγωγικής μάθησης» -- 'inductive logic programming' -> «επαγωγικός λογικός προγραμματισμός» -- 'inductive reasoning' -> «επαγωγική συλλογιστική» -- 'inference' -> «εξαγωγή συμπερασμάτων» -- 'Inference engine' -> «Μηχανή συμπερασμάτων» -- 'inference engine' -> «μηχανή εξαγωγής συμπερασμάτων» -- 'inference mechanism' -> «μηχανισμός εξαγωγής συμπερασμάτων» -- 'inference rules' -> «κανόνες εξαγωγής συμπερασμάτων» -- 'inferential adequacy' -> «επάρκεια συνεπαγωγής» -- 'inferential efficiency' -> «αποδοτικότητα συνεπαγωγής» -- 'inferential inefficiency' -> «μη-αποδοτικότητα επαγωγής» -- 'information gain' -> «κέρδος πληροφορίας» -- 'Information integration (II)' -> «Ενοποίηση πληροφοριών» -- 'Information Processing Language (IPL)' -> «Γλώσσα επεξεργασίας πληροφοριών» -- 'information retrieval' -> «ανάκτηση πληροφοριών» -- 'information value theory' -> «θεωρία αξίας της πληροφορίας» -- 'informative patterns' -> «πρότυπα πληροφόρησης» -- 'inheritance' -> «κληρονομικότητα» -- 'initial state' -> «αρχική κατάσταση» -- 'input layer' -> «επίπεδο εισόδου» -- 'instance' -> «στιγμιότυπο» -- 'instance-based learning' -> «μάθηση κατά περίπτωση» -- 'Intelligence amplification (IA)' -> «Ενίσχυση νοημοσύνης» -- 'Intelligence explosion' -> «Έκρηξη πληροφοριών» -- 'intelligent agent' -> «ευφυής πράκτορας» -- 'Intelligent agent (IA)' -> «Ευφυής παράγοντας» -- 'Intelligent control' -> «Έξυπνος έλεγχος» -- 'Intelligent personal assistant' -> «Έξυπνος προσωπικός βοηθός» -- 'intention' -> «πρόθεση» -- 'intentional stance' -> «προθεσιαρχική προσέγγιση» -- 'inter-transactional association rules' -> «δια-συναλλακτικοί κανόνες συσχέτισης» -- 'interaction protocol' -> «πρωτόκολλο αλληλεπίδρασης» -- 'interference' -> «παρέμβαση» -- 'interoperability' -> «διαλειτουργικότητα» -- 'interpolative associative memories' -> «μνήμη συσχέτισης παρεμβολής» -- 'Interpretation' -> «Ερμηνεία» -- 'interpretation' -> «ερμηνεία» -- 'interpretation models' -> «ερμηνευτικά μοντέλα» -- 'interpreter' -> «διερμηνέας» -- 'intra-transactional association rules' -> «ενδο-συναλλακτικοί κανόνες συσχέτισης» -- 'Intrinsic motivation' -> «Εσωτερικά κίνητρα» -- 'Issue tree' -> «Δέντρο έκδοσης» -- 'Iterative Deepening A* Search' -> «αναζήτηση Α* με επαναληπτική εκβάθυνση» -- 'Iterative Deepening Search' -> «αναζήτηση επαναληπτικής εκβάθυνσης» -- 'job-shop scheduling' -> «χρονοπρογραμματισμός καταστημάτων εργασιών» -- 'join' -> «συνένωση» -- 'Junction tree algorithm' -> «Αλγόριθμος δέντρων διασταύρωσης» -- 'K-consistency' -> «Κ-συνέπεια» -- 'K-means algorithm' -> «αλγόριθμος Κ-μέσων» -- 'k-nearest neighbors algorithm' -> «αλγόριθμος κ-πλησιέστερων γειτόνων» -- 'Kernel method' -> «Μέθοδος πυρήνα» -- 'knapsack problem' -> «πρόβλημα ταξιδιωτικού σάκου» -- 'knowledge' -> «γνώση» -- 'Knowledge acquisition' -> «Απόκτηση γνώσης» -- 'knowledge acquisition' -> «απόκτηση γνώσης» -- 'knowledge base' -> «βάση γνώσης» -- 'knowledge based system' -> «σύστημα βασισμένο στη γνώση» -- 'knowledge capture' -> «σύλληψη γνώσης» -- 'knowledge elicitation' -> «εκμαίευση γνώσης» -- 'knowledge engineer' -> «μηχανικός γνώσης» -- 'knowledge engineering' -> «τεχνολογία γνώσης» -- 'Knowledge engineering (KE)' -> «Μηχανική Γνώσης» -- 'Knowledge extraction' -> «Εξαγωγή γνώσης» -- 'knowledge extraction' -> «εξαγωγή γνώσης» -- 'Knowledge Interchange Format (KIF)' -> «Μορφή ανταλλαγής γνώσεων» -- 'knowledge management' -> «διαχείριση γνώσης» -- 'knowledge modeling' -> «μοντελοποίηση γνώσης» -- 'Knowledge representation and reasoning (KR² or KR&R)' -> «Αναπαράσταση και συλλογιστική γνώσης» -- 'knowledge source' -> «πηγή γνώσης» -- 'knowledge system' -> «σύστημα βασισμένο στη γνώση» -- 'Knowledge-based system (KBS)' -> «Σύστημα βασισμένο στη γνώση» -- 'laddered grids' -> «βαθμωτά πλέγματα» -- 'lambda expressions' -> «εκφράσεις-λ» -- 'lateral excitation' -> «παράπλευρη διέγερση» -- 'lateral inhibition' -> «παράπλευρη καταστολή» -- 'layer' -> «στρώματα» -- 'Lazy learning' -> «Τεμπέλικη μάθηση» -- 'lazy learning' -> «αναβλητική μάθηση» -- 'learning' -> «μάθηση» -- 'learning from examples' -> «μάθηση με παραδείγματα» -- 'learning from observation' -> «μάθηση από παρατήρηση» -- 'least commitment principle' -> «αρχή της ελάχιστης δέσμευσης» -- 'linear associator' -> «γραμμικός συσχετιστής» -- 'linear plan' -> «γραμμικό πλάνο» -- 'linear regression' -> «γραμμική παρεμβολή» -- 'linear resolution' -> «γραμμική ανάλυση» -- 'linear time logic' -> «γραμμική χρονική λογική» -- 'linearly separable problems' -> «γραμμικώς διαχωρίσιμα προβλήματα» -- 'literal' -> «λεκτικό» -- 'local minima' -> «τοπικά ελάχιστα» -- 'logic clause' -> «λογική πρόταση» -- 'logic contradiction' -> «λογική αντίφαση» -- 'Logic programming' -> «Λογικός προγραμματισμός» -- 'logic semantics' -> «λογική σημασιολογία» -- 'logic substitution' -> «λογική αντικατάσταση» -- 'logical inadequacy' -> «λογική ανεπάρκεια» -- 'logical necessity' -> «λογική αναγκαιότητα» -- 'logical sufficiency' -> «λογική επάρκεια» -- 'logistic function' -> «λογιστική συνάρτηση» -- 'logistics' -> «εφοδιαστική» -- 'Long short-term memory (LSTM)' -> «Μακροπρόθεσμη μνήμη» -- 'machine evolution' -> «μηχανική εξέλιξη» -- 'machine learning' -> «μηχανική μάθηση» -- 'Machine learning (ML)' -> «Μηχανική μάθηση» -- 'Machine listening' -> «Μηχανική ακρόαση» -- 'Machine perception' -> «Μηχανική αντίληψη» -- 'machine vision' -> «μηχανική όραση» -- 'Machine vision (MV)' -> «Μηχανική όραση» -- 'maintaining arc consistency' -> «διατήρηση συνέπεια τόξου» -- 'manifestation frequency' -> «συχνότητα εκδήλωσης συμπτώματος» -- 'manufacturing robots' -> «κατασκευαστικά ρομπότ» -- 'Markov chain' -> «Αλυσίδα Markov» -- 'Markov decision process (MDP)' -> «Διαδικασία απόφασης Markov» -- 'mathematical logic' -> «μαθηματική λογική» -- 'Mathematical optimization' -> «Μαθηματική βελτιστοποίηση» -- 'means-ends analysis' -> «ανάλυση μέσων και στόχων» -- 'Mechanism design' -> «Σχεδιασμός μηχανισμού» -- 'Mechatronics' -> «Μηχατρονική» -- 'mediator' -> «διαμεσολαβητής» -- 'membership function' -> «συνάρτηση συγγένειας» -- 'memory capacity' -> «χωρητικότητα μνήμης» -- 'message passing systems' -> «συστήματα ανταλλαγής μηνυμάτων» -- 'meta -control' -> «μετα- έλεγχος» -- 'meta -knowledge' -> «μετα- γνώση» -- 'meta -rule' -> «μετα- κανόνας» -- 'Metabolic network reconstruction and simulation' -> «Ανακατασκευή και προσομοίωση μεταβολικού δικτύου» -- 'metadata' -> «μεταδεδομένα» -- 'Metaheuristic' -> «Μεταευρετική» -- 'mgu' -> «γενικότερος ενοποιητής» -- 'min conflicts heuristic' -> «ευριστικός μηχανισμός ελαχίστων συγκρούσεων» -- 'minimax algorithm' -> «αλγόριθμοι αναζήτησης ελαχίστου-μεγίστου» -- 'minimax search' -> «αναζήτηση ελαχίστου-μεγίστου» -- 'missing data' -> «ελλιπή δεδομένα» -- 'mobile robots' -> «μετακινούμενα ρομπότ» -- 'mobility' -> «κινητικότητα» -- 'modal logic' -> «λογική τροπική» -- 'model' -> «μοντέλο» -- 'Model checking' -> «Έλεγχος μοντέλου» -- 'model checking' -> «έλεγχος μοντέλων» -- 'model-based diagnosis' -> «διάγνωση βασισμένη σε μοντέλα» -- 'model-based reasoning' -> «συλλογιστική βασισμένη σε μοντέλα» -- 'module' -> «ενότητα» -- 'modus ponens' -> «τρόπος του θέτειν» -- 'modus tollens' -> «τρόπος του αναίρειν» -- 'Monte Carlo tree search' -> «Αναζήτηση δέντρων στο Μόντε Κάρλο» -- 'morphological analysis' -> «μορφολογική ανάλυση» -- 'morphology derivational' -> «μορφολογία ετυμολογική» -- 'morphology inflectional' -> «μορφολογία κλίσεων» -- 'most general unifier' -> «γενικότερος ενοποιητής» -- 'multi-agent planning' -> «πολυπρακτορικός σχεδιασμός» -- 'multi-agent system' -> «πολυπρακτορικό σύστημα» -- 'Multi-agent system (MAS)' -> «Σύστημα πολλαπλών πρακτόρων» -- 'Multi-swarm optimization' -> «Βελτιστοποίηση πολλαπλών σμήνων» -- 'multiple inheritance' -> «πολλαπλή κληρονομικότητα» -- 'multistage classification' -> «πολυβάθμια κατηγοριοποίηση» -- 'Mutation' -> «Μετάλλαξη» -- 'mutation' -> «μετάλλαξη» -- 'mutual exclusion relations' -> «σχέσεις αμοιβαίου αποκλεισμού» -- 'Naive Bayes classifier' -> «Ταξινομητής Naive Bayes» -- 'Naive semantics' -> «Αφελής σημασιολογία» -- 'Name binding' -> «Δέσμευση ονόματος» -- 'Named graph' -> «Ονομασμένο γράφημα» -- 'Named-entity recognition (NER)' -> «Αναγνώριση επώνυμης οντότητας» -- 'namespace' -> «χώρος ονομάτων» -- 'natural language' -> «φυσική γλώσσα» -- 'Natural language generation (NLG)' -> «Δημιουργία φυσικής γλώσσας» -- 'Natural language processing (NLP)' -> «Επεξεργασία φυσικής γλώσσας» -- 'Natural language programming' -> «Προγραμματισμός φυσικής γλώσσας» -- 'negation as failure' -> «άρνηση ως αποτυχία» -- 'negative context' -> «αρνητικό πλαίσιο (συμφραζόμενων)» -- 'negative preconditions' -> «αρνητικές προϋποθέσεις» -- 'negotiation' -> «διαπραγμάτευση» -- 'Network motif' -> «Μοτίβο δικτύου» -- 'network paralysis' -> «παράλυση νευρωνικού δικτύου» -- 'Neural machine translation (NMT)' -> «Νευρωνική μηχανική μετάφραση» -- 'neural network' -> «νευρωνικό δίκτυο» -- 'Neural Turing machine (NTM)' -> «Μηχανή Neural Turing» -- 'Neuro-fuzzy' -> «Νευρο-ασαφής» -- 'Neurocybernetics' -> «Νευροκυβερνητική» -- 'Neuromorphic engineering' -> «Νευρομορφική μηχανική» -- 'neuron' -> «νευρώνας» -- 'Node' -> «Κόμβος» -- 'node consistency' -> «συνέπεια κόμβου» -- 'noise reduction' -> «μείωση θορύβου» -- 'non-determinism' -> «μη-αιτιοκρατία» -- 'non-monotonic modal logic' -> «μη μονότονη τροπική λογική» -- 'non-symbolic artificial intelligence' -> «μη συμβολική τεχνητή νοημοσύνη» -- 'Nondeterministic algorithm' -> «Μη προσδιοριστικός αλγόριθμος» -- 'Nouvelle AI' -> «Νέο AI» -- 'NP-completeness' -> «NP-πληρότητα» -- 'NP-hardness' -> «NP-σκληρότητα» -- 'null plan' -> «μηδενικό πλάνο» -- 'object' -> «αντικείμενο» -- 'object instances' -> «στιγμιότυπα αντικειμένου» -- 'object-oriented programming' -> «αντικειμενοστραφής προγραμματισμός» -- 'obligation' -> «υποχρέωση» -- 'Occam's razor' -> «ξυράφι του Όκαμ» -- 'occurs check' -> «έλεγχος εμφάνισης» -- 'OCR – Optical Character Recognition' -> «οπτική αναγνώριση χαρακτήρων» -- 'Offline learning' -> «Εκμάθηση εκτός σύνδεσης» -- 'offsprings' -> «απόγονοι» -- 'omniscience' -> «παντογνωσία» -- 'Online machine learning' -> «Διαδικτυακή μηχανική εκμάθηση» -- 'ontology' -> «οντολογία» -- 'Ontology learning' -> «Εκμάθηση οντολογίας» -- 'open world' -> «ανοιχτός κόσμος» -- 'Open-source software (OSS)' -> «Λογισμικό ανοιχτού κώδικα» -- 'opportunistic scheduling' -> «καιροσκοπικός χρονοπρογραμματισμός» -- 'optimal solution' -> «βέλτιστη λύση» -- 'optimization' -> «βελτιστοποίηση» -- 'order inconsistent plan' -> «πλάνο ασυνεπές ως προς τις διατάξεις» -- 'ordered game tree' -> «διατεταγμένο δένδρο» -- 'ordering constraint' -> «περιορισμοί διάταξης» -- 'output layer' -> «επίπεδα εξόδου» -- 'overfitting' -> «υπερπροσαρμογή» -- 'overloading' -> «υπερφόρτωση» -- 'parallel search' -> «παράλληλη αναζήτηση» -- 'parse tree' -> «δένδρο συντακτικής ανάλυσης» -- 'partial look ahead algorithm' -> «αλγόριθμος έγκαιρης μερικής εξέτασης» -- 'Partial order reduction' -> «Μερική μείωση παραγγελίας» -- 'Partially observable Markov decision process (POMDP)' -> «Μερικώς παρατηρήσιμη διαδικασία απόφασης Markov» -- 'Particle swarm optimization (PSO)' -> «Βελτιστοποίηση σμήνος σωματιδίων» -- 'passive troubleshooting' -> «παθητική διάγνωση» -- 'path consistency algorithm' -> «αλγόριθμος συνέπειας μονοπατιού» -- 'Pathfinding' -> «Διαδρομή» -- 'pattern' -> «πρότυπα» -- 'pattern matching' -> «ταυτοποίηση» -- 'pattern of activity' -> «πρότυπα δραστηριότητας» -- 'Pattern recognition' -> «Αναγνώριση μοτίβου» -- 'phenotype' -> «φαινότυπο» -- 'phonemes' -> «φθόγγοι» -- 'physical stance' -> «φυσική προσέγγιση» -- 'pixel' -> «εικονοστοιχείο» -- 'plan' -> «πλάνο» -- 'plan solution' -> «λύση πλάνου» -- 'plan space' -> «χώρος πλάνων» -- 'planner' -> «σχεδιαστής» -- 'planning contingency' -> «σχεδιασμός πολλαπλών ενδεχομένων» -- 'planning graph' -> «γράφος σχεδιασμού» -- 'planning system' -> «σύστημα σχεδιασμού» -- 'polymorphism' -> «πολυμορφισμός» -- 'portals' -> «διαδικτυακές πύλες» -- 'positive context' -> «θετικό πλαίσιο συμφραζόμενων» -- 'powerset' -> «δυναμοσύνολο» -- 'pragmatic analysis' -> «πραγματολογική ανάλυση» -- 'precondition list' -> «λίστα προϋποθέσεων» -- 'predicate' -> «κατηγόρημα» -- 'Predicate logic' -> «Λογική κατηγορήματος» -- 'predicate logic' -> «κατηγορηματική λογική» -- 'prediction' -> «πρόγνωση» -- 'Predictive analytics' -> «Προγνωστική ανάλυση» -- 'predictive models' -> «μοντέλο πρόβλεψης» -- 'prenex conjunctive normal form' -> «προσημασμένη συζευκτική κανονική μορφή» -- 'primitive action' -> «αρχέγονη ενέργεια» -- 'primitive conceptualizations' -> «αρχέγονες εννοιολογικές μορφές» -- 'primitive problem' -> «αρχέγονο πρόβλημα» -- 'Principal component analysis (PCA)' -> «Ανάλυση κύριου συστατικού» -- 'Principle of rationality' -> «Αρχή του ορθολογισμού» -- 'prior probability' -> «προϋπάρχουσα πιθανότητα» -- 'pro-activeness' -> «προνοητικότητα» -- 'Probabilistic programming (PP)' -> «Πιθανοτικός προγραμματισμός» -- 'probability planning' -> «σχεδιασμός με πιθανότητες» -- 'problem description' -> «περιγραφή προβλήματος» -- 'problem world' -> «κόσμος προβλήματος» -- 'procedural knowledge' -> «διαδικαστική γνώση» -- 'production rules' -> «κανόνες παραγωγής» -- 'Production system' -> «Σύστημα παραγωγής» -- 'production system' -> «σύστημα κανόνων παραγωγής» -- 'Programming language' -> «Γλώσσα προγραμματισμού» -- 'progression' -> «ορθή διάσχιση» -- 'projection' -> «προβολή» -- 'promotion' -> «προβιβασμός» -- 'proof' -> «απόδειξη» -- 'proof by contradiction' -> «εις άτοπο απαγωγή» -- 'proof layer' -> «επίπεδο αξιοπιστίας» -- 'proof procedure' -> «διαδικασία απόδειξης» -- 'Propositional calculus' -> «Προτασιακός λογισμός» -- 'propositional logic' -> «προτασιακή λογική» -- 'propositional rules' -> «προτασιακοί κανόνες» -- 'pruning' -> «κλάδεμα» -- 'pure node' -> «αμιγής κόμβος» -- 'pure tree' -> «αμιγές δένδρο» -- 'Python' -> «Πύθων» -- 'Qualification problem' -> «Πρόβλημα προσόντων» -- 'qualitative reasoning' -> «ποιοτική συλλογιστική» -- 'Quantifier' -> «Ποσοτικοποιητής» -- 'quantifier' -> «ποσοδείκτες» -- 'Quantum computing' -> «Κβαντική Υπολογιστική» -- 'Query language' -> «Γλώσσα ερωτήματος» -- 'R programming language' -> «Γλώσσα προγραμματισμού R» -- 'Radial basis function network' -> «Δίκτυο λειτουργίας ακτινικής βάσης» -- 'Random forest' -> «Τυχαίο δάσος» -- 'random learning' -> «τυχαία μάθηση» -- 'rationality' -> «λογικότητα» -- 'reactive agent' -> «αντιδραστικός πράκτορας» -- 'reactive rules' -> «αντιδραστικοί κανόνες» -- 'reactiveness' -> «αντιδραστικότητα» -- 'reasoning' -> «συλλογιστική» -- 'Reasoning system' -> «Σύστημα συλλογισμού» -- 'recurrent' -> «ανατροφοδοτούμενος» -- 'recurrent neural networks' -> «νευρωνικά δίκτυα με ανατροφοδότηση» -- 'Recurrent neural network (RNN)' -> «Επαναλαμβανόμενο νευρωνικό δίκτυο» -- 'recursion' -> «αναδρομή» -- 'reduction' -> «αναγωγή» -- 'reduction operator' -> «τελεστής αναγωγής» -- 'refutation' -> «εις άτοπο απαγωγή» -- 'refutation completeness' -> «πληρότητα ατόπου» -- 'Region connection calculus' -> «Λογισμός σύνδεσης περιοχής» -- 'regression' -> «παλινδρόμηση» -- 'reinforcement learning' -> «ενισχυτική μάθηση» -- 'Reinforcement learning (RL)' -> «Ενισχυτική μάθηση» -- 'repair algorithm' -> «αλγόριθμος επιδιόρθωσης» -- 'repair space' -> «χώρος επιδιορθώσεων» -- 'replanning' -> «επανασχεδιασμός» -- 'representational adequacy' -> «επάρκεια αναπαράστασης» -- 'Reservoir computing' -> «Υπολογισμός δεξαμενής» -- 'resolution principle' -> «αρχή της ανάλυσης» -- 'resolvent' -> «αναλυθέν» -- 'resource competition' -> «ανταγωνισμός πόρων» -- 'Resource Description Framework (RDF)' -> «Πλαίσιο Περιγραφής Πόρων» -- 'resource planning' -> «σχεδιασμός με πόρους» -- 'Restricted Boltzmann machine (RBM)' -> «Περιορισμένη μηχανή Boltzmann» -- 'restriction' -> «περιορισμός» -- 'reversible operator' -> «τελεστής αντιστρέψιμος» -- 'robot' -> «ρομπότ» -- 'robotic agent' -> «ρομποτικός πράκτορας» -- 'Robotics' -> «Ρομποτική» -- 'rule action' -> «ενέργεια κανόνα» -- 'rule base' -> «βάση κανόνων» -- 'rule cluster' -> «ομάδα κανόνων» -- 'rule conclusion' -> «συμπέρασμα κανόνα» -- 'rule condition' -> «συνθήκη κανόνα» -- 'rule of inference' -> «κανόνας συμπερασμού» -- 'Rule-based system' -> «Σύστημα βασισμένο σε κανόνες» -- 'Satisfiability' -> «Ικανοποίηση» -- 'scheduler' -> «χρονοπρογραμματιστής» -- 'schema theorem' -> «θεώρημα σχημάτων» -- 'scout' -> «ανιχνευτής» -- 'scripts' -> «σενάρια» -- 'Search algorithm' -> «Αλγόριθμος αναζήτησης» -- 'search algorithms' -> «αλγόριθμοι αναζήτησης» -- 'search engines' -> «μηχανές αναζήτησης» -- 'search frontier' -> «μέτωπο αναζήτησης» -- 'search space' -> «χώρος αναζήτησης» -- 'search thread' -> «νήμα αναζήτησης» -- 'search tree' -> «δένδρο αναζήτησης» -- 'Selection' -> «Επιλογή» -- 'selection fitness proportionate' -> «επιλογή αναλογικής καταλληλότητας» -- 'selection roulette wheel' -> «επιλογή ρουλέτας» -- 'selection tournament' -> «επιλογή τουρνουά» -- 'Selective Linear Definite clause resolution' -> «Επιλεκτική γραμμική ανάλυση οριστικής πρότασης» -- 'self decay' -> «εξασθένιση» -- 'Self-management' -> «Αυτοδιαχείρηση» -- 'self-organizing feature map' -> «αυτο-οργανούμενη απεικόνιση» -- 'semantic analysis' -> «σημασιολογική ανάλυση» -- 'semantic knowledge' -> «σημασιολογική γνώση» -- 'Semantic network' -> «Σημασιολογικό δίκτυο» -- 'semantic networks' -> «σημασιολογικά δίκτυα» -- 'Semantic query' -> «Σημασιολογική ερώτηση» -- 'Semantic reasoner' -> «Σημασιολογικός λογιστής» -- 'semantic web' -> «σημασιολογικός ιστός» -- 'Semantics' -> «Σημασιολογία» -- 'semantics' -> «σημασιολογία» -- 'sensor' -> «αισθητήρας» -- 'Sensor fusion' -> «Σύντηξη αισθητήρα» -- 'Separation logic' -> «Λογική χωρισμού» -- 'sequential covering algorithm' -> «αλγόριθμος σειριακής κάλυψης» -- 'sequential pattern minimg' -> «εξόρυξη ακολουθιακών προτύπων» -- 'shallow knowledge' -> «ρηχή γνώση» -- 'shell' -> «κέλυφος έμπειρου συστήματος» -- 'sigmoid functions' -> «σιγμοειδείς συναρτήσεις» -- 'sign function' -> «συνάρτηση πρόσημου» -- 'Similarity learning' -> «Εκμάθηση ομοιότητας» -- 'simplification' -> «απλοποίηση» -- 'Simulated Annealing Search' -> «αναζήτηση προσομοιωμένης ανόπτησης» -- 'Simulated annealing (SA)' -> «Προσομοίωση ανόπτησης» -- 'Situated approach' -> «Τοποθετημένη προσέγγιση» -- 'Situation calculus' -> «Λογισμός καταστάσεων» -- 'situation calculus' -> «λογισμός καταστάσεων» -- 'skeptical logic' -> «σκεπτικιστική λογική» -- 'skolemization' -> «σκολεμοποίηση» -- 'smoothing' -> «εξομάλυνση» -- 'social ability' -> «κοινωνικότητα» -- 'softbot' -> «λογισμικός πράκτορας» -- 'Software' -> «Λογισμικό» -- 'software agent' -> «λογισμικός πράκτορας» -- 'Software engineering' -> «Μηχανική Λογισμικού» -- 'solution extraction' -> «εξαγωγή λύσης» -- 'solution refinement' -> «επιλογή λύσης» -- 'sparse data' -> «αραιά δεδομένα» -- 'Spatial-temporal reasoning' -> «Χωροχρονικός συλλογισμός» -- 'specialization rule' -> «κανόνας εξειδίκευσης» -- 'spectrogram' -> «φασματογράφημα» -- 'Speech Act Theory' -> «Θεωρία Πράξεων Λόγου» -- 'Speech recognition' -> «Αναγνώρισης ομιλίας» -- 'speech recognition' -> «αναγνώριση ομιλίας» -- 'spelling correction rules' -> «αλγόριθμος διόρθωσης ορθογραφικών λαθών» -- 'Spiking neural network (SNN)' -> «Spiking νευρωνικό δίκτυο» -- 'Stanford Research Institute Problem Solver (STRIPS)' -> «Επίλυση προβλημάτων του Ερευνητικού Ινστιτούτου Στάνφορντ» -- 'State' -> «Κατάσταση» -- 'state' -> «κατάσταση» -- 'state space' -> «χώρος καταστάσεων» -- 'state-space planning' -> «σχεδιασμός χώρου καταστάσεων» -- 'static world' -> «στατικός κόσμος» -- 'Statistical classification' -> «Στατιστική ταξινόμηση» -- 'Statistical relational learning (SRL)' -> «Στατιστική σχεσιακή μάθηση» -- 'step function' -> «βηματική συνάρτηση» -- 'Stochastic optimization (SO)' -> «Στοχαστική βελτιστοποίηση» -- 'Stochastic semantic analysis' -> «Στοχαστική σημασιολογική ανάλυση» -- 'strict rules' -> «ισχυροί κανόνες» -- 'strong negation' -> «κλασική άρνηση» -- 'Subject-matter expert' -> «Εμπειρογνώμονας σε θέματα» -- 'subsumption architecture' -> «αρχιτεκτονική υπαγωγής» -- 'Superintelligence' -> «Υπερευφυΐα» -- 'superiority relation' -> «σχέση υπεροχής» -- 'Supervised learning' -> «Επίβλεψη μάθησης» -- 'supervised learning' -> «μάθηση με επίβλεψη» -- 'support' -> «υποστήριξη» -- 'Support Vector Machines' -> «μηχανές διανυσμάτων υποστήριξης» -- 'Support-vector machines' -> «Υποστήριξη-διανυσματικά μηχανήματα» -- 'Swarm intelligence (SI)' -> «Νοημοσύνη σμήνους» -- 'Symbolic artificial intelligence' -> «Συμβολική τεχνητή νοημοσύνη» -- 'symbolic artificial intelligence' -> «συμβολική τεχνητή νοημοσύνη» -- 'symbolic logic' -> «συμβολική λογική» -- 'synapse' -> «σύναψη» -- 'syntactic analysis' -> «συντακτική ανάλυση» -- 'Synthetic intelligence (SI)' -> «Συνθετική νοημοσύνη» -- 'system model' -> «μοντέλο συστήματος» -- 'Systems neuroscience' -> «Συστημική νευροεπιστήμη» -- 'Tabu Search' -> «αναζήτηση με απαγορευμένες καταστάσεις» -- 'tacit knowledge' -> «άρρητη γνώση» -- 'target function' -> «συνάρτηση στόχος» -- 'tautology' -> «ταυτολογία» -- 'teach-back' -> «επαναδιδασκαλία» -- 'Technological singularity' -> «Τεχνολογική ιδιομορφία» -- 'temporal association rules' -> «κανόνες συσχέτισης χρονικοί» -- 'Temporal difference learning' -> «Εκμάθηση χρονικής διαφοράς» -- 'temporal logic' -> «λογική χρονική» -- 'Tensor network theory' -> «Θεωρία τανυστικού δικτύου» -- 'term' -> «όρος» -- 'term assignment' -> «ανάθεση όρων» -- 'terminal state' -> «τερματική κατάσταση» -- 'text categorization' -> «κατηγοριοποίηση κειμένων» -- 'text planning' -> «σχεδιασμός κειμένου» -- 'Theoretical computer science (TCS)' -> «Θεωρητική επιστήμη των υπολογιστών» -- 'Theory of computation' -> «Θεωρία υπολογισμού» -- 'therapy space' -> «χώρος θεραπειών» -- 'Thompson sampling' -> «Δειγματοληψία Thompson» -- 'threat' -> «απειλή» -- 'threshold effect' -> «φαινόμενο κατωφλίου» -- 'threshold function' -> «συνάρτηση ενεργοποίησης» -- 'Time complexity' -> «Χρονική πολυπλοκότητα» -- 'timetable' -> «ωρολόγιο πρόγραμμα» -- 'topological sort' -> «τοπολογική διάταξη» -- 'total ordered plan' -> «πλάνο πλήρους διάταξης» -- 'Transhumanism' -> «Υπερανθρωπισμός» -- 'transition operator' -> «τελεστής μετάβασης» -- 'Transition system' -> «Σύστημα μετάβασης» -- 'Tree traversal' -> «Διάβαση δέντρου» -- 'trigger' -> «σκανδαλιστές» -- 'troubleshooting' -> «επιδιόρθωση βλαβών» -- 'True quantified Boolean formula' -> «Αληθής ποσοτικοποιημένος τύπος Boolean» -- 'trust layer' -> «επίπεδο αξιοπιστίας» -- 'truth maintenance' -> «συντήρηση αλήθειας» -- 'truth table' -> «πίνακας αληθείας» -- 'Turing machine' -> «Μηχανή Turing» -- 'Turing test' -> «Δοκιμή Turing» -- 'tutorial interview' -> «διδακτική συνέντευξη» -- 'two-person game' -> «παίγνια δύο αντιπάλων» -- 'Type system' -> «Σύστημα τύπου» -- 'unary constraint' -> «μοναδιαίος περιορισμός» -- 'unconditional probability' -> «πιθανότητα άνευ συνθηκών» -- 'underfitting' -> «υποπροσαρμογή» -- 'unification' -> «ενοποίηση» -- 'unifier' -> «ενοποιητής» -- 'unit clause' -> «μοναδιαία πρόταση» -- 'universal quantifier' -> «καθολικός ποσοδείκτης» -- 'unrestrict' -> «επέκταση» -- 'Unsupervised learning' -> «Εκμάθηση χωρίς επίβλεψη» -- 'unsupervised learning' -> «μάθηση χωρίς επίβλεψη» -- 'valence' -> «σθένος» -- 'valid plan' -> «έγκυρο πλάνο» -- 'validation' -> «έλεγχος αξιοπιστίας» -- 'validation data' -> «δεδομένα επικύρωσης» -- 'veracity' -> «ειλικρίνεια» -- 'verification' -> «επαλήθευση» -- 'Vision processing unit (VPU)' -> «Μονάδα επεξεργασίας όρασης» -- 'Weak AI' -> «Αδύναμη AI» -- 'web portals' -> «πύλες παγκόσμιου ιστού» -- 'web resource' -> «πόρος παγκόσμιου ιστού» -- 'web services' -> «υπηρεσίες παγκόσμιου ιστού» -- 'well formed formulae' -> «ορθά δομημένοι τύποι» -- 'working memory' -> «χώρος εργασίας» -- 'World Wide Web Consortium (W3C)' -> «Κοινοπραξία World Wide Web» diff --git a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/es.txt b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/es.txt deleted file mode 100644 index 1412f5b64..000000000 --- a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/es.txt +++ /dev/null @@ -1,175 +0,0 @@ -# ES HINTS -## TERM MAPPINGS -These are preferred terminology choices for this language. Use them whenever they sound natural; adapt freely if context requires. - -- 'accuracy' -> 'exactitud' -- 'activation function' -> 'función de activación' -- 'artificial intelligence' -> 'inteligencia artificial' -- 'AUC' -> 'AUC' -- 'AUC (Area under the ROC curve)' -> 'AUC (área bajo la curva ROC)' -- 'backpropagation' -> 'propagación inversa' -- 'batch' -> 'lote' -- 'batch size' -> 'tamaño del lote' -- 'bias (ethics/fairness)' -> 'sesgo (ética/equidad)' -- 'bias (math) or bias term' -> 'ordenada al origen (matemática) o término de sesgo' -- 'bias in ethics and fairness' -> 'sesgo en ética y equidad' -- 'bias term' -> 'término de sesgo' -- 'binary classification' -> 'Clasificación binaria' -- 'bucketing' -> 'Agrupamiento' -- 'categorical' -> 'categórico' -- 'categorical data' -> 'datos categóricos' -- 'class' -> 'clase' -- 'class-imbalanced dataset' -> 'conjunto de datos con desequilibrio de clases' -- 'class-imbalanced datasets' -> 'conjuntos de datos con desequilibrio de clases' -- 'classification' -> 'clasificación' -- 'classification model' -> 'modelo de clasificación' -- 'classification threshold' -> 'umbral de clasificación' -- 'classifier' -> 'clasificador' -- 'clipping' -> 'recorte' -- 'confusion matrix' -> 'matriz de confusión' -- 'continuous feature' -> 'atributo continuo' -- 'convergence' -> 'convergencia' -- 'data set or dataset' -> 'conjunto de datos (data set o dataset)' -- 'DataFrame' -> 'DataFrame' -- 'dataset' -> 'conjunto de datos' -- 'deep learning' -> 'aprendizaje profundo' -- 'deep model' -> 'modelo profundo' -- 'dense feature' -> 'atributo denso' -- 'depth' -> 'depth' -- 'discrete feature' -> 'atributo discreto' -- 'discrete features' -> 'atributos discretos' -- 'dynamic' -> 'dinámico' -- 'dynamic model' -> 'modelo dinámico' -- 'early stopping' -> 'Interrupción anticipada' -- 'embedding layer' -> 'Capa de embedding' -- 'embedding layers' -> 'capas de incorporación' -- 'epoch' -> 'época' -- 'example' -> 'ejemplo' -- 'false negative (FN)' -> 'falso negativo (FN)' -- 'false negatives' -> 'falsos negativos' -- 'false positive (FP)' -> 'Falso positivo (FP)' -- 'false positive rate' -> 'tasa de falsos positivos' -- 'false positive rate (FPR)' -> 'tasa de falsos positivos (FPR)' -- 'false positives' -> 'falsos positivos' -- 'feature' -> 'función' -- 'feature cross' -> 'combinación de atributos' -- 'feature crosses' -> 'combinaciones de atributos' -- 'feature engineering' -> 'ingeniería de atributos.' -- 'feature set' -> 'conjunto de atributos' -- 'feature vector' -> 'vector de atributos' -- 'feedback loop' -> 'ciclo de retroalimentación' -- 'generalization' -> 'generalización' -- 'generalization curve' -> 'Curva de generalización' -- 'gradient descent' -> 'descenso de gradientes' -- 'ground truth' -> 'Verdad fundamental' -- 'hidden layer' -> 'Capa oculta' -- 'hidden layer(s)' -> 'capas ocultas' -- 'hyperparameter' -> 'hiperparámetro' -- 'independently and identically distributed (i.i.d)' -> 'independiente e idénticamente distribuido (i.i.d.)' -- 'inference' -> 'Inferencia' -- 'input layer' -> 'capa de entrada' -- 'interpretability' -> 'interpretabilidad' -- 'iteration' -> 'iteración' -- 'L0regularization' -> 'Regularización L0' -- 'L1loss' -> 'pérdida L1' -- 'L1regularization' -> 'regularización L1' -- 'L2loss' -> 'pérdida L2' -- 'L2regularization' -> 'regularización L2' -- 'label' -> 'etiqueta' -- 'labeled example' -> 'ejemplo etiquetado' -- 'lambda' -> 'lambda' -- 'layer' -> 'oculta' -- 'learning rate' -> 'Tasa de aprendizaje' -- 'linear' -> 'linear' -- 'linear model' -> 'modelo lineal' -- 'linear models' -> 'modelos lineales' -- 'linear regression' -> 'regresión lineal' -- 'Log Loss' -> 'pérdida logística' -- 'log-odds' -> 'Logaritmo de probabilidad' -- 'logistic regression' -> 'regresión logística' -- 'loss' -> 'pérdida' -- 'loss curve' -> 'Curva de pérdida' -- 'loss function' -> 'función de pérdida' -- 'machine learning' -> 'aprendizaje automático' -- 'majority class' -> 'clase mayoritaria' -- 'mini-batch' -> 'minilote' -- 'minority class' -> 'clase minoritaria' -- 'model' -> 'modelo' -- 'multi-class classification' -> 'clasificación de clases múltiples' -- 'negative class' -> 'clase negativa' -- 'negative classes' -> 'clases negativas' -- 'neural network' -> 'neuronal prealimentada' -- 'neural networks' -> 'redes neuronales' -- 'neuron' -> 'neurona' -- 'node (neural network)' -> 'nodo (red neuronal)' -- 'nonlinear' -> 'no lineal' -- 'nonstationarity' -> 'no estacionariedad' -- 'normalization' -> 'Normalización' -- 'numerical data' -> 'datos numéricos' -- 'offline' -> 'Sin conexión' -- 'offline inference' -> 'inferencia sin conexión' -- 'one-hot encoding' -> 'codificación one-hot' -- 'one-hot vector' -> 'vector de un solo 1' -- 'one-vs.-all' -> 'uno frente a todos' -- 'online' -> 'en línea' -- 'online inference' -> 'inferencia en línea' -- 'output layer' -> 'capa de salida' -- 'output layers' -> 'capas de salida' -- 'overfitting' -> 'sobreajuste' -- 'pandas' -> 'pandas' -- 'parameter' -> 'parámetro' -- 'positive class' -> 'clase positiva' -- 'positive classes' -> 'clases positivas' -- 'post-processing' -> 'posprocesamiento' -- 'precision' -> 'precision' -- 'prediction' -> 'predicción' -- 'proxy labels' -> 'etiquetas de proxy' -- 'RAG' -> 'RAG' -- 'rater' -> 'evaluador' -- 'recall' -> 'recall' -- 'Rectified Linear Unit (ReLU)' -> 'Unidad lineal rectificada (ReLU)' -- 'regression model' -> 'modelo de regresión' -- 'regularization' -> 'regularización' -- 'regularization rate' -> 'tasa de regularización' -- 'ReLU' -> 'ReLU' -- 'retrieval-augmented generation' -> 'generación aumentada por recuperación' -- 'retrieval-augmented generation (RAG)' -> 'Generación mejorada por recuperación (RAG)' -- 'ROC (receiver operating characteristic) Curve' -> 'Curva ROC (característica operativa del receptor)' -- 'ROC curve' -> 'curva ROC' -- 'Root Mean Squared Error (RMSE)' -> 'Raíz cuadrada del error cuadrático medio (RMSE)' -- 'sigmoid function' -> 'función sigmoidea' -- 'softmax' -> 'softmax' -- 'sparse feature' -> 'atributo disperso' -- 'sparse representation' -> 'representación dispersa' -- 'sparse vector' -> 'vector disperso' -- 'squared loss' -> 'Pérdida al cuadrado' -- 'static' -> 'static' -- 'static inference' -> 'Inferencia estática' -- 'static model' -> 'modelo estático' -- 'stationarity' -> 'Estacionariedad' -- 'Stochastic Gradient Descent (SGD)' -> 'Descenso de gradientes estocástico (SGD)' -- 'supervised learning' -> 'aprendizaje supervisado' -- 'supervised machine learning' -> 'aprendizaje automático supervisado' -- 'synthetic feature' -> 'atributo sintético' -- 'synthetic features' -> 'atributos sintéticos' -- 'test loss' -> 'Pérdida de prueba' -- 'training' -> 'entrenamiento' -- 'training loss' -> 'Pérdida de entrenamiento' -- 'training set' -> 'conjunto de entrenamiento' -- 'training-serving skew' -> 'Sesgo entre el entrenamiento y la entrega' -- 'true negative (TN)' -> 'verdadero negativo (VN)' -- 'true negatives' -> 'verdaderos negativos' -- 'true positive (TP)' -> 'verdadero positivo (VP)' -- 'true positive rate' -> 'tasa de verdaderos positivos' -- 'true positive rate (TPR)' -> 'tasa de verdaderos positivos (TVP)' -- 'true positives' -> 'verdaderos positivos' -- 'underfitting' -> 'Subajuste' -- 'unlabeled example' -> 'ejemplo sin etiqueta' -- 'unsupervised machine learning' -> 'aprendizaje automático no supervisado' -- 'validation' -> 'validación' -- 'validation dataset' -> 'conjunto de datos de validación' -- 'validation loss' -> 'Pérdida de validación' -- 'validation set' -> 'conjunto de validación' -- 'weight' -> 'peso' -- 'weighted sum' -> 'suma ponderada' -- 'Z-score normalization' -> 'normalización de la puntuación Z' diff --git a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/es_419.txt b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/es_419.txt deleted file mode 100644 index 1412f5b64..000000000 --- a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/es_419.txt +++ /dev/null @@ -1,175 +0,0 @@ -# ES HINTS -## TERM MAPPINGS -These are preferred terminology choices for this language. Use them whenever they sound natural; adapt freely if context requires. - -- 'accuracy' -> 'exactitud' -- 'activation function' -> 'función de activación' -- 'artificial intelligence' -> 'inteligencia artificial' -- 'AUC' -> 'AUC' -- 'AUC (Area under the ROC curve)' -> 'AUC (área bajo la curva ROC)' -- 'backpropagation' -> 'propagación inversa' -- 'batch' -> 'lote' -- 'batch size' -> 'tamaño del lote' -- 'bias (ethics/fairness)' -> 'sesgo (ética/equidad)' -- 'bias (math) or bias term' -> 'ordenada al origen (matemática) o término de sesgo' -- 'bias in ethics and fairness' -> 'sesgo en ética y equidad' -- 'bias term' -> 'término de sesgo' -- 'binary classification' -> 'Clasificación binaria' -- 'bucketing' -> 'Agrupamiento' -- 'categorical' -> 'categórico' -- 'categorical data' -> 'datos categóricos' -- 'class' -> 'clase' -- 'class-imbalanced dataset' -> 'conjunto de datos con desequilibrio de clases' -- 'class-imbalanced datasets' -> 'conjuntos de datos con desequilibrio de clases' -- 'classification' -> 'clasificación' -- 'classification model' -> 'modelo de clasificación' -- 'classification threshold' -> 'umbral de clasificación' -- 'classifier' -> 'clasificador' -- 'clipping' -> 'recorte' -- 'confusion matrix' -> 'matriz de confusión' -- 'continuous feature' -> 'atributo continuo' -- 'convergence' -> 'convergencia' -- 'data set or dataset' -> 'conjunto de datos (data set o dataset)' -- 'DataFrame' -> 'DataFrame' -- 'dataset' -> 'conjunto de datos' -- 'deep learning' -> 'aprendizaje profundo' -- 'deep model' -> 'modelo profundo' -- 'dense feature' -> 'atributo denso' -- 'depth' -> 'depth' -- 'discrete feature' -> 'atributo discreto' -- 'discrete features' -> 'atributos discretos' -- 'dynamic' -> 'dinámico' -- 'dynamic model' -> 'modelo dinámico' -- 'early stopping' -> 'Interrupción anticipada' -- 'embedding layer' -> 'Capa de embedding' -- 'embedding layers' -> 'capas de incorporación' -- 'epoch' -> 'época' -- 'example' -> 'ejemplo' -- 'false negative (FN)' -> 'falso negativo (FN)' -- 'false negatives' -> 'falsos negativos' -- 'false positive (FP)' -> 'Falso positivo (FP)' -- 'false positive rate' -> 'tasa de falsos positivos' -- 'false positive rate (FPR)' -> 'tasa de falsos positivos (FPR)' -- 'false positives' -> 'falsos positivos' -- 'feature' -> 'función' -- 'feature cross' -> 'combinación de atributos' -- 'feature crosses' -> 'combinaciones de atributos' -- 'feature engineering' -> 'ingeniería de atributos.' -- 'feature set' -> 'conjunto de atributos' -- 'feature vector' -> 'vector de atributos' -- 'feedback loop' -> 'ciclo de retroalimentación' -- 'generalization' -> 'generalización' -- 'generalization curve' -> 'Curva de generalización' -- 'gradient descent' -> 'descenso de gradientes' -- 'ground truth' -> 'Verdad fundamental' -- 'hidden layer' -> 'Capa oculta' -- 'hidden layer(s)' -> 'capas ocultas' -- 'hyperparameter' -> 'hiperparámetro' -- 'independently and identically distributed (i.i.d)' -> 'independiente e idénticamente distribuido (i.i.d.)' -- 'inference' -> 'Inferencia' -- 'input layer' -> 'capa de entrada' -- 'interpretability' -> 'interpretabilidad' -- 'iteration' -> 'iteración' -- 'L0regularization' -> 'Regularización L0' -- 'L1loss' -> 'pérdida L1' -- 'L1regularization' -> 'regularización L1' -- 'L2loss' -> 'pérdida L2' -- 'L2regularization' -> 'regularización L2' -- 'label' -> 'etiqueta' -- 'labeled example' -> 'ejemplo etiquetado' -- 'lambda' -> 'lambda' -- 'layer' -> 'oculta' -- 'learning rate' -> 'Tasa de aprendizaje' -- 'linear' -> 'linear' -- 'linear model' -> 'modelo lineal' -- 'linear models' -> 'modelos lineales' -- 'linear regression' -> 'regresión lineal' -- 'Log Loss' -> 'pérdida logística' -- 'log-odds' -> 'Logaritmo de probabilidad' -- 'logistic regression' -> 'regresión logística' -- 'loss' -> 'pérdida' -- 'loss curve' -> 'Curva de pérdida' -- 'loss function' -> 'función de pérdida' -- 'machine learning' -> 'aprendizaje automático' -- 'majority class' -> 'clase mayoritaria' -- 'mini-batch' -> 'minilote' -- 'minority class' -> 'clase minoritaria' -- 'model' -> 'modelo' -- 'multi-class classification' -> 'clasificación de clases múltiples' -- 'negative class' -> 'clase negativa' -- 'negative classes' -> 'clases negativas' -- 'neural network' -> 'neuronal prealimentada' -- 'neural networks' -> 'redes neuronales' -- 'neuron' -> 'neurona' -- 'node (neural network)' -> 'nodo (red neuronal)' -- 'nonlinear' -> 'no lineal' -- 'nonstationarity' -> 'no estacionariedad' -- 'normalization' -> 'Normalización' -- 'numerical data' -> 'datos numéricos' -- 'offline' -> 'Sin conexión' -- 'offline inference' -> 'inferencia sin conexión' -- 'one-hot encoding' -> 'codificación one-hot' -- 'one-hot vector' -> 'vector de un solo 1' -- 'one-vs.-all' -> 'uno frente a todos' -- 'online' -> 'en línea' -- 'online inference' -> 'inferencia en línea' -- 'output layer' -> 'capa de salida' -- 'output layers' -> 'capas de salida' -- 'overfitting' -> 'sobreajuste' -- 'pandas' -> 'pandas' -- 'parameter' -> 'parámetro' -- 'positive class' -> 'clase positiva' -- 'positive classes' -> 'clases positivas' -- 'post-processing' -> 'posprocesamiento' -- 'precision' -> 'precision' -- 'prediction' -> 'predicción' -- 'proxy labels' -> 'etiquetas de proxy' -- 'RAG' -> 'RAG' -- 'rater' -> 'evaluador' -- 'recall' -> 'recall' -- 'Rectified Linear Unit (ReLU)' -> 'Unidad lineal rectificada (ReLU)' -- 'regression model' -> 'modelo de regresión' -- 'regularization' -> 'regularización' -- 'regularization rate' -> 'tasa de regularización' -- 'ReLU' -> 'ReLU' -- 'retrieval-augmented generation' -> 'generación aumentada por recuperación' -- 'retrieval-augmented generation (RAG)' -> 'Generación mejorada por recuperación (RAG)' -- 'ROC (receiver operating characteristic) Curve' -> 'Curva ROC (característica operativa del receptor)' -- 'ROC curve' -> 'curva ROC' -- 'Root Mean Squared Error (RMSE)' -> 'Raíz cuadrada del error cuadrático medio (RMSE)' -- 'sigmoid function' -> 'función sigmoidea' -- 'softmax' -> 'softmax' -- 'sparse feature' -> 'atributo disperso' -- 'sparse representation' -> 'representación dispersa' -- 'sparse vector' -> 'vector disperso' -- 'squared loss' -> 'Pérdida al cuadrado' -- 'static' -> 'static' -- 'static inference' -> 'Inferencia estática' -- 'static model' -> 'modelo estático' -- 'stationarity' -> 'Estacionariedad' -- 'Stochastic Gradient Descent (SGD)' -> 'Descenso de gradientes estocástico (SGD)' -- 'supervised learning' -> 'aprendizaje supervisado' -- 'supervised machine learning' -> 'aprendizaje automático supervisado' -- 'synthetic feature' -> 'atributo sintético' -- 'synthetic features' -> 'atributos sintéticos' -- 'test loss' -> 'Pérdida de prueba' -- 'training' -> 'entrenamiento' -- 'training loss' -> 'Pérdida de entrenamiento' -- 'training set' -> 'conjunto de entrenamiento' -- 'training-serving skew' -> 'Sesgo entre el entrenamiento y la entrega' -- 'true negative (TN)' -> 'verdadero negativo (VN)' -- 'true negatives' -> 'verdaderos negativos' -- 'true positive (TP)' -> 'verdadero positivo (VP)' -- 'true positive rate' -> 'tasa de verdaderos positivos' -- 'true positive rate (TPR)' -> 'tasa de verdaderos positivos (TVP)' -- 'true positives' -> 'verdaderos positivos' -- 'underfitting' -> 'Subajuste' -- 'unlabeled example' -> 'ejemplo sin etiqueta' -- 'unsupervised machine learning' -> 'aprendizaje automático no supervisado' -- 'validation' -> 'validación' -- 'validation dataset' -> 'conjunto de datos de validación' -- 'validation loss' -> 'Pérdida de validación' -- 'validation set' -> 'conjunto de validación' -- 'weight' -> 'peso' -- 'weighted sum' -> 'suma ponderada' -- 'Z-score normalization' -> 'normalización de la puntuación Z' diff --git a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/fr.txt b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/fr.txt deleted file mode 100644 index 3f64f3098..000000000 --- a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/fr.txt +++ /dev/null @@ -1,175 +0,0 @@ -# FR HINTS -## TERM MAPPINGS -These are preferred terminology choices for this language. Use them whenever they sound natural; adapt freely if context requires. - -- 'accuracy' -> 'accuracy' -- 'activation function' -> 'fonction d'activation' -- 'artificial intelligence' -> 'intelligence artificielle' -- 'AUC' -> 'AUC' -- 'AUC (Area under the ROC curve)' -> 'AUC (aire sous la courbe ROC)' -- 'backpropagation' -> 'rétropropagation' -- 'batch' -> 'lot' -- 'batch size' -> 'taille du lot' -- 'bias (ethics/fairness)' -> 'biais (éthique/équité) (bias (ethics/fairness))' -- 'bias (math) or bias term' -> 'biais (mathématiques) ou terme de biais' -- 'bias in ethics and fairness' -> 'biais en matière d'éthique et d'équité' -- 'bias term' -> 'biais' -- 'binary classification' -> 'classification binaire' -- 'bucketing' -> 'le binning' -- 'categorical' -> 'catégorielle' -- 'categorical data' -> 'données catégorielles' -- 'class' -> 'classe' -- 'class-imbalanced dataset' -> 'ensemble de données avec déséquilibre des classes' -- 'class-imbalanced datasets' -> 'ensembles de données déséquilibrés en termes de classes' -- 'classification' -> 'classification' -- 'classification model' -> 'modèle de classification' -- 'classification threshold' -> 'seuil de classification' -- 'classifier' -> 'classificateur' -- 'clipping' -> 'écrêtage' -- 'confusion matrix' -> 'matrice de confusion' -- 'continuous feature' -> 'caractéristique continue' -- 'convergence' -> 'convergence' -- 'data set or dataset' -> 'ensemble de données (data set ou dataset)' -- 'DataFrame' -> 'DataFrame' -- 'dataset' -> 'ensemble de données' -- 'deep learning' -> 'deep learning' -- 'deep model' -> 'modèle deep learning' -- 'dense feature' -> 'caractéristique dense' -- 'depth' -> 'profondeur' -- 'discrete feature' -> 'caractéristique discrète' -- 'discrete features' -> 'caractéristiques discrètes' -- 'dynamic' -> 'dynamic' -- 'dynamic model' -> 'modèle dynamique' -- 'early stopping' -> 'arrêt prématuré' -- 'embedding layer' -> 'couche d'embedding' -- 'embedding layers' -> 'couches d'embedding' -- 'epoch' -> 'epoch' -- 'example' -> 'exemple' -- 'false negative (FN)' -> 'Faux négatif (FN)' -- 'false negatives' -> 'faux négatifs' -- 'false positive (FP)' -> 'Faux positif (FP)' -- 'false positive rate' -> 'taux de faux positifs' -- 'false positive rate (FPR)' -> 'taux de faux positifs (TFP) (false positive rate (FPR))' -- 'false positives' -> 'faux positifs' -- 'feature' -> 'fonctionnalité' -- 'feature cross' -> 'croisement de caractéristiques' -- 'feature crosses' -> 'caractéristiques croisées' -- 'feature engineering' -> 'l'ingénierie des caractéristiques.' -- 'feature set' -> 'ensemble de fonctionnalités' -- 'feature vector' -> 'vecteur de caractéristiques' -- 'feedback loop' -> 'boucle de rétroaction' -- 'generalization' -> 'généralisation' -- 'generalization curve' -> 'courbe de généralisation' -- 'gradient descent' -> 'descente de gradient' -- 'ground truth' -> 'vérité terrain' -- 'hidden layer' -> 'couche cachée' -- 'hidden layer(s)' -> 'couches cachées' -- 'hyperparameter' -> 'hyperparamètre' -- 'independently and identically distributed (i.i.d)' -> 'variables indépendantes et identiquement distribuées (i.i.d)' -- 'inference' -> 'inférence' -- 'input layer' -> 'couche d'entrée' -- 'interpretability' -> 'interprétabilité' -- 'iteration' -> 'itération' -- 'L0regularization' -> 'Régularisation L0' -- 'L1loss' -> 'perte L1' -- 'L1regularization' -> 'régularisationL1' -- 'L2loss' -> 'perte L2' -- 'L2regularization' -> 'régularisationL2' -- 'label' -> 'étiquette' -- 'labeled example' -> 'exemple étiqueté' -- 'lambda' -> 'lambda' -- 'layer' -> 'cachée)' -- 'learning rate' -> 'taux d'apprentissage' -- 'linear' -> 'linear' -- 'linear model' -> 'modèle linéaire' -- 'linear models' -> 'modèles linéaires' -- 'linear regression' -> 'régression linéaire' -- 'Log Loss' -> 'perte logistique' -- 'log-odds' -> 'logarithme de cote' -- 'logistic regression' -> 'régression logistique' -- 'loss' -> 'perte' -- 'loss curve' -> 'courbe de perte' -- 'loss function' -> 'fonction de perte' -- 'machine learning' -> 'machine learning' -- 'majority class' -> 'classe majoritaire' -- 'mini-batch' -> 'mini-lot' -- 'minority class' -> 'classe minoritaire' -- 'model' -> 'modèle' -- 'multi-class classification' -> 'classification à classes multiples' -- 'negative class' -> 'classe négative' -- 'negative classes' -> 'classes négatives' -- 'neural network' -> 'neurones feedforward' -- 'neural networks' -> 'réseaux de neurones' -- 'neuron' -> 'neurone' -- 'node (neural network)' -> 'nœud (réseau de neurones)' -- 'nonlinear' -> 'non linéaire' -- 'nonstationarity' -> 'non-stationnarité' -- 'normalization' -> 'normalisation' -- 'numerical data' -> 'données numériques' -- 'offline' -> 'Hors connexion' -- 'offline inference' -> 'inférence hors connexion' -- 'one-hot encoding' -> 'Encodage one-hot' -- 'one-hot vector' -> 'vecteur one-hot' -- 'one-vs.-all' -> 'un contre tous' -- 'online' -> 'online' -- 'online inference' -> 'inférence en ligne' -- 'output layer' -> 'couche de sortie' -- 'output layers' -> 'couches de sortie' -- 'overfitting' -> 'surapprentissage' -- 'pandas' -> 'pandas' -- 'parameter' -> 'paramètre' -- 'positive class' -> 'classe positive' -- 'positive classes' -> 'classes positives' -- 'post-processing' -> 'post-traitement' -- 'precision' -> 'precision' -- 'prediction' -> 'prédiction' -- 'proxy labels' -> 'étiquettes de substitution' -- 'RAG' -> 'RAG' -- 'rater' -> 'évaluateur' -- 'recall' -> 'recall (rappel)' -- 'Rectified Linear Unit (ReLU)' -> 'Unité de rectification linéaire (ReLU)' -- 'regression model' -> 'modèle de régression' -- 'regularization' -> 'régularisation' -- 'regularization rate' -> 'taux de régularisation' -- 'ReLU' -> 'ReLU' -- 'retrieval-augmented generation' -> 'génération augmentée par récupération' -- 'retrieval-augmented generation (RAG)' -> 'génération augmentée par récupération (RAG)' -- 'ROC (receiver operating characteristic) Curve' -> 'Courbe ROC (receiver operating characteristic)' -- 'ROC curve' -> 'courbe ROC' -- 'Root Mean Squared Error (RMSE)' -> 'la racine carrée de l'erreur quadratique moyenne (RMSE, Root Mean Squared Error)' -- 'sigmoid function' -> 'fonction sigmoïde' -- 'softmax' -> 'softmax' -- 'sparse feature' -> 'caractéristique creuse' -- 'sparse representation' -> 'représentation creuse' -- 'sparse vector' -> 'vecteur creux' -- 'squared loss' -> 'perte quadratique' -- 'static' -> 'static' -- 'static inference' -> 'inférence statique' -- 'static model' -> 'modèle statique' -- 'stationarity' -> 'stationnarité' -- 'Stochastic Gradient Descent (SGD)' -> 'Descente de gradient stochastique (SGD, Stochastic Gradient Descent)' -- 'supervised learning' -> 'apprentissage supervisé' -- 'supervised machine learning' -> 'machine learning supervisé' -- 'synthetic feature' -> 'caractéristique synthétique' -- 'synthetic features' -> 'caractéristiques synthétiques' -- 'test loss' -> 'perte de test' -- 'training' -> 'entraînement' -- 'training loss' -> 'perte d'entraînement' -- 'training set' -> 'ensemble d'entraînement' -- 'training-serving skew' -> 'décalage entraînement/mise en service' -- 'true negative (TN)' -> 'vrai négatif (VN)' -- 'true negatives' -> 'vrais négatifs' -- 'true positive (TP)' -> 'vrai positif (VP)' -- 'true positive rate' -> 'taux de vrais positifs' -- 'true positive rate (TPR)' -> 'taux de vrais positifs (TVP)' -- 'true positives' -> 'vrais positifs' -- 'underfitting' -> 'sous-ajustement' -- 'unlabeled example' -> 'exemple sans étiquette' -- 'unsupervised machine learning' -> 'machine learning non supervisé' -- 'validation' -> 'validation' -- 'validation dataset' -> 'ensemble de données de validation' -- 'validation loss' -> 'perte de validation' -- 'validation set' -> 'ensemble de validation' -- 'weight' -> 'weight' -- 'weighted sum' -> 'Somme pondérée' -- 'Z-score normalization' -> 'Normalisation du score Z' diff --git a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/ja.txt b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/ja.txt deleted file mode 100644 index fb3787a79..000000000 --- a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/ja.txt +++ /dev/null @@ -1,175 +0,0 @@ -# JA HINTS -## TERM MAPPINGS -These are preferred terminology choices for this language. Use them whenever they sound natural; adapt freely if context requires. - -- 'accuracy' -> 「accuracy」 -- 'activation function' -> 「活性化関数」 -- 'artificial intelligence' -> 「AI」 -- 'AUC' -> 「AUC」 -- 'AUC (Area under the ROC curve)' -> 「AUC(ROC 曲線の下の面積)」 -- 'backpropagation' -> 「バックプロパゲーション」 -- 'batch' -> 「Batch」 -- 'batch size' -> 「バッチサイズ」 -- 'bias (ethics/fairness)' -> 「バイアス(倫理/公平性)」 -- 'bias (math) or bias term' -> 「バイアス(数学)またはバイアス項」 -- 'bias in ethics and fairness' -> 「倫理と公平性のバイアス」 -- 'bias term' -> 「バイアス項」 -- 'binary classification' -> 「バイナリ分類」 -- 'bucketing' -> 「バケット化、」 -- 'categorical' -> 「カテゴリカル」 -- 'categorical data' -> 「カテゴリデータ」 -- 'class' -> 「クラス」 -- 'class-imbalanced dataset' -> 「クラスの不均衡なデータセット」 -- 'class-imbalanced datasets' -> 「クラス不均衡データセット」 -- 'classification' -> 「分類」 -- 'classification model' -> 「分類モデル」 -- 'classification threshold' -> 「分類しきい値」 -- 'classifier' -> 「分類器」 -- 'clipping' -> 「クリッピング」 -- 'confusion matrix' -> 「混同行列」 -- 'continuous feature' -> 「連続特徴」 -- 'convergence' -> 「収束」 -- 'data set or dataset' -> 「データセット」 -- 'DataFrame' -> 「DataFrame」 -- 'dataset' -> 「データセット」 -- 'deep learning' -> 「ディープ ラーニング」 -- 'deep model' -> 「ディープモデル」 -- 'dense feature' -> 「密な特徴」 -- 'depth' -> 「深さ」 -- 'discrete feature' -> 「離散特徴」 -- 'discrete features' -> 「離散特徴」 -- 'dynamic' -> 「動的」 -- 'dynamic model' -> 「動的モデル」 -- 'early stopping' -> 「早期停止」 -- 'embedding layer' -> 「エンベディング レイヤ」 -- 'embedding layers' -> 「エンベディング レイヤ」 -- 'epoch' -> 「エポック」 -- 'example' -> 「例」 -- 'false negative (FN)' -> 「偽陰性(FN)」 -- 'false negatives' -> 「偽陰性」 -- 'false positive (FP)' -> 「偽陽性(FP)」 -- 'false positive rate' -> 「偽陽性率」 -- 'false positive rate (FPR)' -> 「偽陽性率(FPR)」 -- 'false positives' -> 「偽陽性」 -- 'feature' -> 「機能」 -- 'feature cross' -> 「特徴クロス」 -- 'feature crosses' -> 「特徴交差」 -- 'feature engineering' -> 「2つのステップが含まれます」 -- 'feature set' -> 「機能セット」 -- 'feature vector' -> 「特徴ベクトル」 -- 'feedback loop' -> 「フィードバック ループ」 -- 'generalization' -> 「一般化」 -- 'generalization curve' -> 「汎化曲線」 -- 'gradient descent' -> 「勾配降下法」 -- 'ground truth' -> 「グラウンド トゥルース」 -- 'hidden layer' -> 「隠れ層」 -- 'hidden layer(s)' -> 「隠れ層」 -- 'hyperparameter' -> 「ハイパーパラメータ」 -- 'independently and identically distributed (i.i.d)' -> 「独立同分布(i.i.d)」 -- 'inference' -> 「推論」 -- 'input layer' -> 「入力レイヤ」 -- 'interpretability' -> 「解釈可能性」 -- 'iteration' -> 「繰り返し」 -- 'L0regularization' -> 「L0正規化」 -- 'L1loss' -> 「L1損失」 -- 'L1regularization' -> 「L1正則化」 -- 'L2loss' -> 「L2損失」 -- 'L2regularization' -> 「L2正則化」 -- 'label' -> 「ラベル」 -- 'labeled example' -> 「ラベル付きの例」 -- 'lambda' -> 「lambda」 -- 'layer' -> 「レイヤ」 -- 'learning rate' -> 「学習率」 -- 'linear' -> 「線形」 -- 'linear model' -> 「線形モデル」 -- 'linear models' -> 「線形モデル」 -- 'linear regression' -> 「線形回帰」 -- 'Log Loss' -> 「対数損失」 -- 'log-odds' -> 「対数オッズ」 -- 'logistic regression' -> 「ロジスティック回帰」 -- 'loss' -> 「損失」 -- 'loss curve' -> 「損失曲線」 -- 'loss function' -> 「損失関数」 -- 'machine learning' -> 「機械学習」 -- 'majority class' -> 「多数派クラス」 -- 'mini-batch' -> 「ミニバッチ」 -- 'minority class' -> 「少数派クラス」 -- 'model' -> 「モデル」 -- 'multi-class classification' -> 「マルチクラス分類」 -- 'negative class' -> 「陰性クラス」 -- 'negative classes' -> 「陰性クラス」 -- 'neural network' -> 「ニューラル ネットワークの」 -- 'neural networks' -> 「ニューラル ネットワーク」 -- 'neuron' -> 「ニューロン」 -- 'node (neural network)' -> 「ノード(ニューラル ネットワーク)」 -- 'nonlinear' -> 「非線形」 -- 'nonstationarity' -> 「非定常性」 -- 'normalization' -> 「正規化」 -- 'numerical data' -> 「数値データ」 -- 'offline' -> 「オフライン」 -- 'offline inference' -> 「オフライン推論」 -- 'one-hot encoding' -> 「ワンホット エンコード」 -- 'one-hot vector' -> 「ワンホット ベクトル」 -- 'one-vs.-all' -> 「1 対すべて」 -- 'online' -> 「オンライン」 -- 'online inference' -> 「オンライン推論」 -- 'output layer' -> 「出力レイヤ」 -- 'output layers' -> 「出力レイヤ」 -- 'overfitting' -> 「過学習」 -- 'pandas' -> 「pandas」 -- 'parameter' -> 「パラメータ」 -- 'positive class' -> 「陽性クラス」 -- 'positive classes' -> 「陽性クラス」 -- 'post-processing' -> 「後処理」 -- 'precision' -> 「precision」 -- 'prediction' -> 「予測」 -- 'proxy labels' -> 「プロキシラベル」 -- 'RAG' -> 「RAG」 -- 'rater' -> 「rater」 -- 'recall' -> 「recall」 -- 'Rectified Linear Unit (ReLU)' -> 「正規化線形ユニット(ReLU)」 -- 'regression model' -> 「回帰モデル」 -- 'regularization' -> 「正則化」 -- 'regularization rate' -> 「正則化率」 -- 'ReLU' -> 「ReLU」 -- 'retrieval-augmented generation' -> 「検索拡張生成」 -- 'retrieval-augmented generation (RAG)' -> 「検索拡張生成(RAG)」 -- 'ROC (receiver operating characteristic) Curve' -> 「ROC(受信者操作特性)曲線」 -- 'ROC curve' -> 「ROC 曲線」 -- 'Root Mean Squared Error (RMSE)' -> 「二乗平均平方根誤差(RMSE)」 -- 'sigmoid function' -> 「シグモイド関数」 -- 'softmax' -> 「Softmax」 -- 'sparse feature' -> 「スパース特徴」 -- 'sparse representation' -> 「スパース表現」 -- 'sparse vector' -> 「スパース ベクトル」 -- 'squared loss' -> 「二乗損失」 -- 'static' -> 「static」 -- 'static inference' -> 「静的推論」 -- 'static model' -> 「静的モデル」 -- 'stationarity' -> 「定常性」 -- 'Stochastic Gradient Descent (SGD)' -> 「確率的勾配降下法(SGD)」 -- 'supervised learning' -> 「教師あり学習」 -- 'supervised machine learning' -> 「教師あり機械学習」 -- 'synthetic feature' -> 「合成特徴」 -- 'synthetic features' -> 「合成特徴」 -- 'test loss' -> 「テスト損失」 -- 'training' -> 「トレーニング」 -- 'training loss' -> 「トレーニングの損失」 -- 'training set' -> 「トレーニング セット」 -- 'training-serving skew' -> 「トレーニング サービング スキュー」 -- 'true negative (TN)' -> 「真陰性(TN)」 -- 'true negatives' -> 「真陰性」 -- 'true positive (TP)' -> 「真陽性(TP)」 -- 'true positive rate' -> 「真陽性率」 -- 'true positive rate (TPR)' -> 「真陽性率(TPR)」 -- 'true positives' -> 「真陽性」 -- 'underfitting' -> 「アンダーフィット」 -- 'unlabeled example' -> 「ラベルのない例」 -- 'unsupervised machine learning' -> 「教師なし機械学習」 -- 'validation' -> 「検証」 -- 'validation dataset' -> 「検証データセット」 -- 'validation loss' -> 「検証損失」 -- 'validation set' -> 「検証セット」 -- 'weight' -> 「weight」 -- 'weighted sum' -> 「加重合計」 -- 'Z-score normalization' -> 「Z スコアの正規化」 diff --git a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/pt_BR.txt b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/pt_BR.txt deleted file mode 100644 index 16b2b9dee..000000000 --- a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/pt_BR.txt +++ /dev/null @@ -1,175 +0,0 @@ -# PT-BR HINTS -## TERM MAPPINGS -These are preferred terminology choices for this language. Use them whenever they sound natural; adapt freely if context requires. - -- 'accuracy' -> 'precisão' -- 'activation function' -> 'função de ativação' -- 'artificial intelligence' -> 'inteligência artificial' -- 'AUC' -> 'AUC' -- 'AUC (Area under the ROC curve)' -> 'AUC (área sob a curva ROC)' -- 'backpropagation' -> 'retropropagação' -- 'batch' -> 'lote' -- 'batch size' -> 'tamanho do lote' -- 'bias (ethics/fairness)' -> 'viés (ética/justiça)' -- 'bias (math) or bias term' -> 'viés (matemática) ou termo de viés' -- 'bias in ethics and fairness' -> 'viés em ética e justiça' -- 'bias term' -> 'termo de viés' -- 'binary classification' -> 'classificação binária' -- 'bucketing' -> 'agrupamento por classes' -- 'categorical' -> 'categórico' -- 'categorical data' -> 'dados categóricos' -- 'class' -> 'classe' -- 'class-imbalanced dataset' -> 'conjunto de dados não balanceado' -- 'class-imbalanced datasets' -> 'conjuntos de dados com classes desbalanceadas' -- 'classification' -> 'classificação' -- 'classification model' -> 'modelo de classificação' -- 'classification threshold' -> 'limiar de classificação' -- 'classifier' -> 'classificador' -- 'clipping' -> 'corte' -- 'confusion matrix' -> 'matriz de confusão' -- 'continuous feature' -> 'atributo contínuo' -- 'convergence' -> 'convergência' -- 'data set or dataset' -> 'conjunto de dados' -- 'DataFrame' -> 'DataFrame' -- 'dataset' -> 'conjunto de dados' -- 'deep learning' -> 'aprendizado profundo' -- 'deep model' -> 'modelo profundo' -- 'dense feature' -> 'atributo denso' -- 'depth' -> 'profundidade' -- 'discrete feature' -> 'atributo discreto' -- 'discrete features' -> 'recursos discretos' -- 'dynamic' -> 'dinâmico' -- 'dynamic model' -> 'modelo dinâmico' -- 'early stopping' -> 'parada antecipada' -- 'embedding layer' -> 'camada de embedding' -- 'embedding layers' -> 'camadas de embedding' -- 'epoch' -> 'época' -- 'example' -> 'exemplo' -- 'false negative (FN)' -> 'falso negativo (FN)' -- 'false negatives' -> 'falsos negativos' -- 'false positive (FP)' -> 'falso positivo (FP)' -- 'false positive rate' -> 'taxa de falso positivo' -- 'false positive rate (FPR)' -> 'taxa de falso positivo (FPR)' -- 'false positives' -> 'falsos positivos' -- 'feature' -> 'recurso' -- 'feature cross' -> 'cruzamento de atributos' -- 'feature crosses' -> 'cruzamentos de recursos' -- 'feature engineering' -> 'engenharia de atributos' -- 'feature set' -> 'conjunto de atributos' -- 'feature vector' -> 'vetor de atributos' -- 'feedback loop' -> 'ciclo de feedback' -- 'generalization' -> 'generalização' -- 'generalization curve' -> 'curva de generalização' -- 'gradient descent' -> 'gradiente descendente' -- 'ground truth' -> 'informações empíricas' -- 'hidden layer' -> 'camada oculta' -- 'hidden layer(s)' -> 'camadas ocultas' -- 'hyperparameter' -> 'hiperparâmetro' -- 'independently and identically distributed (i.i.d)' -> 'independente e identicamente distribuído (i.i.d)' -- 'inference' -> 'inferência' -- 'input layer' -> 'camada de entrada' -- 'interpretability' -> 'interpretabilidade' -- 'iteration' -> 'iteração' -- 'L0regularization' -> 'Regularização L0' -- 'L1loss' -> 'L1' -- 'L1regularization' -> 'regularização L1' -- 'L2loss' -> 'perda L2' -- 'L2regularization' -> 'regularizaçãoL2' -- 'label' -> 'o rótulo.' -- 'labeled example' -> 'exemplo rotulado' -- 'lambda' -> 'lambda' -- 'layer' -> 'layer' -- 'learning rate' -> 'taxa de aprendizado' -- 'linear' -> 'linear' -- 'linear model' -> 'modelo linear' -- 'linear models' -> 'modelos lineares' -- 'linear regression' -> 'regressão linear' -- 'Log Loss' -> 'perda logarítmica' -- 'log-odds' -> 'log-odds' -- 'logistic regression' -> 'regressão logística' -- 'loss' -> 'perda' -- 'loss curve' -> 'curva de perda' -- 'loss function' -> 'função de perda' -- 'machine learning' -> 'machine learning' -- 'majority class' -> 'classe majoritária' -- 'mini-batch' -> 'minilote' -- 'minority class' -> 'classe minoritária' -- 'model' -> 'modelo' -- 'multi-class classification' -> 'classificação multiclasse' -- 'negative class' -> 'classe negativa' -- 'negative classes' -> 'classes negativas' -- 'neural network' -> 'do feedforward' -- 'neural networks' -> 'redes neurais' -- 'neuron' -> 'neurônio' -- 'node (neural network)' -> 'nó (rede neural)' -- 'nonlinear' -> 'não linear' -- 'nonstationarity' -> 'não estacionariedade' -- 'normalization' -> 'normalização' -- 'numerical data' -> 'dados numéricos' -- 'offline' -> 'off-line' -- 'offline inference' -> 'inferência off-line' -- 'one-hot encoding' -> 'codificação one-hot' -- 'one-hot vector' -> 'vetor one-hot' -- 'one-vs.-all' -> 'um-contra-todos' -- 'online' -> 'on-line' -- 'online inference' -> 'inferência on-line' -- 'output layer' -> 'camada de saída' -- 'output layers' -> 'camadas de saída' -- 'overfitting' -> 'overfitting' -- 'pandas' -> 'pandas' -- 'parameter' -> 'parâmetro' -- 'positive class' -> 'classe positiva' -- 'positive classes' -> 'classes positivas' -- 'post-processing' -> 'pós-processamento' -- 'precision' -> 'precision' -- 'prediction' -> 'previsão' -- 'proxy labels' -> 'rotulação indireta' -- 'RAG' -> 'RAG' -- 'rater' -> 'rotulador' -- 'recall' -> 'recall' -- 'Rectified Linear Unit (ReLU)' -> 'Unidade linear retificada (ReLU)' -- 'regression model' -> 'modelo de regressão' -- 'regularization' -> 'regularização' -- 'regularization rate' -> 'taxa de regularização' -- 'ReLU' -> 'ReLU' -- 'retrieval-augmented generation' -> 'geração aumentada de recuperação' -- 'retrieval-augmented generation (RAG)' -> 'geração aumentada de recuperação (RAG)' -- 'ROC (receiver operating characteristic) Curve' -> 'Curva ROC' -- 'ROC curve' -> 'curva ROC' -- 'Root Mean Squared Error (RMSE)' -> 'Raiz do erro quadrático médio (RMSE)' -- 'sigmoid function' -> 'função sigmoide' -- 'softmax' -> 'softmax' -- 'sparse feature' -> 'atributo esparso' -- 'sparse representation' -> 'representação esparsa' -- 'sparse vector' -> 'vetor esparso' -- 'squared loss' -> 'perda quadrática' -- 'static' -> 'static' -- 'static inference' -> 'inferência estática' -- 'static model' -> 'modelo estático' -- 'stationarity' -> 'estacionariedade' -- 'Stochastic Gradient Descent (SGD)' -> 'Gradiente descendente estocástico (GDE)' -- 'supervised learning' -> 'aprendizado supervisionado' -- 'supervised machine learning' -> 'aprendizado de máquina supervisionado' -- 'synthetic feature' -> 'atributo sintético' -- 'synthetic features' -> 'recursos sintéticos' -- 'test loss' -> 'perda de teste' -- 'training' -> 'treinamento' -- 'training loss' -> 'perda de treinamento' -- 'training set' -> 'conjunto de treinamento' -- 'training-serving skew' -> 'desvio entre treinamento e disponibilização' -- 'true negative (TN)' -> 'verdadeiro negativo (VN)' -- 'true negatives' -> 'verdadeiros negativos' -- 'true positive (TP)' -> 'verdadeiro positivo (VP)' -- 'true positive rate' -> 'taxa de verdadeiros positivos' -- 'true positive rate (TPR)' -> 'taxa de verdadeiro positivo (TVP)' -- 'true positives' -> 'verdadeiros positivos' -- 'underfitting' -> 'underfitting' -- 'unlabeled example' -> 'exemplo sem rótulo' -- 'unsupervised machine learning' -> 'aprendizado de máquina sem supervisão' -- 'validation' -> 'validação' -- 'validation dataset' -> 'conjunto de dados de validação' -- 'validation loss' -> 'perda de validação' -- 'validation set' -> 'conjunto de validação' -- 'weight' -> 'peso' -- 'weighted sum' -> 'soma de pesos' -- 'Z-score normalization' -> 'Normalização de pontuação Z' diff --git a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/ru.txt b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/ru.txt deleted file mode 100644 index 0c87ef85e..000000000 --- a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/glossaries/machine_learning/ru.txt +++ /dev/null @@ -1,213 +0,0 @@ -# RU HINTS -## TERM MAPPINGS -These are preferred terminology choices for this language. Use them whenever they sound natural; adapt freely if context requires. - -- 'accuracy' -> «точность» -- 'activation function' -> «функция активации» -- 'artificial intelligence' -> «искусственный интеллект» -- 'AUC' -> «AUC» -- 'AUC (Area under the ROC curve)' -> «AUC (площадь под ROC-кривой)» -- 'backpropagation' -> «обратное распространение» -- 'batch' -> «партия» -- 'batch size' -> «размер партии» -- 'bias (ethics/fairness)' -> «предвзятость (этика/справедливость)» -- 'bias (math) or bias term' -> «предвзятость (математика) или термин предвзятости» -- 'bias in ethics and fairness' -> «предвзятостью в этике и справедливости» -- 'bias term' -> «термином «смещение»» -- 'binary classification' -> «бинарная классификация» -- 'bucketing' -> «распределение» -- 'categorical' -> «категориальном» -- 'categorical data' -> «категориальные данные» -- 'class' -> «сорт» -- 'class-imbalanced dataset' -> «набор данных с несбалансированным классом» -- 'class-imbalanced datasets' -> «несбалансированные по классам наборы данных» -- 'classification' -> «классификации» -- 'classification model' -> «модель классификации» -- 'classification threshold' -> «порог классификации» -- 'classifier' -> «классификатор» -- 'clipping' -> «вырезка» -- 'confusion matrix' -> «матрица путаницы» -- 'continuous feature' -> «непрерывная функция» -- 'convergence' -> «конвергенция» -- 'data set or dataset' -> «набор данных или набор данных» -- 'DataFrame' -> «DataFrame» -- 'dataset' -> «Набор данных» -- 'deep learning' -> «глубоком обучении» -- 'deep model' -> «глубокая модель» -- 'dense feature' -> «плотная особенность» -- 'depth' -> «глубина» -- 'discrete feature' -> «дискретная особенность» -- 'discrete features' -> «дискретными признаками» -- 'dynamic' -> «динамический» -- 'dynamic model' -> «динамическая модель» -- 'early stopping' -> «ранняя остановка» -- 'embedding layer' -> «слой внедрения» -- 'embedding layers' -> «встраиваемых слоев» -- 'epoch' -> «эпоха» -- 'example' -> «пример» -- 'false negative (FN)' -> «ложноотрицательный результат (ЛО)» -- 'false negatives' -> «ложноотрицательных результатов» -- 'false positive (FP)' -> «ложноположительный результат (ЛП)» -- 'false positive rate' -> «false positive rate» -- 'false positive rate (FPR)' -> «частота ложноположительных результатов (FPR)» -- 'false positives' -> «ложноположительных результатов» -- 'feature' -> «особенность» -- 'feature cross' -> «кросс-функция» -- 'feature crosses' -> «пересечение признаков» -- 'feature engineering' -> «проектирование функций» -- 'feature set' -> «набор функций» -- 'feature vector' -> «вектор признаков» -- 'feedback loop' -> «петля обратной связи» -- 'generalization' -> «обобщение» -- 'generalization curve' -> «кривая обобщения» -- 'gradient descent' -> «градиентный спуск» -- 'ground truth' -> «истина» -- 'hidden layer' -> «скрытый слой» -- 'hidden layer(s)' -> «скрытых слоях» -- 'hyperparameter' -> «гиперпараметр» -- 'independently and identically distributed (i.i.d)' -> «независимо и одинаково распределены (iid)» -- 'inference' -> «вывод» -- 'input layer' -> «входной слой» -- 'interpretability' -> «интерпретируемость» -- 'iteration' -> «итерация» -- 'L0regularization' -> «L0регуляризация» -- 'L1loss' -> «потеряL1» -- 'L1regularization' -> «регуляризации L1» -- 'L2loss' -> «Потери L2» -- 'L2regularization' -> «регуляризацииL2» -- 'label' -> «этикетка» -- 'labeled example' -> «помеченный пример» -- 'lambda' -> «лямбда» -- 'layer' -> «слой» -- 'learning rate' -> «скорость обучения» -- 'linear' -> «линейный» -- 'linear model' -> «линейная модель» -- 'linear models' -> «линейных моделях» -- 'linear regression' -> «линейная регрессия» -- 'Log Loss' -> «Log Loss» -- 'log-odds' -> «логарифмические шансы» -- 'logistic regression' -> «логистическая регрессия» -- 'loss' -> «потеря» -- 'loss curve' -> «кривая потерь» -- 'loss function' -> «функция потерь» -- 'machine learning' -> «машинное обучение» -- 'majority class' -> «класс большинства» -- 'mini-batch' -> «мини-партия» -- 'minority class' -> «класс меньшинства» -- 'model' -> «модель» -- 'multi-class classification' -> «многоклассовой классификацией» -- 'negative class' -> «отрицательный класс» -- 'negative classes' -> «отрицательные классы» -- 'neural network' -> «нейронная сеть» -- 'neural networks' -> «нейронным сетям» -- 'neuron' -> «нейрон» -- 'node (neural network)' -> «узел (нейронная сеть)» -- 'nonlinear' -> «нелинейный» -- 'nonstationarity' -> «нестационарность» -- 'normalization' -> «нормализация» -- 'numerical data' -> «числовые данные» -- 'offline' -> «офлайн» -- 'offline inference' -> «автономный вывод» -- 'one-hot encoding' -> «горячее кодирование» -- 'one-hot vector' -> «вектор с одним целым» -- 'one-vs.-all' -> «один против всех» -- 'online' -> «онлайн» -- 'online inference' -> «онлайн-вывод» -- 'output layer' -> «выходной слой» -- 'output layers' -> «выходных слоев» -- 'overfitting' -> «переобучение» -- 'pandas' -> «панды» -- 'parameter' -> «параметр» -- 'positive class' -> «позитивный класс» -- 'positive classes' -> «положительные» -- 'post-processing' -> «постобработка» -- 'precision' -> «точность» -- 'prediction' -> «прогноз» -- 'proxy labels' -> «прокси-метки» -- 'RAG' -> «ТРЯПКА» -- 'rater' -> «оценщик» -- 'recall' -> «отзывать» -- 'Rectified Linear Unit (ReLU)' -> «Rectified Linear Unit (ReLU)» -- 'regression model' -> «регрессионная модель» -- 'regularization' -> «регуляризация» -- 'regularization rate' -> «regularization rate» -- 'ReLU' -> «РеЛУ» -- 'retrieval-augmented generation' -> «генерации с расширенным поиском» -- 'retrieval-augmented generation (RAG)' -> «retrieval-augmented generation (RAG)» -- 'ROC (receiver operating characteristic) Curve' -> «ROC (receiver operating characteristic) Curve» -- 'ROC curve' -> «ROC-кривой» -- 'Root Mean Squared Error (RMSE)' -> «Root Mean Squared Error (RMSE)» -- 'sigmoid function' -> «sigmoid function» -- 'softmax' -> «софтмакс» -- 'sparse feature' -> «sparse feature» -- 'sparse representation' -> «sparse representation» -- 'sparse vector' -> «sparse vector» -- 'squared loss' -> «квадрат потерь» -- 'static' -> «статический» -- 'static inference' -> «static inference» -- 'static model' -> «статической моделью» -- 'stationarity' -> «стационарность» -- 'Stochastic Gradient Descent (SGD)' -> «Стохастический градиентный спуск (SGD)» -- 'supervised learning' -> «контролируемом обучении» -- 'supervised machine learning' -> «контролируемое машинное обучение» -- 'synthetic feature' -> «synthetic feature» -- 'synthetic features' -> «синтетические признаки» -- 'test loss' -> «test loss» -- 'training' -> «обучение» -- 'training loss' -> «training loss» -- 'training set' -> «обучающий набор» -- 'training-serving skew' -> «training-serving skew» -- 'true negative (TN)' -> «true negative (TN)» -- 'true negatives' -> «истинно отрицательных результатов» -- 'true positive (TP)' -> «true positive (TP)» -- 'true positive rate' -> «истинный положительный уровень» -- 'true positive rate (TPR)' -> «true positive rate (TPR)» -- 'true positives' -> «истинно положительных результатов» -- 'underfitting' -> «недообучение» -- 'unlabeled example' -> «unlabeled example» -- 'unsupervised machine learning' -> «неконтролируемое машинное обучение» -- 'validation' -> «проверка» -- 'validation dataset' -> «проверочном наборе данных» -- 'validation loss' -> «validation loss» -- 'validation set' -> «набор для проверки» -- 'weight' -> «масса» -- 'weighted sum' -> «взвешенная сумма» -- 'Z-score normalization' -> «нормализацию Z-показателя» - -# STRICTNESS NOTE -TERM MAPPINGS above are flexible preferences. The following rules are STRICT and override them. - -## 2. Strict, Binding Terminology Rules (MANDATORY) -This section defines terminology and formatting that must always be used in Russian translations. -These rules override any flexible terminology and must be followed exactly. - -# MANDATORY RUSSIAN TERMINOLOGY RULES -## 2.1 Key Translations (Strict) -- 'Shared learning' -> «совместное обучение» - AVOID: «общее обучение». -- 'Multisource data' -> «данные из нескольких источников» - AVOID: «мультиисточниковые данные». -- 'Input embedding' -> «входное векторное представление (эмбеддинг)» -- 'Embedding' -> «эмбеддинг» -- 'Embedding space' -> «пространство представлений (пространство эмбеддингов)» -- 'Task-specific branches' -> «ветви, специфичные для задачи» -- 'Pipeline' -> «конвейер обработки данных» - «пайплайн» допускается только в неформальном контексте. - -## 2.2 Official Google Colab UI (Strict) -Use the official Russian UI strings: -- 'Change Runtime Type' -> «Сменить среду выполнения» -- 'Save a copy in Drive' -> «Сохранить копию на Диске» - -Filenames must remain in ENGLISH exactly as written. -Example: «Копия блокнота OriginalNotebookName.ipynb» - -## 2.3 Abbreviations and Hyphenation (Strict) -Keep all ML/AI abbreviations in English: ROC, AUC, TPR, FPR, L1, L2, UI, API, CNN, RNN, GPT. -Do NOT invent Russian abbreviations for these. - -When an English abbreviation precedes a Russian noun, use a hyphen: -- ROC-кривая -- AUC-показатель -- L1-регуляризация -- UI-дизайн diff --git a/src/ol_openedx_course_translations/README.rst b/src/ol_openedx_course_translations/README.rst index 594c51b50..443513fde 100644 --- a/src/ol_openedx_course_translations/README.rst +++ b/src/ol_openedx_course_translations/README.rst @@ -48,9 +48,6 @@ Configuration "default_model": "mistral-large-latest", }, } - TRANSLATIONS_GITHUB_TOKEN: - TRANSLATIONS_REPO_PATH: "" - TRANSLATIONS_REPO_URL: "https://github.com/mitodl/mitxonline-translations.git" LITE_LLM_REQUEST_TIMEOUT: 300 # Timeout for LLM API requests in seconds - For Tutor installations, these values can also be managed through a `custom Tutor plugin `_. @@ -302,61 +299,6 @@ If subtitle translation fails after all attempts: - An error message will indicate which subtitle file caused the failure - No partial or corrupted translation files will be left behind -Generating static content translations -====================================== - -This command synchronizes translation keys from edx-platform and MFE's, translates empty keys using LLM, and automatically creates a pull request in the translations repository. - -**What it does:** - -1. Syncs translation keys from edx-platform and MFE's to the translations repository -2. Extracts empty translation keys that need translation -3. Translates empty keys using the specified LLM provider and model -4. Applies translations to JSON and PO files -5. Commits changes to a new branch -6. Creates a pull request with translation statistics - -**Usage:** - -1. Go to the CMS shell -2. Run the management command: - - .. code-block:: bash - - ./manage.py cms sync_and_translate_language [OPTIONS] - -**Required arguments:** - -- ``LANGUAGE_CODE``: Language code (e.g., ``el``, ``fr``, ``es_ES``) - -**Optional arguments:** - -- ``--iso-code``: ISO code for JSON files (default: same as language code) -- ``--provider``: Translation provider (``openai``, ``gemini``, ``mistral``). Default is taken from ``TRANSLATIONS_PROVIDERS['default_provider']`` setting -- ``--model``: LLM model name. If not specified, uses the ``default_model`` for the selected provider from ``TRANSLATIONS_PROVIDERS``. Examples: ``gpt-5.2``, ``gemini-3-pro-preview``, ``mistral-large-latest`` -- ``--repo-path``: Path to mitxonline-translations repository (can also be set via ``TRANSLATIONS_REPO_PATH`` setting or environment variable) -- ``--repo-url``: GitHub repository URL (default: ``https://github.com/mitodl/mitxonline-translations.git``, can also be set via ``TRANSLATIONS_REPO_URL`` setting or environment variable) -- ``--glossary``: Path to glossary directory (optional). Should contain language-specific files (e.g. ``{iso_code}.txt``). -- ``--batch-size``: Number of keys to translate per API request (default: 200, recommended: 200-300 for most models) -- ``--mfe``: Filter by specific MFE(s). Use ``edx-platform`` for backend translations -- ``--dry-run``: Run without committing or creating PR - -**Examples:** - - .. code-block:: bash - - # Use default provider (from TRANSLATIONS_PROVIDERS['default_provider']) with its default model - ./manage.py cms sync_and_translate_language el - - # Use OpenAI provider with its default model (gpt-5.2) - ./manage.py cms sync_and_translate_language el --provider openai - - # Use OpenAI provider with a specific model - ./manage.py cms sync_and_translate_language el --provider openai --model gpt-5.2 - - # Use Mistral provider with a specific model and glossary - ./manage.py cms sync_and_translate_language el --provider mistral --model mistral-large-latest --glossary /path/to/glossary --batch-size 250 - License ******* diff --git a/src/ol_openedx_course_translations/ol_openedx_course_translations/glossaries/machine_learning/ar.txt b/src/ol_openedx_course_translations/ol_openedx_course_translations/glossaries/machine_learning/ar.txt deleted file mode 100644 index 246ddba39..000000000 --- a/src/ol_openedx_course_translations/ol_openedx_course_translations/glossaries/machine_learning/ar.txt +++ /dev/null @@ -1,175 +0,0 @@ -# AR HINTS -## TERM MAPPINGS -These are preferred terminology choices for this language. Use them whenever they sound natural; adapt freely if context requires. - -- 'accuracy' -> 'الدقة' -- 'activation function' -> 'دالّة التفعيل' -- 'artificial intelligence' -> 'الذكاء الاصطناعي' -- 'AUC' -> 'AUC' -- 'AUC (Area under the ROC curve)' -> 'المساحة تحت منحنى ROC' -- 'backpropagation' -> 'الانتشار العكسي' -- 'batch' -> 'دفعة' -- 'batch size' -> 'حجم الدفعة' -- 'bias (ethics/fairness)' -> 'التحيّز (الأخلاقيات/الإنصاف)' -- 'bias (math) or bias term' -> 'الانحياز (في الرياضيات) أو مصطلح الانحياز' -- 'bias in ethics and fairness' -> 'التحيز في الأخلاق والعدالة' -- 'bias term' -> 'مصطلح التحيز' -- 'binary classification' -> 'التصنيف الثنائي' -- 'bucketing' -> 'تصنيف البيانات' -- 'categorical' -> 'فئوية' -- 'categorical data' -> 'البيانات الفئوية' -- 'class' -> 'صنف' -- 'class-imbalanced dataset' -> 'مجموعة بيانات غير متوازنة الفئات' -- 'class-imbalanced datasets' -> 'مجموعات بيانات غير متوازنة الفئات' -- 'classification' -> 'التصنيف' -- 'classification model' -> 'نموذج التصنيف' -- 'classification threshold' -> 'عتبة التصنيف' -- 'classifier' -> 'مصنِّف' -- 'clipping' -> 'القص' -- 'confusion matrix' -> 'مصفوفة نجاح التوقعات' -- 'continuous feature' -> 'خاصية مستمرة' -- 'convergence' -> 'التقارب' -- 'data set or dataset' -> 'مجموعة البيانات' -- 'DataFrame' -> 'DataFrame' -- 'dataset' -> 'مجموعة بيانات' -- 'deep learning' -> 'التعلم العميق' -- 'deep model' -> 'نموذج عميق' -- 'dense feature' -> 'خاصية كثيفة' -- 'depth' -> 'العمق' -- 'discrete feature' -> 'خاصية محدّدة القيم' -- 'discrete features' -> 'الميزات المنفصلة' -- 'dynamic' -> 'ديناميكي' -- 'dynamic model' -> 'نموذج ديناميكي' -- 'early stopping' -> 'الإيقاف المبكر' -- 'embedding layer' -> 'طبقة التضمين' -- 'embedding layers' -> 'طبقات تضمين' -- 'epoch' -> 'حقبة' -- 'example' -> 'على سبيل المثال' -- 'false negative (FN)' -> 'سالب خاطئ (FN)' -- 'false negatives' -> 'الحالات السالبة الخاطئة' -- 'false positive (FP)' -> 'موجب خاطئ (FP)' -- 'false positive rate' -> 'معدّل الموجب الخاطئ' -- 'false positive rate (FPR)' -> 'معدّل الموجب الخاطئ' -- 'false positives' -> 'الحالات الموجبة الخاطئة' -- 'feature' -> 'ميزة' -- 'feature cross' -> 'مضروب مجموعات الخصائص' -- 'feature crosses' -> 'تقاطع الميزات' -- 'feature engineering' -> 'هندسة الميزات' -- 'feature set' -> 'مجموعة الميزات' -- 'feature vector' -> 'متّجه الميزات' -- 'feedback loop' -> 'حلقة الملاحظات' -- 'generalization' -> 'التعميم' -- 'generalization curve' -> 'منحنى التعميم' -- 'gradient descent' -> 'النزول المتدرّج' -- 'ground truth' -> 'معلومات فعلية' -- 'hidden layer' -> 'الطبقة المخفية' -- 'hidden layer(s)' -> 'الطبقات المخفية' -- 'hyperparameter' -> 'المعلَمة الفائقة' -- 'independently and identically distributed (i.i.d)' -> 'موزّعة بشكل مستقل ومتشابه' -- 'inference' -> 'الاستنتاج' -- 'input layer' -> 'طبقة الإدخال' -- 'interpretability' -> 'القابلية للتفسير' -- 'iteration' -> 'التكرار' -- 'L0regularization' -> 'التسوية من النوع L0' -- 'L1loss' -> 'L1' -- 'L1regularization' -> 'التسوية من النوع L1' -- 'L2loss' -> 'فقدانL2' -- 'L2regularization' -> 'التسوية من النوع L2' -- 'label' -> 'التصنيف' -- 'labeled example' -> 'مثال مصنّف' -- 'lambda' -> 'lambda' -- 'layer' -> 'طبقة' -- 'learning rate' -> 'معدّل التعلّم' -- 'linear' -> 'خطي' -- 'linear model' -> 'النموذج الخطي' -- 'linear models' -> 'النماذج الخطية' -- 'linear regression' -> 'الانحدار الخطي' -- 'Log Loss' -> 'الخسارة اللوغاريتمية' -- 'log-odds' -> 'لوغاريتم فرص الأفضلية' -- 'logistic regression' -> 'الانحدار اللوجستي' -- 'loss' -> 'خسارة' -- 'loss curve' -> 'منحنى الخسارة' -- 'loss function' -> 'دالة الخسارة' -- 'machine learning' -> 'تعلُم الآلة' -- 'majority class' -> 'الفئة الأكبر' -- 'mini-batch' -> 'دفعة صغيرة' -- 'minority class' -> 'فئة الأقلية' -- 'model' -> 'نموذج' -- 'multi-class classification' -> 'التصنيف المتعدّد الفئات' -- 'negative class' -> 'فئة سالبة' -- 'negative classes' -> 'الفئات السلبية' -- 'neural network' -> 'شبكة عصبونية' -- 'neural networks' -> 'للشبكات العصبية' -- 'neuron' -> 'عصبون' -- 'node (neural network)' -> 'عقدة (شبكة عصبونية)' -- 'nonlinear' -> 'غير خطي' -- 'nonstationarity' -> 'عدم الثبات' -- 'normalization' -> 'التسوية' -- 'numerical data' -> 'البيانات الرقمية' -- 'offline' -> 'بلا إنترنت' -- 'offline inference' -> 'الاستنتاج المؤخَّر' -- 'one-hot encoding' -> 'الترميز الأحادي' -- 'one-hot vector' -> 'متجهًا ذا ترميز ساخن' -- 'one-vs.-all' -> 'واحد-مقابل-الكل' -- 'online' -> 'online' -- 'online inference' -> 'الاستنتاج الحي' -- 'output layer' -> 'الطبقة النهائية' -- 'output layers' -> 'الطبقات النهائية' -- 'overfitting' -> 'فرط التخصيص' -- 'pandas' -> 'باندا' -- 'parameter' -> 'مَعلمة' -- 'positive class' -> 'فئة موجبة' -- 'positive classes' -> 'الفئات الإيجابية' -- 'post-processing' -> 'المعالجة اللاحقة' -- 'precision' -> 'الدقة' -- 'prediction' -> 'التوقّع' -- 'proxy labels' -> 'تصنيفات تقريبية' -- 'RAG' -> 'التوليد المعزّز بالاسترجاع (RAG)' -- 'rater' -> 'مُصنِّف' -- 'recall' -> 'تذكُّر الإعلان' -- 'Rectified Linear Unit (ReLU)' -> 'وحدة خطية مصحَّحة (ReLU)' -- 'regression model' -> 'نموذج الانحدار' -- 'regularization' -> 'التسوية' -- 'regularization rate' -> 'معدّل التسوية' -- 'ReLU' -> 'ReLU' -- 'retrieval-augmented generation' -> 'التوليد المعزّز بالاسترجاع' -- 'retrieval-augmented generation (RAG)' -> 'التوليد المعزّز بالاسترجاع (RAG)' -- 'ROC (receiver operating characteristic) Curve' -> 'منحنى الأمثلة الإيجابية' -- 'ROC curve' -> 'منحنى ROC' -- 'Root Mean Squared Error (RMSE)' -> 'جذر الخطأ التربيعي المتوسّط (RMSE)' -- 'sigmoid function' -> 'الدالّة الإسية' -- 'softmax' -> 'softmax' -- 'sparse feature' -> 'خاصية متناثرة' -- 'sparse representation' -> 'التمثيل المتناثر' -- 'sparse vector' -> 'متّجه متناثر' -- 'squared loss' -> 'الخسارة التربيعية' -- 'static' -> 'ثابت' -- 'static inference' -> 'الاستنتاج الثابت' -- 'static model' -> 'النموذج الثابت' -- 'stationarity' -> 'الثبات' -- 'Stochastic Gradient Descent (SGD)' -> 'النزول المتدرّج العشوائي (SGD)' -- 'supervised learning' -> 'التعلم المُوجّه' -- 'supervised machine learning' -> 'تعلُّم الآلة الخاضع للإشراف' -- 'synthetic feature' -> 'خاصية مصطنعة' -- 'synthetic features' -> 'ميزات اصطناعية' -- 'test loss' -> 'فقدان الاختبار' -- 'training' -> 'التدريب' -- 'training loss' -> 'فقدان التدريب' -- 'training set' -> 'مجموعة التدريب' -- 'training-serving skew' -> 'اختلاف بين بيانات التدريب وبيانات العرض' -- 'true negative (TN)' -> 'سالب صحيح' -- 'true negatives' -> 'الحالات السالبة الصحيحة' -- 'true positive (TP)' -> 'موجب صحيح (TP)' -- 'true positive rate' -> 'معدّل الإيجابية الحقيقية' -- 'true positive rate (TPR)' -> 'معدّل الموجب الصحيح (TPR)' -- 'true positives' -> 'الحالات الموجبة الصحيحة' -- 'underfitting' -> 'فرط التعميم' -- 'unlabeled example' -> 'مثال غير مصنّف' -- 'unsupervised machine learning' -> 'تعلُّم الآلة غير الموجَّه' -- 'validation' -> 'الإثبات' -- 'validation dataset' -> 'مجموعة بيانات التحقّق من الصحة' -- 'validation loss' -> 'فقدان التحقّق من الصحة' -- 'validation set' -> 'مجموعة التحقّق' -- 'weight' -> 'الوزن' -- 'weighted sum' -> 'المجموع الموزون' -- 'Z-score normalization' -> 'التسوية باستخدام الدرجة المعيارية' diff --git a/src/ol_openedx_course_translations/ol_openedx_course_translations/glossaries/machine_learning/de.txt b/src/ol_openedx_course_translations/ol_openedx_course_translations/glossaries/machine_learning/de.txt deleted file mode 100644 index c53a3be9e..000000000 --- a/src/ol_openedx_course_translations/ol_openedx_course_translations/glossaries/machine_learning/de.txt +++ /dev/null @@ -1,175 +0,0 @@ -# DE HINTS -## TERM MAPPINGS -These are preferred terminology choices for this language. Use them whenever they sound natural; adapt freely if context requires. - -- 'accuracy' -> ‚Genauigkeit' -- 'activation function' -> ‚Aktivierungsfunktion' -- 'artificial intelligence' -> ‚künstliche Intelligenz' -- 'AUC' -> ‚AUC' -- 'AUC (Area under the ROC curve)' -> ‚AUC (Area Under the ROC Curve, Bereich unter der ROC-Kurve)' -- 'backpropagation' -> ‚Rückpropagation' -- 'batch' -> ‚Batch' -- 'batch size' -> ‚Batchgröße' -- 'bias (ethics/fairness)' -> ‚Bias (Ethik/Fairness)' -- 'bias (math) or bias term' -> ‚Bias (mathematisch) oder Bias-Term' -- 'bias in ethics and fairness' -> ‚Bias in Bezug auf Ethik und Fairness' -- 'bias term' -> ‚Bias-Term' -- 'binary classification' -> ‚Binärklassifizierung' -- 'bucketing' -> ‚Bucketing' -- 'categorical' -> ‚kategorialen' -- 'categorical data' -> ‚Kategoriale Daten' -- 'class' -> ‚Klasse' -- 'class-imbalanced dataset' -> ‚Dataset mit Klassenungleichgewicht' -- 'class-imbalanced datasets' -> ‚Datasets mit ungleichmäßiger Klassenverteilung' -- 'classification' -> ‚Klassifizierungsaufgabe' -- 'classification model' -> ‚Klassifikationsmodell' -- 'classification threshold' -> ‚Klassifizierungsschwellenwert' -- 'classifier' -> ‚Klassifikator' -- 'clipping' -> ‚Clipping' -- 'confusion matrix' -> ‚Wahrheitsmatrix' -- 'continuous feature' -> ‚stetiges Feature' -- 'convergence' -> ‚Konvergenz' -- 'data set or dataset' -> ‚Dataset oder Dataset' -- 'DataFrame' -> ‚DataFrame' -- 'dataset' -> ‚Dataset' -- 'deep learning' -> ‚Deep Learning' -- 'deep model' -> ‚Deep-Modell' -- 'dense feature' -> ‚vollbesetztes Feature' -- 'depth' -> ‚Tiefe' -- 'discrete feature' -> ‚diskretes Feature' -- 'discrete features' -> ‚diskrete Features' -- 'dynamic' -> ‚dynamic' -- 'dynamic model' -> ‚dynamisches Modell' -- 'early stopping' -> ‚Vorzeitiges Beenden' -- 'embedding layer' -> ‚Einbettungsebene' -- 'embedding layers' -> ‚Einbettungsebenen' -- 'epoch' -> ‚Epoche' -- 'example' -> ‚Beispiel' -- 'false negative (FN)' -> ‚falsch negativ (FN)' -- 'false negatives' -> ‚falsch negativen Ergebnisse' -- 'false positive (FP)' -> ‚falsch positiv (FP)' -- 'false positive rate' -> ‚Falsch-Positiv-Rate' -- 'false positive rate (FPR)' -> ‚Rate falsch positiver Ergebnisse (False Positive Rate, FPR)' -- 'false positives' -> ‚falsch positiven Ergebnisse' -- 'feature' -> ‚Feature' -- 'feature cross' -> ‚Featureverknüpfung' -- 'feature crosses' -> ‚Feature-Kombinationen' -- 'feature engineering' -> ‚Feature Engineering' -- 'feature set' -> ‚Feature-Set' -- 'feature vector' -> ‚Featurevektor' -- 'feedback loop' -> ‚Feedbackschleife' -- 'generalization' -> ‚Generalisierung' -- 'generalization curve' -> ‚Verallgemeinerungskurve' -- 'gradient descent' -> ‚Gradientenabstieg' -- 'ground truth' -> ‚Ground Truth' -- 'hidden layer' -> ‚versteckte Ebene' -- 'hidden layer(s)' -> ‚verborgenen Schichten' -- 'hyperparameter' -> ‚Hyperparameter' -- 'independently and identically distributed (i.i.d)' -> ‚unabhängig und identisch verteilt (i.i.d.)' -- 'inference' -> ‚Inferenz' -- 'input layer' -> ‚Eingabelayer' -- 'interpretability' -> ‚Interpretierbarkeit' -- 'iteration' -> ‚Iteration' -- 'L0regularization' -> ‚L0-Regularisierung' -- 'L1loss' -> ‚L1-Verlust' -- 'L1regularization' -> ‚L1-Regularisierung' -- 'L2loss' -> ‚L2-Verlust' -- 'L2regularization' -> ‚L2-Regularisierung' -- 'label' -> ‚Label' -- 'labeled example' -> ‚Beispiel mit Label' -- 'lambda' -> ‚Lambda' -- 'layer' -> ‚Layer' -- 'learning rate' -> ‚Lernrate' -- 'linear' -> ‚Linear' -- 'linear model' -> ‚Lineares Modell' -- 'linear models' -> ‚linearen Modellen' -- 'linear regression' -> ‚lineare Regression' -- 'Log Loss' -> ‚Log Loss' -- 'log-odds' -> ‚Log-Odds' -- 'logistic regression' -> ‚logistische Regression' -- 'loss' -> ‚Niederlage' -- 'loss curve' -> ‚Verlustkurve' -- 'loss function' -> ‚Verlustfunktion' -- 'machine learning' -> ‚Machine Learning' -- 'majority class' -> ‚Mehrheitsklasse' -- 'mini-batch' -> ‚Mini-Batch' -- 'minority class' -> ‚Minderheitsklasse' -- 'model' -> ‚Modell' -- 'multi-class classification' -> ‚Klassifizierung mit mehreren Klassen' -- 'negative class' -> ‚negative Klasse' -- 'negative classes' -> ‚negativen Klassen' -- 'neural network' -> ‚neuronales Netzwerk' -- 'neural networks' -> ‚neuronale Netze' -- 'neuron' -> ‚Neuron' -- 'node (neural network)' -> ‚Knoten (neuronales Netzwerk)' -- 'nonlinear' -> ‚nicht linear' -- 'nonstationarity' -> ‚Nichtstationarität' -- 'normalization' -> ‚Normalisierung' -- 'numerical data' -> ‚Numerische Daten' -- 'offline' -> ‚offline' -- 'offline inference' -> ‚Offlineinferenz' -- 'one-hot encoding' -> ‚One-Hot-Codierung' -- 'one-hot vector' -> ‚One-Hot-Vektor' -- 'one-vs.-all' -> ‚One-vs.-All' -- 'online' -> ‚online' -- 'online inference' -> ‚Onlineinferenz' -- 'output layer' -> ‚Ausgabeschicht' -- 'output layers' -> ‚Ausgabelayer' -- 'overfitting' -> ‚Überanpassung' -- 'pandas' -> ‚pandas' -- 'parameter' -> ‚Parameter' -- 'positive class' -> ‚positive Klasse' -- 'positive classes' -> ‚positive Klassen' -- 'post-processing' -> ‚Nachbearbeitung' -- 'precision' -> ‚Precision' -- 'prediction' -> ‚Vorhersage-' -- 'proxy labels' -> ‚Proxy-Labels' -- 'RAG' -> ‚RAG' -- 'rater' -> ‚Bewerter' -- 'recall' -> ‚Rückruf' -- 'Rectified Linear Unit (ReLU)' -> ‚Rektifizierte lineare Einheit (ReLU)' -- 'regression model' -> ‚Regressionsmodell' -- 'regularization' -> ‚Regularisierung' -- 'regularization rate' -> ‚Regularisierungsrate' -- 'ReLU' -> ‚ReLU' -- 'retrieval-augmented generation' -> ‚Retrieval-Augmented Generation' -- 'retrieval-augmented generation (RAG)' -> ‚Retrieval-Augmented Generation (RAG)' -- 'ROC (receiver operating characteristic) Curve' -> ‚ROC-Kurve (Receiver Operating Characteristic)' -- 'ROC curve' -> ‚ROC-Kurve' -- 'Root Mean Squared Error (RMSE)' -> ‚Wurzel der mittleren Fehlerquadratsumme (RMSE)' -- 'sigmoid function' -> ‚Sigmoidfunktion' -- 'softmax' -> ‚Softmax-Funktion' -- 'sparse feature' -> ‚dünnbesetztes Feature' -- 'sparse representation' -> ‚dünnbesetzte Darstellung' -- 'sparse vector' -> ‚dünnbesetzter Vektor' -- 'squared loss' -> ‚Quadratischer Verlust' -- 'static' -> ‚Statisch' -- 'static inference' -> ‚Statische Inferenz' -- 'static model' -> ‚statischen Modell' -- 'stationarity' -> ‚Stationarität' -- 'Stochastic Gradient Descent (SGD)' -> ‚Stochastic Gradient Descent (SGD)' -- 'supervised learning' -> ‚überwachtes Lernen' -- 'supervised machine learning' -> ‚überwachtes maschinelles Lernen' -- 'synthetic feature' -> ‚synthetisches Feature' -- 'synthetic features' -> ‚synthetische Features' -- 'test loss' -> ‚Testverlust' -- 'training' -> ‚Training' -- 'training loss' -> ‚Trainingsverlust' -- 'training set' -> ‚Trainings-Dataset' -- 'training-serving skew' -> ‚Abweichungen zwischen Training und Bereitstellung' -- 'true negative (TN)' -> ‚richtig negativ (RN)' -- 'true negatives' -> ‚richtig negativen Ergebnisse' -- 'true positive (TP)' -> ‚Richtig positiv (TP)' -- 'true positive rate' -> ‚Rate der richtig positiven Ergebnisse' -- 'true positive rate (TPR)' -> ‚Rate richtig positiver Ergebnisse (True Positive Rate, TPR)' -- 'true positives' -> ‚richtig positiven Ergebnisse' -- 'underfitting' -> ‚Unteranpassung' -- 'unlabeled example' -> ‚Beispiel ohne Label' -- 'unsupervised machine learning' -> ‚unüberwachtes maschinelles Lernen' -- 'validation' -> ‚Validierung' -- 'validation dataset' -> ‚Validierungs-Dataset' -- 'validation loss' -> ‚Validierungsverlust' -- 'validation set' -> ‚Validierungs-Dataset' -- 'weight' -> ‚Gewicht' -- 'weighted sum' -> ‚gewichtete Summe' -- 'Z-score normalization' -> ‚Z-Score-Normalisierung' diff --git a/src/ol_openedx_course_translations/ol_openedx_course_translations/glossaries/machine_learning/el.txt b/src/ol_openedx_course_translations/ol_openedx_course_translations/glossaries/machine_learning/el.txt deleted file mode 100644 index 22c5b4e4c..000000000 --- a/src/ol_openedx_course_translations/ol_openedx_course_translations/glossaries/machine_learning/el.txt +++ /dev/null @@ -1,988 +0,0 @@ -# EL HINTS -## TERM MAPPINGS -The following mappings are the CANONICAL Greek translations for AI/ML terms. -When translating TO GREEK, you MUST use these exact Greek terms for the listed English expressions whenever the same technical meaning is intended. - -- 'a priori probability' -> «πιθανότητα εκ των προτέρων» -- 'A* Search' -> «αλγόριθμοι αναζήτησης Α*» -- 'Abductive logic programming (ALP)' -> «Προγραμματισμός απαγωγικής λογικής (ALP)» -- 'Abductive reasoning' -> «Απαγωγικός συλλογισμός» -- 'abductive reasoning' -> «απαγωγική συλλογιστική» -- 'Abstract data type' -> «Αφηρημένος τύπος δεδομένων» -- 'abstract plan' -> «αφηρημένο πλάνο» -- 'Abstraction' -> «Αφαίρεση» -- 'Accelerating change' -> «Επιταχυνόμενη αλλαγή» -- 'accretive associative memory' -> «προσαυξητική μνήμη συσχέτισης» -- 'acquisitional efficiency' -> «αποδοτικότητα απόκτησης» -- 'action' -> «ενέργεια» -- 'Action language' -> «Γλώσσα δράσης» -- 'Action model learning' -> «Εκμάθηση μοντέλου δράσης» -- 'action schemas' -> «σχήματα ενεργειών» -- 'Action selection' -> «Επιλογή δράσης» -- 'Activation function' -> «Λειτουργία ενεργοποίησης» -- 'activation function' -> «συνάρτηση ενεργοποίησης» -- 'active' -> «ενεργός» -- 'active database' -> «ενεργή βάση δεδομένων» -- 'active rule' -> «ενεργός κανόνας» -- 'active troubleshooting' -> «ενεργητική διάγνωση» -- 'Adaptive algorithm' -> «Προσαρμοστικός αλγόριθμος» -- 'Adaptive neuro fuzzy inference system (ANFIS)' -> «Προσαρμοστικό σύστημα ασαφών συμπερασμάτων δίκτυου» -- 'adaptivity' -> «προσαρμοστικότητα» -- 'add list' -> «λίστα προσθηκών» -- 'admissibility criterion' -> «κριτήριο αποδοχής» -- 'admissible' -> «αποδεκτός» -- 'Admissible heuristic' -> «Παραδεκτό ευρετικό» -- 'adversary game' -> «ανταγωνιστικό παίγνιο» -- 'Affective computing' -> «Συναισθηματική υπολογιστική» -- 'agent' -> «πράκτορας» -- 'Agent architecture' -> «Αρχιτεκτονική πράκτορα» -- 'agglomerative algorithm' -> «αλγόριθμος συγχώνευσης» -- 'AI' -> «τεχνητή νοημοσύνη» -- 'AI accelerator' -> «Επιταχυντής AI» -- 'AI-complete' -> «AI-πλήρης» -- 'Algorithm' -> «Αλγόριθμος» -- 'algorithm' -> «αλγόριθμος» -- 'Algorithmic efficiency' -> «Αλγοριθμική απόδοση» -- 'Algorithmic probability' -> «Αλγοριθμική πιθανότητα» -- 'Alpha-Beta algorithm' -> «αλγόριθμος άλφα-βήτα» -- 'Alpha-Beta search' -> «αναζήτηση άλφα-βήτα» -- 'Ambient intelligence (AmI)' -> «Ευφυΐα περιβάλλοντος» -- 'ambiguity' -> «πολυσημαντικότητα» -- 'analogical reasoning' -> «συλλογιστική με αναλογίες» -- 'Analysis of algorithms' -> «Ανάλυση αλγορίθμων» -- 'Analytics' -> «Ανάλυση» -- 'AND tree' -> «δένδρο ΚΑΙ» -- 'AND/OR tree' -> «δένδρο ΚΑΙ / Ή» -- 'Answer set programming (ASP)' -> «Προγραμματισμός συνόλου απαντήσεων» -- 'Anytime algorithm' -> «Ανα πάσα στιγμή Αλγόριθμος» -- 'Application programming interface (API)' -> «Διεπαφή προγραμματισμού εφαρμογών» -- 'Approximate string matching' -> «Κατά προσέγγιση ταίριασμα συμβολοσειρών» -- 'Approximation error' -> «Σφάλμα προσέγγισης» -- 'arc consistency' -> «συνέπεια τόξου» -- 'Argumentation framework' -> «Πλαίσιο επιχειρημάτων» -- 'artificial agent' -> «τεχνητός πράκτορας» -- 'Artificial general intelligence (AGI)' -> «Τεχνητή γενική νοημοσύνη» -- 'Artificial immune system (AIS)' -> «Τεχνητό ανοσοποιητικό σύστημα» -- 'artificial intelligence' -> «τεχνητή νοημοσύνη» -- 'Artificial Intelligence Markup Language' -> «Γλώσσα σήμανσης τεχνητής νοημοσύνης» -- 'Artificial intelligence (AI)' -> «Τεχνητή νοημοσύνη» -- 'Artificial neural network (ANN)' -> «Τεχνητό νευρωνικό δίκτυο» -- 'artificial neuron' -> «τεχνητός νευρώνας» -- 'association rules' -> «κανόνες συσχέτισης» -- 'associative memory' -> «μνήμη συσχέτισης» -- 'Asymptotic computational complexity' -> «Ασυμπτωτική υπολογιστική πολυπλοκότητα» -- 'atomic formula' -> «ατομικός τύπος» -- 'atoms' -> «άτομα» -- 'attribute selection' -> «επιλογή χαρακτηριστικών» -- 'attributes' -> «χαρακτηριστικά» -- 'Attributional calculus' -> «Λογισμός απόδοσης» -- 'auctions protocols' -> «πρωτόκολλα πλειστηριασμού» -- 'Augmented reality (AR)' -> «Επαυξημένη πραγματικότητα» -- 'auto-associative memories' -> «αυτοσυσχετιζόμενες μνήμες» -- 'Automata theory' -> «Θεωρία αυτομάτων» -- 'Automated machine learning (AutoML)' -> «Αυτοματοποιημένη μηχανική εκμάθηση» -- 'Automated planning and scheduling' -> «Αυτοματοποιημένος προγραμματισμός» -- 'Automated reasoning' -> «Αυτοματοποιημένη συλλογιστική» -- 'automated synopsis' -> «αυτόματη περίληψη» -- 'automatic translation' -> «αυτόματη μετάφραση» -- 'Autonomic computing (AC)' -> «Αυτόνομος Υπολογισμός» -- 'Autonomous car' -> «Αυτόνομο αυτοκίνητο» -- 'Autonomous robot' -> «Αυτόνομο ρομπότ» -- 'autonomy' -> «αυτονομία» -- 'average reward model' -> «μοντέλο μέσης ανταμοιβής» -- 'axon' -> «άξονας» -- 'back propagation' -> «ανάστροφη μετάδοση» -- 'Backpropagation' -> «Ο πίσω πολλαπλασιασμός» -- 'Backpropagation through time (BPTT)' -> «Πίσω διάδοση στο χρόνο (BPTT)» -- 'backtracking' -> «οπισθοδρόμηση» -- 'backtracking points' -> «σημεία οπισθοδρόμησης» -- 'Backward chaining' -> «Αλυσίδα προς τα πίσω» -- 'backward chaining' -> «ανάστροφη ακολουθία εκτέλεσης» -- 'backward pass' -> «ανάστροφο πέρασμα» -- 'Bag-of-words model' -> «Μοντέλο τσάντα με λέξεις» -- 'Bag-of-words model in computer vision' -> «Μοντέλο τσάντας λέξεων στην όραση υπολογιστή» -- 'basic probability assignment' -> «βασική κατανομή πιθανότητας» -- 'batch learning' -> «μάθηση δέσμης» -- 'Batch normalization' -> «Ομαλοποίηση παρτίδας» -- 'Bayesian programming' -> «Μπεϋζιανός προγραμματισμός» -- 'Beam Search' -> «ακτινωτή αναζήτηση» -- 'Bees algorithm' -> «Αλγόριθμος μελισσών» -- 'Behavior informatics (BI)' -> «Πληροφορική συμπεριφοράς» -- 'Behavior tree (BT)' -> «Δέντρο συμπεριφοράς» -- 'belief' -> «πεποίθηση» -- 'Belief-desire-intention software model (BDI)' -> «Μοντέλο λογισμικού πεποίθησης-επιθυμίας-πρόθεσης» -- 'benevolence' -> «αγαθή προαίρεση» -- 'Best-First Search' -> «αναζήτηση πρώτα στο καλύτερο» -- 'Bias–variance tradeoff' -> «Ανταλλαγή μεροληψίας-διακύμανσης» -- 'bidirectional associative memories' -> «μνήμη συσχέτισης διπλής κατεύθυνσης» -- 'Bidirectional Search' -> «αναζήτηση διπλής κατεύθυνσης» -- 'Big data' -> «Μεγάλα δεδομένα» -- 'Big O notation' -> «Σημείωση Big O» -- 'binary constraint' -> «δυαδικός περιορισμός» -- 'Binary tree' -> «Δυαδικό δέντρο» -- 'blackboard' -> «μαυροπίνακας» -- 'blackboard architecture' -> «αρχιτεκτονική μαυροπίνακα» -- 'Blackboard system' -> «Σύστημα μαυροπίνακα» -- 'blackboard systems' -> «συστήματα μαυροπίνακα» -- 'Blind Search' -> «τυφλή αναζήτηση» -- 'Boltzmann machine' -> «Μηχανή Boltzmann» -- 'Boolean satisfiability problem' -> «Πρόβλημα ικανοποίησης Boolean» -- 'Brain technology' -> «Τεχνολογία εγκεφάλου» -- 'Branch and Bound Search' -> «αναζήτηση με επέκταση και οριοθέτηση» -- 'Branching factor' -> «Συντελεστής διακλάδωσης» -- 'branching factor' -> «παράγοντας διακλάδωσης» -- 'Breadth First Search' -> «αναζήτηση πρώτα σε» -- 'Brute-force search' -> «Αναζήτηση ωμής βίας» -- 'candidate elimination' -> «απαλοιφή υποψηφίων» -- 'canonical form' -> «κανονική μορφή» -- 'canonical formation rules' -> «κανόνες ορθής διαμόρφωσης» -- 'Capsule neural network (CapsNet)' -> «Νευρωνικό δίκτυο κάψουλας» -- 'card sorting' -> «ταξινόμηση καρτών» -- 'case adaptation' -> «προσαρμογή περιπτώσεων» -- 'case indexing' -> «δεικτοδότηση περιπτώσεων» -- 'case learning' -> «εκμάθηση περιπτώσεων» -- 'case library' -> «βιβλιοθήκη περιπτώσεων» -- 'case retrieval' -> «ανάκληση περιπτώσεων» -- 'case verification' -> «επαλήθευση περιπτώσεων» -- 'case-based' -> «βασισμένο σε περιπτώσεις» -- 'case-based learning' -> «μάθηση κατά περίπτωση» -- 'case-based planning' -> «σχεδιασμός βασισμένος σε παραδείγματα» -- 'case-based reasoning' -> «συλλογιστική βασισμένη σε περιπτώσεις» -- 'Case-based reasoning (CBR)' -> «Συλλογισμός βάσει περιπτώσεων» -- 'causal link' -> «αιτιολογικές συνδέσεις» -- 'causal model' -> «αιτιοκρατικό μοντέλο» -- 'certainty factors' -> «συντελεστές βεβαιότητας» -- 'chaining' -> «ακολουθία εκτέλεσης κανόνων» -- 'chromosome' -> «χρωμόσωμα» -- 'chronological backtracking' -> «χρονική οπισθοδρόμηση» -- 'circumscription' -> «μέθοδος περιγράμματος» -- 'class' -> «κλάση» -- 'class extension' -> «επέκταση κλάσης» -- 'classical negation' -> «κλασική άρνηση» -- 'classification' -> «ταξινόμηση» -- 'classification rules' -> «κανόνες ταξινόμησης» -- 'classification trees' -> «δένδρο ταξινόμησης» -- 'clausal form' -> «προτασιακή μορφή» -- 'closed formula' -> «κλειστός τύπος» -- 'closed set' -> «κλειστό σύνολο» -- 'closed world' -> «κλειστός κόσμος» -- 'closed world assumption' -> «υπόθεση κλειστού κόσμού» -- 'Cloud robotics' -> «Cloud ρομποτική» -- 'CLP' -> «λογικός προγραμματισμός με περιορισμούς» -- 'Cluster analysis' -> «Ανάλυση συστάδων» -- 'clustering' -> «ομαδοποίηση» -- 'clusters' -> «ομάδες» -- 'coarse grain' -> «αδρή υφή» -- 'Cobweb' -> «Ιστός αράχνης» -- 'Cognitive architecture' -> «Γνωστική αρχιτεκτονική» -- 'Cognitive computing' -> «Γνωστική Υπολογιστική» -- 'Cognitive science' -> «Γνωστική επιστήμη» -- 'cognitive science' -> «γνωσιολογική επιστήμη» -- 'combinatorial explosion' -> «συνδυαστική έκρηξη» -- 'Combinatorial optimization' -> «Συνδυαστική βελτιστοποίηση» -- 'commitment' -> «δέσμευση» -- 'Committee machine' -> «Μηχανή επιτροπής» -- 'common sense' -> «κοινή λογική» -- 'Commonsense knowledge' -> «Κοινή γνώση» -- 'Commonsense reasoning' -> «Κοινός συλλογισμός» -- 'communication protocol' -> «πρωτόκολλο επικοινωνίας» -- 'competition' -> «ανταγωνισμός» -- 'competitive neural networks' -> «νευρωνικά δίκτυα με ανταγωνισμό» -- 'compiled knowledge' -> «αυτοματοποιημένη γνώση» -- 'complementary pairs' -> «συμπληρωματικά ζεύγη» -- 'complete' -> «πλήρης» -- 'complete plan' -> «πλήρες πλάνο» -- 'completeness' -> «πληρότητα» -- 'Computational chemistry' -> «Υπολογιστική χημεία» -- 'Computational complexity theory' -> «Υπολογιστική θεωρία πολυπλοκότητας» -- 'Computational creativity' -> «Υπολογιστική δημιουργικότητα» -- 'Computational cybernetics' -> «Υπολογιστική κυβερνητική» -- 'Computational humor' -> «Υπολογιστικό χιούμορ» -- 'computational intelligence' -> «υπολογιστική νοημοσύνη» -- 'Computational intelligence (CI)' -> «Υπολογιστική νοημοσύνη» -- 'Computational learning theory' -> «Υπολογιστική θεωρία μάθησης» -- 'Computational linguistics' -> «Υπολογιστική γλωσσολογία» -- 'Computational mathematics' -> «Υπολογιστικά μαθηματικά» -- 'Computational neuroscience' -> «Υπολογιστική νευροεπιστήμη» -- 'Computational number theory' -> «Υπολογιστική θεωρία αριθμών» -- 'Computational problem' -> «Υπολογιστικό πρόβλημα» -- 'Computational statistics' -> «Υπολογιστική στατιστική» -- 'Computational Tree Logic' -> «Λογική Υπολογιστικού Δένδρου» -- 'Computer audition (CA)' -> «Οντισιόν υπολογιστή (CA)» -- 'Computer science' -> «Επιστήμη των υπολογιστών» -- 'Computer vision' -> «Υπολογιστική όραση» -- 'Computer-automated design (CAutoD)' -> «Αυτοματοποιημένη σχεδίαση υπολογιστή» -- 'concept' -> «έννοια» -- 'Concept drift' -> «Εννοιολογική μετατόπιση» -- 'concept learning' -> «μάθηση εννοιών» -- 'concept type' -> «τύπος έννοιας» -- 'conceptual dependency' -> «εννοιολογική εξάρτηση» -- 'conceptual dependency graph' -> «γράφος εννοιολογικής εξάρτησης» -- 'conceptual dependency relationships' -> «σχέσεις εννοιολογικής εξάρτησης» -- 'conceptual graphs' -> «εννοιολογικός γράφος» -- 'conceptual relation' -> «εννοιολογικές σχέσεις» -- 'conditional effects' -> «αποτελέσματα υπό προϋπόθεση» -- 'conditional probability' -> «πιθανότητα υπό συνθήκη» -- 'confidence' -> «εμπιστοσύνη» -- 'configuration' -> «διαμόρφωση» -- 'conflict' -> «σύγκρουση κανόνων» -- 'conflict resolution' -> «επίλυση συγκρούσεων» -- 'conflict set' -> «σύνολο σύγκρουσης» -- 'conflicting literals' -> «αντικρουόμενα λεκτικά» -- 'conjunctive model of classification' -> «συζευκτικό μοντέλο ταξινόμησης» -- 'conjunctive normal form' -> «συζευκτική κανονική μορφή» -- 'Connectionism' -> «Συνδεσιονισμός» -- 'connectionist approach' -> «συνδετική προσέγγιση» -- 'connectives' -> «συνδετικά» -- 'consistency check' -> «έλεγχος συνέπειας» -- 'consistency check algorithms' -> «αλγόριθμος ελέγχου συνέπειας» -- 'Consistent heuristic' -> «Συνεπής ευρετική» -- 'Constrained conditional model (CCM)' -> «Περιορισμένο υπό όρους μοντέλο» -- 'constraint' -> «περιορισμός» -- 'constraint graph' -> «γράφος περιορισμών» -- 'Constraint logic programming' -> «Προγραμματισμός λογικής περιορισμών» -- 'Constraint Logic Programming' -> «λογικός προγραμματισμός με περιορισμούς» -- 'constraint programming' -> «προγραμματισμός με περιορισμούς» -- 'constraint propagation' -> «διάδοση περιορισμών» -- 'constraint satisfaction' -> «ικανοποίηση περιορισμών» -- 'constraint satisfaction problems' -> «προβλήματα ικανοποίησης περιορισμών» -- 'constraint solving problems' -> «προβλήματα επίλυσης περιορισμών» -- 'Constructed language' -> «Κατασκευασμένη γλώσσα» -- 'content addressability' -> «δυνατότητα ανάκλησης περιεχομένου» -- 'context' -> «συμφραζόμενα» -- 'contracting net protocol' -> «συντονισμός πρακτόρων με σύναψη συμβολαίων» -- 'control' -> «έλεγχος» -- 'Control theory' -> «Θεωρία ελέγχου» -- 'convention' -> «σύμβαση» -- 'Convolutional neural network' -> «Συνελικτικό νευρωνικό δίκτυο» -- 'cooperation' -> «συνεργασία» -- 'crisp value' -> «σαφής τιμή» -- 'critical point' -> «κρίσιμο σημείο» -- 'criticality value' -> «τιμή σημαντικότητας» -- 'critics' -> «κανόνες προσαρμογής περιπτώσεων» -- 'Crossover' -> «Διασταύρωση» -- 'crossover' -> «διασταύρωση» -- 'crossover mask' -> «μάσκα διασταύρωσης» -- 'Darkforest' -> «Σκοτεινό δάσος» -- 'Dartmouth workshop' -> «Εργαστήριο Dartmouth» -- 'data abstraction' -> «γενίκευση δεδομένων» -- 'Data augmentation' -> «Αύξηση δεδομένων» -- 'data driven' -> «αναζήτηση οδηγούμενη από δεδομένα» -- 'Data fusion' -> «Συγχώνευση δεδομένων» -- 'Data integration' -> «Ενοποίηση δεδομένων» -- 'Data mining' -> «Εξόρυξη δεδομένων» -- 'data mining' -> «εξόρυξη σε δεδομένα» -- 'data mining incremental' -> «εξόρυξη σε δεδομένα σταδιακή» -- 'Data science' -> «Επιστημονικά δεδομένα» -- 'Data set' -> «Σύνολο δεδομένων» -- 'data space' -> «χώρος δεδομένων» -- 'data warehouse' -> «συστήματα αποθήκευσης δεδομένων» -- 'Data warehouse (DW or DWH)' -> «Αποθήκη δεδομένων» -- 'Datalog' -> «Καταγραφή δεδομένων» -- 'deafisible inference' -> «αναιρέσιμη εξαγωγή συμπερασμάτων» -- 'decidable logic' -> «καταληκτική λογική» -- 'Decision boundary' -> «Όριο απόφασης» -- 'Decision support system (DSS)' -> «Σύστημα υποστήριξης αποφάσεων» -- 'Decision theory' -> «Θεωρία απόφασης» -- 'Decision tree learning' -> «Εκμάθηση του δέντρου αποφάσεων» -- 'Declarative programming' -> «Δηλωτικός προγραμματισμός» -- 'deduction system' -> «σύστημα εξαγωγής συμπερασμάτων» -- 'Deductive classifier' -> «Απαγωγικός ταξινομητής» -- 'deductive reasoning' -> «συνεπαγωγική συλλογιστική» -- 'Deep Blue' -> «Βαθύ μπλε» -- 'deep knowledge' -> «βαθιά γνώση» -- 'Deep learning' -> «Βαθιά μάθηση» -- 'DeepMind Technologies' -> «Τεχνολογίες DeepMind» -- 'default reasoning' -> «συλλογιστική εύλογων υποθέσεων» -- 'defeasible logic' -> «αναιρέσιμη λογική» -- 'defeasible rules' -> «αναιρέσιμοι κανόνες» -- 'defeasible theory' -> «αναιρέσιμη θεωρία» -- 'defeaters' -> «αναιρετές» -- 'definite clause grammars' -> «γραμματικές οριστικών προτάσεων» -- 'definite inference' -> «οριστική απόδειξη» -- 'defuzzification' -> «αποσαφήνιση» -- 'degree of consistency' -> «βαθμός συνέπειας» -- 'degree of truth' -> «βαθμός αληθείας» -- 'delete list' -> «λίστα διαγραφών» -- 'deliberative agent' -> «πράκτορας με εσωτερική κατάσταση» -- 'Delta rule' -> «κανόνας Δέλτα» -- 'demons' -> «δαίμονας» -- 'demotion' -> «υποβιβασμός» -- 'dendrite' -> «δενδρίτης» -- 'Depth-First Search' -> «αναζήτηση πρώτα σε βάθος» -- 'Description logic (DL)' -> «Λογική περιγραφής» -- 'design' -> «σχεδίαση» -- 'design stance' -> «σχεδιαστική προσέγγιση» -- 'detach' -> «διαχωρισμός» -- 'deterministic effects' -> «ντετερμινιστικά αποτελέσματα» -- 'Developmental robotics (DevRob)' -> «Αναπτυξιακή ρομποτική» -- 'Diagnosis' -> «Διάγνωση» -- 'diagnosis' -> «διάγνωση» -- 'Dialogue system' -> «Σύστημα διαλόγου» -- 'Dimensionality reduction' -> «Μείωση διαστάσεων» -- 'discrepancy' -> «ασυμφωνία τιμών» -- 'Discrete system' -> «Διακριτό σύστημα» -- 'discretization' -> «διακριτοποίηση» -- 'disjunctive normal form' -> «διαζευκτική κανονική μορφή» -- 'distributed artificial intelligence' -> «κατανεμημένη τεχνητή νοημοσύνη» -- 'Distributed artificial intelligence (DAI)' -> «Κατανεμημένη τεχνητή νοημοσύνη» -- 'distributed data mining' -> «κατανεμημένη εξόρυξη σε δεδομένα» -- 'distributed memory' -> «κατανεμημένη μνήμη» -- 'distributed multi-agent planning' -> «κατανεμημένος πολυπρακτορικός σχεδιασμός» -- 'divisive algorithm' -> «αλγόριθμος διαίρεσης» -- 'domain expert' -> «ειδικός του τομέα» -- 'Dynamic epistemic logic (DEL)' -> «Δυναμική επιστημική λογική» -- 'dynamic programming' -> «δυναμικός προγραμματισμός» -- 'Eager learning' -> «Πρόθυμη μάθηση» -- 'eager learning' -> «έγκαιρη μάθηση» -- 'Ebert test' -> «Τεστ Έμπερτ» -- 'Echo state network (ESN)' -> «Δίκτυο κατάστασης Echo» -- 'edge detection' -> «εντοπισμός ακμών» -- 'effectors' -> «εξαρτήματα δράσης» -- 'Embodied agent' -> «Ενσαρκωμένος πράκτορας» -- 'Embodied cognitive science' -> «Ενσωματωμένη γνωστική επιστήμη» -- 'encapsulation' -> «εγκλεισμός (αντικειμένου)» -- 'energy function' -> «συνάρτηση ενέργειας» -- 'Enforced Hill-Climbing Search' -> «αναζήτηση με εξαναγκασμένη αναρρίχηση λόφου» -- 'Ensemble averaging' -> «Μέσος όρος του συνόλου» -- 'entropy of information' -> «εντροπία πληροφορίας» -- 'episode mining algorithms' -> «εξόρυξη επεισοδίων» -- 'episodical knowledge' -> «επεισοδιακή γνώση» -- 'Epoch (machine learning)' -> «Εποχή (μηχανική μάθηση)» -- 'epochs' -> «εποχές» -- 'equivalence' -> «ισοδυναμία» -- 'equivalence rules' -> «κανόνες ισοδυναμίας» -- 'erasure' -> «διαγραφή» -- 'error driven learning' -> «μάθηση καθοδηγούμενη από το σφάλμα» -- 'Error-driven learning' -> «Μάθηση με γνώμονα τα σφάλματα» -- 'Ethics of artificial intelligence' -> «Ηθική της τεχνητής νοημοσύνης» -- 'Euclidian distance' -> «Ευκλείδεια απόσταση» -- 'evaluation' -> «αποτίμηση» -- 'evaluation function' -> «συνάρτηση αξιολόγησης» -- 'event-driven rule' -> «ενεργός κανόνας» -- 'evoking strength' -> «δύναμη πρόκλησης» -- 'Evolutionary algorithm (EA)' -> «Εξελικτικός αλγόριθμος» -- 'Evolutionary computation' -> «Εξελικτικός υπολογισμός» -- 'Evolving classification function (ECF)' -> «Εξελισσόμενη συνάρτηση ταξινόμησης» -- 'exhaustive search' -> «εξαντλητική αναζήτηση» -- 'existential graphs' -> «υπαρξιακοί γράφοι» -- 'existential quantifier' -> «υπαρξιακός ποσοδείκτης» -- 'Existential risk' -> «Υπαρξιακός κίνδυνος» -- 'exoneration' -> «αθώωση» -- 'Expert system' -> «Ειδικό σύστημα» -- 'expert system' -> «έμπειρο σύστημα» -- 'expert system shell' -> «κέλυφος έμπειρου συστήματος» -- 'explicit knowledge' -> «ρητή γνώση» -- 'extension principle' -> «αρχή της επέκτασης» -- 'Fast-and-frugal trees' -> «Γρήγορα και λιτά δέντρα» -- 'Feature extraction' -> «Εξαγωγή χαρακτηριστικών» -- 'Feature learning' -> «Εκμάθηση χαρακτηριστικών» -- 'Feature selection' -> «Επιλογή χαρακτηριστικών» -- 'Federated learning' -> «Ομοσπονδιακή μάθηση» -- 'feedback' -> «ανάδραση, ανατροφοδότηση» -- 'feedforward' -> «πρόσθια τροφοδότηση» -- 'filtering algorithm' -> «αλγόριθμος διήθησης τιμών» -- 'final state' -> «τελική κατάσταση» -- 'fine grain' -> «λεπτή υφή» -- 'first fail principle' -> «αρχή συντομότερης αποτυχίας» -- 'first order predicate logic' -> «κατηγορηματική λογική πρώτης τάξης» -- 'First-order logic' -> «Λογική πρώτης τάξης» -- 'fitness function' -> «συνάρτηση καταλληλότητας» -- 'Fluent' -> «Ευφραδής» -- 'Formal language' -> «Επίσημη γλώσσα» -- 'Forward chaining' -> «Αλυσίδα προς τα εμπρός» -- 'forward chaining' -> «ορθή ακολουθία εκτέλεσης» -- 'forward checking' -> «προοπτικός έλεγχος» -- 'Frame' -> «Πλαίσιο» -- 'frame axioms' -> «αξιώματα του πλαισίου» -- 'Frame language' -> «Γλώσσα πλαισίου» -- 'frame of discernment' -> «πλαίσιο διάκρισης» -- 'Frame problem' -> «Πρόβλημα πλαισίου» -- 'frame problem' -> «πρόβλημα πλαισίου» -- 'frames' -> «πλαίσια» -- 'Friendly artificial intelligence' -> «Φιλική τεχνητή νοημοσύνη» -- 'full look ahead' -> «πλήρης έγκαιρη εξέταση» -- 'functional dependency' -> «λειτουργική εξάρτηση» -- 'functional term' -> «συναρτησιακός όρος» -- 'Futures studies' -> «Μελλοντικές μελέτες» -- 'fuzzification' -> «μετατροπή μεγέθους σε ασαφές» -- 'fuzziness' -> «ασάφεια» -- 'fuzzy complement' -> «συμπληρωματικό ασαφούς συνόλου» -- 'fuzzy composition' -> «σύνθεση ασαφών σχέσεων» -- 'Fuzzy control system' -> «Ασαφές σύστημα ελέγχου» -- 'fuzzy linguistic description' -> «ασαφής λεκτική περιγραφή» -- 'fuzzy linguistic variable' -> «ασαφής λεκτική μεταβλητή» -- 'Fuzzy logic' -> «Ασαφής λογική» -- 'fuzzy logic' -> «ασαφής λογική» -- 'fuzzy numbers' -> «ασαφείς αριθμοί» -- 'fuzzy reasoning' -> «ασαφής συλλογιστική» -- 'fuzzy relations' -> «ασαφείς σχέσεις» -- 'Fuzzy rule' -> «Ασαφής κανόνας» -- 'fuzzy rule' -> «ασαφής κανόνας» -- 'Fuzzy set' -> «Ασαφές σύνολο» -- 'fuzzy set' -> «ασαφή σύνολα» -- 'fuzzy set theory' -> «θεωρία ασαφών συνόλων» -- 'fuzzy variable' -> «ασαφής μεταβλητή» -- 'Game theor' -> «Θεωρία παιγνίων» -- 'game tree' -> «δένδρο παιγνίου» -- 'gene' -> «γονίδιο» -- 'general problem solver' -> «γενικός επιλυτής προβλημάτων» -- 'generalization rule' -> «κανόνας γενίκευσης» -- 'generalized modus ponens' -> «γενικευμένος τρόπος του θέτειν» -- 'generalized modus tollens' -> «γενικευμένος τρόπος του αναιρείν» -- 'generate and test' -> «παραγωγή και δοκιμή» -- 'generation gap' -> «χάσμα γενεών» -- 'Generative adversarial network (GAN)' -> «Δημιουργικό ανταγωνιστικό δίκτυο» -- 'genetic algorithms' -> «γενετικοί αλγόριθμοι» -- 'Genetic algorithm (GA)' -> «Γενετικός αλγόριθμος» -- 'Genetic operator' -> «Γενετικός χειριστής» -- 'genetic programming' -> «γενετικός προγραμματισμός» -- 'genotype' -> «γονότυπος» -- 'Glowworm swarm optimization' -> «Βελτιστοποίηση σμήνους Glowworm» -- 'goal driven' -> «αναζήτηση οδηγούμενη από στόχους» -- 'goals of attainment' -> «στόχοι επίτευξης» -- 'graded learning' -> «βαθμολογημένη μάθηση» -- 'gradient descent' -> «επικλινής καθόδος» -- 'gradient descent optimization' -> «βελτιστοποίηση επικλινούς καθόδου» -- 'Graph (abstract data type)' -> «Γράφημα» -- 'Graph (discrete mathematics)' -> «Γράφημα (διακριτά μαθηματικά)» -- 'Graph database (GDB)' -> «Βάση δεδομένων γραφημάτων» -- 'graph expansion' -> «επέκταση γράφου» -- 'Graph theory' -> «Θεωρία γραφημάτων» -- 'Graph traversal' -> «Διασύνδεση γραφήματος» -- 'graph-based planning' -> «σχεδιασμός βασισμένος σε γράφους» -- 'grid' -> «πλέγμα» -- 'grip' -> «λαβή» -- 'ground term' -> «βασικός όρος» -- 'guided-probe approach' -> «προσέγγιση καθοδηγούμενων δοκιμών» -- 'hetero-associative memories' -> «ετεροσυσχετιζόμενες μνήμες» -- 'Heuristic' -> «Ευρετική» -- 'heuristic' -> «ευρετικός μηχανισμός» -- 'heuristic classification' -> «ευρετική κατηγοριοποίηση» -- 'heuristic function' -> «ευρετική συνάρτηση» -- 'heuristic match' -> «ευρετική ταυτοποίηση» -- 'heuristic search' -> «ευρετική αναζήτηση» -- 'heuristic value' -> «ευρετική τιμή» -- 'Hidden layer' -> «Κρυφό στρώμα» -- 'hidden layers' -> «κρυφά επίπεδα» -- 'Hidden unit' -> «Κρυφή μονάδα» -- 'hierarchical planning' -> «ιεραρχικός σχεδιασμός» -- 'Hierarchical Task Networks' -> «ιεραρχικά δίκτυα διεργασιών» -- 'hierarchy concept type' -> «ιεραρχία τύπων εννοιών» -- 'hierarchy relation type' -> «ιεραρχία τύπων σχέσεων» -- 'higher order constraint' -> «περιορισμός ανώτερης τάξης» -- 'Hill Climbing Search' -> «αναζήτηση αναρρίχησης λόφων» -- 'horizon effect' -> «φαινόμενο ορίζοντα» -- 'humanoid robots' -> «ανθρωποειδή ρομπότ» -- 'hybrid agent' -> «υβριδικός πράκτορας» -- 'Hyper-heuristic' -> «Υπερ-ευρετικό» -- 'hypotheses discrimination' -> «διάκριση υποθέσεων» -- 'hypothesis space' -> «χώρος υποθέσεων» -- 'hypothesize and test' -> «δημιουργία και έλεγχος υποθέσεων» -- 'IEEE Computational Intelligence Society' -> «Κοινωνία Υπολογιστικής Νοημοσύνης» -- 'if-needed demon' -> «προσκόλληση διαδικασιών» -- 'implication' -> «συνεπαγωγή» -- 'imprecise data' -> «ανακριβή δεδομένα» -- 'incomplete' -> «μη-πλήρης» -- 'incomplete data' -> «ελλιπή δεδομένα» -- 'inconsistency effects' -> «ασυνεπή αποτελέσματα» -- 'inconsistency support' -> «ασύμβατη υποστήριξη» -- 'Incremental learning' -> «Αυξητική μάθηση» -- 'incremental learning' -> «επαυξητική μάθηση» -- 'indivisible action' -> «αδιαίρετη ενέργεια» -- 'induction' -> «επαγωγή» -- 'inductive learning' -> «επαγωγική μάθηση» -- 'inductive learning hypothesis' -> «υπόθεση επαγωγικής μάθησης» -- 'inductive logic programming' -> «επαγωγικός λογικός προγραμματισμός» -- 'inductive reasoning' -> «επαγωγική συλλογιστική» -- 'inference' -> «εξαγωγή συμπερασμάτων» -- 'Inference engine' -> «Μηχανή συμπερασμάτων» -- 'inference engine' -> «μηχανή εξαγωγής συμπερασμάτων» -- 'inference mechanism' -> «μηχανισμός εξαγωγής συμπερασμάτων» -- 'inference rules' -> «κανόνες εξαγωγής συμπερασμάτων» -- 'inferential adequacy' -> «επάρκεια συνεπαγωγής» -- 'inferential efficiency' -> «αποδοτικότητα συνεπαγωγής» -- 'inferential inefficiency' -> «μη-αποδοτικότητα επαγωγής» -- 'information gain' -> «κέρδος πληροφορίας» -- 'Information integration (II)' -> «Ενοποίηση πληροφοριών» -- 'Information Processing Language (IPL)' -> «Γλώσσα επεξεργασίας πληροφοριών» -- 'information retrieval' -> «ανάκτηση πληροφοριών» -- 'information value theory' -> «θεωρία αξίας της πληροφορίας» -- 'informative patterns' -> «πρότυπα πληροφόρησης» -- 'inheritance' -> «κληρονομικότητα» -- 'initial state' -> «αρχική κατάσταση» -- 'input layer' -> «επίπεδο εισόδου» -- 'instance' -> «στιγμιότυπο» -- 'instance-based learning' -> «μάθηση κατά περίπτωση» -- 'Intelligence amplification (IA)' -> «Ενίσχυση νοημοσύνης» -- 'Intelligence explosion' -> «Έκρηξη πληροφοριών» -- 'intelligent agent' -> «ευφυής πράκτορας» -- 'Intelligent agent (IA)' -> «Ευφυής παράγοντας» -- 'Intelligent control' -> «Έξυπνος έλεγχος» -- 'Intelligent personal assistant' -> «Έξυπνος προσωπικός βοηθός» -- 'intention' -> «πρόθεση» -- 'intentional stance' -> «προθεσιαρχική προσέγγιση» -- 'inter-transactional association rules' -> «δια-συναλλακτικοί κανόνες συσχέτισης» -- 'interaction protocol' -> «πρωτόκολλο αλληλεπίδρασης» -- 'interference' -> «παρέμβαση» -- 'interoperability' -> «διαλειτουργικότητα» -- 'interpolative associative memories' -> «μνήμη συσχέτισης παρεμβολής» -- 'Interpretation' -> «Ερμηνεία» -- 'interpretation' -> «ερμηνεία» -- 'interpretation models' -> «ερμηνευτικά μοντέλα» -- 'interpreter' -> «διερμηνέας» -- 'intra-transactional association rules' -> «ενδο-συναλλακτικοί κανόνες συσχέτισης» -- 'Intrinsic motivation' -> «Εσωτερικά κίνητρα» -- 'Issue tree' -> «Δέντρο έκδοσης» -- 'Iterative Deepening A* Search' -> «αναζήτηση Α* με επαναληπτική εκβάθυνση» -- 'Iterative Deepening Search' -> «αναζήτηση επαναληπτικής εκβάθυνσης» -- 'job-shop scheduling' -> «χρονοπρογραμματισμός καταστημάτων εργασιών» -- 'join' -> «συνένωση» -- 'Junction tree algorithm' -> «Αλγόριθμος δέντρων διασταύρωσης» -- 'K-consistency' -> «Κ-συνέπεια» -- 'K-means algorithm' -> «αλγόριθμος Κ-μέσων» -- 'k-nearest neighbors algorithm' -> «αλγόριθμος κ-πλησιέστερων γειτόνων» -- 'Kernel method' -> «Μέθοδος πυρήνα» -- 'knapsack problem' -> «πρόβλημα ταξιδιωτικού σάκου» -- 'knowledge' -> «γνώση» -- 'Knowledge acquisition' -> «Απόκτηση γνώσης» -- 'knowledge acquisition' -> «απόκτηση γνώσης» -- 'knowledge base' -> «βάση γνώσης» -- 'knowledge based system' -> «σύστημα βασισμένο στη γνώση» -- 'knowledge capture' -> «σύλληψη γνώσης» -- 'knowledge elicitation' -> «εκμαίευση γνώσης» -- 'knowledge engineer' -> «μηχανικός γνώσης» -- 'knowledge engineering' -> «τεχνολογία γνώσης» -- 'Knowledge engineering (KE)' -> «Μηχανική Γνώσης» -- 'Knowledge extraction' -> «Εξαγωγή γνώσης» -- 'knowledge extraction' -> «εξαγωγή γνώσης» -- 'Knowledge Interchange Format (KIF)' -> «Μορφή ανταλλαγής γνώσεων» -- 'knowledge management' -> «διαχείριση γνώσης» -- 'knowledge modeling' -> «μοντελοποίηση γνώσης» -- 'Knowledge representation and reasoning (KR² or KR&R)' -> «Αναπαράσταση και συλλογιστική γνώσης» -- 'knowledge source' -> «πηγή γνώσης» -- 'knowledge system' -> «σύστημα βασισμένο στη γνώση» -- 'Knowledge-based system (KBS)' -> «Σύστημα βασισμένο στη γνώση» -- 'laddered grids' -> «βαθμωτά πλέγματα» -- 'lambda expressions' -> «εκφράσεις-λ» -- 'lateral excitation' -> «παράπλευρη διέγερση» -- 'lateral inhibition' -> «παράπλευρη καταστολή» -- 'layer' -> «στρώματα» -- 'Lazy learning' -> «Τεμπέλικη μάθηση» -- 'lazy learning' -> «αναβλητική μάθηση» -- 'learning' -> «μάθηση» -- 'learning from examples' -> «μάθηση με παραδείγματα» -- 'learning from observation' -> «μάθηση από παρατήρηση» -- 'least commitment principle' -> «αρχή της ελάχιστης δέσμευσης» -- 'linear associator' -> «γραμμικός συσχετιστής» -- 'linear plan' -> «γραμμικό πλάνο» -- 'linear regression' -> «γραμμική παρεμβολή» -- 'linear resolution' -> «γραμμική ανάλυση» -- 'linear time logic' -> «γραμμική χρονική λογική» -- 'linearly separable problems' -> «γραμμικώς διαχωρίσιμα προβλήματα» -- 'literal' -> «λεκτικό» -- 'local minima' -> «τοπικά ελάχιστα» -- 'logic clause' -> «λογική πρόταση» -- 'logic contradiction' -> «λογική αντίφαση» -- 'Logic programming' -> «Λογικός προγραμματισμός» -- 'logic semantics' -> «λογική σημασιολογία» -- 'logic substitution' -> «λογική αντικατάσταση» -- 'logical inadequacy' -> «λογική ανεπάρκεια» -- 'logical necessity' -> «λογική αναγκαιότητα» -- 'logical sufficiency' -> «λογική επάρκεια» -- 'logistic function' -> «λογιστική συνάρτηση» -- 'logistics' -> «εφοδιαστική» -- 'Long short-term memory (LSTM)' -> «Μακροπρόθεσμη μνήμη» -- 'machine evolution' -> «μηχανική εξέλιξη» -- 'machine learning' -> «μηχανική μάθηση» -- 'Machine learning (ML)' -> «Μηχανική μάθηση» -- 'Machine listening' -> «Μηχανική ακρόαση» -- 'Machine perception' -> «Μηχανική αντίληψη» -- 'machine vision' -> «μηχανική όραση» -- 'Machine vision (MV)' -> «Μηχανική όραση» -- 'maintaining arc consistency' -> «διατήρηση συνέπεια τόξου» -- 'manifestation frequency' -> «συχνότητα εκδήλωσης συμπτώματος» -- 'manufacturing robots' -> «κατασκευαστικά ρομπότ» -- 'Markov chain' -> «Αλυσίδα Markov» -- 'Markov decision process (MDP)' -> «Διαδικασία απόφασης Markov» -- 'mathematical logic' -> «μαθηματική λογική» -- 'Mathematical optimization' -> «Μαθηματική βελτιστοποίηση» -- 'means-ends analysis' -> «ανάλυση μέσων και στόχων» -- 'Mechanism design' -> «Σχεδιασμός μηχανισμού» -- 'Mechatronics' -> «Μηχατρονική» -- 'mediator' -> «διαμεσολαβητής» -- 'membership function' -> «συνάρτηση συγγένειας» -- 'memory capacity' -> «χωρητικότητα μνήμης» -- 'message passing systems' -> «συστήματα ανταλλαγής μηνυμάτων» -- 'meta -control' -> «μετα- έλεγχος» -- 'meta -knowledge' -> «μετα- γνώση» -- 'meta -rule' -> «μετα- κανόνας» -- 'Metabolic network reconstruction and simulation' -> «Ανακατασκευή και προσομοίωση μεταβολικού δικτύου» -- 'metadata' -> «μεταδεδομένα» -- 'Metaheuristic' -> «Μεταευρετική» -- 'mgu' -> «γενικότερος ενοποιητής» -- 'min conflicts heuristic' -> «ευριστικός μηχανισμός ελαχίστων συγκρούσεων» -- 'minimax algorithm' -> «αλγόριθμοι αναζήτησης ελαχίστου-μεγίστου» -- 'minimax search' -> «αναζήτηση ελαχίστου-μεγίστου» -- 'missing data' -> «ελλιπή δεδομένα» -- 'mobile robots' -> «μετακινούμενα ρομπότ» -- 'mobility' -> «κινητικότητα» -- 'modal logic' -> «λογική τροπική» -- 'model' -> «μοντέλο» -- 'Model checking' -> «Έλεγχος μοντέλου» -- 'model checking' -> «έλεγχος μοντέλων» -- 'model-based diagnosis' -> «διάγνωση βασισμένη σε μοντέλα» -- 'model-based reasoning' -> «συλλογιστική βασισμένη σε μοντέλα» -- 'module' -> «ενότητα» -- 'modus ponens' -> «τρόπος του θέτειν» -- 'modus tollens' -> «τρόπος του αναίρειν» -- 'Monte Carlo tree search' -> «Αναζήτηση δέντρων στο Μόντε Κάρλο» -- 'morphological analysis' -> «μορφολογική ανάλυση» -- 'morphology derivational' -> «μορφολογία ετυμολογική» -- 'morphology inflectional' -> «μορφολογία κλίσεων» -- 'most general unifier' -> «γενικότερος ενοποιητής» -- 'multi-agent planning' -> «πολυπρακτορικός σχεδιασμός» -- 'multi-agent system' -> «πολυπρακτορικό σύστημα» -- 'Multi-agent system (MAS)' -> «Σύστημα πολλαπλών πρακτόρων» -- 'Multi-swarm optimization' -> «Βελτιστοποίηση πολλαπλών σμήνων» -- 'multiple inheritance' -> «πολλαπλή κληρονομικότητα» -- 'multistage classification' -> «πολυβάθμια κατηγοριοποίηση» -- 'Mutation' -> «Μετάλλαξη» -- 'mutation' -> «μετάλλαξη» -- 'mutual exclusion relations' -> «σχέσεις αμοιβαίου αποκλεισμού» -- 'Naive Bayes classifier' -> «Ταξινομητής Naive Bayes» -- 'Naive semantics' -> «Αφελής σημασιολογία» -- 'Name binding' -> «Δέσμευση ονόματος» -- 'Named graph' -> «Ονομασμένο γράφημα» -- 'Named-entity recognition (NER)' -> «Αναγνώριση επώνυμης οντότητας» -- 'namespace' -> «χώρος ονομάτων» -- 'natural language' -> «φυσική γλώσσα» -- 'Natural language generation (NLG)' -> «Δημιουργία φυσικής γλώσσας» -- 'Natural language processing (NLP)' -> «Επεξεργασία φυσικής γλώσσας» -- 'Natural language programming' -> «Προγραμματισμός φυσικής γλώσσας» -- 'negation as failure' -> «άρνηση ως αποτυχία» -- 'negative context' -> «αρνητικό πλαίσιο (συμφραζόμενων)» -- 'negative preconditions' -> «αρνητικές προϋποθέσεις» -- 'negotiation' -> «διαπραγμάτευση» -- 'Network motif' -> «Μοτίβο δικτύου» -- 'network paralysis' -> «παράλυση νευρωνικού δικτύου» -- 'Neural machine translation (NMT)' -> «Νευρωνική μηχανική μετάφραση» -- 'neural network' -> «νευρωνικό δίκτυο» -- 'Neural Turing machine (NTM)' -> «Μηχανή Neural Turing» -- 'Neuro-fuzzy' -> «Νευρο-ασαφής» -- 'Neurocybernetics' -> «Νευροκυβερνητική» -- 'Neuromorphic engineering' -> «Νευρομορφική μηχανική» -- 'neuron' -> «νευρώνας» -- 'Node' -> «Κόμβος» -- 'node consistency' -> «συνέπεια κόμβου» -- 'noise reduction' -> «μείωση θορύβου» -- 'non-determinism' -> «μη-αιτιοκρατία» -- 'non-monotonic modal logic' -> «μη μονότονη τροπική λογική» -- 'non-symbolic artificial intelligence' -> «μη συμβολική τεχνητή νοημοσύνη» -- 'Nondeterministic algorithm' -> «Μη προσδιοριστικός αλγόριθμος» -- 'Nouvelle AI' -> «Νέο AI» -- 'NP-completeness' -> «NP-πληρότητα» -- 'NP-hardness' -> «NP-σκληρότητα» -- 'null plan' -> «μηδενικό πλάνο» -- 'object' -> «αντικείμενο» -- 'object instances' -> «στιγμιότυπα αντικειμένου» -- 'object-oriented programming' -> «αντικειμενοστραφής προγραμματισμός» -- 'obligation' -> «υποχρέωση» -- 'Occam's razor' -> «ξυράφι του Όκαμ» -- 'occurs check' -> «έλεγχος εμφάνισης» -- 'OCR – Optical Character Recognition' -> «οπτική αναγνώριση χαρακτήρων» -- 'Offline learning' -> «Εκμάθηση εκτός σύνδεσης» -- 'offsprings' -> «απόγονοι» -- 'omniscience' -> «παντογνωσία» -- 'Online machine learning' -> «Διαδικτυακή μηχανική εκμάθηση» -- 'ontology' -> «οντολογία» -- 'Ontology learning' -> «Εκμάθηση οντολογίας» -- 'open world' -> «ανοιχτός κόσμος» -- 'Open-source software (OSS)' -> «Λογισμικό ανοιχτού κώδικα» -- 'opportunistic scheduling' -> «καιροσκοπικός χρονοπρογραμματισμός» -- 'optimal solution' -> «βέλτιστη λύση» -- 'optimization' -> «βελτιστοποίηση» -- 'order inconsistent plan' -> «πλάνο ασυνεπές ως προς τις διατάξεις» -- 'ordered game tree' -> «διατεταγμένο δένδρο» -- 'ordering constraint' -> «περιορισμοί διάταξης» -- 'output layer' -> «επίπεδα εξόδου» -- 'overfitting' -> «υπερπροσαρμογή» -- 'overloading' -> «υπερφόρτωση» -- 'parallel search' -> «παράλληλη αναζήτηση» -- 'parse tree' -> «δένδρο συντακτικής ανάλυσης» -- 'partial look ahead algorithm' -> «αλγόριθμος έγκαιρης μερικής εξέτασης» -- 'Partial order reduction' -> «Μερική μείωση παραγγελίας» -- 'Partially observable Markov decision process (POMDP)' -> «Μερικώς παρατηρήσιμη διαδικασία απόφασης Markov» -- 'Particle swarm optimization (PSO)' -> «Βελτιστοποίηση σμήνος σωματιδίων» -- 'passive troubleshooting' -> «παθητική διάγνωση» -- 'path consistency algorithm' -> «αλγόριθμος συνέπειας μονοπατιού» -- 'Pathfinding' -> «Διαδρομή» -- 'pattern' -> «πρότυπα» -- 'pattern matching' -> «ταυτοποίηση» -- 'pattern of activity' -> «πρότυπα δραστηριότητας» -- 'Pattern recognition' -> «Αναγνώριση μοτίβου» -- 'phenotype' -> «φαινότυπο» -- 'phonemes' -> «φθόγγοι» -- 'physical stance' -> «φυσική προσέγγιση» -- 'pixel' -> «εικονοστοιχείο» -- 'plan' -> «πλάνο» -- 'plan solution' -> «λύση πλάνου» -- 'plan space' -> «χώρος πλάνων» -- 'planner' -> «σχεδιαστής» -- 'planning contingency' -> «σχεδιασμός πολλαπλών ενδεχομένων» -- 'planning graph' -> «γράφος σχεδιασμού» -- 'planning system' -> «σύστημα σχεδιασμού» -- 'polymorphism' -> «πολυμορφισμός» -- 'portals' -> «διαδικτυακές πύλες» -- 'positive context' -> «θετικό πλαίσιο συμφραζόμενων» -- 'powerset' -> «δυναμοσύνολο» -- 'pragmatic analysis' -> «πραγματολογική ανάλυση» -- 'precondition list' -> «λίστα προϋποθέσεων» -- 'predicate' -> «κατηγόρημα» -- 'Predicate logic' -> «Λογική κατηγορήματος» -- 'predicate logic' -> «κατηγορηματική λογική» -- 'prediction' -> «πρόγνωση» -- 'Predictive analytics' -> «Προγνωστική ανάλυση» -- 'predictive models' -> «μοντέλο πρόβλεψης» -- 'prenex conjunctive normal form' -> «προσημασμένη συζευκτική κανονική μορφή» -- 'primitive action' -> «αρχέγονη ενέργεια» -- 'primitive conceptualizations' -> «αρχέγονες εννοιολογικές μορφές» -- 'primitive problem' -> «αρχέγονο πρόβλημα» -- 'Principal component analysis (PCA)' -> «Ανάλυση κύριου συστατικού» -- 'Principle of rationality' -> «Αρχή του ορθολογισμού» -- 'prior probability' -> «προϋπάρχουσα πιθανότητα» -- 'pro-activeness' -> «προνοητικότητα» -- 'Probabilistic programming (PP)' -> «Πιθανοτικός προγραμματισμός» -- 'probability planning' -> «σχεδιασμός με πιθανότητες» -- 'problem description' -> «περιγραφή προβλήματος» -- 'problem world' -> «κόσμος προβλήματος» -- 'procedural knowledge' -> «διαδικαστική γνώση» -- 'production rules' -> «κανόνες παραγωγής» -- 'Production system' -> «Σύστημα παραγωγής» -- 'production system' -> «σύστημα κανόνων παραγωγής» -- 'Programming language' -> «Γλώσσα προγραμματισμού» -- 'progression' -> «ορθή διάσχιση» -- 'projection' -> «προβολή» -- 'promotion' -> «προβιβασμός» -- 'proof' -> «απόδειξη» -- 'proof by contradiction' -> «εις άτοπο απαγωγή» -- 'proof layer' -> «επίπεδο αξιοπιστίας» -- 'proof procedure' -> «διαδικασία απόδειξης» -- 'Propositional calculus' -> «Προτασιακός λογισμός» -- 'propositional logic' -> «προτασιακή λογική» -- 'propositional rules' -> «προτασιακοί κανόνες» -- 'pruning' -> «κλάδεμα» -- 'pure node' -> «αμιγής κόμβος» -- 'pure tree' -> «αμιγές δένδρο» -- 'Python' -> «Πύθων» -- 'Qualification problem' -> «Πρόβλημα προσόντων» -- 'qualitative reasoning' -> «ποιοτική συλλογιστική» -- 'Quantifier' -> «Ποσοτικοποιητής» -- 'quantifier' -> «ποσοδείκτες» -- 'Quantum computing' -> «Κβαντική Υπολογιστική» -- 'Query language' -> «Γλώσσα ερωτήματος» -- 'R programming language' -> «Γλώσσα προγραμματισμού R» -- 'Radial basis function network' -> «Δίκτυο λειτουργίας ακτινικής βάσης» -- 'Random forest' -> «Τυχαίο δάσος» -- 'random learning' -> «τυχαία μάθηση» -- 'rationality' -> «λογικότητα» -- 'reactive agent' -> «αντιδραστικός πράκτορας» -- 'reactive rules' -> «αντιδραστικοί κανόνες» -- 'reactiveness' -> «αντιδραστικότητα» -- 'reasoning' -> «συλλογιστική» -- 'Reasoning system' -> «Σύστημα συλλογισμού» -- 'recurrent' -> «ανατροφοδοτούμενος» -- 'recurrent neural networks' -> «νευρωνικά δίκτυα με ανατροφοδότηση» -- 'Recurrent neural network (RNN)' -> «Επαναλαμβανόμενο νευρωνικό δίκτυο» -- 'recursion' -> «αναδρομή» -- 'reduction' -> «αναγωγή» -- 'reduction operator' -> «τελεστής αναγωγής» -- 'refutation' -> «εις άτοπο απαγωγή» -- 'refutation completeness' -> «πληρότητα ατόπου» -- 'Region connection calculus' -> «Λογισμός σύνδεσης περιοχής» -- 'regression' -> «παλινδρόμηση» -- 'reinforcement learning' -> «ενισχυτική μάθηση» -- 'Reinforcement learning (RL)' -> «Ενισχυτική μάθηση» -- 'repair algorithm' -> «αλγόριθμος επιδιόρθωσης» -- 'repair space' -> «χώρος επιδιορθώσεων» -- 'replanning' -> «επανασχεδιασμός» -- 'representational adequacy' -> «επάρκεια αναπαράστασης» -- 'Reservoir computing' -> «Υπολογισμός δεξαμενής» -- 'resolution principle' -> «αρχή της ανάλυσης» -- 'resolvent' -> «αναλυθέν» -- 'resource competition' -> «ανταγωνισμός πόρων» -- 'Resource Description Framework (RDF)' -> «Πλαίσιο Περιγραφής Πόρων» -- 'resource planning' -> «σχεδιασμός με πόρους» -- 'Restricted Boltzmann machine (RBM)' -> «Περιορισμένη μηχανή Boltzmann» -- 'restriction' -> «περιορισμός» -- 'reversible operator' -> «τελεστής αντιστρέψιμος» -- 'robot' -> «ρομπότ» -- 'robotic agent' -> «ρομποτικός πράκτορας» -- 'Robotics' -> «Ρομποτική» -- 'rule action' -> «ενέργεια κανόνα» -- 'rule base' -> «βάση κανόνων» -- 'rule cluster' -> «ομάδα κανόνων» -- 'rule conclusion' -> «συμπέρασμα κανόνα» -- 'rule condition' -> «συνθήκη κανόνα» -- 'rule of inference' -> «κανόνας συμπερασμού» -- 'Rule-based system' -> «Σύστημα βασισμένο σε κανόνες» -- 'Satisfiability' -> «Ικανοποίηση» -- 'scheduler' -> «χρονοπρογραμματιστής» -- 'schema theorem' -> «θεώρημα σχημάτων» -- 'scout' -> «ανιχνευτής» -- 'scripts' -> «σενάρια» -- 'Search algorithm' -> «Αλγόριθμος αναζήτησης» -- 'search algorithms' -> «αλγόριθμοι αναζήτησης» -- 'search engines' -> «μηχανές αναζήτησης» -- 'search frontier' -> «μέτωπο αναζήτησης» -- 'search space' -> «χώρος αναζήτησης» -- 'search thread' -> «νήμα αναζήτησης» -- 'search tree' -> «δένδρο αναζήτησης» -- 'Selection' -> «Επιλογή» -- 'selection fitness proportionate' -> «επιλογή αναλογικής καταλληλότητας» -- 'selection roulette wheel' -> «επιλογή ρουλέτας» -- 'selection tournament' -> «επιλογή τουρνουά» -- 'Selective Linear Definite clause resolution' -> «Επιλεκτική γραμμική ανάλυση οριστικής πρότασης» -- 'self decay' -> «εξασθένιση» -- 'Self-management' -> «Αυτοδιαχείρηση» -- 'self-organizing feature map' -> «αυτο-οργανούμενη απεικόνιση» -- 'semantic analysis' -> «σημασιολογική ανάλυση» -- 'semantic knowledge' -> «σημασιολογική γνώση» -- 'Semantic network' -> «Σημασιολογικό δίκτυο» -- 'semantic networks' -> «σημασιολογικά δίκτυα» -- 'Semantic query' -> «Σημασιολογική ερώτηση» -- 'Semantic reasoner' -> «Σημασιολογικός λογιστής» -- 'semantic web' -> «σημασιολογικός ιστός» -- 'Semantics' -> «Σημασιολογία» -- 'semantics' -> «σημασιολογία» -- 'sensor' -> «αισθητήρας» -- 'Sensor fusion' -> «Σύντηξη αισθητήρα» -- 'Separation logic' -> «Λογική χωρισμού» -- 'sequential covering algorithm' -> «αλγόριθμος σειριακής κάλυψης» -- 'sequential pattern minimg' -> «εξόρυξη ακολουθιακών προτύπων» -- 'shallow knowledge' -> «ρηχή γνώση» -- 'shell' -> «κέλυφος έμπειρου συστήματος» -- 'sigmoid functions' -> «σιγμοειδείς συναρτήσεις» -- 'sign function' -> «συνάρτηση πρόσημου» -- 'Similarity learning' -> «Εκμάθηση ομοιότητας» -- 'simplification' -> «απλοποίηση» -- 'Simulated Annealing Search' -> «αναζήτηση προσομοιωμένης ανόπτησης» -- 'Simulated annealing (SA)' -> «Προσομοίωση ανόπτησης» -- 'Situated approach' -> «Τοποθετημένη προσέγγιση» -- 'Situation calculus' -> «Λογισμός καταστάσεων» -- 'situation calculus' -> «λογισμός καταστάσεων» -- 'skeptical logic' -> «σκεπτικιστική λογική» -- 'skolemization' -> «σκολεμοποίηση» -- 'smoothing' -> «εξομάλυνση» -- 'social ability' -> «κοινωνικότητα» -- 'softbot' -> «λογισμικός πράκτορας» -- 'Software' -> «Λογισμικό» -- 'software agent' -> «λογισμικός πράκτορας» -- 'Software engineering' -> «Μηχανική Λογισμικού» -- 'solution extraction' -> «εξαγωγή λύσης» -- 'solution refinement' -> «επιλογή λύσης» -- 'sparse data' -> «αραιά δεδομένα» -- 'Spatial-temporal reasoning' -> «Χωροχρονικός συλλογισμός» -- 'specialization rule' -> «κανόνας εξειδίκευσης» -- 'spectrogram' -> «φασματογράφημα» -- 'Speech Act Theory' -> «Θεωρία Πράξεων Λόγου» -- 'Speech recognition' -> «Αναγνώρισης ομιλίας» -- 'speech recognition' -> «αναγνώριση ομιλίας» -- 'spelling correction rules' -> «αλγόριθμος διόρθωσης ορθογραφικών λαθών» -- 'Spiking neural network (SNN)' -> «Spiking νευρωνικό δίκτυο» -- 'Stanford Research Institute Problem Solver (STRIPS)' -> «Επίλυση προβλημάτων του Ερευνητικού Ινστιτούτου Στάνφορντ» -- 'State' -> «Κατάσταση» -- 'state' -> «κατάσταση» -- 'state space' -> «χώρος καταστάσεων» -- 'state-space planning' -> «σχεδιασμός χώρου καταστάσεων» -- 'static world' -> «στατικός κόσμος» -- 'Statistical classification' -> «Στατιστική ταξινόμηση» -- 'Statistical relational learning (SRL)' -> «Στατιστική σχεσιακή μάθηση» -- 'step function' -> «βηματική συνάρτηση» -- 'Stochastic optimization (SO)' -> «Στοχαστική βελτιστοποίηση» -- 'Stochastic semantic analysis' -> «Στοχαστική σημασιολογική ανάλυση» -- 'strict rules' -> «ισχυροί κανόνες» -- 'strong negation' -> «κλασική άρνηση» -- 'Subject-matter expert' -> «Εμπειρογνώμονας σε θέματα» -- 'subsumption architecture' -> «αρχιτεκτονική υπαγωγής» -- 'Superintelligence' -> «Υπερευφυΐα» -- 'superiority relation' -> «σχέση υπεροχής» -- 'Supervised learning' -> «Επίβλεψη μάθησης» -- 'supervised learning' -> «μάθηση με επίβλεψη» -- 'support' -> «υποστήριξη» -- 'Support Vector Machines' -> «μηχανές διανυσμάτων υποστήριξης» -- 'Support-vector machines' -> «Υποστήριξη-διανυσματικά μηχανήματα» -- 'Swarm intelligence (SI)' -> «Νοημοσύνη σμήνους» -- 'Symbolic artificial intelligence' -> «Συμβολική τεχνητή νοημοσύνη» -- 'symbolic artificial intelligence' -> «συμβολική τεχνητή νοημοσύνη» -- 'symbolic logic' -> «συμβολική λογική» -- 'synapse' -> «σύναψη» -- 'syntactic analysis' -> «συντακτική ανάλυση» -- 'Synthetic intelligence (SI)' -> «Συνθετική νοημοσύνη» -- 'system model' -> «μοντέλο συστήματος» -- 'Systems neuroscience' -> «Συστημική νευροεπιστήμη» -- 'Tabu Search' -> «αναζήτηση με απαγορευμένες καταστάσεις» -- 'tacit knowledge' -> «άρρητη γνώση» -- 'target function' -> «συνάρτηση στόχος» -- 'tautology' -> «ταυτολογία» -- 'teach-back' -> «επαναδιδασκαλία» -- 'Technological singularity' -> «Τεχνολογική ιδιομορφία» -- 'temporal association rules' -> «κανόνες συσχέτισης χρονικοί» -- 'Temporal difference learning' -> «Εκμάθηση χρονικής διαφοράς» -- 'temporal logic' -> «λογική χρονική» -- 'Tensor network theory' -> «Θεωρία τανυστικού δικτύου» -- 'term' -> «όρος» -- 'term assignment' -> «ανάθεση όρων» -- 'terminal state' -> «τερματική κατάσταση» -- 'text categorization' -> «κατηγοριοποίηση κειμένων» -- 'text planning' -> «σχεδιασμός κειμένου» -- 'Theoretical computer science (TCS)' -> «Θεωρητική επιστήμη των υπολογιστών» -- 'Theory of computation' -> «Θεωρία υπολογισμού» -- 'therapy space' -> «χώρος θεραπειών» -- 'Thompson sampling' -> «Δειγματοληψία Thompson» -- 'threat' -> «απειλή» -- 'threshold effect' -> «φαινόμενο κατωφλίου» -- 'threshold function' -> «συνάρτηση ενεργοποίησης» -- 'Time complexity' -> «Χρονική πολυπλοκότητα» -- 'timetable' -> «ωρολόγιο πρόγραμμα» -- 'topological sort' -> «τοπολογική διάταξη» -- 'total ordered plan' -> «πλάνο πλήρους διάταξης» -- 'Transhumanism' -> «Υπερανθρωπισμός» -- 'transition operator' -> «τελεστής μετάβασης» -- 'Transition system' -> «Σύστημα μετάβασης» -- 'Tree traversal' -> «Διάβαση δέντρου» -- 'trigger' -> «σκανδαλιστές» -- 'troubleshooting' -> «επιδιόρθωση βλαβών» -- 'True quantified Boolean formula' -> «Αληθής ποσοτικοποιημένος τύπος Boolean» -- 'trust layer' -> «επίπεδο αξιοπιστίας» -- 'truth maintenance' -> «συντήρηση αλήθειας» -- 'truth table' -> «πίνακας αληθείας» -- 'Turing machine' -> «Μηχανή Turing» -- 'Turing test' -> «Δοκιμή Turing» -- 'tutorial interview' -> «διδακτική συνέντευξη» -- 'two-person game' -> «παίγνια δύο αντιπάλων» -- 'Type system' -> «Σύστημα τύπου» -- 'unary constraint' -> «μοναδιαίος περιορισμός» -- 'unconditional probability' -> «πιθανότητα άνευ συνθηκών» -- 'underfitting' -> «υποπροσαρμογή» -- 'unification' -> «ενοποίηση» -- 'unifier' -> «ενοποιητής» -- 'unit clause' -> «μοναδιαία πρόταση» -- 'universal quantifier' -> «καθολικός ποσοδείκτης» -- 'unrestrict' -> «επέκταση» -- 'Unsupervised learning' -> «Εκμάθηση χωρίς επίβλεψη» -- 'unsupervised learning' -> «μάθηση χωρίς επίβλεψη» -- 'valence' -> «σθένος» -- 'valid plan' -> «έγκυρο πλάνο» -- 'validation' -> «έλεγχος αξιοπιστίας» -- 'validation data' -> «δεδομένα επικύρωσης» -- 'veracity' -> «ειλικρίνεια» -- 'verification' -> «επαλήθευση» -- 'Vision processing unit (VPU)' -> «Μονάδα επεξεργασίας όρασης» -- 'Weak AI' -> «Αδύναμη AI» -- 'web portals' -> «πύλες παγκόσμιου ιστού» -- 'web resource' -> «πόρος παγκόσμιου ιστού» -- 'web services' -> «υπηρεσίες παγκόσμιου ιστού» -- 'well formed formulae' -> «ορθά δομημένοι τύποι» -- 'working memory' -> «χώρος εργασίας» -- 'World Wide Web Consortium (W3C)' -> «Κοινοπραξία World Wide Web» diff --git a/src/ol_openedx_course_translations/ol_openedx_course_translations/glossaries/machine_learning/es.txt b/src/ol_openedx_course_translations/ol_openedx_course_translations/glossaries/machine_learning/es.txt deleted file mode 100644 index 1412f5b64..000000000 --- a/src/ol_openedx_course_translations/ol_openedx_course_translations/glossaries/machine_learning/es.txt +++ /dev/null @@ -1,175 +0,0 @@ -# ES HINTS -## TERM MAPPINGS -These are preferred terminology choices for this language. Use them whenever they sound natural; adapt freely if context requires. - -- 'accuracy' -> 'exactitud' -- 'activation function' -> 'función de activación' -- 'artificial intelligence' -> 'inteligencia artificial' -- 'AUC' -> 'AUC' -- 'AUC (Area under the ROC curve)' -> 'AUC (área bajo la curva ROC)' -- 'backpropagation' -> 'propagación inversa' -- 'batch' -> 'lote' -- 'batch size' -> 'tamaño del lote' -- 'bias (ethics/fairness)' -> 'sesgo (ética/equidad)' -- 'bias (math) or bias term' -> 'ordenada al origen (matemática) o término de sesgo' -- 'bias in ethics and fairness' -> 'sesgo en ética y equidad' -- 'bias term' -> 'término de sesgo' -- 'binary classification' -> 'Clasificación binaria' -- 'bucketing' -> 'Agrupamiento' -- 'categorical' -> 'categórico' -- 'categorical data' -> 'datos categóricos' -- 'class' -> 'clase' -- 'class-imbalanced dataset' -> 'conjunto de datos con desequilibrio de clases' -- 'class-imbalanced datasets' -> 'conjuntos de datos con desequilibrio de clases' -- 'classification' -> 'clasificación' -- 'classification model' -> 'modelo de clasificación' -- 'classification threshold' -> 'umbral de clasificación' -- 'classifier' -> 'clasificador' -- 'clipping' -> 'recorte' -- 'confusion matrix' -> 'matriz de confusión' -- 'continuous feature' -> 'atributo continuo' -- 'convergence' -> 'convergencia' -- 'data set or dataset' -> 'conjunto de datos (data set o dataset)' -- 'DataFrame' -> 'DataFrame' -- 'dataset' -> 'conjunto de datos' -- 'deep learning' -> 'aprendizaje profundo' -- 'deep model' -> 'modelo profundo' -- 'dense feature' -> 'atributo denso' -- 'depth' -> 'depth' -- 'discrete feature' -> 'atributo discreto' -- 'discrete features' -> 'atributos discretos' -- 'dynamic' -> 'dinámico' -- 'dynamic model' -> 'modelo dinámico' -- 'early stopping' -> 'Interrupción anticipada' -- 'embedding layer' -> 'Capa de embedding' -- 'embedding layers' -> 'capas de incorporación' -- 'epoch' -> 'época' -- 'example' -> 'ejemplo' -- 'false negative (FN)' -> 'falso negativo (FN)' -- 'false negatives' -> 'falsos negativos' -- 'false positive (FP)' -> 'Falso positivo (FP)' -- 'false positive rate' -> 'tasa de falsos positivos' -- 'false positive rate (FPR)' -> 'tasa de falsos positivos (FPR)' -- 'false positives' -> 'falsos positivos' -- 'feature' -> 'función' -- 'feature cross' -> 'combinación de atributos' -- 'feature crosses' -> 'combinaciones de atributos' -- 'feature engineering' -> 'ingeniería de atributos.' -- 'feature set' -> 'conjunto de atributos' -- 'feature vector' -> 'vector de atributos' -- 'feedback loop' -> 'ciclo de retroalimentación' -- 'generalization' -> 'generalización' -- 'generalization curve' -> 'Curva de generalización' -- 'gradient descent' -> 'descenso de gradientes' -- 'ground truth' -> 'Verdad fundamental' -- 'hidden layer' -> 'Capa oculta' -- 'hidden layer(s)' -> 'capas ocultas' -- 'hyperparameter' -> 'hiperparámetro' -- 'independently and identically distributed (i.i.d)' -> 'independiente e idénticamente distribuido (i.i.d.)' -- 'inference' -> 'Inferencia' -- 'input layer' -> 'capa de entrada' -- 'interpretability' -> 'interpretabilidad' -- 'iteration' -> 'iteración' -- 'L0regularization' -> 'Regularización L0' -- 'L1loss' -> 'pérdida L1' -- 'L1regularization' -> 'regularización L1' -- 'L2loss' -> 'pérdida L2' -- 'L2regularization' -> 'regularización L2' -- 'label' -> 'etiqueta' -- 'labeled example' -> 'ejemplo etiquetado' -- 'lambda' -> 'lambda' -- 'layer' -> 'oculta' -- 'learning rate' -> 'Tasa de aprendizaje' -- 'linear' -> 'linear' -- 'linear model' -> 'modelo lineal' -- 'linear models' -> 'modelos lineales' -- 'linear regression' -> 'regresión lineal' -- 'Log Loss' -> 'pérdida logística' -- 'log-odds' -> 'Logaritmo de probabilidad' -- 'logistic regression' -> 'regresión logística' -- 'loss' -> 'pérdida' -- 'loss curve' -> 'Curva de pérdida' -- 'loss function' -> 'función de pérdida' -- 'machine learning' -> 'aprendizaje automático' -- 'majority class' -> 'clase mayoritaria' -- 'mini-batch' -> 'minilote' -- 'minority class' -> 'clase minoritaria' -- 'model' -> 'modelo' -- 'multi-class classification' -> 'clasificación de clases múltiples' -- 'negative class' -> 'clase negativa' -- 'negative classes' -> 'clases negativas' -- 'neural network' -> 'neuronal prealimentada' -- 'neural networks' -> 'redes neuronales' -- 'neuron' -> 'neurona' -- 'node (neural network)' -> 'nodo (red neuronal)' -- 'nonlinear' -> 'no lineal' -- 'nonstationarity' -> 'no estacionariedad' -- 'normalization' -> 'Normalización' -- 'numerical data' -> 'datos numéricos' -- 'offline' -> 'Sin conexión' -- 'offline inference' -> 'inferencia sin conexión' -- 'one-hot encoding' -> 'codificación one-hot' -- 'one-hot vector' -> 'vector de un solo 1' -- 'one-vs.-all' -> 'uno frente a todos' -- 'online' -> 'en línea' -- 'online inference' -> 'inferencia en línea' -- 'output layer' -> 'capa de salida' -- 'output layers' -> 'capas de salida' -- 'overfitting' -> 'sobreajuste' -- 'pandas' -> 'pandas' -- 'parameter' -> 'parámetro' -- 'positive class' -> 'clase positiva' -- 'positive classes' -> 'clases positivas' -- 'post-processing' -> 'posprocesamiento' -- 'precision' -> 'precision' -- 'prediction' -> 'predicción' -- 'proxy labels' -> 'etiquetas de proxy' -- 'RAG' -> 'RAG' -- 'rater' -> 'evaluador' -- 'recall' -> 'recall' -- 'Rectified Linear Unit (ReLU)' -> 'Unidad lineal rectificada (ReLU)' -- 'regression model' -> 'modelo de regresión' -- 'regularization' -> 'regularización' -- 'regularization rate' -> 'tasa de regularización' -- 'ReLU' -> 'ReLU' -- 'retrieval-augmented generation' -> 'generación aumentada por recuperación' -- 'retrieval-augmented generation (RAG)' -> 'Generación mejorada por recuperación (RAG)' -- 'ROC (receiver operating characteristic) Curve' -> 'Curva ROC (característica operativa del receptor)' -- 'ROC curve' -> 'curva ROC' -- 'Root Mean Squared Error (RMSE)' -> 'Raíz cuadrada del error cuadrático medio (RMSE)' -- 'sigmoid function' -> 'función sigmoidea' -- 'softmax' -> 'softmax' -- 'sparse feature' -> 'atributo disperso' -- 'sparse representation' -> 'representación dispersa' -- 'sparse vector' -> 'vector disperso' -- 'squared loss' -> 'Pérdida al cuadrado' -- 'static' -> 'static' -- 'static inference' -> 'Inferencia estática' -- 'static model' -> 'modelo estático' -- 'stationarity' -> 'Estacionariedad' -- 'Stochastic Gradient Descent (SGD)' -> 'Descenso de gradientes estocástico (SGD)' -- 'supervised learning' -> 'aprendizaje supervisado' -- 'supervised machine learning' -> 'aprendizaje automático supervisado' -- 'synthetic feature' -> 'atributo sintético' -- 'synthetic features' -> 'atributos sintéticos' -- 'test loss' -> 'Pérdida de prueba' -- 'training' -> 'entrenamiento' -- 'training loss' -> 'Pérdida de entrenamiento' -- 'training set' -> 'conjunto de entrenamiento' -- 'training-serving skew' -> 'Sesgo entre el entrenamiento y la entrega' -- 'true negative (TN)' -> 'verdadero negativo (VN)' -- 'true negatives' -> 'verdaderos negativos' -- 'true positive (TP)' -> 'verdadero positivo (VP)' -- 'true positive rate' -> 'tasa de verdaderos positivos' -- 'true positive rate (TPR)' -> 'tasa de verdaderos positivos (TVP)' -- 'true positives' -> 'verdaderos positivos' -- 'underfitting' -> 'Subajuste' -- 'unlabeled example' -> 'ejemplo sin etiqueta' -- 'unsupervised machine learning' -> 'aprendizaje automático no supervisado' -- 'validation' -> 'validación' -- 'validation dataset' -> 'conjunto de datos de validación' -- 'validation loss' -> 'Pérdida de validación' -- 'validation set' -> 'conjunto de validación' -- 'weight' -> 'peso' -- 'weighted sum' -> 'suma ponderada' -- 'Z-score normalization' -> 'normalización de la puntuación Z' diff --git a/src/ol_openedx_course_translations/ol_openedx_course_translations/glossaries/machine_learning/es_419.txt b/src/ol_openedx_course_translations/ol_openedx_course_translations/glossaries/machine_learning/es_419.txt deleted file mode 100644 index 1412f5b64..000000000 --- a/src/ol_openedx_course_translations/ol_openedx_course_translations/glossaries/machine_learning/es_419.txt +++ /dev/null @@ -1,175 +0,0 @@ -# ES HINTS -## TERM MAPPINGS -These are preferred terminology choices for this language. Use them whenever they sound natural; adapt freely if context requires. - -- 'accuracy' -> 'exactitud' -- 'activation function' -> 'función de activación' -- 'artificial intelligence' -> 'inteligencia artificial' -- 'AUC' -> 'AUC' -- 'AUC (Area under the ROC curve)' -> 'AUC (área bajo la curva ROC)' -- 'backpropagation' -> 'propagación inversa' -- 'batch' -> 'lote' -- 'batch size' -> 'tamaño del lote' -- 'bias (ethics/fairness)' -> 'sesgo (ética/equidad)' -- 'bias (math) or bias term' -> 'ordenada al origen (matemática) o término de sesgo' -- 'bias in ethics and fairness' -> 'sesgo en ética y equidad' -- 'bias term' -> 'término de sesgo' -- 'binary classification' -> 'Clasificación binaria' -- 'bucketing' -> 'Agrupamiento' -- 'categorical' -> 'categórico' -- 'categorical data' -> 'datos categóricos' -- 'class' -> 'clase' -- 'class-imbalanced dataset' -> 'conjunto de datos con desequilibrio de clases' -- 'class-imbalanced datasets' -> 'conjuntos de datos con desequilibrio de clases' -- 'classification' -> 'clasificación' -- 'classification model' -> 'modelo de clasificación' -- 'classification threshold' -> 'umbral de clasificación' -- 'classifier' -> 'clasificador' -- 'clipping' -> 'recorte' -- 'confusion matrix' -> 'matriz de confusión' -- 'continuous feature' -> 'atributo continuo' -- 'convergence' -> 'convergencia' -- 'data set or dataset' -> 'conjunto de datos (data set o dataset)' -- 'DataFrame' -> 'DataFrame' -- 'dataset' -> 'conjunto de datos' -- 'deep learning' -> 'aprendizaje profundo' -- 'deep model' -> 'modelo profundo' -- 'dense feature' -> 'atributo denso' -- 'depth' -> 'depth' -- 'discrete feature' -> 'atributo discreto' -- 'discrete features' -> 'atributos discretos' -- 'dynamic' -> 'dinámico' -- 'dynamic model' -> 'modelo dinámico' -- 'early stopping' -> 'Interrupción anticipada' -- 'embedding layer' -> 'Capa de embedding' -- 'embedding layers' -> 'capas de incorporación' -- 'epoch' -> 'época' -- 'example' -> 'ejemplo' -- 'false negative (FN)' -> 'falso negativo (FN)' -- 'false negatives' -> 'falsos negativos' -- 'false positive (FP)' -> 'Falso positivo (FP)' -- 'false positive rate' -> 'tasa de falsos positivos' -- 'false positive rate (FPR)' -> 'tasa de falsos positivos (FPR)' -- 'false positives' -> 'falsos positivos' -- 'feature' -> 'función' -- 'feature cross' -> 'combinación de atributos' -- 'feature crosses' -> 'combinaciones de atributos' -- 'feature engineering' -> 'ingeniería de atributos.' -- 'feature set' -> 'conjunto de atributos' -- 'feature vector' -> 'vector de atributos' -- 'feedback loop' -> 'ciclo de retroalimentación' -- 'generalization' -> 'generalización' -- 'generalization curve' -> 'Curva de generalización' -- 'gradient descent' -> 'descenso de gradientes' -- 'ground truth' -> 'Verdad fundamental' -- 'hidden layer' -> 'Capa oculta' -- 'hidden layer(s)' -> 'capas ocultas' -- 'hyperparameter' -> 'hiperparámetro' -- 'independently and identically distributed (i.i.d)' -> 'independiente e idénticamente distribuido (i.i.d.)' -- 'inference' -> 'Inferencia' -- 'input layer' -> 'capa de entrada' -- 'interpretability' -> 'interpretabilidad' -- 'iteration' -> 'iteración' -- 'L0regularization' -> 'Regularización L0' -- 'L1loss' -> 'pérdida L1' -- 'L1regularization' -> 'regularización L1' -- 'L2loss' -> 'pérdida L2' -- 'L2regularization' -> 'regularización L2' -- 'label' -> 'etiqueta' -- 'labeled example' -> 'ejemplo etiquetado' -- 'lambda' -> 'lambda' -- 'layer' -> 'oculta' -- 'learning rate' -> 'Tasa de aprendizaje' -- 'linear' -> 'linear' -- 'linear model' -> 'modelo lineal' -- 'linear models' -> 'modelos lineales' -- 'linear regression' -> 'regresión lineal' -- 'Log Loss' -> 'pérdida logística' -- 'log-odds' -> 'Logaritmo de probabilidad' -- 'logistic regression' -> 'regresión logística' -- 'loss' -> 'pérdida' -- 'loss curve' -> 'Curva de pérdida' -- 'loss function' -> 'función de pérdida' -- 'machine learning' -> 'aprendizaje automático' -- 'majority class' -> 'clase mayoritaria' -- 'mini-batch' -> 'minilote' -- 'minority class' -> 'clase minoritaria' -- 'model' -> 'modelo' -- 'multi-class classification' -> 'clasificación de clases múltiples' -- 'negative class' -> 'clase negativa' -- 'negative classes' -> 'clases negativas' -- 'neural network' -> 'neuronal prealimentada' -- 'neural networks' -> 'redes neuronales' -- 'neuron' -> 'neurona' -- 'node (neural network)' -> 'nodo (red neuronal)' -- 'nonlinear' -> 'no lineal' -- 'nonstationarity' -> 'no estacionariedad' -- 'normalization' -> 'Normalización' -- 'numerical data' -> 'datos numéricos' -- 'offline' -> 'Sin conexión' -- 'offline inference' -> 'inferencia sin conexión' -- 'one-hot encoding' -> 'codificación one-hot' -- 'one-hot vector' -> 'vector de un solo 1' -- 'one-vs.-all' -> 'uno frente a todos' -- 'online' -> 'en línea' -- 'online inference' -> 'inferencia en línea' -- 'output layer' -> 'capa de salida' -- 'output layers' -> 'capas de salida' -- 'overfitting' -> 'sobreajuste' -- 'pandas' -> 'pandas' -- 'parameter' -> 'parámetro' -- 'positive class' -> 'clase positiva' -- 'positive classes' -> 'clases positivas' -- 'post-processing' -> 'posprocesamiento' -- 'precision' -> 'precision' -- 'prediction' -> 'predicción' -- 'proxy labels' -> 'etiquetas de proxy' -- 'RAG' -> 'RAG' -- 'rater' -> 'evaluador' -- 'recall' -> 'recall' -- 'Rectified Linear Unit (ReLU)' -> 'Unidad lineal rectificada (ReLU)' -- 'regression model' -> 'modelo de regresión' -- 'regularization' -> 'regularización' -- 'regularization rate' -> 'tasa de regularización' -- 'ReLU' -> 'ReLU' -- 'retrieval-augmented generation' -> 'generación aumentada por recuperación' -- 'retrieval-augmented generation (RAG)' -> 'Generación mejorada por recuperación (RAG)' -- 'ROC (receiver operating characteristic) Curve' -> 'Curva ROC (característica operativa del receptor)' -- 'ROC curve' -> 'curva ROC' -- 'Root Mean Squared Error (RMSE)' -> 'Raíz cuadrada del error cuadrático medio (RMSE)' -- 'sigmoid function' -> 'función sigmoidea' -- 'softmax' -> 'softmax' -- 'sparse feature' -> 'atributo disperso' -- 'sparse representation' -> 'representación dispersa' -- 'sparse vector' -> 'vector disperso' -- 'squared loss' -> 'Pérdida al cuadrado' -- 'static' -> 'static' -- 'static inference' -> 'Inferencia estática' -- 'static model' -> 'modelo estático' -- 'stationarity' -> 'Estacionariedad' -- 'Stochastic Gradient Descent (SGD)' -> 'Descenso de gradientes estocástico (SGD)' -- 'supervised learning' -> 'aprendizaje supervisado' -- 'supervised machine learning' -> 'aprendizaje automático supervisado' -- 'synthetic feature' -> 'atributo sintético' -- 'synthetic features' -> 'atributos sintéticos' -- 'test loss' -> 'Pérdida de prueba' -- 'training' -> 'entrenamiento' -- 'training loss' -> 'Pérdida de entrenamiento' -- 'training set' -> 'conjunto de entrenamiento' -- 'training-serving skew' -> 'Sesgo entre el entrenamiento y la entrega' -- 'true negative (TN)' -> 'verdadero negativo (VN)' -- 'true negatives' -> 'verdaderos negativos' -- 'true positive (TP)' -> 'verdadero positivo (VP)' -- 'true positive rate' -> 'tasa de verdaderos positivos' -- 'true positive rate (TPR)' -> 'tasa de verdaderos positivos (TVP)' -- 'true positives' -> 'verdaderos positivos' -- 'underfitting' -> 'Subajuste' -- 'unlabeled example' -> 'ejemplo sin etiqueta' -- 'unsupervised machine learning' -> 'aprendizaje automático no supervisado' -- 'validation' -> 'validación' -- 'validation dataset' -> 'conjunto de datos de validación' -- 'validation loss' -> 'Pérdida de validación' -- 'validation set' -> 'conjunto de validación' -- 'weight' -> 'peso' -- 'weighted sum' -> 'suma ponderada' -- 'Z-score normalization' -> 'normalización de la puntuación Z' diff --git a/src/ol_openedx_course_translations/ol_openedx_course_translations/glossaries/machine_learning/fr.txt b/src/ol_openedx_course_translations/ol_openedx_course_translations/glossaries/machine_learning/fr.txt deleted file mode 100644 index 3f64f3098..000000000 --- a/src/ol_openedx_course_translations/ol_openedx_course_translations/glossaries/machine_learning/fr.txt +++ /dev/null @@ -1,175 +0,0 @@ -# FR HINTS -## TERM MAPPINGS -These are preferred terminology choices for this language. Use them whenever they sound natural; adapt freely if context requires. - -- 'accuracy' -> 'accuracy' -- 'activation function' -> 'fonction d'activation' -- 'artificial intelligence' -> 'intelligence artificielle' -- 'AUC' -> 'AUC' -- 'AUC (Area under the ROC curve)' -> 'AUC (aire sous la courbe ROC)' -- 'backpropagation' -> 'rétropropagation' -- 'batch' -> 'lot' -- 'batch size' -> 'taille du lot' -- 'bias (ethics/fairness)' -> 'biais (éthique/équité) (bias (ethics/fairness))' -- 'bias (math) or bias term' -> 'biais (mathématiques) ou terme de biais' -- 'bias in ethics and fairness' -> 'biais en matière d'éthique et d'équité' -- 'bias term' -> 'biais' -- 'binary classification' -> 'classification binaire' -- 'bucketing' -> 'le binning' -- 'categorical' -> 'catégorielle' -- 'categorical data' -> 'données catégorielles' -- 'class' -> 'classe' -- 'class-imbalanced dataset' -> 'ensemble de données avec déséquilibre des classes' -- 'class-imbalanced datasets' -> 'ensembles de données déséquilibrés en termes de classes' -- 'classification' -> 'classification' -- 'classification model' -> 'modèle de classification' -- 'classification threshold' -> 'seuil de classification' -- 'classifier' -> 'classificateur' -- 'clipping' -> 'écrêtage' -- 'confusion matrix' -> 'matrice de confusion' -- 'continuous feature' -> 'caractéristique continue' -- 'convergence' -> 'convergence' -- 'data set or dataset' -> 'ensemble de données (data set ou dataset)' -- 'DataFrame' -> 'DataFrame' -- 'dataset' -> 'ensemble de données' -- 'deep learning' -> 'deep learning' -- 'deep model' -> 'modèle deep learning' -- 'dense feature' -> 'caractéristique dense' -- 'depth' -> 'profondeur' -- 'discrete feature' -> 'caractéristique discrète' -- 'discrete features' -> 'caractéristiques discrètes' -- 'dynamic' -> 'dynamic' -- 'dynamic model' -> 'modèle dynamique' -- 'early stopping' -> 'arrêt prématuré' -- 'embedding layer' -> 'couche d'embedding' -- 'embedding layers' -> 'couches d'embedding' -- 'epoch' -> 'epoch' -- 'example' -> 'exemple' -- 'false negative (FN)' -> 'Faux négatif (FN)' -- 'false negatives' -> 'faux négatifs' -- 'false positive (FP)' -> 'Faux positif (FP)' -- 'false positive rate' -> 'taux de faux positifs' -- 'false positive rate (FPR)' -> 'taux de faux positifs (TFP) (false positive rate (FPR))' -- 'false positives' -> 'faux positifs' -- 'feature' -> 'fonctionnalité' -- 'feature cross' -> 'croisement de caractéristiques' -- 'feature crosses' -> 'caractéristiques croisées' -- 'feature engineering' -> 'l'ingénierie des caractéristiques.' -- 'feature set' -> 'ensemble de fonctionnalités' -- 'feature vector' -> 'vecteur de caractéristiques' -- 'feedback loop' -> 'boucle de rétroaction' -- 'generalization' -> 'généralisation' -- 'generalization curve' -> 'courbe de généralisation' -- 'gradient descent' -> 'descente de gradient' -- 'ground truth' -> 'vérité terrain' -- 'hidden layer' -> 'couche cachée' -- 'hidden layer(s)' -> 'couches cachées' -- 'hyperparameter' -> 'hyperparamètre' -- 'independently and identically distributed (i.i.d)' -> 'variables indépendantes et identiquement distribuées (i.i.d)' -- 'inference' -> 'inférence' -- 'input layer' -> 'couche d'entrée' -- 'interpretability' -> 'interprétabilité' -- 'iteration' -> 'itération' -- 'L0regularization' -> 'Régularisation L0' -- 'L1loss' -> 'perte L1' -- 'L1regularization' -> 'régularisationL1' -- 'L2loss' -> 'perte L2' -- 'L2regularization' -> 'régularisationL2' -- 'label' -> 'étiquette' -- 'labeled example' -> 'exemple étiqueté' -- 'lambda' -> 'lambda' -- 'layer' -> 'cachée)' -- 'learning rate' -> 'taux d'apprentissage' -- 'linear' -> 'linear' -- 'linear model' -> 'modèle linéaire' -- 'linear models' -> 'modèles linéaires' -- 'linear regression' -> 'régression linéaire' -- 'Log Loss' -> 'perte logistique' -- 'log-odds' -> 'logarithme de cote' -- 'logistic regression' -> 'régression logistique' -- 'loss' -> 'perte' -- 'loss curve' -> 'courbe de perte' -- 'loss function' -> 'fonction de perte' -- 'machine learning' -> 'machine learning' -- 'majority class' -> 'classe majoritaire' -- 'mini-batch' -> 'mini-lot' -- 'minority class' -> 'classe minoritaire' -- 'model' -> 'modèle' -- 'multi-class classification' -> 'classification à classes multiples' -- 'negative class' -> 'classe négative' -- 'negative classes' -> 'classes négatives' -- 'neural network' -> 'neurones feedforward' -- 'neural networks' -> 'réseaux de neurones' -- 'neuron' -> 'neurone' -- 'node (neural network)' -> 'nœud (réseau de neurones)' -- 'nonlinear' -> 'non linéaire' -- 'nonstationarity' -> 'non-stationnarité' -- 'normalization' -> 'normalisation' -- 'numerical data' -> 'données numériques' -- 'offline' -> 'Hors connexion' -- 'offline inference' -> 'inférence hors connexion' -- 'one-hot encoding' -> 'Encodage one-hot' -- 'one-hot vector' -> 'vecteur one-hot' -- 'one-vs.-all' -> 'un contre tous' -- 'online' -> 'online' -- 'online inference' -> 'inférence en ligne' -- 'output layer' -> 'couche de sortie' -- 'output layers' -> 'couches de sortie' -- 'overfitting' -> 'surapprentissage' -- 'pandas' -> 'pandas' -- 'parameter' -> 'paramètre' -- 'positive class' -> 'classe positive' -- 'positive classes' -> 'classes positives' -- 'post-processing' -> 'post-traitement' -- 'precision' -> 'precision' -- 'prediction' -> 'prédiction' -- 'proxy labels' -> 'étiquettes de substitution' -- 'RAG' -> 'RAG' -- 'rater' -> 'évaluateur' -- 'recall' -> 'recall (rappel)' -- 'Rectified Linear Unit (ReLU)' -> 'Unité de rectification linéaire (ReLU)' -- 'regression model' -> 'modèle de régression' -- 'regularization' -> 'régularisation' -- 'regularization rate' -> 'taux de régularisation' -- 'ReLU' -> 'ReLU' -- 'retrieval-augmented generation' -> 'génération augmentée par récupération' -- 'retrieval-augmented generation (RAG)' -> 'génération augmentée par récupération (RAG)' -- 'ROC (receiver operating characteristic) Curve' -> 'Courbe ROC (receiver operating characteristic)' -- 'ROC curve' -> 'courbe ROC' -- 'Root Mean Squared Error (RMSE)' -> 'la racine carrée de l'erreur quadratique moyenne (RMSE, Root Mean Squared Error)' -- 'sigmoid function' -> 'fonction sigmoïde' -- 'softmax' -> 'softmax' -- 'sparse feature' -> 'caractéristique creuse' -- 'sparse representation' -> 'représentation creuse' -- 'sparse vector' -> 'vecteur creux' -- 'squared loss' -> 'perte quadratique' -- 'static' -> 'static' -- 'static inference' -> 'inférence statique' -- 'static model' -> 'modèle statique' -- 'stationarity' -> 'stationnarité' -- 'Stochastic Gradient Descent (SGD)' -> 'Descente de gradient stochastique (SGD, Stochastic Gradient Descent)' -- 'supervised learning' -> 'apprentissage supervisé' -- 'supervised machine learning' -> 'machine learning supervisé' -- 'synthetic feature' -> 'caractéristique synthétique' -- 'synthetic features' -> 'caractéristiques synthétiques' -- 'test loss' -> 'perte de test' -- 'training' -> 'entraînement' -- 'training loss' -> 'perte d'entraînement' -- 'training set' -> 'ensemble d'entraînement' -- 'training-serving skew' -> 'décalage entraînement/mise en service' -- 'true negative (TN)' -> 'vrai négatif (VN)' -- 'true negatives' -> 'vrais négatifs' -- 'true positive (TP)' -> 'vrai positif (VP)' -- 'true positive rate' -> 'taux de vrais positifs' -- 'true positive rate (TPR)' -> 'taux de vrais positifs (TVP)' -- 'true positives' -> 'vrais positifs' -- 'underfitting' -> 'sous-ajustement' -- 'unlabeled example' -> 'exemple sans étiquette' -- 'unsupervised machine learning' -> 'machine learning non supervisé' -- 'validation' -> 'validation' -- 'validation dataset' -> 'ensemble de données de validation' -- 'validation loss' -> 'perte de validation' -- 'validation set' -> 'ensemble de validation' -- 'weight' -> 'weight' -- 'weighted sum' -> 'Somme pondérée' -- 'Z-score normalization' -> 'Normalisation du score Z' diff --git a/src/ol_openedx_course_translations/ol_openedx_course_translations/glossaries/machine_learning/ja.txt b/src/ol_openedx_course_translations/ol_openedx_course_translations/glossaries/machine_learning/ja.txt deleted file mode 100644 index fb3787a79..000000000 --- a/src/ol_openedx_course_translations/ol_openedx_course_translations/glossaries/machine_learning/ja.txt +++ /dev/null @@ -1,175 +0,0 @@ -# JA HINTS -## TERM MAPPINGS -These are preferred terminology choices for this language. Use them whenever they sound natural; adapt freely if context requires. - -- 'accuracy' -> 「accuracy」 -- 'activation function' -> 「活性化関数」 -- 'artificial intelligence' -> 「AI」 -- 'AUC' -> 「AUC」 -- 'AUC (Area under the ROC curve)' -> 「AUC(ROC 曲線の下の面積)」 -- 'backpropagation' -> 「バックプロパゲーション」 -- 'batch' -> 「Batch」 -- 'batch size' -> 「バッチサイズ」 -- 'bias (ethics/fairness)' -> 「バイアス(倫理/公平性)」 -- 'bias (math) or bias term' -> 「バイアス(数学)またはバイアス項」 -- 'bias in ethics and fairness' -> 「倫理と公平性のバイアス」 -- 'bias term' -> 「バイアス項」 -- 'binary classification' -> 「バイナリ分類」 -- 'bucketing' -> 「バケット化、」 -- 'categorical' -> 「カテゴリカル」 -- 'categorical data' -> 「カテゴリデータ」 -- 'class' -> 「クラス」 -- 'class-imbalanced dataset' -> 「クラスの不均衡なデータセット」 -- 'class-imbalanced datasets' -> 「クラス不均衡データセット」 -- 'classification' -> 「分類」 -- 'classification model' -> 「分類モデル」 -- 'classification threshold' -> 「分類しきい値」 -- 'classifier' -> 「分類器」 -- 'clipping' -> 「クリッピング」 -- 'confusion matrix' -> 「混同行列」 -- 'continuous feature' -> 「連続特徴」 -- 'convergence' -> 「収束」 -- 'data set or dataset' -> 「データセット」 -- 'DataFrame' -> 「DataFrame」 -- 'dataset' -> 「データセット」 -- 'deep learning' -> 「ディープ ラーニング」 -- 'deep model' -> 「ディープモデル」 -- 'dense feature' -> 「密な特徴」 -- 'depth' -> 「深さ」 -- 'discrete feature' -> 「離散特徴」 -- 'discrete features' -> 「離散特徴」 -- 'dynamic' -> 「動的」 -- 'dynamic model' -> 「動的モデル」 -- 'early stopping' -> 「早期停止」 -- 'embedding layer' -> 「エンベディング レイヤ」 -- 'embedding layers' -> 「エンベディング レイヤ」 -- 'epoch' -> 「エポック」 -- 'example' -> 「例」 -- 'false negative (FN)' -> 「偽陰性(FN)」 -- 'false negatives' -> 「偽陰性」 -- 'false positive (FP)' -> 「偽陽性(FP)」 -- 'false positive rate' -> 「偽陽性率」 -- 'false positive rate (FPR)' -> 「偽陽性率(FPR)」 -- 'false positives' -> 「偽陽性」 -- 'feature' -> 「機能」 -- 'feature cross' -> 「特徴クロス」 -- 'feature crosses' -> 「特徴交差」 -- 'feature engineering' -> 「2つのステップが含まれます」 -- 'feature set' -> 「機能セット」 -- 'feature vector' -> 「特徴ベクトル」 -- 'feedback loop' -> 「フィードバック ループ」 -- 'generalization' -> 「一般化」 -- 'generalization curve' -> 「汎化曲線」 -- 'gradient descent' -> 「勾配降下法」 -- 'ground truth' -> 「グラウンド トゥルース」 -- 'hidden layer' -> 「隠れ層」 -- 'hidden layer(s)' -> 「隠れ層」 -- 'hyperparameter' -> 「ハイパーパラメータ」 -- 'independently and identically distributed (i.i.d)' -> 「独立同分布(i.i.d)」 -- 'inference' -> 「推論」 -- 'input layer' -> 「入力レイヤ」 -- 'interpretability' -> 「解釈可能性」 -- 'iteration' -> 「繰り返し」 -- 'L0regularization' -> 「L0正規化」 -- 'L1loss' -> 「L1損失」 -- 'L1regularization' -> 「L1正則化」 -- 'L2loss' -> 「L2損失」 -- 'L2regularization' -> 「L2正則化」 -- 'label' -> 「ラベル」 -- 'labeled example' -> 「ラベル付きの例」 -- 'lambda' -> 「lambda」 -- 'layer' -> 「レイヤ」 -- 'learning rate' -> 「学習率」 -- 'linear' -> 「線形」 -- 'linear model' -> 「線形モデル」 -- 'linear models' -> 「線形モデル」 -- 'linear regression' -> 「線形回帰」 -- 'Log Loss' -> 「対数損失」 -- 'log-odds' -> 「対数オッズ」 -- 'logistic regression' -> 「ロジスティック回帰」 -- 'loss' -> 「損失」 -- 'loss curve' -> 「損失曲線」 -- 'loss function' -> 「損失関数」 -- 'machine learning' -> 「機械学習」 -- 'majority class' -> 「多数派クラス」 -- 'mini-batch' -> 「ミニバッチ」 -- 'minority class' -> 「少数派クラス」 -- 'model' -> 「モデル」 -- 'multi-class classification' -> 「マルチクラス分類」 -- 'negative class' -> 「陰性クラス」 -- 'negative classes' -> 「陰性クラス」 -- 'neural network' -> 「ニューラル ネットワークの」 -- 'neural networks' -> 「ニューラル ネットワーク」 -- 'neuron' -> 「ニューロン」 -- 'node (neural network)' -> 「ノード(ニューラル ネットワーク)」 -- 'nonlinear' -> 「非線形」 -- 'nonstationarity' -> 「非定常性」 -- 'normalization' -> 「正規化」 -- 'numerical data' -> 「数値データ」 -- 'offline' -> 「オフライン」 -- 'offline inference' -> 「オフライン推論」 -- 'one-hot encoding' -> 「ワンホット エンコード」 -- 'one-hot vector' -> 「ワンホット ベクトル」 -- 'one-vs.-all' -> 「1 対すべて」 -- 'online' -> 「オンライン」 -- 'online inference' -> 「オンライン推論」 -- 'output layer' -> 「出力レイヤ」 -- 'output layers' -> 「出力レイヤ」 -- 'overfitting' -> 「過学習」 -- 'pandas' -> 「pandas」 -- 'parameter' -> 「パラメータ」 -- 'positive class' -> 「陽性クラス」 -- 'positive classes' -> 「陽性クラス」 -- 'post-processing' -> 「後処理」 -- 'precision' -> 「precision」 -- 'prediction' -> 「予測」 -- 'proxy labels' -> 「プロキシラベル」 -- 'RAG' -> 「RAG」 -- 'rater' -> 「rater」 -- 'recall' -> 「recall」 -- 'Rectified Linear Unit (ReLU)' -> 「正規化線形ユニット(ReLU)」 -- 'regression model' -> 「回帰モデル」 -- 'regularization' -> 「正則化」 -- 'regularization rate' -> 「正則化率」 -- 'ReLU' -> 「ReLU」 -- 'retrieval-augmented generation' -> 「検索拡張生成」 -- 'retrieval-augmented generation (RAG)' -> 「検索拡張生成(RAG)」 -- 'ROC (receiver operating characteristic) Curve' -> 「ROC(受信者操作特性)曲線」 -- 'ROC curve' -> 「ROC 曲線」 -- 'Root Mean Squared Error (RMSE)' -> 「二乗平均平方根誤差(RMSE)」 -- 'sigmoid function' -> 「シグモイド関数」 -- 'softmax' -> 「Softmax」 -- 'sparse feature' -> 「スパース特徴」 -- 'sparse representation' -> 「スパース表現」 -- 'sparse vector' -> 「スパース ベクトル」 -- 'squared loss' -> 「二乗損失」 -- 'static' -> 「static」 -- 'static inference' -> 「静的推論」 -- 'static model' -> 「静的モデル」 -- 'stationarity' -> 「定常性」 -- 'Stochastic Gradient Descent (SGD)' -> 「確率的勾配降下法(SGD)」 -- 'supervised learning' -> 「教師あり学習」 -- 'supervised machine learning' -> 「教師あり機械学習」 -- 'synthetic feature' -> 「合成特徴」 -- 'synthetic features' -> 「合成特徴」 -- 'test loss' -> 「テスト損失」 -- 'training' -> 「トレーニング」 -- 'training loss' -> 「トレーニングの損失」 -- 'training set' -> 「トレーニング セット」 -- 'training-serving skew' -> 「トレーニング サービング スキュー」 -- 'true negative (TN)' -> 「真陰性(TN)」 -- 'true negatives' -> 「真陰性」 -- 'true positive (TP)' -> 「真陽性(TP)」 -- 'true positive rate' -> 「真陽性率」 -- 'true positive rate (TPR)' -> 「真陽性率(TPR)」 -- 'true positives' -> 「真陽性」 -- 'underfitting' -> 「アンダーフィット」 -- 'unlabeled example' -> 「ラベルのない例」 -- 'unsupervised machine learning' -> 「教師なし機械学習」 -- 'validation' -> 「検証」 -- 'validation dataset' -> 「検証データセット」 -- 'validation loss' -> 「検証損失」 -- 'validation set' -> 「検証セット」 -- 'weight' -> 「weight」 -- 'weighted sum' -> 「加重合計」 -- 'Z-score normalization' -> 「Z スコアの正規化」 diff --git a/src/ol_openedx_course_translations/ol_openedx_course_translations/glossaries/machine_learning/pt_BR.txt b/src/ol_openedx_course_translations/ol_openedx_course_translations/glossaries/machine_learning/pt_BR.txt deleted file mode 100644 index 16b2b9dee..000000000 --- a/src/ol_openedx_course_translations/ol_openedx_course_translations/glossaries/machine_learning/pt_BR.txt +++ /dev/null @@ -1,175 +0,0 @@ -# PT-BR HINTS -## TERM MAPPINGS -These are preferred terminology choices for this language. Use them whenever they sound natural; adapt freely if context requires. - -- 'accuracy' -> 'precisão' -- 'activation function' -> 'função de ativação' -- 'artificial intelligence' -> 'inteligência artificial' -- 'AUC' -> 'AUC' -- 'AUC (Area under the ROC curve)' -> 'AUC (área sob a curva ROC)' -- 'backpropagation' -> 'retropropagação' -- 'batch' -> 'lote' -- 'batch size' -> 'tamanho do lote' -- 'bias (ethics/fairness)' -> 'viés (ética/justiça)' -- 'bias (math) or bias term' -> 'viés (matemática) ou termo de viés' -- 'bias in ethics and fairness' -> 'viés em ética e justiça' -- 'bias term' -> 'termo de viés' -- 'binary classification' -> 'classificação binária' -- 'bucketing' -> 'agrupamento por classes' -- 'categorical' -> 'categórico' -- 'categorical data' -> 'dados categóricos' -- 'class' -> 'classe' -- 'class-imbalanced dataset' -> 'conjunto de dados não balanceado' -- 'class-imbalanced datasets' -> 'conjuntos de dados com classes desbalanceadas' -- 'classification' -> 'classificação' -- 'classification model' -> 'modelo de classificação' -- 'classification threshold' -> 'limiar de classificação' -- 'classifier' -> 'classificador' -- 'clipping' -> 'corte' -- 'confusion matrix' -> 'matriz de confusão' -- 'continuous feature' -> 'atributo contínuo' -- 'convergence' -> 'convergência' -- 'data set or dataset' -> 'conjunto de dados' -- 'DataFrame' -> 'DataFrame' -- 'dataset' -> 'conjunto de dados' -- 'deep learning' -> 'aprendizado profundo' -- 'deep model' -> 'modelo profundo' -- 'dense feature' -> 'atributo denso' -- 'depth' -> 'profundidade' -- 'discrete feature' -> 'atributo discreto' -- 'discrete features' -> 'recursos discretos' -- 'dynamic' -> 'dinâmico' -- 'dynamic model' -> 'modelo dinâmico' -- 'early stopping' -> 'parada antecipada' -- 'embedding layer' -> 'camada de embedding' -- 'embedding layers' -> 'camadas de embedding' -- 'epoch' -> 'época' -- 'example' -> 'exemplo' -- 'false negative (FN)' -> 'falso negativo (FN)' -- 'false negatives' -> 'falsos negativos' -- 'false positive (FP)' -> 'falso positivo (FP)' -- 'false positive rate' -> 'taxa de falso positivo' -- 'false positive rate (FPR)' -> 'taxa de falso positivo (FPR)' -- 'false positives' -> 'falsos positivos' -- 'feature' -> 'recurso' -- 'feature cross' -> 'cruzamento de atributos' -- 'feature crosses' -> 'cruzamentos de recursos' -- 'feature engineering' -> 'engenharia de atributos' -- 'feature set' -> 'conjunto de atributos' -- 'feature vector' -> 'vetor de atributos' -- 'feedback loop' -> 'ciclo de feedback' -- 'generalization' -> 'generalização' -- 'generalization curve' -> 'curva de generalização' -- 'gradient descent' -> 'gradiente descendente' -- 'ground truth' -> 'informações empíricas' -- 'hidden layer' -> 'camada oculta' -- 'hidden layer(s)' -> 'camadas ocultas' -- 'hyperparameter' -> 'hiperparâmetro' -- 'independently and identically distributed (i.i.d)' -> 'independente e identicamente distribuído (i.i.d)' -- 'inference' -> 'inferência' -- 'input layer' -> 'camada de entrada' -- 'interpretability' -> 'interpretabilidade' -- 'iteration' -> 'iteração' -- 'L0regularization' -> 'Regularização L0' -- 'L1loss' -> 'L1' -- 'L1regularization' -> 'regularização L1' -- 'L2loss' -> 'perda L2' -- 'L2regularization' -> 'regularizaçãoL2' -- 'label' -> 'o rótulo.' -- 'labeled example' -> 'exemplo rotulado' -- 'lambda' -> 'lambda' -- 'layer' -> 'layer' -- 'learning rate' -> 'taxa de aprendizado' -- 'linear' -> 'linear' -- 'linear model' -> 'modelo linear' -- 'linear models' -> 'modelos lineares' -- 'linear regression' -> 'regressão linear' -- 'Log Loss' -> 'perda logarítmica' -- 'log-odds' -> 'log-odds' -- 'logistic regression' -> 'regressão logística' -- 'loss' -> 'perda' -- 'loss curve' -> 'curva de perda' -- 'loss function' -> 'função de perda' -- 'machine learning' -> 'machine learning' -- 'majority class' -> 'classe majoritária' -- 'mini-batch' -> 'minilote' -- 'minority class' -> 'classe minoritária' -- 'model' -> 'modelo' -- 'multi-class classification' -> 'classificação multiclasse' -- 'negative class' -> 'classe negativa' -- 'negative classes' -> 'classes negativas' -- 'neural network' -> 'do feedforward' -- 'neural networks' -> 'redes neurais' -- 'neuron' -> 'neurônio' -- 'node (neural network)' -> 'nó (rede neural)' -- 'nonlinear' -> 'não linear' -- 'nonstationarity' -> 'não estacionariedade' -- 'normalization' -> 'normalização' -- 'numerical data' -> 'dados numéricos' -- 'offline' -> 'off-line' -- 'offline inference' -> 'inferência off-line' -- 'one-hot encoding' -> 'codificação one-hot' -- 'one-hot vector' -> 'vetor one-hot' -- 'one-vs.-all' -> 'um-contra-todos' -- 'online' -> 'on-line' -- 'online inference' -> 'inferência on-line' -- 'output layer' -> 'camada de saída' -- 'output layers' -> 'camadas de saída' -- 'overfitting' -> 'overfitting' -- 'pandas' -> 'pandas' -- 'parameter' -> 'parâmetro' -- 'positive class' -> 'classe positiva' -- 'positive classes' -> 'classes positivas' -- 'post-processing' -> 'pós-processamento' -- 'precision' -> 'precision' -- 'prediction' -> 'previsão' -- 'proxy labels' -> 'rotulação indireta' -- 'RAG' -> 'RAG' -- 'rater' -> 'rotulador' -- 'recall' -> 'recall' -- 'Rectified Linear Unit (ReLU)' -> 'Unidade linear retificada (ReLU)' -- 'regression model' -> 'modelo de regressão' -- 'regularization' -> 'regularização' -- 'regularization rate' -> 'taxa de regularização' -- 'ReLU' -> 'ReLU' -- 'retrieval-augmented generation' -> 'geração aumentada de recuperação' -- 'retrieval-augmented generation (RAG)' -> 'geração aumentada de recuperação (RAG)' -- 'ROC (receiver operating characteristic) Curve' -> 'Curva ROC' -- 'ROC curve' -> 'curva ROC' -- 'Root Mean Squared Error (RMSE)' -> 'Raiz do erro quadrático médio (RMSE)' -- 'sigmoid function' -> 'função sigmoide' -- 'softmax' -> 'softmax' -- 'sparse feature' -> 'atributo esparso' -- 'sparse representation' -> 'representação esparsa' -- 'sparse vector' -> 'vetor esparso' -- 'squared loss' -> 'perda quadrática' -- 'static' -> 'static' -- 'static inference' -> 'inferência estática' -- 'static model' -> 'modelo estático' -- 'stationarity' -> 'estacionariedade' -- 'Stochastic Gradient Descent (SGD)' -> 'Gradiente descendente estocástico (GDE)' -- 'supervised learning' -> 'aprendizado supervisionado' -- 'supervised machine learning' -> 'aprendizado de máquina supervisionado' -- 'synthetic feature' -> 'atributo sintético' -- 'synthetic features' -> 'recursos sintéticos' -- 'test loss' -> 'perda de teste' -- 'training' -> 'treinamento' -- 'training loss' -> 'perda de treinamento' -- 'training set' -> 'conjunto de treinamento' -- 'training-serving skew' -> 'desvio entre treinamento e disponibilização' -- 'true negative (TN)' -> 'verdadeiro negativo (VN)' -- 'true negatives' -> 'verdadeiros negativos' -- 'true positive (TP)' -> 'verdadeiro positivo (VP)' -- 'true positive rate' -> 'taxa de verdadeiros positivos' -- 'true positive rate (TPR)' -> 'taxa de verdadeiro positivo (TVP)' -- 'true positives' -> 'verdadeiros positivos' -- 'underfitting' -> 'underfitting' -- 'unlabeled example' -> 'exemplo sem rótulo' -- 'unsupervised machine learning' -> 'aprendizado de máquina sem supervisão' -- 'validation' -> 'validação' -- 'validation dataset' -> 'conjunto de dados de validação' -- 'validation loss' -> 'perda de validação' -- 'validation set' -> 'conjunto de validação' -- 'weight' -> 'peso' -- 'weighted sum' -> 'soma de pesos' -- 'Z-score normalization' -> 'Normalização de pontuação Z' diff --git a/src/ol_openedx_course_translations/ol_openedx_course_translations/glossaries/machine_learning/ru.txt b/src/ol_openedx_course_translations/ol_openedx_course_translations/glossaries/machine_learning/ru.txt deleted file mode 100644 index 0c87ef85e..000000000 --- a/src/ol_openedx_course_translations/ol_openedx_course_translations/glossaries/machine_learning/ru.txt +++ /dev/null @@ -1,213 +0,0 @@ -# RU HINTS -## TERM MAPPINGS -These are preferred terminology choices for this language. Use them whenever they sound natural; adapt freely if context requires. - -- 'accuracy' -> «точность» -- 'activation function' -> «функция активации» -- 'artificial intelligence' -> «искусственный интеллект» -- 'AUC' -> «AUC» -- 'AUC (Area under the ROC curve)' -> «AUC (площадь под ROC-кривой)» -- 'backpropagation' -> «обратное распространение» -- 'batch' -> «партия» -- 'batch size' -> «размер партии» -- 'bias (ethics/fairness)' -> «предвзятость (этика/справедливость)» -- 'bias (math) or bias term' -> «предвзятость (математика) или термин предвзятости» -- 'bias in ethics and fairness' -> «предвзятостью в этике и справедливости» -- 'bias term' -> «термином «смещение»» -- 'binary classification' -> «бинарная классификация» -- 'bucketing' -> «распределение» -- 'categorical' -> «категориальном» -- 'categorical data' -> «категориальные данные» -- 'class' -> «сорт» -- 'class-imbalanced dataset' -> «набор данных с несбалансированным классом» -- 'class-imbalanced datasets' -> «несбалансированные по классам наборы данных» -- 'classification' -> «классификации» -- 'classification model' -> «модель классификации» -- 'classification threshold' -> «порог классификации» -- 'classifier' -> «классификатор» -- 'clipping' -> «вырезка» -- 'confusion matrix' -> «матрица путаницы» -- 'continuous feature' -> «непрерывная функция» -- 'convergence' -> «конвергенция» -- 'data set or dataset' -> «набор данных или набор данных» -- 'DataFrame' -> «DataFrame» -- 'dataset' -> «Набор данных» -- 'deep learning' -> «глубоком обучении» -- 'deep model' -> «глубокая модель» -- 'dense feature' -> «плотная особенность» -- 'depth' -> «глубина» -- 'discrete feature' -> «дискретная особенность» -- 'discrete features' -> «дискретными признаками» -- 'dynamic' -> «динамический» -- 'dynamic model' -> «динамическая модель» -- 'early stopping' -> «ранняя остановка» -- 'embedding layer' -> «слой внедрения» -- 'embedding layers' -> «встраиваемых слоев» -- 'epoch' -> «эпоха» -- 'example' -> «пример» -- 'false negative (FN)' -> «ложноотрицательный результат (ЛО)» -- 'false negatives' -> «ложноотрицательных результатов» -- 'false positive (FP)' -> «ложноположительный результат (ЛП)» -- 'false positive rate' -> «false positive rate» -- 'false positive rate (FPR)' -> «частота ложноположительных результатов (FPR)» -- 'false positives' -> «ложноположительных результатов» -- 'feature' -> «особенность» -- 'feature cross' -> «кросс-функция» -- 'feature crosses' -> «пересечение признаков» -- 'feature engineering' -> «проектирование функций» -- 'feature set' -> «набор функций» -- 'feature vector' -> «вектор признаков» -- 'feedback loop' -> «петля обратной связи» -- 'generalization' -> «обобщение» -- 'generalization curve' -> «кривая обобщения» -- 'gradient descent' -> «градиентный спуск» -- 'ground truth' -> «истина» -- 'hidden layer' -> «скрытый слой» -- 'hidden layer(s)' -> «скрытых слоях» -- 'hyperparameter' -> «гиперпараметр» -- 'independently and identically distributed (i.i.d)' -> «независимо и одинаково распределены (iid)» -- 'inference' -> «вывод» -- 'input layer' -> «входной слой» -- 'interpretability' -> «интерпретируемость» -- 'iteration' -> «итерация» -- 'L0regularization' -> «L0регуляризация» -- 'L1loss' -> «потеряL1» -- 'L1regularization' -> «регуляризации L1» -- 'L2loss' -> «Потери L2» -- 'L2regularization' -> «регуляризацииL2» -- 'label' -> «этикетка» -- 'labeled example' -> «помеченный пример» -- 'lambda' -> «лямбда» -- 'layer' -> «слой» -- 'learning rate' -> «скорость обучения» -- 'linear' -> «линейный» -- 'linear model' -> «линейная модель» -- 'linear models' -> «линейных моделях» -- 'linear regression' -> «линейная регрессия» -- 'Log Loss' -> «Log Loss» -- 'log-odds' -> «логарифмические шансы» -- 'logistic regression' -> «логистическая регрессия» -- 'loss' -> «потеря» -- 'loss curve' -> «кривая потерь» -- 'loss function' -> «функция потерь» -- 'machine learning' -> «машинное обучение» -- 'majority class' -> «класс большинства» -- 'mini-batch' -> «мини-партия» -- 'minority class' -> «класс меньшинства» -- 'model' -> «модель» -- 'multi-class classification' -> «многоклассовой классификацией» -- 'negative class' -> «отрицательный класс» -- 'negative classes' -> «отрицательные классы» -- 'neural network' -> «нейронная сеть» -- 'neural networks' -> «нейронным сетям» -- 'neuron' -> «нейрон» -- 'node (neural network)' -> «узел (нейронная сеть)» -- 'nonlinear' -> «нелинейный» -- 'nonstationarity' -> «нестационарность» -- 'normalization' -> «нормализация» -- 'numerical data' -> «числовые данные» -- 'offline' -> «офлайн» -- 'offline inference' -> «автономный вывод» -- 'one-hot encoding' -> «горячее кодирование» -- 'one-hot vector' -> «вектор с одним целым» -- 'one-vs.-all' -> «один против всех» -- 'online' -> «онлайн» -- 'online inference' -> «онлайн-вывод» -- 'output layer' -> «выходной слой» -- 'output layers' -> «выходных слоев» -- 'overfitting' -> «переобучение» -- 'pandas' -> «панды» -- 'parameter' -> «параметр» -- 'positive class' -> «позитивный класс» -- 'positive classes' -> «положительные» -- 'post-processing' -> «постобработка» -- 'precision' -> «точность» -- 'prediction' -> «прогноз» -- 'proxy labels' -> «прокси-метки» -- 'RAG' -> «ТРЯПКА» -- 'rater' -> «оценщик» -- 'recall' -> «отзывать» -- 'Rectified Linear Unit (ReLU)' -> «Rectified Linear Unit (ReLU)» -- 'regression model' -> «регрессионная модель» -- 'regularization' -> «регуляризация» -- 'regularization rate' -> «regularization rate» -- 'ReLU' -> «РеЛУ» -- 'retrieval-augmented generation' -> «генерации с расширенным поиском» -- 'retrieval-augmented generation (RAG)' -> «retrieval-augmented generation (RAG)» -- 'ROC (receiver operating characteristic) Curve' -> «ROC (receiver operating characteristic) Curve» -- 'ROC curve' -> «ROC-кривой» -- 'Root Mean Squared Error (RMSE)' -> «Root Mean Squared Error (RMSE)» -- 'sigmoid function' -> «sigmoid function» -- 'softmax' -> «софтмакс» -- 'sparse feature' -> «sparse feature» -- 'sparse representation' -> «sparse representation» -- 'sparse vector' -> «sparse vector» -- 'squared loss' -> «квадрат потерь» -- 'static' -> «статический» -- 'static inference' -> «static inference» -- 'static model' -> «статической моделью» -- 'stationarity' -> «стационарность» -- 'Stochastic Gradient Descent (SGD)' -> «Стохастический градиентный спуск (SGD)» -- 'supervised learning' -> «контролируемом обучении» -- 'supervised machine learning' -> «контролируемое машинное обучение» -- 'synthetic feature' -> «synthetic feature» -- 'synthetic features' -> «синтетические признаки» -- 'test loss' -> «test loss» -- 'training' -> «обучение» -- 'training loss' -> «training loss» -- 'training set' -> «обучающий набор» -- 'training-serving skew' -> «training-serving skew» -- 'true negative (TN)' -> «true negative (TN)» -- 'true negatives' -> «истинно отрицательных результатов» -- 'true positive (TP)' -> «true positive (TP)» -- 'true positive rate' -> «истинный положительный уровень» -- 'true positive rate (TPR)' -> «true positive rate (TPR)» -- 'true positives' -> «истинно положительных результатов» -- 'underfitting' -> «недообучение» -- 'unlabeled example' -> «unlabeled example» -- 'unsupervised machine learning' -> «неконтролируемое машинное обучение» -- 'validation' -> «проверка» -- 'validation dataset' -> «проверочном наборе данных» -- 'validation loss' -> «validation loss» -- 'validation set' -> «набор для проверки» -- 'weight' -> «масса» -- 'weighted sum' -> «взвешенная сумма» -- 'Z-score normalization' -> «нормализацию Z-показателя» - -# STRICTNESS NOTE -TERM MAPPINGS above are flexible preferences. The following rules are STRICT and override them. - -## 2. Strict, Binding Terminology Rules (MANDATORY) -This section defines terminology and formatting that must always be used in Russian translations. -These rules override any flexible terminology and must be followed exactly. - -# MANDATORY RUSSIAN TERMINOLOGY RULES -## 2.1 Key Translations (Strict) -- 'Shared learning' -> «совместное обучение» - AVOID: «общее обучение». -- 'Multisource data' -> «данные из нескольких источников» - AVOID: «мультиисточниковые данные». -- 'Input embedding' -> «входное векторное представление (эмбеддинг)» -- 'Embedding' -> «эмбеддинг» -- 'Embedding space' -> «пространство представлений (пространство эмбеддингов)» -- 'Task-specific branches' -> «ветви, специфичные для задачи» -- 'Pipeline' -> «конвейер обработки данных» - «пайплайн» допускается только в неформальном контексте. - -## 2.2 Official Google Colab UI (Strict) -Use the official Russian UI strings: -- 'Change Runtime Type' -> «Сменить среду выполнения» -- 'Save a copy in Drive' -> «Сохранить копию на Диске» - -Filenames must remain in ENGLISH exactly as written. -Example: «Копия блокнота OriginalNotebookName.ipynb» - -## 2.3 Abbreviations and Hyphenation (Strict) -Keep all ML/AI abbreviations in English: ROC, AUC, TPR, FPR, L1, L2, UI, API, CNN, RNN, GPT. -Do NOT invent Russian abbreviations for these. - -When an English abbreviation precedes a Russian noun, use a hyphen: -- ROC-кривая -- AUC-показатель -- L1-регуляризация -- UI-дизайн diff --git a/src/ol_openedx_course_translations/ol_openedx_course_translations/management/commands/sync_and_translate_language.py b/src/ol_openedx_course_translations/ol_openedx_course_translations/management/commands/sync_and_translate_language.py deleted file mode 100644 index a69e25437..000000000 --- a/src/ol_openedx_course_translations/ol_openedx_course_translations/management/commands/sync_and_translate_language.py +++ /dev/null @@ -1,2676 +0,0 @@ -""" -Django management command to sync translation keys, translate using LLM, and create PRs. - -Usage: - ./manage.py cms sync_and_translate_language el - ./manage.py cms sync_and_translate_language el \\ - --provider openai --model gpt-4-turbo --glossary /path/to/glossary -""" - -import json -import logging -import os -import re -import shutil -import subprocess -import textwrap -import time -import urllib.parse -from configparser import NoSectionError -from contextlib import contextmanager, suppress -from pathlib import Path -from typing import Any, TypedDict, cast - -import git -import requests -from django.conf import settings -from django.core.management.base import BaseCommand, CommandError -from litellm import completion - -from ol_openedx_course_translations.utils.command_utils import ( - configure_litellm_for_provider, - create_branch_name, - get_config_value, - get_default_model_for_provider, - get_default_provider, - is_retryable_error, - normalize_language_code, - sanitize_for_git, - validate_branch_name, - validate_language_code, -) -from ol_openedx_course_translations.utils.constants import ( - HTTP_CREATED, - HTTP_NOT_FOUND, - HTTP_OK, - HTTP_TOO_MANY_REQUESTS, - HTTP_UNPROCESSABLE_ENTITY, - LANGUAGE_MAPPING, - MAX_ERROR_MESSAGE_LENGTH, - MAX_LOG_ICU_STRING_LENGTH, - MAX_LOG_STRING_LENGTH, - MAX_RETRIES, - PLURAL_CATEGORIES_ARABIC, - PLURAL_CATEGORIES_FOUR, - PLURAL_CATEGORIES_THREE, - PLURAL_CATEGORIES_TWO, - PLURAL_FORMS, - PROVIDER_GEMINI, - PROVIDER_MISTRAL, -) -from ol_openedx_course_translations.utils.translation_sync import ( - _get_base_lang, - _get_numeric_plural_keys, - _get_po_plural_count, - apply_json_translations, - apply_po_translations, - extract_empty_keys, - get_nplurals_from_po_file, - load_glossary, - match_glossary_term, - plural_source_has_placeholders_not_in_singular, - sync_all_translations, -) - -logger = logging.getLogger(__name__) - -# Max number of rejected brace-format entries to list in PR description. -MAX_REJECTED_BRACE_DISPLAY = 50 - - -class _PluralInstructionParams(TypedDict): - """Parameters for _build_plural_instructions.""" - - json_plural_info: dict[str, Any] - plural_count: int - key_batch: list[dict] - icu_categories_str: str - lang_code: str - po_plural_count_override: int | None - - -# Plural-instruction prompts for LLM (used in _build_plural_instructions). -# Format with .format(json_plural_count=..., icu_categories_str=..., etc.). - -# For languages with multiple plural forms (e.g. Arabic): expand ICU to ALL categories. -_PROMPT_JSON_PLURAL_EXPAND_ICU = ( - "IMPORTANT: {json_plural_count} entry/entries are JSON " - "strings with ICU MessageFormat plural forms. " - "These may currently have only 'one' and 'other' " - "categories, but for this language ({icu_categories_str}), " - "you MUST expand them to include ALL {num_categories} " - "categories: {icu_categories_str}. " - "Translate the content and return a complete ICU " - "MessageFormat string with ALL categories. " - "Example format: {{count, plural, {icu_categories_str} " - "{{translation}} ... other {{translation}}}}. " - "CRITICAL: Do not preserve the existing 2-category " - "structure. Expand it to include all {num_categories} " - "required categories for this language." -) - -# For languages with 2 forms: preserve existing ICU structure. -_PROMPT_JSON_PLURAL_PRESERVE_ICU = ( - "IMPORTANT: {json_plural_count} entry/entries are JSON " - "strings with ICU MessageFormat plural forms. " - "These already have the ICU structure " - "(e.g., {{activityCount, plural, one {{# activity}} " - "other {{# activities}}}}). " - "Translate the content inside the plural forms while " - "preserving the exact ICU structure and variable names. " - "Return the complete ICU MessageFormat string with " - "translated content." -) - -# JSON plurals (no existing ICU), multiple categories. -_PROMPT_JSON_PLURAL_MULTI_CATEGORY = ( - "IMPORTANT: {json_plural_count} entry/entries are for " - "JSON files with plural forms. " - "For these, return ICU MessageFormat strings with ALL " - "plural categories: {icu_categories_str}. " - "Format: {{count, plural, {icu_categories_str} " - "{{translation}} ... other {{translation}}}}. " - "Example: {example}. " - "IMPORTANT: Include ALL {num_categories} categories in " - "your response, not just 'one' and 'other'. Each category " - "may require different word forms in this language." -) - -# JSON plurals (no existing ICU), two categories. -_PROMPT_JSON_PLURAL_TWO_CATEGORY = ( - "IMPORTANT: {json_plural_count} entry/entries are for " - "JSON files with plural forms. " - "For these, return ICU MessageFormat strings with plural " - "categories: {icu_categories_str}. " - "Format: {{count, plural, {icu_categories_str} " - "{{translation}} ... other {{translation}}}}. " - "Example: {example}." -) - -# PO plurals: language has more than 2 forms (all indices 0..N-1). -_PROMPT_PO_PLURAL_MULTI_FORM = ( - "CRITICAL - PO FILE PLURAL ENTRIES " - "({plural_count} entry/entries): " - "These are for PO files (NOT JSON files). " - "This language requires {po_plural_count} plural forms " - "(indices 0, 1, 2, ..., {po_plural_count_minus_1}). " - "For PO files, you MUST return an object with keys " - "'0', '1', '2', ..., '{po_plural_count_minus_1}', " - "covering all indices from 0 through " - "{po_plural_count_minus_1}, where each value is a " - "PLAIN TRANSLATION STRING. " - "\n" - "WRONG (DO NOT DO THIS): " - "{{'0': '{{count, plural, one {{...}} other {{...}}}}'}} " - "\n" - "CORRECT: " - "{{'0': 'translation for zero items', " - "'1': 'translation for one item', " - "'2': 'translation for two items', " - "'3': 'translation for few items', " - "'4': 'translation for many items', " - "'5': 'translation for other items'}} " - "\n" - "Each value must be a simple translated string, " - "NOT ICU MessageFormat syntax. " - "Preserve placeholders like {{count}}, %(count)s, etc. " - "in the plain strings." -) - -# PO plurals: language has 2 forms (singular/plural). -_PROMPT_PO_PLURAL_SINGULAR_PLURAL = ( - "CRITICAL - PO FILE PLURAL ENTRIES " - "({plural_count} entry/entries): " - "These are for PO files (NOT JSON files). " - "For PO files, return an object with 'singular' and " - "'plural' keys, each containing a PLAIN TRANSLATION STRING. " - "\n" - "WRONG (DO NOT DO THIS): " - "{{'singular': '{{count, plural, one {{...}} " - "other {{...}}}}'}} " - "\n" - "CORRECT: " - "{{'singular': 'translation for one item', " - "'plural': 'translation for multiple items'}} " - "\n" - "Each value must be a simple translated string, " - "NOT ICU MessageFormat syntax. " - "Preserve placeholders like {{count}}, %(count)s, etc. " - "in the plain strings." -) - -# PO plurals: language has only 1 form (e.g. Chinese, Japanese, Korean). -# We need BOTH singular and plural; we choose which to use for the single form. -_PROMPT_PO_PLURAL_ONE_FORM = ( - "CRITICAL - PO FILE PLURAL ENTRIES " - "({plural_count} entry/entries): " - "These are for PO files (NOT JSON files). " - "This language has only ONE plural form (e.g. Chinese, Japanese). " - "Always return an object with BOTH 'singular' and 'plural' keys. " - "For entries where the PLURAL source has a variable (e.g. %(num_selected)s) " - "that the SINGULAR source does not, we use your 'singular' for the single form " - "(to avoid runtime errors). So provide a natural singular WITHOUT that variable. " - "For other entries we use your 'plural' for the single form. " - "\n" - "CORRECT (variable only in plural): " - "{{'singular': 'translation without the variable', " - "'plural': 'translation with %(num_selected)s etc.'}} " - "\n" - "CORRECT (same variable in both): " - "{{'singular': 'translation with %(count)s', " - "'plural': 'translation with %(count)s'}} " - "\n" - "Each value must be a simple translated string. Preserve placeholders." -) - - -class GitRepository: - """Helper class for git operations with consistent error handling.""" - - def __init__(self, repo_path: str): - self.repo_path = Path(repo_path) - try: - self.repo = git.Repo(repo_path) - except git.exc.InvalidGitRepositoryError as e: - msg = ( - f"Invalid git repository at {repo_path}. " - f"Please remove it or specify a different path." - ) - raise CommandError(msg) from e - except git.exc.GitCommandError as e: - msg = f"Git error accessing repository: {e!s}" - raise CommandError(msg) from e - - def _handle_git_error(self, operation: str, error: Exception) -> None: - """Convert git errors to CommandError with context.""" - msg = f"Git error {operation}: {error!s}" - raise CommandError(msg) from error - - def _get_main_branch_name(self) -> str: - """ - Determine the main branch name. - Checks local branches first, then remote branches. - Fetches from remote if needed to check remote branches. - """ - # Check if 'main' exists locally - if "main" in [ref.name for ref in self.repo.heads]: - return "main" - - # If not found locally, fetch from remote and check remote branches - with suppress(git.exc.GitCommandError): - # If fetch fails, we'll try to check existing remote refs anyway - self.repo.remotes.origin.fetch() - - # Check remote branches - if "origin/main" in [ref.name for ref in self.repo.remotes.origin.refs]: - return "main" - - msg = "Main branch not found locally or on remote" - raise CommandError(msg) - - def ensure_clean(self) -> bool: - """ - Clean uncommitted changes in tracked files. - Returns True if cleaned, False if already clean. - - This ensures any leftover staged/uncommitted changes from a previous - interrupted run are removed before starting a new translation sync. - """ - try: - if self.repo.is_dirty(untracked_files=False): - self.repo.head.reset(index=True, working_tree=True) - return True - else: - return False - except git.exc.GitCommandError as e: - self._handle_git_error("cleaning repository", e) - return False # Never reached, but satisfies type checker - - def switch_to_main(self) -> None: - """Switch to main branch, deleting current branch if it's not main.""" - try: - # Get current branch name (might be in detached HEAD state) - try: - current_branch = self.repo.active_branch.name - except TypeError: - # Detached HEAD state - we'll checkout main anyway - current_branch = None - - # Get the main branch name - main_branch = self._get_main_branch_name() - - # Only switch if we're not already on the main branch - if current_branch != main_branch: - # Try to checkout the branch (will work if it exists locally) - try: - self.repo.git.checkout(main_branch) - except git.exc.GitCommandError: - # Branch doesn't exist locally, checkout from remote - self.repo.git.checkout("-b", main_branch, f"origin/{main_branch}") - - # Delete the previous branch if it exists and is not the main branch - if current_branch and current_branch != main_branch: - with suppress(git.exc.GitCommandError): - self.repo.git.branch("-D", current_branch) - except (git.exc.GitCommandError, TypeError) as e: - self._handle_git_error("switching branches", e) - - def update_from_remote(self) -> None: - """Fetch and pull latest changes from origin/main.""" - try: - self.repo.remotes.origin.fetch() - main_branch = self._get_main_branch_name() - self.repo.git.pull("origin", main_branch) - except git.exc.GitCommandError as e: - self._handle_git_error("updating repository", e) - - def get_remote_url(self) -> str | None: - """Get the current remote URL.""" - try: - return self.repo.remotes.origin.url - except (git.exc.GitCommandError, AttributeError): - return None - - def configure_user( - self, - email: str = "translations@mitodl.org", - name: str = "MIT Open Learning Translations Bot", - ) -> None: - """Configure git user for this repository.""" - try: - with self.repo.config_writer() as config: - # Check if user section exists and get existing values - try: - existing_email = config.get_value("user", "email", default=None) - existing_name = config.get_value("user", "name", default=None) - except NoSectionError: - # Section doesn't exist, set both values - existing_email = None - existing_name = None - # Set values only if they don't exist - if not existing_email: - config.set_value("user", "email", email) - if not existing_name: - config.set_value("user", "name", name) - except git.exc.GitCommandError as e: - self._handle_git_error("configuring user", e) - - def branch_exists(self, branch_name: str) -> bool: - """Check if branch exists locally or remotely.""" - validate_branch_name(branch_name) - try: - # Check local branches - if branch_name in [ref.name for ref in self.repo.heads]: - return True - # Check remote branches - remote_branch = f"origin/{branch_name}" - try: - self.repo.remotes.origin.fetch() - except git.exc.GitCommandError: - # If fetch fails, try to check existing remote refs anyway - # Check remote refs with existing data - return remote_branch in [ - ref.name for ref in self.repo.remotes.origin.refs - ] - else: - # Fetch succeeded, check remote refs - return remote_branch in [ - ref.name for ref in self.repo.remotes.origin.refs - ] - except git.exc.GitCommandError as e: - self._handle_git_error("checking branch existence", e) - return False # Never reached, but satisfies type checker - - def create_branch(self, branch_name: str) -> None: - """Create and checkout a new branch.""" - validate_branch_name(branch_name) - try: - self.repo.git.checkout("-b", branch_name) - except git.exc.GitCommandError as e: - self._handle_git_error("creating branch", e) - - def stage_all(self) -> None: - """Stage all changes.""" - try: - self.repo.git.add(".") - except git.exc.GitCommandError as e: - self._handle_git_error("staging changes", e) - - def has_changes(self) -> bool: - """Check if there are uncommitted changes.""" - try: - return self.repo.is_dirty(untracked_files=True) - except git.exc.GitCommandError as e: - self._handle_git_error("checking changes", e) - return False # Never reached, but satisfies type checker - - def commit(self, message: str) -> None: - """Commit staged changes.""" - try: - self.repo.index.commit(message) - except git.exc.GitCommandError as e: - self._handle_git_error("committing changes", e) - - @contextmanager - def authenticated_push_url(self, github_token: str): - """Context manager for authenticated push with automatic cleanup.""" - origin = self.repo.remotes.origin - original_url = origin.url - - # Build authenticated URL - match = re.search(r"github\.com[/:]([^/]+)/([^/]+?)(?:\.git)?$", original_url) - if match: - owner, repo_name = match.groups() - encoded_token = urllib.parse.quote(github_token, safe="") - push_url = f"https://{encoded_token}@github.com/{owner}/{repo_name}.git" - else: - encoded_token = urllib.parse.quote(github_token, safe="") - push_url = original_url.replace("https://", f"https://{encoded_token}@") - - try: - origin.set_url(push_url) - yield - finally: - # Always restore original URL - try: - origin.set_url(original_url) - except (git.exc.GitCommandError, ValueError) as e: - # Best effort cleanup - log but don't fail - logger.warning("Failed to restore original git remote URL: %s", e) - - def push_branch(self, branch_name: str, github_token: str | None = None) -> None: - """Push branch to remote with optional authentication.""" - validate_branch_name(branch_name) - try: - if github_token: - with self.authenticated_push_url(github_token): - self.repo.git.push("-u", "origin", branch_name) - else: - self.repo.git.push("-u", "origin", branch_name) - except git.exc.GitCommandError as e: - self._handle_git_error("pushing branch", e) - - @staticmethod - def clone(repo_url: str, repo_path: str) -> "GitRepository": - """Clone a repository and return GitRepository instance.""" - repo_path_obj = Path(repo_path) - try: - repo_path_obj.parent.mkdir(parents=True, exist_ok=True) - git.Repo.clone_from(repo_url, str(repo_path)) - return GitRepository(repo_path) - except git.exc.GitCommandError as e: - msg = f"Git error cloning repository: {e!s}" - raise CommandError(msg) from e - except OSError as e: - msg = f"Error creating directory: {e!s}" - raise CommandError(msg) from e - - -class GitHubAPIClient: - """Helper class for GitHub API operations.""" - - def __init__(self, token: str | None = None): - """Initialize with optional token.""" - self.token = ( - token - or getattr(settings, "TRANSLATIONS_GITHUB_TOKEN", None) - or os.environ.get("TRANSLATIONS_GITHUB_TOKEN") - ) - if not self.token: - msg = "TRANSLATIONS_GITHUB_TOKEN not set in settings or environment" - raise CommandError(msg) - - def _get_headers(self) -> dict: - """Get API request headers.""" - return { - "Authorization": f"Bearer {self.token}", - "Accept": "application/vnd.github.v3+json", - "Content-Type": "application/json", - } - - @staticmethod - def parse_repo_url(repo_url: str) -> tuple[str, str]: - """Extract owner and repo from GitHub URL.""" - match = re.search(r"github\.com[/:]([^/]+)/([^/]+?)(?:\.git)?$", repo_url) - if not match: - msg = f"Could not parse owner/repo from repo URL: {repo_url}" - raise CommandError(msg) - owner, repo = match.groups() - return (owner, repo) - - def _handle_rate_limit( - self, response: requests.Response, attempt: int, max_retries: int, stdout - ) -> bool: - """Handle rate limit response. Returns True if should retry.""" - if response.status_code == HTTP_TOO_MANY_REQUESTS: - retry_after = int(response.headers.get("Retry-After", 2 * (2**attempt))) - if attempt < max_retries - 1: - stdout.write( - f" Rate limit exceeded (attempt {attempt + 1}/{max_retries}). " - f"Retrying in {retry_after} seconds..." - ) - time.sleep(retry_after) - return True - else: - msg = "GitHub API rate limit exceeded. Please try again later." - raise CommandError(msg) - return False - - def _extract_error_message(self, response: requests.Response) -> str: - """Extract safe error message from response, including validation errors.""" - try: - error_data = response.json() - message = error_data.get("message", f"HTTP {response.status_code}") - - # GitHub API validation errors include detailed error info in 'errors' array - if error_data.get("errors"): - error_details = [] - for err in error_data["errors"]: - if isinstance(err, dict): - field = err.get("field", "unknown") - code = err.get("code", "unknown") - resource = err.get("resource", "unknown") - error_details.append(f"{resource}.{field}: {code}") - else: - error_details.append(str(err)) - - if error_details: - message = f"{message} ({', '.join(error_details)})" - return message - else: - return message - except (ValueError, requests.exceptions.JSONDecodeError): - return f"HTTP {response.status_code}" - - def verify_branch( - self, - owner: str, - repo: str, - branch_name: str, - stdout, # noqa: ARG002 - ) -> None: - """Verify branch exists on remote.""" - url = f"https://api.github.com/repos/{owner}/{repo}/branches/{branch_name}" - response = requests.get(url, headers=self._get_headers(), timeout=10) - - if response.status_code == HTTP_NOT_FOUND: - msg = ( - f"Branch '{branch_name}' not found on remote. " - f"Ensure the branch was pushed successfully." - ) - raise CommandError(msg) - elif response.status_code != HTTP_OK: - error_msg = self._extract_error_message(response) - msg = f"Failed to verify branch: {error_msg}" - raise CommandError(msg) - # If status_code is HTTP_OK, function returns None implicitly - - def create_pull_request( # noqa: PLR0913 - self, - owner: str, - repo: str, - branch_name: str, - title: str, - body: str, - base: str = "main", - stdout=None, - ) -> str: - """Create a pull request with retry logic.""" - url = f"https://api.github.com/repos/{owner}/{repo}/pulls" - payload = {"title": title, "body": body, "head": branch_name, "base": base} - headers = self._get_headers() - - max_retries = 3 - base_retry_delay = 2 - - for attempt in range(max_retries): - retry_delay = base_retry_delay * (2**attempt) - - try: - response = requests.post(url, json=payload, headers=headers, timeout=30) - - if response.status_code == HTTP_CREATED: - return response.json()["html_url"] - - if self._handle_rate_limit( - response, attempt, max_retries, stdout or self - ): - continue - - if response.status_code == HTTP_UNPROCESSABLE_ENTITY: - error_msg = self._extract_error_message(response) - safe_error = ( - error_msg[:MAX_ERROR_MESSAGE_LENGTH] - if len(error_msg) > MAX_ERROR_MESSAGE_LENGTH - else error_msg - ) - msg = ( - f"GitHub API validation error: {safe_error}\n" - f"This usually means the branch doesn't exist on remote " - f"or there's already a PR for this branch." - ) - raise CommandError(msg) - - error_msg = self._extract_error_message(response) - safe_error = ( - error_msg[:MAX_ERROR_MESSAGE_LENGTH] - if len(error_msg) > MAX_ERROR_MESSAGE_LENGTH - else error_msg - ) - msg = f"GitHub API error: {safe_error}" - raise CommandError(msg) - - except requests.exceptions.RequestException as e: - is_connection_error = isinstance( - e, - (requests.exceptions.ConnectionError, requests.exceptions.Timeout), - ) - - if is_connection_error and attempt < max_retries - 1: - if stdout: - error_msg = ( - f" Connection error " - f"(attempt {attempt + 1}/{max_retries}): {e!s}" - ) - stdout.write(error_msg) - stdout.write(f" Retrying in {retry_delay} seconds...") - time.sleep(retry_delay) - continue - else: - if is_connection_error: - msg = ( - f"Failed to connect to GitHub API after " - f"{max_retries} attempts: {e!s}\n" - f"Please check your network connection and try again later." - ) - raise CommandError(msg) from e - msg = f"GitHub API error: {e!s}" - raise CommandError(msg) from e - - msg = "Failed to create pull request after all retries" - raise CommandError(msg) - - -class PullRequestData(TypedDict): - """Data structure for pull request creation.""" - - lang_code: str - iso_code: str - sync_stats: dict - applied_count: int - translation_stats: dict[str, Any] - applied_by_app: dict[str, Any] - provider: str - model: str - rejected_brace_format_entries: list[dict[str, str]] - - -class TranslationParams(TypedDict): - """Parameters for translation operations.""" - - lang_code: str - provider: str - model: str - glossary: dict[str, Any] | None - batch_size: int - max_retries: int - - -class Command(BaseCommand): - help = ( - "Sync translation keys, translate using LLM, " - "and create PR in mitxonline-translations" - ) - - def add_arguments(self, parser): - parser.add_argument( - "lang", type=str, help="Language code (e.g., el, fr, es_ES)" - ) - parser.add_argument( - "--iso-code", - type=str, - help="ISO code for JSON files (default: same as lang)", - ) - parser.add_argument( - "--repo-path", - type=str, - help=( - "Path to mitxonline-translations repository. " - "Can also be set via TRANSLATIONS_REPO_PATH setting " - "or environment variable." - ), - ) - default_provider = get_default_provider() - parser.add_argument( - "--provider", - type=str, - default=default_provider, - choices=["openai", "gemini", "mistral"], - help=( - "Translation provider (openai, gemini, mistral). " - "Default is taken from TRANSLATIONS_PROVIDERS['default_provider']" - + ( - f" (currently: {default_provider})" - if default_provider - else " (not configured)" - ) - ), - ) - parser.add_argument( - "--model", - type=str, - default=None, - help=( - "Model name (e.g., gpt-4, gemini-pro, mistral-large-latest). " - "If not specified, uses the default_model for the selected provider " - "from TRANSLATIONS_PROVIDERS. " - "LiteLLM automatically detects provider from model name." - ), - ) - parser.add_argument( - "--dry-run", - action="store_true", - help="Run without committing or creating PR", - ) - parser.add_argument( - "--glossary", - dest="glossary", - required=False, - default=None, - help=( - "Path to glossary directory. Should contain language-specific " - "files (e.g. {iso_code}.txt)." - ), - ) - parser.add_argument( - "--batch-size", - type=int, - default=200, - help=( - "Number of keys to translate per API request (default: 200). " - "Larger batches are faster but may hit rate limits. " - "Recommended: 200-300 for most models, " - "up to 400-500 for large models like mistral-large." - ), - ) - parser.add_argument( - "--mfe", - type=str, - nargs="+", - help=( - "Filter by specific MFE(s). " - "Use 'edx-platform' for backend translations." - ), - ) - parser.add_argument( - "--repo-url", - type=str, - help=( - "GitHub repository URL. " - "Can also be set via TRANSLATIONS_REPO_URL setting " - "or environment variable." - ), - ) - - def handle(self, *args, **options): # noqa: ARG002, PLR0915 - """Handle the command execution.""" - # Normalize language codes (convert hyphens to underscores) - lang_code = normalize_language_code(options["lang"]) - iso_code = normalize_language_code(options.get("iso_code") or lang_code) - - validate_language_code(lang_code) - validate_language_code(iso_code, "ISO code") - - repo_path = get_config_value( - "repo_path", - options, - str(Path.home() / ".mitxonline-translations"), - ) - repo_url = get_config_value( - "repo_url", - options, - "https://github.com/mitodl/mitxonline-translations.git", - ) - - # Validate repository path is not empty - if not repo_path or not repo_path.strip(): - msg = ( - "Repository path is not set. Please specify --repo-path, " - "set TRANSLATIONS_REPO_PATH in Django settings, or set " - "TRANSLATIONS_REPO_PATH environment variable." - ) - raise CommandError(msg) - - self.stdout.write(self.style.SUCCESS(f"Processing language: {lang_code}")) - self.stdout.write(f" ISO code: {iso_code}") - self.stdout.write(f" Repository: {repo_path}") - - repo = self._ensure_repo(repo_path, repo_url) - - self.stdout.write("\nSyncing translation keys...") - base_dir = Path(repo_path) / "translations" - sync_stats = sync_all_translations( - base_dir, lang_code, iso_code, skip_backend=False - ) - self._log_sync_stats(sync_stats) - - # Extract and filter empty keys - self.stdout.write("\nExtracting empty keys for translation...") - empty_keys = extract_empty_keys( - base_dir, lang_code, iso_code, skip_backend=False - ) - empty_keys = self._filter_by_mfe(empty_keys, options.get("mfe")) - - if not empty_keys: - self.stdout.write(self.style.SUCCESS("\nNo empty keys to translate!")) - return - - glossary = self._load_glossary(options, iso_code) - - provider = options.get("provider") or get_default_provider() - if not provider: - msg = ( - "Provider not specified and " - "TRANSLATIONS_PROVIDERS['default_provider'] is not set" - ) - raise CommandError(msg) - - model = options.get("model") or get_default_model_for_provider(provider) - if not model: - msg = ( - f"Model not specified and provider '{provider}' " - "does not have default_model in TRANSLATIONS_PROVIDERS" - ) - raise CommandError(msg) - - self.stdout.write(f"\nTranslating using {provider}/{model}...") - params = TranslationParams( - lang_code=lang_code, - provider=provider, - model=model, - glossary=glossary, - batch_size=options.get("batch_size", 200), - max_retries=MAX_RETRIES, - ) - translations, translation_stats = self._translate_keys(empty_keys, params) - self.stdout.write(f" Translated {len(translations)} keys") - - self.stdout.write("\nApplying translations...") - applied_count, applied_by_app = self._apply_translations( - translations, empty_keys, self.stdout, lang_code - ) - self.stdout.write(f" Applied {applied_count} translations") - - if options.get("dry_run"): - self.stdout.write(self.style.WARNING("\nDry run - no changes committed")) - return - - branch_name = create_branch_name(lang_code) - self.stdout.write(f"\nCommitting changes to branch: {branch_name}") - - if not self._commit_changes(repo, branch_name, lang_code): - return - - self.stdout.write("\nCreating pull request...") - try: - pr_data = PullRequestData( - lang_code=lang_code, - iso_code=iso_code, - sync_stats=sync_stats, - applied_count=applied_count, - translation_stats=translation_stats, - applied_by_app=applied_by_app, - provider=provider, - model=model, - rejected_brace_format_entries=applied_by_app.get( - "rejected_brace_format_entries", [] - ), - ) - pr_url = self._create_pull_request( - repo_path, - branch_name, - pr_data, - repo_url, - ) - self.stdout.write(self.style.SUCCESS(f"\nPull request created: {pr_url}")) - except CommandError as e: - # Clean up branch if PR creation fails - self.stdout.write( - self.style.ERROR(f"\nFailed to create pull request: {e!s}") - ) - self._cleanup_failed_branch(repo, branch_name) - raise - - def _ensure_repo(self, repo_path: str, repo_url: str) -> GitRepository: - """Ensure repository exists and is ready. Returns GitRepository instance.""" - repo_path_obj = Path(repo_path) - is_git_repo = repo_path_obj.exists() and (repo_path_obj / ".git").exists() - - if is_git_repo: - repo = GitRepository(repo_path) - current_url = repo.get_remote_url() - - # Normalize URLs for comparison (remove .git suffix, trailing slashes) - normalized_current = (current_url or "").rstrip(".git").rstrip("/") - normalized_new = repo_url.rstrip(".git").rstrip("/") - - # If URL changed, delete and re-clone - if normalized_current != normalized_new: - self.stdout.write( - self.style.WARNING( - f" Repository URL changed from {current_url} to {repo_url}" - ) - ) - self.stdout.write(" Removing old repository and cloning new one...") - shutil.rmtree(repo_path) - self.stdout.write(f" Cloning repository to {repo_path}...") - repo = GitRepository.clone(repo_url, repo_path) - self.stdout.write( - self.style.SUCCESS(" Repository cloned successfully") - ) - return repo - - # URL matches, use existing repo - self.stdout.write(f" Repository found at {repo_path}") - if repo.ensure_clean(): - self.stdout.write( - self.style.WARNING( - " WARNING: Found uncommitted changes (cleaned up)" - ) - ) - self.stdout.write( - self.style.SUCCESS(" Cleaned up uncommitted changes") - ) - - repo.switch_to_main() - self.stdout.write(" Updating repository...") - repo.update_from_remote() - self.stdout.write(self.style.SUCCESS(" Repository up to date")) - return repo - - elif repo_path_obj.exists(): - msg = ( - f"Path {repo_path} exists but is not a git repository. " - f"Please remove it or specify a different path." - ) - raise CommandError(msg) - else: - self.stdout.write(f" Cloning repository to {repo_path}...") - repo = GitRepository.clone(repo_url, repo_path) - self.stdout.write(self.style.SUCCESS(" Repository cloned successfully")) - return repo - - def _log_sync_stats(self, sync_stats: dict) -> None: - """Log synchronization statistics.""" - self.stdout.write( - f" Frontend: {sync_stats['frontend']['added']} keys added, " - f"{sync_stats['frontend']['fixed']} typos fixed" - ) - self.stdout.write(f" Backend: {sync_stats['backend']['added']} entries added") - - def _filter_by_mfe( - self, empty_keys: list[dict], mfe_filter: list[str] | None - ) -> list[dict]: - """Filter empty keys by MFE if specified.""" - if not mfe_filter: - self.stdout.write(f" Found {len(empty_keys)} empty keys") - return empty_keys - - mfe_set = set(mfe_filter) - original_count = len(empty_keys) - available_apps = {key.get("app", "unknown") for key in empty_keys} - filtered = [key for key in empty_keys if key.get("app") in mfe_set] - - if not filtered: - mfe_list = ", ".join(mfe_filter) - apps_list = ", ".join(sorted(available_apps)) - self.stdout.write( - self.style.WARNING( - f"\nWARNING: No empty keys found for specified MFE(s): " - f"{mfe_list}\n" - f" Available apps: {apps_list}" - ) - ) - return [] - - mfe_list = ", ".join(mfe_filter) - self.stdout.write( - f" Filtered to {len(filtered)} keys from {len(mfe_set)} MFE(s): " - f"{mfe_list} (was {original_count} total)" - ) - return filtered - - def _get_icu_plural_categories(self, lang_code: str) -> list[str]: - """Get ICU MessageFormat plural categories for a language.""" - base_lang = _get_base_lang(lang_code) - plural_form = PLURAL_FORMS.get(base_lang, "nplurals=2; plural=(n != 1);") - - nplurals_match = re.search(r"nplurals=(\d+)", plural_form) - if not nplurals_match: - return ["one", "other"] - - nplurals = int(nplurals_match.group(1)) - - # Map nplurals to ICU categories - nplurals_to_categories = { - 1: ["other"], - 2: ["one", "other"], - 3: ["one", "few", "other"], - 4: ["one", "two", "few", "other"], - 6: ["zero", "one", "two", "few", "many", "other"], - } - - return nplurals_to_categories.get(nplurals, ["one", "other"]) - - def _build_icu_example(self, categories_list: list[str]) -> str: - """Build an ICU MessageFormat example string based on categories.""" - num_categories = len(categories_list) - - templates_by_count = { - PLURAL_CATEGORIES_ARABIC: ( - # Arabic: zero, one, two, few, many, other - "{activityCount, plural, " - "zero {# activities} " - "one {# activity} " - "two {# activities} " - "few {# activities} " - "many {# activities} " - "other {# activities}}" - ), - PLURAL_CATEGORIES_FOUR: ( - # Languages with 4 forms: one, two, few, other - "{activityCount, plural, " - "one {# activity} " - "two {# activities} " - "few {# activities} " - "other {# activities}}" - ), - PLURAL_CATEGORIES_THREE: ( - # Languages with 3 forms: one, few, other (e.g., Russian, Polish) - "{activityCount, plural, " - "one {# activity} " - "few {# activities} " - "other {# activities}}" - ), - PLURAL_CATEGORIES_TWO: ( - # Languages with 2 forms: one, other (most languages) - "{activityCount, plural, one {# activity} other {# activities}}" - ), - } - - def fallback_template() -> str: - # Fallback for other multi-category languages - example_categories = " ".join( - f"{cat} {{# {'activity' if cat == 'one' else 'activities'}}}" - for cat in categories_list - ) - return f"{{activityCount, plural, {example_categories}}}" - - return templates_by_count.get(num_categories) or fallback_template() - - def _load_glossary(self, options: dict, iso_code: str) -> dict[str, Any]: - """Load glossary from directory. Uses ISO code for file lookup. - - iso_code is already normalized (e.g. es_419). Tries {iso_code}.txt first, - then {iso_code with underscores→hyphens}.txt (e.g. es-419.txt) if not found. - """ - glossary_dir = options.get("glossary") - if not glossary_dir: - return {} - - base_dir = Path(glossary_dir) - candidates = [ - base_dir / f"{iso_code}.txt", - base_dir / f"{iso_code.replace('_', '-')}.txt", - ] - glossary_path = None - for path in candidates: - if path.exists(): - glossary_path = path - break - - if glossary_path is not None: - self.stdout.write(f"\nLoading glossary from {glossary_path}...") - glossary = load_glossary(glossary_path, iso_code) - self.stdout.write(f" Loaded {len(glossary)} glossary terms") - return glossary - - self.stdout.write( - self.style.WARNING( - f"\nWARNING: Glossary file not found for {iso_code} " - f"(tried {candidates[0].name}, {candidates[1].name})\n" - f" Continuing without glossary." - ) - ) - return {} - - def _check_glossary_for_keys( - self, - empty_keys: list[dict], - glossary: dict[str, Any] | None, - ) -> tuple[dict[str, Any], int, list[dict]]: - """Check glossary matches for keys. - - Returns (translations, matches_count, remaining_keys). - """ - translations = {} - glossary_matches = 0 - keys_needing_llm = [] - - for key_info in empty_keys: - # Normalize file path for consistent comparison - file_path_str = str(Path(key_info["file_path"]).resolve()) - # Include msgctxt in key if it exists to distinguish entries with same msgid - msgctxt = key_info.get("msgctxt") - if msgctxt: - translation_key = f"{file_path_str}:{msgctxt}:{key_info['key']}" - else: - translation_key = f"{file_path_str}:{key_info['key']}" - - if glossary: - match_result = self._check_glossary_match(key_info, glossary) - if match_result: - translations[translation_key] = match_result - glossary_matches += 1 - continue - - keys_needing_llm.append(key_info) - - return translations, glossary_matches, keys_needing_llm - - def _process_batch_results( - self, - batch: list[dict], - batch_translations: list[Any], - translations: dict[str, Any], - ) -> tuple[int, int, dict[str, int]]: - """Process batch translation results. - - Returns (successes, errors, errors_by_app). - """ - batch_successes = 0 - batch_errors = 0 - batch_errors_by_app: dict[str, int] = {} - - for i, key_info in enumerate(batch): - # Normalize file path for consistent comparison - file_path_str = str(Path(key_info["file_path"]).resolve()) - # Include msgctxt in key if it exists to distinguish entries with same msgid - msgctxt = key_info.get("msgctxt") - if msgctxt: - translation_key = f"{file_path_str}:{msgctxt}:{key_info['key']}" - else: - translation_key = f"{file_path_str}:{key_info['key']}" - app = key_info.get("app", "unknown") - if i < len(batch_translations) and batch_translations[i]: - translations[translation_key] = batch_translations[i] - batch_successes += 1 - else: - batch_errors += 1 - batch_errors_by_app[app] = batch_errors_by_app.get(app, 0) + 1 - - return batch_successes, batch_errors, batch_errors_by_app - - def _translate_with_llm( # noqa: PLR0913 - self, - keys_needing_llm: list[dict], - translations: dict[str, Any], - lang_code: str, - provider: str, - model: str, - glossary: dict[str, Any] | None, - batch_size: int, - max_retries: int, - po_nplurals_override: int | None = None, - ) -> tuple[int, int, dict[str, int]]: - """Translate keys using LLM with batch processing. - - Returns (llm_translations, llm_errors, errors_by_app). - When po_nplurals_override is set (from translation file's Plural-Forms), - it is used for PO plural prompt instructions instead of the constant-based rule. - """ - llm_translations = 0 - llm_errors = 0 - errors_by_app: dict[str, int] = {} - - total_keys = len(keys_needing_llm) - num_batches = (total_keys + batch_size - 1) // batch_size - self.stdout.write( - f" Translating {total_keys} keys using LLM " - f"({num_batches} batches of up to {batch_size} keys each)..." - ) - - for batch_idx, batch in enumerate( - [ - keys_needing_llm[i : i + batch_size] - for i in range(0, total_keys, batch_size) - ], - 1, - ): - batch_succeeded = False - batch_apps = {key_info.get("app", "unknown") for key_info in batch} - - # Retry loop for this batch - for attempt in range(max_retries + 1): # +1 for initial attempt - try: - batch_translations = self._call_llm_batch( - batch, - lang_code, - provider, - model, - glossary, - po_nplurals_override=po_nplurals_override, - ) - batch_successes, batch_errors, batch_errors_by_app = ( - self._process_batch_results( - batch, - batch_translations, - translations, - ) - ) - - llm_translations += batch_successes - llm_errors += batch_errors - for app, count in batch_errors_by_app.items(): - errors_by_app[app] = errors_by_app.get(app, 0) + count - - completed = min(batch_idx * batch_size, total_keys) - progress_pct = min((completed / total_keys) * 100, 100) - remaining_keys = total_keys - llm_translations - - self._log_batch_progress( - batch_idx, - num_batches, - batch_successes, - batch_errors, - completed, - total_keys, - progress_pct, - remaining_keys, - batch_apps, - batch_errors_by_app, - attempt, - ) - - batch_succeeded = True - break # Success - exit retry loop - - except ( - requests.RequestException, - ValueError, - KeyError, - AttributeError, - ) as e: - if not self._handle_batch_error( - e, batch_idx, num_batches, batch_apps, attempt, max_retries - ): - break # Non-retryable error - - # If batch failed after all retries, mark all keys as errors - if not batch_succeeded: - batch_errors = len(batch) - llm_errors += batch_errors - for key_info in batch: - app = key_info.get("app", "unknown") - errors_by_app[app] = errors_by_app.get(app, 0) + 1 - apps_str = ", ".join(sorted(batch_apps)) - self.stdout.write( - self.style.ERROR( - f" Marked {batch_errors} keys as errors, " - f"continuing with next batch...\n" - f" Affected apps: {apps_str}" - ) - ) - - return llm_translations, llm_errors, errors_by_app - - def _log_batch_progress( # noqa: PLR0913 - self, - batch_idx: int, - num_batches: int, - batch_successes: int, - batch_errors: int, - completed: int, - total_keys: int, - progress_pct: float, - remaining_keys: int, - batch_apps: set[str], - batch_errors_by_app: dict[str, int], - attempt: int, - ) -> None: - """Log batch processing progress.""" - retry_msg = f" (after {attempt + 1} attempt(s))" if attempt > 0 else "" - if batch_errors > 0: - apps_str = ", ".join(sorted(batch_apps)) - errors_by_app_str = ", ".join( - f"{app}: {count}" for app, count in sorted(batch_errors_by_app.items()) - ) - self.stdout.write( - f" Batch {batch_idx}/{num_batches} completed " - f"with partial success " - f"({batch_successes} succeeded, " - f"{batch_errors} failed){retry_msg} " - f"({completed}/{total_keys} keys, " - f"{progress_pct:.1f}% complete, " - f"{remaining_keys} remaining)\n" - f" Affected apps: {apps_str}\n" - f" Errors by app: {errors_by_app_str}" - ) - else: - self.stdout.write( - f" Batch {batch_idx}/{num_batches} completed" - f"{retry_msg} " - f"({completed}/{total_keys} keys, " - f"{progress_pct:.1f}% complete, " - f"{remaining_keys} remaining)" - ) - - def _handle_batch_error( # noqa: PLR0913 - self, - error: Exception, - batch_idx: int, - num_batches: int, - batch_apps: set[str], - attempt: int, - max_retries: int, - ) -> bool: - """Handle batch error. Returns True if should retry, False otherwise.""" - apps_str = ", ".join(sorted(batch_apps)) - if not is_retryable_error(error): - # Non-retryable error - fail immediately - self.stdout.write( - self.style.ERROR( - f" ERROR: Batch {batch_idx}/{num_batches} " - f"failed with non-retryable error: {error!s}\n" - f" Affected apps: {apps_str}" - ) - ) - return False - - # Retryable error - check if we have retries left - if attempt < max_retries: - # Exponential backoff: 2^attempt seconds (1s, 2s, 4s, 8s...) - wait_time = 2**attempt - self.stdout.write( - self.style.WARNING( - f" WARNING: Batch {batch_idx}/{num_batches} " - f"failed (attempt {attempt + 1}/" - f"{max_retries + 1}): {error!s}\n" - f" Affected apps: {apps_str}\n" - f" Retrying in {wait_time} second(s)..." - ) - ) - time.sleep(wait_time) - return True - else: - # Out of retries - self.stdout.write( - self.style.ERROR( - f" ERROR: Batch {batch_idx}/{num_batches} " - f"failed after {max_retries + 1} attempts: " - f"{error!s}\n" - f" Affected apps: {apps_str}" - ) - ) - return False - - def _translate_keys( - self, - empty_keys: list[dict], - params: TranslationParams, - ) -> tuple[dict[str, Any], dict[str, Any]]: - """Translate empty keys using LLM with batch processing.""" - lang_code = params["lang_code"] - provider = params["provider"] - model = params["model"] - glossary = params["glossary"] - batch_size = params["batch_size"] - - # Add lang_code to each key_info for ICU format conversion - for key_info in empty_keys: - key_info["lang_code"] = lang_code - max_retries = params["max_retries"] - - # First pass: check glossary matches - logger.info( - "Checking glossary matches for %d empty key(s) (language: %s)", - len(empty_keys), - lang_code, - ) - translations, glossary_matches, keys_needing_llm = ( - self._check_glossary_for_keys(empty_keys, glossary) - ) - - logger.info( - "Glossary matches: %d, Keys needing LLM: %d", - glossary_matches, - len(keys_needing_llm), - ) - - if not keys_needing_llm: - logger.info("All translations found in glossary, skipping LLM translation") - return translations, { - "glossary_matches": glossary_matches, - "llm_translations": 0, - "errors": 0, - "errors_by_app": cast("dict[str, int]", {}), - } - - # Translate remaining keys with LLM - logger.info( - "Starting LLM translation for %d key(s) using %s/%s (batch size: %d)", - len(keys_needing_llm), - provider, - model, - batch_size, - ) - # Prefer nplurals from the first PO file in the translation repo - po_nplurals_override = None - for key_info in empty_keys: - if key_info.get("file_type") == "po" and key_info.get("file_path"): - n = get_nplurals_from_po_file(Path(key_info["file_path"])) - if n is not None: - po_nplurals_override = n - break - - llm_translations, llm_errors, errors_by_app = self._translate_with_llm( - keys_needing_llm, - translations, - lang_code, - provider, - model, - glossary, - batch_size, - max_retries, - po_nplurals_override=po_nplurals_override, - ) - logger.info( - "LLM translation completed: %d translated, %d errors", - llm_translations, - llm_errors, - ) - - summary = ( - f" Summary - LLM translations: {llm_translations}, Errors: {llm_errors}" - ) - if glossary: - summary = ( - f" Summary - Glossary matches: {glossary_matches}, {summary[12:]}" - ) - self.stdout.write(summary) - - return translations, { - "glossary_matches": glossary_matches, - "llm_translations": llm_translations, - "errors": llm_errors, - "errors_by_app": errors_by_app, - } - - def _check_glossary_match( - self, key_info: dict, glossary: dict[str, Any] | None - ) -> Any | None: - """ - Check if key matches glossary. Returns translation or None. - - Args: - key_info: Dictionary containing key information with 'english', - 'is_plural', etc. - glossary: Dictionary mapping English terms to translations, or None. - - Returns: - Translation string/dict if match found, None otherwise. - """ - if not glossary: - return None - - is_plural = key_info.get("is_plural", False) - msgid_plural = key_info.get("msgid_plural") - - if is_plural and msgid_plural: - return self._check_plural_glossary_match(key_info, glossary, msgid_plural) - - match = match_glossary_term(key_info["english"], glossary, exact_match=True) - if not match: - logger.debug( - "No glossary match found for key: %s", key_info.get("key", "unknown") - ) - return None - - translation = ( - match.get("translation", match.get("singular", "")) - if isinstance(match, dict) - else match - ) - logger.debug( - "Found glossary match for key: %s -> %s", - key_info.get("key", "unknown"), - str(translation)[:MAX_LOG_STRING_LENGTH] + "..." - if len(str(translation)) > MAX_LOG_STRING_LENGTH - else str(translation), - ) - return translation - - def _is_icu_format(self, text: str) -> bool: - """Check if text is already in ICU MessageFormat.""" - if not isinstance(text, str): - return False - # Match ICU MessageFormat pattern: {variable, plural, ...} - icu_pattern = r"\{[^,]+,\s*plural\s*," - return bool(re.search(icu_pattern, text)) - - def _convert_to_icu_format( - self, singular: str, plural: str, lang_code: str, count_var: str = "count" - ) -> str: - """Convert singular and plural translations to ICU MessageFormat string.""" - categories = self._get_icu_plural_categories(lang_code) - - parts = [f"{{{count_var}, plural"] - for category in categories: - translation = singular if category == "one" else plural - parts.append(f" {category} {{{translation}}}") - parts.append("}") - - icu_string = "".join(parts) - logger.debug( - "Converted singular/plural to ICU format for %s: %s (categories: %s)", - lang_code, - ( - icu_string[:MAX_LOG_ICU_STRING_LENGTH] + "..." - if len(icu_string) > MAX_LOG_ICU_STRING_LENGTH - else icu_string - ), - categories, - ) - return icu_string - - def _extract_translation_from_match(self, match: Any) -> str: - """Extract translation string from glossary match.""" - if isinstance(match, str): - return match - return match.get( - "singular", match.get("plural", match.get("translation", str(match))) - ) - - def _check_plural_glossary_match( - self, key_info: dict, glossary: dict[str, Any], msgid_plural: str - ) -> Any | None: - """Check glossary match for plural keys. Returns translation or None.""" - file_type = key_info.get("file_type", "po") - singular_match = match_glossary_term( - key_info["english"], glossary, exact_match=True - ) - plural_match = match_glossary_term(msgid_plural, glossary, exact_match=True) - - if singular_match and plural_match: - singular_str = self._extract_translation_from_match(singular_match) - plural_str = self._extract_translation_from_match(plural_match) - - if file_type == "json": - lang_code = key_info.get("lang_code", "en") - return self._convert_to_icu_format(singular_str, plural_str, lang_code) - - return {"singular": singular_str, "plural": plural_str} - - if singular_match: - key_info["_glossary_singular"] = self._extract_translation_from_match( - singular_match - ) - - return None - - def _format_glossary_for_prompt(self, glossary: dict[str, Any] | None) -> str: - """Format glossary as a prompt section for LLM translation requests. - - Args: - glossary: Dictionary mapping English terms to translations, or - None/empty dict. - - Returns: - Empty string if glossary is None or empty, otherwise returns a - formatted string with glossary terms and instructions for consistent - translation. - """ - if not glossary: - return "" - - try: - glossary_json = json.dumps(glossary, indent=2, ensure_ascii=False) - except (TypeError, ValueError) as e: - self.stdout.write( - self.style.WARNING( - f" WARNING: Could not serialize glossary for prompt: {e!s}. " - f"Continuing without glossary in LLM prompt." - ) - ) - return "" - glossary_template = f""" - IMPORTANT - Use these glossary terms when translating. If any English terms - from the glossary appear in the texts to translate, use the corresponding - translation from the glossary: - - {glossary_json} - - When translating sentences, ensure that glossary terms are translated - consistently according to the glossary above, even if they appear - within longer sentences. For example, if the glossary specifies - "certificate" -> "Πιστοποιητικό", then translate "certificate" as - "Πιστοποιητικό" even when it appears in longer sentences like - "The course completion certificate is available". - """ - return textwrap.dedent(glossary_template) - - def _build_plural_instructions(self, params: _PluralInstructionParams) -> str: - """Build plural handling instructions for LLM prompt. - - When po_plural_count_override is set (e.g. from the translation file's - Plural-Forms header), it is used for PO plural form count instead of - the constant-based _get_po_plural_count(lang_code). - """ - instructions = [] - json_plural_info = params["json_plural_info"] - key_batch = params["key_batch"] - icu_categories_str = params["icu_categories_str"] - lang_code = params["lang_code"] - po_plural_count_override = params["po_plural_count_override"] - plural_count = params["plural_count"] - - json_plural_count = json_plural_info.get("count", 0) - json_plural_entries = json_plural_info.get("entries", {}) - - if json_plural_count > 0: - categories_list = icu_categories_str.split(", ") - num_categories = len(categories_list) - has_existing_icu = any( - self._is_icu_format(key_batch[i].get("english", "")) - for i in range(len(key_batch)) - if str(i + 1) in json_plural_entries - ) - - if has_existing_icu: - if num_categories > PLURAL_CATEGORIES_TWO: - instructions.append( - _PROMPT_JSON_PLURAL_EXPAND_ICU.format( - json_plural_count=json_plural_count, - icu_categories_str=icu_categories_str, - num_categories=num_categories, - ) - ) - else: - instructions.append( - _PROMPT_JSON_PLURAL_PRESERVE_ICU.format( - json_plural_count=json_plural_count - ) - ) - else: - example = self._build_icu_example(categories_list) - if num_categories > PLURAL_CATEGORIES_TWO: - instructions.append( - _PROMPT_JSON_PLURAL_MULTI_CATEGORY.format( - json_plural_count=json_plural_count, - icu_categories_str=icu_categories_str, - num_categories=num_categories, - example=example, - ) - ) - else: - instructions.append( - _PROMPT_JSON_PLURAL_TWO_CATEGORY.format( - json_plural_count=json_plural_count, - icu_categories_str=icu_categories_str, - example=example, - ) - ) - - if plural_count > 0: - # Prefer nplurals from file; fall back to constant-based rule - po_plural_count = ( - po_plural_count_override - if po_plural_count_override is not None - else _get_po_plural_count(lang_code) - ) - if po_plural_count > PLURAL_CATEGORIES_TWO: - instructions.append( - _PROMPT_PO_PLURAL_MULTI_FORM.format( - plural_count=plural_count, - po_plural_count=po_plural_count, - po_plural_count_minus_1=po_plural_count - 1, - ) - ) - elif po_plural_count == 1: - instructions.append( - _PROMPT_PO_PLURAL_ONE_FORM.format(plural_count=plural_count) - ) - # If any PO plural entry has a placeholder only in the plural source, - # add a note so the LLM provides a safe singular (no such placeholder). - if any( - key_info.get("is_plural") - and key_info.get("msgid_plural") - and plural_source_has_placeholders_not_in_singular( - key_info.get("english", ""), - key_info.get("msgid_plural", ""), - ) - for key_info in key_batch - ): - instructions.append( - "Some of the above PO plural entries have a variable only in " - "the plural source (e.g. %(num_selected)s). For those, we use " - "your 'singular' for the single form—so provide a natural " - "singular translation WITHOUT that variable." - ) - else: - instructions.append( - _PROMPT_PO_PLURAL_SINGULAR_PLURAL.format(plural_count=plural_count) - ) - - return "\n".join(instructions) - - def _call_llm_batch( # noqa: PLR0913 - self, - key_batch: list[dict], - lang_code: str, - provider: str, - model: str, - glossary: dict[str, Any] | None = None, - timeout: int = 120, - po_nplurals_override: int | None = None, - ) -> list[str | dict[str, str] | None]: - """Call LLM API to translate multiple texts in a single request. - - Args: - key_batch: List of key information dictionaries to translate - lang_code: Target language code - provider: Translation provider name (openai, gemini, mistral) - model: LLM model name - glossary: Optional glossary dictionary - timeout: Request timeout in seconds (default: 120) - """ - api_key = self._get_llm_api_key(provider) - - texts_dict = {} - plural_entries: dict[str, bool] = {} - json_plural_entries: dict[str, bool] = {} - - for i, key_info in enumerate(key_batch, 1): - key_str = str(i) - file_type = key_info.get("file_type", "po") - english_text = key_info["english"] - is_plural = key_info.get("is_plural", False) - msgid_plural = key_info.get("msgid_plural") - - if file_type == "json" and self._is_icu_format(english_text): - texts_dict[key_str] = english_text - json_plural_entries[key_str] = True - elif is_plural and msgid_plural: - texts_dict[key_str] = {"singular": english_text, "plural": msgid_plural} - (json_plural_entries if file_type == "json" else plural_entries)[ - key_str - ] = True - else: - texts_dict[key_str] = english_text - - texts_block = json.dumps(texts_dict, indent=2, ensure_ascii=False) - plural_count = len(plural_entries) - json_plural_count = len(json_plural_entries) - - lang_name = LANGUAGE_MAPPING.get(lang_code, lang_code) - glossary_section = self._format_glossary_for_prompt(glossary) - icu_categories_str = ", ".join(self._get_icu_plural_categories(lang_code)) - plural_instructions = self._build_plural_instructions( - { - "json_plural_info": { - "count": json_plural_count, - "entries": json_plural_entries, - }, - "plural_count": plural_count, - "key_batch": key_batch, - "icu_categories_str": icu_categories_str, - "lang_code": lang_code, - "po_plural_count_override": po_nplurals_override, - } - ) - - prompt_template = ( - f"""Translate the following {len(key_batch)} text(s) to {lang_name} """ - f"""(language code: {lang_code}). - Context: These are from an educational platform. - - CRITICAL - Placeholders and variables (NEVER translate these): - - Copy every placeholder EXACTLY from source to translation: same spelling, - same braces and brackets. Do NOT translate, rename, add, or remove any - placeholder. This includes: {{variable_name}}, %(name)s, %s, {{0}}, and - HTML-like tags such as <{{tag}}> or . - - For strings containing {{variable_name}}: keep every {{variable_name}} - character-for-character in the translation. A single missing }} or wrong - name will break the build. Translate only the surrounding text. - - Preserve HTML tags and formatting. - {glossary_section} - {plural_instructions} - - Return a JSON object where each key is the number (1, 2, 3, etc.). - - FORMAT BY ENTRY TYPE: - - 1. Singular entries (no plural): value is a simple translation string. - - 2. JSON plural entries: value is an ICU MessageFormat string. - Example: "{{count, plural, one {{# item}} other {{# items}}}}" - - 3. PO plural entries: value is an object with PLAIN TRANSLATION STRINGS. - NEVER use ICU MessageFormat for PO entries! - Use simple translated strings for each form. - - For languages with 2 forms: - {{"singular": "translation for one", "plural": "translation for many"}} - - For languages with more forms (e.g., Arabic with 6): - {{"0": "translation for zero", "1": "translation for one", - "2": "translation for two", "3": "translation for few", - "4": "translation for many", "5": "translation for other"}} - - CRITICAL: Each value in PO entries must be a plain string, " - "NOT ICU syntax! Preserve placeholders ({{count}}, " - "%(count)s, etc.) in the plain strings. - - Input texts (numbered): - {texts_block} - - Return ONLY valid JSON in this format: - {{ - "1": "translation of first text", - "2": "{{count, plural, one {{singular}} " - "other {{plural}}}}", - "3": {{"singular": "singular translation", " - ""plural": "plural translation"}}, - "4": {{"0": "form 0", "1": "form 1", "2": "form 2"}} - ... - }}""" - ) - prompt = textwrap.dedent(prompt_template) - - try: - completion_kwargs = configure_litellm_for_provider( - provider=provider, - model=model, - api_key=api_key, - messages=[{"role": "user", "content": prompt}], - temperature=0.3, - timeout=timeout, - ) - - response = completion(**completion_kwargs) - response_text = response.choices[0].message.content.strip() - - logger.debug( - "LLM response received for batch of %d key(s), response length: %d", - len(key_batch), - len(response_text), - ) - - translations = self._parse_json_response(response_text, key_batch) - if translations: - logger.debug( - "Successfully parsed JSON response for batch of %d key(s)", - len(key_batch), - ) - return translations - - logger.warning( - "JSON parsing failed for batch, falling back to order-based parsing" - ) - return self._parse_order_based_response(response_text, key_batch) - - except TimeoutError: - logger.exception( - "LLM batch API call timed out after %d seconds " - "(model: %s, batch size: %d)", - timeout, - model, - len(key_batch), - ) - msg = ( - f"LLM batch API call timed out after {timeout} seconds.\n" - f"Model: {model}\n" - f"Batch size: {len(key_batch)}\n" - f"Try reducing --batch-size or check your network connection." - ) - raise CommandError(msg) from None - except (requests.RequestException, ValueError, KeyError, AttributeError) as e: - logger.exception( - "LLM batch API call failed (model: %s, batch size: %d)", - model, - len(key_batch), - ) - msg = ( - f"LLM batch API call failed: {e!s}\n" - f"Model: {model}\n" - f"Batch size: {len(key_batch)}\n" - f"Make sure TRANSLATIONS_PROVIDERS is configured in settings " - f"with the appropriate api_key, or set the environment variable " - f"(OPENAI_API_KEY, GEMINI_API_KEY, or MISTRAL_API_KEY)" - ) - raise CommandError(msg) from e - - def _extract_json_from_response(self, response_text: str) -> str: - """Extract JSON text from response, handling code blocks.""" - json_text = response_text - if "```json" in response_text: - start = response_text.find("```json") + 7 - end = response_text.find("```", start) - if end > start: - json_text = response_text[start:end].strip() - elif "```" in response_text: - start = response_text.find("```") + 3 - end = response_text.find("```", start) - if end > start: - json_text = response_text[start:end].strip() - return json_text - - def _process_translation_key( - self, key: str, value: Any, key_info: dict - ) -> tuple[str | dict[str, str] | None, bool]: - """Process a single translation key from LLM response.""" - file_type = key_info.get("file_type", "po") - is_plural = key_info.get("is_plural", False) - - translation = self._process_llm_response_value( - value, key_info, file_type, is_plural=is_plural - ) - is_missing = translation is None - - if translation is None: - self._log_rejected_translation(key, key_info) - elif translation is not None: - self._log_parsed_translation(key_info, translation) - - return translation, is_missing - - def _log_rejected_translation(self, key: str, key_info: dict) -> None: - """Log warning for rejected translation.""" - self.stdout.write( - self.style.WARNING( - f" WARNING: Translation rejected for key {key} " - f"(file: {key_info.get('file_path', 'unknown')}, " - f"key: {key_info.get('key', 'unknown')[:50]}). " - f"Likely returned ICU format for PO file." - ) - ) - logger.warning( - "Translation rejected for key %s (file: %s) - " - "likely ICU format for PO file", - key_info.get("key", "unknown"), - key_info.get("file_path", "unknown"), - ) - - def _log_parsed_translation( - self, key_info: dict, translation: str | dict[str, str] - ) -> None: - """Log debug message for parsed translation.""" - logger.debug( - "Parsed translation for key %s: %s", - key_info.get("key", "unknown"), - ( - str(translation)[:MAX_LOG_STRING_LENGTH] + "..." - if len(str(translation)) > MAX_LOG_STRING_LENGTH - else str(translation) - ), - ) - - def _parse_json_response( - self, response_text: str, key_batch: list[dict] - ) -> list[str | dict[str, str] | None] | None: - """Parse JSON response from LLM.""" - json_text = self._extract_json_from_response(response_text) - - try: - data = json.loads(json_text) - translations: list[str | dict[str, str] | None] = [] - missing_keys = [] - for i in range(len(key_batch)): - key = str(i + 1) - key_info = key_batch[i] - - if key in data: - value = data[key] - translation, is_missing = self._process_translation_key( - key, value, key_info - ) - if is_missing: - missing_keys.append(key_info.get("key", "unknown")) - translations.append(translation) - else: - missing_keys.append(key_info.get("key", "unknown")) - self.stdout.write( - self.style.WARNING( - f" WARNING: LLM did not return translation for key {key} " - f"(file: {key_info.get('file_path', 'unknown')}, " - f"key: {key_info.get('key', 'unknown')})" - ) - ) - logger.warning( - "LLM did not return translation for key %s (file: %s)", - key_info.get("key", "unknown"), - key_info.get("file_path", "unknown"), - ) - translations.append(None) - - if missing_keys: - logger.warning( - "LLM response missing %d key(s): %s", - len(missing_keys), - missing_keys, - ) - except (json.JSONDecodeError, KeyError, ValueError): - logger.exception("Failed to parse JSON response") - return None - else: - return translations - - def _parse_order_based_response( - self, response_text: str, key_batch: list[dict] - ) -> list[str | dict[str, str] | None]: - """Fallback: Parse response assuming translations are in order.""" - lines = [line.strip() for line in response_text.split("\n") if line.strip()] - cleaned_lines = [ - line.lstrip("0123456789.-) ").strip() - for line in lines - if line.lstrip("0123456789.-) ").strip() - ] - if len(cleaned_lines) < len(key_batch): - cleaned_lines.extend([""] * (len(key_batch) - len(cleaned_lines))) - # Return as list[str | dict[str, str] | None] - all strings in this fallback - return cast( - "list[str | dict[str, str] | None]", cleaned_lines[: len(key_batch)] - ) - - def _get_llm_api_key(self, provider: str) -> str | None: - """Get API key from TRANSLATIONS_PROVIDERS or environment variables. - - Args: - provider: Translation provider name (openai, gemini, mistral) - """ - try: - if hasattr(settings, "TRANSLATIONS_PROVIDERS"): - providers = getattr(settings, "TRANSLATIONS_PROVIDERS", {}) - if isinstance(providers, dict) and provider in providers: - provider_config = providers[provider] - if isinstance(provider_config, dict): - api_key = provider_config.get("api_key") - if api_key: - return api_key - except (AttributeError, TypeError) as e: - logger.debug("Error accessing TRANSLATIONS_PROVIDERS: %s", e) - - env_key_name = ( - "GEMINI_API_KEY" - if provider == PROVIDER_GEMINI - else "MISTRAL_API_KEY" - if provider == PROVIDER_MISTRAL - else "OPENAI_API_KEY" - ) - return os.environ.get(env_key_name) - - def _process_string_value( - self, value: str, file_type: str, *, is_plural: bool - ) -> tuple[str | dict | None, bool]: - """Process a string value from LLM response. - - Returns: - Tuple of (result, is_dict) where is_dict indicates if result is a dict - that should be processed further. - """ - stripped = value.strip() - if stripped.startswith("{") and stripped.endswith("}"): - return self._parse_string_dict(stripped, file_type, is_plural=is_plural) - if self._is_icu_format(stripped): - if file_type == "po" and is_plural: - return None, False - return stripped, False - return stripped, False - - def _parse_string_dict( - self, value: str, file_type: str, *, is_plural: bool - ) -> tuple[str | dict | None, bool]: - """Parse a string that looks like a dict.""" - try: - parsed = json.loads(value) - if isinstance(parsed, dict): - # Return dict to be processed further - return parsed, True - except (json.JSONDecodeError, ValueError): - # Not valid JSON; fall through to ICU check and plain string handling. - pass - - # Not a dict or parsing failed, check ICU format - if self._is_icu_format(value) and file_type == "po" and is_plural: - return None, False - return value, False - - def _process_dict_numeric_keys( - self, value: dict, file_type: str, *, is_plural: bool - ) -> dict[str, str] | None: - """Process dict with numeric keys (multiple plural forms).""" - numeric_keys = _get_numeric_plural_keys(value) - if not numeric_keys or file_type != "po" or not is_plural: - return None - - result = {} - for key in numeric_keys: - v_str = str(value[key]).strip() - if self._is_icu_format(v_str): - return None - result[str(key)] = v_str - return result if result else None - - def _process_dict_singular_plural( - self, value: dict, key_info: dict, file_type: str, *, is_plural: bool - ) -> str | dict[str, str] | None: - """Process dict with singular/plural keys.""" - if "singular" not in value or "plural" not in value: - return None - - if file_type == "json" and is_plural: - lang_code = key_info.get("lang_code", "en") - return self._convert_to_icu_format( - str(value["singular"]).strip(), - str(value["plural"]).strip(), - lang_code, - ) - return { - "singular": str(value["singular"]).strip(), - "plural": str(value["plural"]).strip(), - } - - def _process_llm_response_value( - self, value: Any, key_info: dict, file_type: str, *, is_plural: bool - ) -> str | dict[str, str] | None: - """Process a single value from LLM response, converting formats.""" - if isinstance(value, str): - result, is_dict = self._process_string_value( - value, file_type, is_plural=is_plural - ) - if result is None: - return None - if is_dict: - # Result is a dict, process it further - value = result - else: - # Result is a string, return it - return result - - if isinstance(value, dict): - # Check for numeric keys (multiple plural forms) - result = self._process_dict_numeric_keys( - value, file_type, is_plural=is_plural - ) - if result is not None: - return result - - # Check for singular/plural format - result = self._process_dict_singular_plural( - value, key_info, file_type, is_plural=is_plural - ) - if result is not None: - return result - - return str(value).strip() - - def _group_translations_by_file( - self, translations: dict[str, Any], empty_keys: list[dict] - ) -> dict[str, dict[str, Any]]: - """Group translations by file path.""" - translations_by_file: dict[str, dict[str, Any]] = {} - - for key_info in empty_keys: - file_path_str = str(Path(key_info["file_path"]).resolve()) - # Include msgctxt in key if it exists to match key structure - msgctxt = key_info.get("msgctxt") - if msgctxt: - translation_key = f"{file_path_str}:{msgctxt}:{key_info['key']}" - else: - translation_key = f"{file_path_str}:{key_info['key']}" - - if translation_key in translations: - trans_value = translations[translation_key] - if trans_value is None: - continue # Skip missing translations - file_type = key_info.get("file_type", "po") - is_plural = key_info.get("is_plural", False) - - if file_type == "json" and isinstance(trans_value, dict): - if "singular" in trans_value and "plural" in trans_value: - trans_value = self._process_llm_response_value( - trans_value, key_info, file_type, is_plural=is_plural - ) - else: - trans_value = trans_value.get("singular", str(trans_value)) - - # For PO files, include msgctxt in key for apply_po_translations - if key_info["file_type"] == "po" and msgctxt: - # Store with msgctxt prefix for proper matching - po_key = f"{msgctxt}:{key_info['key']}" - else: - po_key = key_info["key"] - - translations_by_file.setdefault(file_path_str, {})[po_key] = trans_value - - return translations_by_file - - def _apply_file_translations( - self, - file_path: Path, - file_translations: dict[str, Any], - empty_keys: list[dict], - stdout, - lang_code: str | None = None, - ) -> tuple[int, str, list[dict[str, str]]]: - """Apply translations to a single file. - - Returns (count, app, rejected_brace_entries). - """ - if not file_path.exists(): - stdout.write(self.style.WARNING(f" WARNING: File not found: {file_path}")) - return 0, "unknown", [] - - # Normalize paths for comparison - normalized_file_path = str(file_path.resolve()) - key_info = next( - k - for k in empty_keys - if str(Path(k["file_path"]).resolve()) == normalized_file_path - ) - app = key_info.get("app", "unknown") - - logger.debug( - "Applying %d translation(s) to %s (type: %s, app: %s)", - len(file_translations), - file_path.name, - key_info["file_type"], - app, - ) - rejected_brace_entries: list[dict[str, str]] = [] - if key_info["file_type"] == "json": - count = apply_json_translations(file_path, file_translations) - elif key_info["file_type"] == "po": - count = apply_po_translations( - file_path, - file_translations, - lang_code, - rejected_brace_entries=rejected_brace_entries, - ) - else: - logger.warning( - "Unknown file type '%s' for file: %s", key_info["file_type"], file_path - ) - return 0, app, [] - - if count > 0: - logger.info( - "Applied %d translation(s) to %s (app: %s)", count, file_path.name, app - ) - - return count, app, rejected_brace_entries - - def _apply_translations( - self, - translations: dict[str, Any], - empty_keys: list[dict], - stdout, - lang_code: str | None = None, - ) -> tuple[int, dict[str, Any]]: - """Apply translations to files. - - Returns (applied_count, applied_by_app dict with details and - rejected_brace_format_entries). - """ - translations_by_file = self._group_translations_by_file( - translations, empty_keys - ) - - if not translations_by_file: - stdout.write(self.style.WARNING(" WARNING: No translations to apply")) - return 0, {"by_app": {}, "details": [], "rejected_brace_format_entries": []} - - applied = 0 - applied_by_app: dict[str, int] = {} - applied_details: list[dict[str, Any]] = [] - all_rejected_brace: list[dict[str, str]] = [] - - for file_path_str, file_translations in translations_by_file.items(): - full_path = Path(file_path_str) - count, app, rejected_brace_entries = self._apply_file_translations( - full_path, file_translations, empty_keys, stdout, lang_code - ) - - applied += count - all_rejected_brace.extend(rejected_brace_entries) - if count > 0: - applied_by_app[app] = applied_by_app.get(app, 0) + count - applied_details.append( - {"app": app, "file": full_path.name, "count": count} - ) - stdout.write( - f" Applied {count} translations to {app} ({full_path.name})" - ) - - if applied_by_app: - app_summary = ", ".join( - f"{app}: {count}" for app, count in applied_by_app.items() - ) - stdout.write(f" Summary by app: {app_summary}") - - return applied, { - "by_app": applied_by_app, - "details": applied_details, - "rejected_brace_format_entries": all_rejected_brace, - } - - def _cleanup_failed_branch(self, repo: GitRepository, branch_name: str) -> None: - """Clean up branch if PR creation fails.""" - try: - repo.switch_to_main() - # Only try to delete if branch exists locally - if branch_name in [ref.name for ref in repo.repo.heads]: - with suppress(git.exc.GitCommandError): - repo.repo.git.branch("-D", branch_name) - self.stdout.write( - self.style.WARNING( - f" Cleaned up failed branch: {branch_name}" - ) - ) - except (git.exc.GitCommandError, AttributeError) as e: - self.stdout.write( - self.style.WARNING(f" Could not clean up branch {branch_name}: {e!s}") - ) - - def _commit_changes( - self, repo: GitRepository, branch_name: str, lang_code: str - ) -> bool: - """Commit changes to git repository. Returns True if committed.""" - # Check if branch already exists - if repo.branch_exists(branch_name): - self.stdout.write( - self.style.WARNING( - f" Branch '{branch_name}' already exists. " - f"Switching to it and continuing..." - ) - ) - try: - repo.repo.git.checkout(branch_name) - except git.exc.GitCommandError: - # If local branch doesn't exist but remote does, create tracking branch - repo.repo.git.checkout("-b", branch_name, f"origin/{branch_name}") - else: - repo.configure_user() - repo.create_branch(branch_name) - repo.stage_all() - - if not repo.has_changes(): - self.stdout.write( - self.style.WARNING( - " No changes to commit. Skipping commit and PR creation." - ) - ) - repo.switch_to_main() - with suppress(git.exc.GitCommandError): - repo.repo.git.branch("-D", branch_name) - return False - - safe_lang_code = sanitize_for_git(lang_code) - commit_message = ( - f"feat: Add {safe_lang_code} translations via LLM\n\n" - f"Automated translation of empty keys for {safe_lang_code} language." - ) - - repo.commit(commit_message) - - github_token = getattr( - settings, "TRANSLATIONS_GITHUB_TOKEN", None - ) or os.environ.get("TRANSLATIONS_GITHUB_TOKEN") - repo.push_branch(branch_name, github_token) - self.stdout.write(" Pushed branch to remote") - - return True - - def _create_pull_request( - self, - repo_path: str, - branch_name: str, - pr_data: PullRequestData, - repo_url: str, - ) -> str: - """Create pull request using GitHub CLI or API.""" - iso_code = pr_data["iso_code"] - provider = pr_data["provider"] - model = pr_data["model"] - provider_display = provider.replace("_", " ").title() - pr_title = ( - f"feat: Add {iso_code} translations via LLM using " - f"{provider_display} provider and model {model}" - ) - try: - # Using GitHub CLI (gh) - trusted system command - gh_path = shutil.which("gh") - if gh_path: - result = subprocess.run( # noqa: S603 - [ - gh_path, - "pr", - "create", - "--title", - pr_title, - "--body", - self._generate_pr_body(pr_data), - ], - cwd=repo_path, - capture_output=True, - text=True, - check=True, - ) - return result.stdout.strip() - except (subprocess.CalledProcessError, FileNotFoundError): - pass - # Fall back to API if gh CLI is not available or fails - return self._create_pr_via_api( - repo_path, - branch_name, - pr_data, - repo_url, - pr_title=pr_title, - ) - - def _generate_error_section( - self, errors: int, errors_by_app: dict[str, int] | None = None - ) -> str: - """Generate error warning section for PR body if there are errors. - - Args: - errors: Number of translation errors. - errors_by_app: Dictionary mapping app/MFE names to error counts. - - Returns: - Error section markdown string, or empty string if no errors. - """ - if errors == 0: - return "" - - error_details = "" - if errors_by_app: - error_lines = [ - f"- **{app}**: {count} key(s) failed" - for app, count in sorted( - errors_by_app.items(), key=lambda x: x[1], reverse=True - ) - ] - error_details = ( - "\n**Errors by app/MFE:**\n\n" + "\n".join(error_lines) + "\n" - ) - - error_template = f""" - ### Translation Errors - - **{errors} translation key(s) failed to translate** due to API errors, rate - limits, or parsing issues. - {error_details} - **Impact:** - - These keys remain untranslated in the target language files - - They will need to be translated manually or re-run the command - - The translation process continued and completed successfully - for the remaining keys - - **Recommendation:** - - Review the command output logs for specific error details - - Consider re-running the command to retry failed batches - - Check API key permissions and rate limits if errors persist - - """ - return textwrap.dedent(error_template) - - def _generate_translation_summary( - self, glossary_matches: int, llm_translations: int, errors: int - ) -> str: - """Generate translation statistics summary line. - - Args: - glossary_matches: Number of glossary matches. - llm_translations: Number of LLM translations. - errors: Number of translation errors. - - Returns: - Summary string. - """ - if glossary_matches > 0: - return ( - f"Summary - Glossary matches: {glossary_matches}, " - f"LLM translations: {llm_translations}, Errors: {errors}" - ) - return f"Summary - LLM translations: {llm_translations}, Errors: {errors}" - - def _generate_rejected_brace_section( - self, rejected_brace_format_entries: list[dict[str, str]] - ) -> str: - """Generate PR section for rejected brace-format translations.""" - if not rejected_brace_format_entries: - return "" - lines = [ - "### Rejected brace-format translations", - "", - "The following entries had invalid python-brace-format translations " - "(e.g. missing or mismatched `{placeholders}`) and were not applied. " - "They remain untranslated for manual review:", - "", - ] - for item in rejected_brace_format_entries[:MAX_REJECTED_BRACE_DISPLAY]: - msgid = (item.get("msgid") or "").replace("|", "\\|")[:100] - file_name = item.get("file", "") - lines.append(f"- `{file_name}`: {msgid!r}") - if len(rejected_brace_format_entries) > MAX_REJECTED_BRACE_DISPLAY: - extra = len(rejected_brace_format_entries) - MAX_REJECTED_BRACE_DISPLAY - lines.append(f"- ... and {extra} more.") - return "\n".join(lines) + "\n\n" - - def _generate_pr_body(self, pr_data: PullRequestData) -> str: - """Generate PR description.""" - lang_code = pr_data["lang_code"] - iso_code = pr_data["iso_code"] - sync_stats = pr_data["sync_stats"] - applied_count = pr_data["applied_count"] - translation_stats = pr_data["translation_stats"] - applied_by_app = pr_data["applied_by_app"] - provider = pr_data["provider"] - model = pr_data["model"] - rejected_brace_format_entries = pr_data.get("rejected_brace_format_entries", []) - - glossary_matches = translation_stats.get("glossary_matches", 0) - llm_translations = translation_stats.get("llm_translations", 0) - errors = translation_stats.get("errors", 0) - errors_by_app: dict[str, int] = cast( - "dict[str, int]", translation_stats.get("errors_by_app", {}) - ) - - translation_summary = self._generate_translation_summary( - glossary_matches, llm_translations, errors - ) - error_section = self._generate_error_section(errors, errors_by_app) - rejected_brace_section = self._generate_rejected_brace_section( - rejected_brace_format_entries - ) - - applied_details = applied_by_app.get("details", []) - breakdown_lines = [ - f" Applied {detail['count']} translations to " - f"{detail['app']} ({detail['file']})" - for detail in applied_details - ] - - # Build changes section with conditional error line - changes_lines = [ - f"- **Language**: {lang_code} ({iso_code})", - f"- **Keys synced**: {sync_stats['frontend']['added']} frontend keys, " - f"{sync_stats['backend']['added']} backend entries", - f"- **Translations applied**: {applied_count} keys translated", - f"- **Typos fixed**: {sync_stats['frontend']['fixed']}", - ] - if errors > 0: - changes_lines.append( - f"- **Translation errors**: {errors} keys failed to translate" - ) - if rejected_brace_format_entries: - changes_lines.append( - f"- **Rejected brace-format**: {len(rejected_brace_format_entries)} " - "entries not applied (invalid placeholders; see section below)" - ) - - # Build statistics section with conditional error line - statistics_lines = [ - translation_summary, - f" Translated {applied_count} keys", - ] - if errors > 0: - statistics_lines.append(f" Failed: {errors} keys") - - # Build next steps section with conditional error line - next_steps_lines = [ - "- Review translations for accuracy", - ] - if errors > 0: - next_steps_lines.append( - "- Address failed translations (see error section above)" - ) - if rejected_brace_format_entries: - next_steps_lines.append( - "- Manually fix or translate entries with rejected brace-format " - "(see rejected brace-format section above)" - ) - next_steps_lines.extend( - [ - "- Test in staging environment", - "- Merge when ready", - ] - ) - - provider_display = provider.replace("_", " ").title() - pr_template = ( - f"""## Summary - - This PR adds {iso_code} translations via LLM automation using { - provider_display - } provider and model {model}. - {error_section} - {rejected_brace_section} - ### Changes - - {chr(10).join(changes_lines)} - - ### Translation Statistics - - {chr(10).join(statistics_lines)} - - ### Applied Translations - - { - chr(10).join(breakdown_lines) - if breakdown_lines - else " No translations applied" - } - - ### Files Modified - - - Frontend apps: {sync_stats["frontend"]["created"]} created, """ - f"""{sync_stats["frontend"]["synced"]} synced - - Backend: PO files updated - - ### Next Steps - - {chr(10).join(next_steps_lines)} - - --- - *This PR was automatically generated by the sync_and_translate_language """ - f"""management command.* - """ - ) - return textwrap.dedent(pr_template) - - def _create_pr_via_api( - self, - repo_path: str, - branch_name: str, - pr_data: PullRequestData, - repo_url: str, - pr_title: str, - ) -> str: - """Create PR using GitHub API.""" - client = GitHubAPIClient() - owner, repo = GitHubAPIClient.parse_repo_url(repo_url) - - git_repo = GitRepository(repo_path) - main_branch = git_repo._get_main_branch_name() # noqa: SLF001 - - return client.create_pull_request( - owner=owner, - repo=repo, - branch_name=branch_name, - title=pr_title, - body=self._generate_pr_body(pr_data), - base=main_branch, - stdout=self.stdout, - ) diff --git a/src/ol_openedx_course_translations/ol_openedx_course_translations/settings/common.py b/src/ol_openedx_course_translations/ol_openedx_course_translations/settings/common.py index e6e1b0174..fb9eb3acd 100644 --- a/src/ol_openedx_course_translations/ol_openedx_course_translations/settings/common.py +++ b/src/ol_openedx_course_translations/ol_openedx_course_translations/settings/common.py @@ -55,15 +55,6 @@ def apply_common_settings(settings): "default_model": "mistral-large-latest", }, } - settings.TRANSLATIONS_GITHUB_TOKEN = "" - # Translation repository settings (used by sync_and_translate_language command) - # Git URL of the translations repository (e.g. mitxonline-translations). - settings.TRANSLATIONS_REPO_URL = ( - "https://github.com/mitodl/mitxonline-translations.git" - ) - # Local path to a clone of the translations repo; leave empty to clone - # at the default path from TRANSLATIONS_REPO_URL. - settings.TRANSLATIONS_REPO_PATH = "" settings.LITE_LLM_REQUEST_TIMEOUT = 300 # seconds # HTML/XML translation safety/perf knobs (LLM providers only) diff --git a/src/ol_openedx_course_translations/ol_openedx_course_translations/utils/command_utils.py b/src/ol_openedx_course_translations/ol_openedx_course_translations/utils/command_utils.py deleted file mode 100644 index 50a92f431..000000000 --- a/src/ol_openedx_course_translations/ol_openedx_course_translations/utils/command_utils.py +++ /dev/null @@ -1,244 +0,0 @@ -""" -Utility functions for management commands. - -This module provides reusable utilities for Django management commands, -including validation, error handling, git operations, and configuration helpers. -""" - -import os -import re -from datetime import UTC, datetime -from typing import Any - -from django.conf import settings -from django.core.management.base import CommandError - -from ol_openedx_course_translations.utils.constants import ( - PROVIDER_GEMINI, - PROVIDER_MISTRAL, -) - -# ============================================================================ -# Validation Utilities -# ============================================================================ - -# Language code suffix length constants -REGION_CODE_LENGTH = 2 # 2-letter region codes (e.g., ES, BR) -SCRIPT_TAG_LENGTH = 4 # 4-letter script tags (e.g., Hans, Hant) - - -def normalize_language_code(code: str) -> str: - """Normalize language code to use underscores (Django/gettext format). - - Converts BCP 47 format (hyphens) to gettext format (underscores) and - normalizes case: language part lowercase, suffix properly cased. - Examples: - - 'es-419' -> 'es_419' - - 'ES-419' -> 'es_419' - - 'es-ES' -> 'es_ES' - - 'ES_ES' -> 'es_ES' - - 'zh-Hans' -> 'zh_Hans' - - 'ZH-HANS' -> 'zh_Hans' - - 'es_419' -> 'es_419' (unchanged) - - 'es' -> 'es' (unchanged) - """ - # Replace hyphens with underscores and split - parts = code.replace("-", "_").split("_", 1) - lang_part = parts[0].lower() # Language: always lowercase - - if len(parts) == 1: - return lang_part - - # Normalize suffix: uppercase 2-char regions, title case 4-char scripts - suffix = parts[1] - if len(suffix) == REGION_CODE_LENGTH: - suffix = suffix.upper() # Region codes: ES, BR, etc. - elif len(suffix) == SCRIPT_TAG_LENGTH and suffix[0].isalpha(): - suffix = suffix.title() # Script tags: Hans, Hant, etc. - # Numeric regions (419) and others stay as-is - - return f"{lang_part}_{suffix}" - - -def validate_language_code(code: str, field_name: str = "language code") -> None: - """Validate language code format. - - Accepts normalized codes (already normalized by normalize_language_code): - - xx (2 lowercase letters): e.g., 'el', 'es', 'ar' - - xx_XX (with 2-letter region): e.g., 'es_ES' - - xx_NNN (with UN M.49 numeric region): e.g., 'es_419' - - xx_Xxxx (with script subtag): e.g., 'zh_Hans' - """ - # Pattern: xx, xx_XX, xx_419, xx_Hans - pattern = r"^[a-z]{2}(_([A-Z]{2}|[0-9]{3}|[A-Z][a-z]{3}))?$" - if not re.match(pattern, code): - msg = ( - f"Invalid {field_name} format: {code}. " - f"Expected format: 'xx', 'xx_XX', 'xx_419', 'xx_Hans' " - f"(e.g., 'el', 'es_ES', 'es_419', 'zh_Hans')" - ) - raise CommandError(msg) - - -def validate_branch_name(branch_name: str) -> None: - """Validate branch name format to prevent injection.""" - if not re.match(r"^[a-z0-9/_-]+$", branch_name): - msg = f"Invalid branch name format: {branch_name}" - raise CommandError(msg) - - -# ============================================================================ -# Git Utilities -# ============================================================================ - - -def sanitize_for_git(text: str) -> str: - """Sanitize text for use in git operations.""" - return re.sub(r"[^\w\s-]", "", text) - - -def create_branch_name(lang_code: str) -> str: - """Create a safe branch name from language code.""" - safe_lang = re.sub(r"[^a-z0-9_-]", "", lang_code.lower()) - timestamp = datetime.now(tz=UTC).strftime("%Y%m%d-%H%M%S") - return f"feature/add-{safe_lang}-translations-{timestamp}" - - -# ============================================================================ -# Configuration Helpers -# ============================================================================ - - -def get_config_value(key: str, options: dict, default: Any = None) -> Any: - """Get configuration value from options, settings, or environment.""" - # Check command-line options first (Django converts --repo-path to repo_path) - option_value = options.get(key) or options.get(key.replace("_", "-")) - if option_value: - return option_value - - # Check settings with TRANSLATIONS_ prefix - setting_key = f"TRANSLATIONS_{key.upper().replace('-', '_')}" - if hasattr(settings, setting_key): - setting_value = getattr(settings, setting_key) - # Only use setting if it's not empty - if setting_value: - return setting_value - - # Check environment variable with TRANSLATIONS_ prefix - env_key = setting_key - env_value = os.environ.get(env_key) - if env_value: - return env_value - - # Return default if nothing found - return default - - -def get_default_provider() -> str | None: - """Get default provider from TRANSLATIONS_PROVIDERS.""" - providers = getattr(settings, "TRANSLATIONS_PROVIDERS", {}) - if not isinstance(providers, dict): - return None - return providers.get("default_provider") - - -def get_default_model_for_provider(provider: str) -> str | None: - """Get default model for a provider from TRANSLATIONS_PROVIDERS.""" - providers = getattr(settings, "TRANSLATIONS_PROVIDERS", {}) - if not isinstance(providers, dict): - return None - provider_config = providers.get(provider, {}) - if not isinstance(provider_config, dict): - return None - return provider_config.get("default_model") - - -def configure_litellm_for_provider( - provider: str, model: str, api_key: str | None, **base_kwargs -) -> dict[str, Any]: - """Configure LiteLLM completion kwargs for a specific provider.""" - completion_kwargs = dict(base_kwargs) - completion_kwargs["model"] = model - - if api_key: - completion_kwargs["api_key"] = api_key - if provider == PROVIDER_GEMINI: - # If no prefix, add gemini/ to force Gemini API usage (not Vertex AI) - # If vertex_ai/ or gemini/ prefix already exists, respect it - if not model.startswith(("gemini/", "vertex_ai/")): - completion_kwargs["model"] = f"gemini/{model}" - # Gemini 3 models require temperature = 1.0 to avoid issues: - # - Infinite loops in response generation - # - Degraded reasoning performance - # - Failure on complex tasks - # See: https://docs.litellm.ai/docs/providers/gemini - if "gemini-3" in model.lower(): - completion_kwargs["temperature"] = 1.0 - elif provider == PROVIDER_MISTRAL and not model.startswith("mistral/"): - completion_kwargs["model"] = f"mistral/{model}" - - return completion_kwargs - - -# ============================================================================ -# Error Handling Utilities -# ============================================================================ - - -def is_retryable_error(error: Exception) -> bool: - """ - Check if an error is retryable (network issues, rate limits, timeouts). - - Args: - error: The exception to check - - Returns: - True if the error is retryable, False otherwise - - Examples: - >>> is_retryable_error(ConnectionError("Connection timeout")) - True - >>> is_retryable_error(ValueError("Invalid API key")) - False - """ - error_str = str(error).lower() - - # Retryable errors - retryable_patterns = [ - "timeout", - "connection", - "rate limit", - "429", - "503", - "502", - "500", - "temporarily unavailable", - "service unavailable", - "too many requests", - ] - - # Non-retryable errors (don't retry these) - non_retryable_patterns = [ - "invalid api key", - "authentication", - "401", - "403", - "not found", - "404", - "bad request", - "400", - "commanderror", # Our custom errors that are usually non-retryable - ] - - # Check for non-retryable first - for pattern in non_retryable_patterns: - if pattern in error_str: - return False - - # Check for retryable patterns - for pattern in retryable_patterns: - if pattern in error_str: - return True - - # Default: retry unknown errors (could be transient) - return True diff --git a/src/ol_openedx_course_translations/ol_openedx_course_translations/utils/constants.py b/src/ol_openedx_course_translations/ol_openedx_course_translations/utils/constants.py index cd1118c49..c1dd803b6 100644 --- a/src/ol_openedx_course_translations/ol_openedx_course_translations/utils/constants.py +++ b/src/ol_openedx_course_translations/ol_openedx_course_translations/utils/constants.py @@ -1,4 +1,4 @@ -"""Constants for translation synchronization.""" +"""Constants for course translation utilities.""" # LLM Provider names PROVIDER_DEEPL = "deepl" @@ -6,234 +6,6 @@ PROVIDER_MISTRAL = "mistral" PROVIDER_OPENAI = "openai" -# Learner-facing frontend applications that require translation -LEARNER_FACING_APPS = [ - "frontend-app-learning", - "frontend-app-learner-dashboard", - "frontend-app-learner-record", - "frontend-app-account", - "frontend-app-profile", - "frontend-app-authn", - "frontend-app-catalog", - "frontend-app-discussions", - "frontend-component-header", - "frontend-component-footer", - "frontend-app-ora", - "frontend-platform", -] - -# Plural forms configuration for different languages -# Based on GNU gettext plural forms specification -# See: https://www.gnu.org/software/gettext/manual/html_node/Plural-forms.html -PLURAL_FORMS = { - # Languages with no plural forms (nplurals=1) - "ja": "nplurals=1; plural=0;", # Japanese - "ko": "nplurals=1; plural=0;", # Korean - "zh": "nplurals=1; plural=0;", # Chinese (all variants) - "th": "nplurals=1; plural=0;", # Thai - "vi": "nplurals=1; plural=0;", # Vietnamese - "id": "nplurals=1; plural=0;", # Indonesian - "ms": "nplurals=1; plural=0;", # Malay - "km": "nplurals=1; plural=0;", # Khmer - "bo": "nplurals=1; plural=0;", # Tibetan - # Languages with 2 plural forms: plural=(n != 1) - "en": "nplurals=2; plural=(n != 1);", # English - "es": "nplurals=2; plural=(n != 1);", # Spanish (all variants) - "de": "nplurals=2; plural=(n != 1);", # German - "el": "nplurals=2; plural=(n != 1);", # Greek - "it": "nplurals=2; plural=(n != 1);", # Italian - "pt": "nplurals=2; plural=(n != 1);", # Portuguese (all variants) - "nl": "nplurals=2; plural=(n != 1);", # Dutch - "sv": "nplurals=2; plural=(n != 1);", # Swedish - "da": "nplurals=2; plural=(n != 1);", # Danish - "no": "nplurals=2; plural=(n != 1);", # Norwegian - "nb": "nplurals=2; plural=(n != 1);", # Norwegian Bokmål - "nn": "nplurals=2; plural=(n != 1);", # Norwegian Nynorsk - "fi": "nplurals=2; plural=(n != 1);", # Finnish - "is": "nplurals=2; plural=(n != 1);", # Icelandic - "et": "nplurals=2; plural=(n != 1);", # Estonian - "lv": "nplurals=2; plural=(n != 1);", # Latvian - "he": "nplurals=2; plural=(n != 1);", # Hebrew - "hi": "nplurals=2; plural=(n != 1);", # Hindi - "bn": "nplurals=2; plural=(n != 1);", # Bengali - "gu": "nplurals=2; plural=(n != 1);", # Gujarati - "kn": "nplurals=2; plural=(n != 1);", # Kannada - "ml": "nplurals=2; plural=(n != 1);", # Malayalam - "ta": "nplurals=2; plural=(n != 1);", # Tamil - "te": "nplurals=2; plural=(n != 1);", # Telugu - "or": "nplurals=2; plural=(n != 1);", # Oriya - "si": "nplurals=2; plural=(n != 1);", # Sinhala - "ne": "nplurals=2; plural=(n != 1);", # Nepali - "mr": "nplurals=2; plural=(n != 1);", # Marathi - "ur": "nplurals=2; plural=(n != 1);", # Urdu - "az": "nplurals=2; plural=(n != 1);", # Azerbaijani - "uz": "nplurals=2; plural=(n != 1);", # Uzbek - "kk": "nplurals=2; plural=(n != 1);", # Kazakh - "mn": "nplurals=2; plural=(n != 1);", # Mongolian - "sq": "nplurals=2; plural=(n != 1);", # Albanian - "eu": "nplurals=2; plural=(n != 1);", # Basque - "ca": "nplurals=2; plural=(n != 1);", # Catalan - "gl": "nplurals=2; plural=(n != 1);", # Galician - "tr": "nplurals=2; plural=(n != 1);", # Turkish - "af": "nplurals=2; plural=(n != 1);", # Afrikaans - "fil": "nplurals=2; plural=(n != 1);", # Filipino - # Languages with 2 plural forms: plural=(n > 1) - "fr": "nplurals=2; plural=(n > 1);", # French - "br": "nplurals=2; plural=(n > 1);", # Breton - # Languages with 3 plural forms - "pl": ( - "nplurals=3; plural=(n==1 ? 0 : n%10>=2 && n%10<=4 && " - "(n%100<10 || n%100>=20) ? 1 : 2);" - ), # Polish - "ru": ( - "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && " - "(n%100<10 || n%100>=20) ? 1 : 2);" - ), # Russian - "uk": ( - "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && " - "(n%100<10 || n%100>=20) ? 1 : 2);" - ), # Ukrainian - "be": ( - "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && " - "(n%100<10 || n%100>=20) ? 1 : 2);" - ), # Belarusian - "sr": ( - "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && " - "(n%100<10 || n%100>=20) ? 1 : 2);" - ), # Serbian - "hr": ( - "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && " - "(n%100<10 || n%100>=20) ? 1 : 2);" - ), # Croatian - "bs": ( - "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && " - "(n%100<10 || n%100>=20) ? 1 : 2);" - ), # Bosnian - "cs": "nplurals=3; plural=(n==1 ? 0 : (n>=2 && n<=4) ? 1 : 2);", # Czech - "sk": "nplurals=3; plural=(n==1 ? 0 : (n>=2 && n<=4) ? 1 : 2);", # Slovak - "lt": ( - "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && " - "(n%100<10 || n%100>=20) ? 1 : 2);" - ), # Lithuanian - "hy": "nplurals=3; plural=(n==1 ? 0 : n>=2 && n<=4 ? 1 : 2);", # Armenian - "ro": ( - "nplurals=3; plural=(n==1 ? 0 : (n==0 || (n%100 > 0 && n%100 < 20)) ? 1 : 2);" - ), # Romanian - # Languages with 4 plural forms - "cy": ( - "nplurals=4; plural=(n==1 ? 0 : n==2 ? 1 : (n==8 || n==11) ? 2 : 3);" - ), # Welsh - "ga": "nplurals=4; plural=(n==1 ? 0 : n==2 ? 1 : (n>2 && n<7) ? 2 : 3);", # Irish - "gd": ( - "nplurals=4; plural=(n==1 || n==11) ? 0 : (n==2 || n==12) ? 1 : " - "(n>2 && n<20) ? 2 : 3);" - ), # Scottish Gaelic - "mt": ( - "nplurals=4; plural=(n==1 ? 0 : n==0 || (n%100>=2 && n%100<=10) ? 1 : " - "(n%100>=11 && n%100<=19) ? 2 : 3);" - ), # Maltese - # Languages with 6 plural forms - "ar": ( - "nplurals=6; plural=(n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && " - "n%100<=10 ? 3 : n%100>=11 && n%100<=99 ? 4 : 5);" - ), # Arabic - # Other languages - "fa": "nplurals=2; plural=(n==0 || n==1 ? 0 : 1);", # Persian/Farsi - "hu": "nplurals=2; plural=(n != 1);", # Hungarian - "bg": "nplurals=2; plural=(n != 1);", # Bulgarian - "am": "nplurals=2; plural=(n > 1);", # Amharic -} - -# Default plural form fallback (English-style) -# Used when a language code is not found in PLURAL_FORMS -DEFAULT_PLURAL_FORM = "nplurals=2; plural=(n != 1);" - -# Typo patterns to fix in translation files -TYPO_PATTERNS = [ - ("Serch", "Search"), -] - -# Backend PO file names -BACKEND_PO_FILES = ["django.po", "djangojs.po"] - -# Backend plugin apps: (repo_dir, module_name) under translations/. -# Used by sync_and_translate_language to sync/translate at -# translations///conf/locale//LC_MESSAGES/django.po. -# When pulled in edx-platform (make pull_translations), these go to -# conf/plugins-locale/plugins//. -TRANSLATABLE_PLUGINS = [ - ("open-edx-plugins", "ol_openedx_chat"), -] - -# PO file header metadata -PO_HEADER_PROJECT_VERSION = "0.1a" -PO_HEADER_BUGS_EMAIL = "openedx-translation@googlegroups.com" -PO_HEADER_POT_CREATION_DATE = "2023-06-13 08:00+0000" -PO_HEADER_MIME_VERSION = "1.0" -PO_HEADER_CONTENT_TYPE = "text/plain; charset=UTF-8" -PO_HEADER_CONTENT_TRANSFER_ENCODING = "8bit" -PO_HEADER_TRANSIFEX_TEAM_BASE_URL = "https://app.transifex.com/open-edx/teams/6205" - -# File and directory names -TRANSLATION_FILE_NAMES = { - "transifex_input": "transifex_input.json", - "english": "en.json", - "messages_dir": "messages", - "i18n_dir": "i18n", - "locale_dir": "locale", - "lc_messages": "LC_MESSAGES", - "conf_dir": "conf", - "edx_platform": "edx-platform", -} - -# JSON file formatting -DEFAULT_JSON_INDENT = 2 - -# Language code to human-readable name mapping -# Used in PO file headers for Language-Team field -LANGUAGE_MAPPING = { - "ar": "Arabic", - "de": "German", - "el": "Greek", - "es": "Spanish", - "fr": "French", - "hi": "Hindi", - "id": "Indonesian", - "ja": "Japanese", - "kr": "Korean", - "pt": "Portuguese", - "ru": "Russian", - "sq": "Albanian", - "tr": "Turkish", - "zh": "Chinese", -} - -# Maximum number of retries for failed translation batches -MAX_RETRIES = 3 - -# Glossary parsing constants -EXPECTED_GLOSSARY_PARTS = 2 # English term and translation separated by "->" - -# HTTP Status Codes -HTTP_OK = 200 -HTTP_CREATED = 201 -HTTP_NOT_FOUND = 404 -HTTP_TOO_MANY_REQUESTS = 429 -HTTP_UNPROCESSABLE_ENTITY = 422 - -# Error message length limit -MAX_ERROR_MESSAGE_LENGTH = 200 - -# Maximum length for strings in log messages (truncate with "...") -MAX_LOG_STRING_LENGTH = 50 -MAX_LOG_ICU_STRING_LENGTH = 100 - -# Plural category counts (GNU gettext nplurals) -PLURAL_CATEGORIES_ARABIC = 6 # zero, one, two, few, many, other -PLURAL_CATEGORIES_FOUR = 4 # one, two, few, other -PLURAL_CATEGORIES_THREE = 3 # one, few, other -PLURAL_CATEGORIES_TWO = 2 # one, other (most languages) - ENGLISH_LANGUAGE_CODE = "en" # HTML/XML attribute translation policy diff --git a/src/ol_openedx_course_translations/ol_openedx_course_translations/utils/translation_sync.py b/src/ol_openedx_course_translations/ol_openedx_course_translations/utils/translation_sync.py deleted file mode 100644 index 2b6933013..000000000 --- a/src/ol_openedx_course_translations/ol_openedx_course_translations/utils/translation_sync.py +++ /dev/null @@ -1,1603 +0,0 @@ -"""Translation synchronization module for syncing and managing translation files.""" - -import json -import logging -import re -from collections import OrderedDict -from collections.abc import Iterator -from pathlib import Path -from typing import Any - -import polib # type: ignore[import-untyped] - -from ol_openedx_course_translations.utils.constants import ( - BACKEND_PO_FILES, - DEFAULT_JSON_INDENT, - DEFAULT_PLURAL_FORM, - EXPECTED_GLOSSARY_PARTS, - LANGUAGE_MAPPING, - LEARNER_FACING_APPS, - MAX_LOG_STRING_LENGTH, - PLURAL_FORMS, - PO_HEADER_BUGS_EMAIL, - PO_HEADER_CONTENT_TRANSFER_ENCODING, - PO_HEADER_CONTENT_TYPE, - PO_HEADER_MIME_VERSION, - PO_HEADER_POT_CREATION_DATE, - PO_HEADER_PROJECT_VERSION, - PO_HEADER_TRANSIFEX_TEAM_BASE_URL, - TRANSLATABLE_PLUGINS, - TRANSLATION_FILE_NAMES, - TYPO_PATTERNS, -) - -logger = logging.getLogger(__name__) - - -def load_json_file(file_path: Path) -> dict: - """Load a JSON translation file.""" - if not file_path.exists(): - return {} - try: - with file_path.open(encoding="utf-8") as f: - return json.load(f) - except json.JSONDecodeError as e: - msg = f"Error parsing JSON file {file_path}: {e}" - raise ValueError(msg) from e - - -def save_json_file(file_path: Path, data: dict, indent: int = DEFAULT_JSON_INDENT): - """Save a JSON translation file with proper formatting.""" - file_path.parent.mkdir(parents=True, exist_ok=True) - with file_path.open("w", encoding="utf-8") as f: - json.dump(data, f, ensure_ascii=False, indent=indent) - f.write("\n") - - -def find_typo_mappings(data: dict) -> list[tuple[str, str]]: - """Find typo keys and their correct counterparts.""" - mappings = [] - - for typo, correct in TYPO_PATTERNS: - typo_keys = [k for k in data if typo in k] - for typo_key in typo_keys: - correct_key = typo_key.replace(typo, correct) - if correct_key in data: - mappings.append((typo_key, correct_key)) - - return mappings - - -def sync_or_create_json_file(en_file: Path, target_file: Path) -> dict: - """ - Sync or create a JSON translation file. - Returns dict with stats: - {'action': 'created'|'synced'|'skipped', 'added': int, - 'fixed': int, 'removed': int} - """ - try: - en_data = load_json_file(en_file) - except ValueError: - return { - "action": "skipped", - "added": 0, - "fixed": 0, - "removed": 0, - "error": "English file not readable", - } - - if not en_data: - return { - "action": "skipped", - "added": 0, - "fixed": 0, - "removed": 0, - "error": "English file is empty", - } - - target_data = load_json_file(target_file) if target_file.exists() else {} - file_exists = target_file.exists() - - stats = { - "action": "created" if not file_exists else "synced", - "added": 0, - "fixed": 0, - "removed": 0, - } - - if file_exists: - ordered_data = OrderedDict(target_data) - - typo_mappings = find_typo_mappings(ordered_data) - for typo_key, correct_key in typo_mappings: - typo_value = ordered_data.get(typo_key, "") - correct_value = ordered_data.get(correct_key, "") - - if not correct_value and typo_value: - ordered_data[correct_key] = typo_value - # Type assertion: stats["fixed"] is always int - stats["fixed"] = int(stats["fixed"]) + 1 - - if typo_key in ordered_data: - del ordered_data[typo_key] - # Type assertion: stats["removed"] is always int - stats["removed"] = int(stats["removed"]) + 1 - - for key in en_data: - if key not in ordered_data: - ordered_data[key] = "" - # Type assertion: stats["added"] is always int - stats["added"] = int(stats["added"]) + 1 - - target_data = dict(ordered_data) - else: - target_data = dict.fromkeys(en_data, "") - stats["added"] = len(en_data) - - save_json_file(target_file, target_data) - - return stats - - -def _get_base_lang(lang_code: str) -> str: - """Extract base language code from locale code (e.g., 'es_ES' -> 'es').""" - return lang_code.split("_", maxsplit=1)[0] if "_" in lang_code else lang_code - - -def _get_plural_form(lang_code: str) -> str: - """Get plural form string for a language code.""" - base_lang = _get_base_lang(lang_code) - return PLURAL_FORMS.get(base_lang, DEFAULT_PLURAL_FORM) - - -def _get_po_plural_count(lang_code: str) -> int: - """Get number of plural forms for a language (for PO files).""" - plural_form = _get_plural_form(lang_code) - nplurals_match = re.search(r"nplurals=(\d+)", plural_form) - if not nplurals_match: - return 2 - return int(nplurals_match.group(1)) - - -def get_nplurals_from_po_file(file_path: Path) -> int | None: - """Read nplurals from a PO file's Plural-Forms header. - - Returns None if missing or unreadable. - """ - if not file_path.exists(): - return None - try: - po = polib.pofile(str(file_path)) - plural_forms_str = po.metadata.get("Plural-Forms", "") - if not plural_forms_str: - return None - nplurals_match = re.search(r"nplurals=(\d+)", plural_forms_str) - if not nplurals_match: - return None - return int(nplurals_match.group(1)) - except (OSError, polib.POFileError, ValueError): - return None - - -def _get_numeric_plural_keys(translation: dict) -> list: - """Return keys that are int or digit-string (plural form indices).""" - return [ - key for key in translation if isinstance(key, (int, str)) and str(key).isdigit() - ] - - -# Python-format placeholders as in Django: %(name)s, %(count)d, etc. -_PYTHON_FORMAT_PLACEHOLDER_RE = re.compile(r"%\(\s*(\w+)\s*\)[sdcouxXeEfFgGin%]") - -# Python brace-format placeholders: {variable_name} (Django/python-brace-format) -_PYTHON_BRACE_FORMAT_PLACEHOLDER_RE = re.compile(r"\{(\w+)\}") - - -def _get_brace_format_placeholders(text: str) -> set[str]: - """Return set of placeholder names in a Python brace-format string.""" - if not text: - return set() - return set(_PYTHON_BRACE_FORMAT_PLACEHOLDER_RE.findall(text)) - - -def _is_valid_brace_format_translation(source: str, translation: str) -> bool: - """ - Return True if translation is a valid Python brace-format string matching source. - - Ensures: same set of {name} placeholders as source, and no unterminated - format directives (so msgfmt won't fail). - """ - if not translation: - return True - source_placeholders = _get_brace_format_placeholders(source) - trans_placeholders = _get_brace_format_placeholders(translation) - if source_placeholders != trans_placeholders: - return False - try: - # Placeholders like {0}, {1} are positional; {name} is keyword. - # If all placeholders are digit strings, use positional args. - if source_placeholders and all(p.isdigit() for p in source_placeholders): - max_index = max(int(p) for p in source_placeholders) - positional = [""] * (max_index + 1) - translation.format(*positional) - else: - dummy = dict.fromkeys(source_placeholders, "") - translation.format(**dummy) - except (ValueError, KeyError, IndexError): - return False - else: - return True - - -def _get_python_format_placeholders(text: str) -> set[str]: - """Return set of placeholder names in a python-format string.""" - if not text: - return set() - return set(_PYTHON_FORMAT_PLACEHOLDER_RE.findall(text)) - - -def plural_source_has_placeholders_not_in_singular( - msgid: str, msgid_plural: str -) -> bool: - """ - Return True if msgid_plural has python-format placeholders that msgid does not. - - For nplurals=1 we must then use the singular translation for the single form - to avoid KeyError at runtime when n=1. - """ - if not msgid_plural: - return False - singular_placeholders = _get_python_format_placeholders(msgid) - plural_placeholders = _get_python_format_placeholders(msgid_plural) - return bool(plural_placeholders - singular_placeholders) - - -def _plural_has_placeholders_not_in_singular(entry: polib.POEntry) -> bool: - """Return True if plural has python-format placeholders that singular lacks. - - Used to choose singular vs plural source when nplurals=1. - """ - if not entry.msgid_plural or "python-format" not in (entry.flags or []): - return False - return plural_source_has_placeholders_not_in_singular( - entry.msgid or "", entry.msgid_plural or "" - ) - - -def _entry_has_asymmetric_placeholders(entry: polib.POEntry) -> bool: - """ - Return True if this is a python-format plural with asymmetric placeholders. - - Singular/plural have different placeholders (one has a variable the other - doesn't). For such entries we keep both msgstr[0] and msgstr[1] even when - the locale has nplurals=1, so msgfmt and runtime work correctly. - """ - if not entry.msgid_plural or "python-format" not in (entry.flags or []): - return False - singular_placeholders = _get_python_format_placeholders(entry.msgid or "") - plural_placeholders = _get_python_format_placeholders(entry.msgid_plural or "") - return singular_placeholders != plural_placeholders - - -def create_po_file_header(lang_code: str, iso_code: str | None = None) -> str: - """Create PO file header for a language.""" - if iso_code is None: - iso_code = lang_code - - base_lang = _get_base_lang(lang_code) - plural = _get_plural_form(lang_code) - lang_name = LANGUAGE_MAPPING.get(lang_code, lang_code) - - return f"""msgid "" -msgstr "" -"Project-Id-Version: {PO_HEADER_PROJECT_VERSION}\\n" -"Report-Msgid-Bugs-To: {PO_HEADER_BUGS_EMAIL}\\n" -"POT-Creation-Date: {PO_HEADER_POT_CREATION_DATE}\\n" -"PO-Revision-Date: 2025-01-01 00:00+0000\\n" -"Last-Translator: \\n" -"Language-Team: {lang_name} ({PO_HEADER_TRANSIFEX_TEAM_BASE_URL}/{base_lang}/)\\n" -"MIME-Version: {PO_HEADER_MIME_VERSION}\\n" -"Content-Type: {PO_HEADER_CONTENT_TYPE}\\n" -"Content-Transfer-Encoding: {PO_HEADER_CONTENT_TRANSFER_ENCODING}\\n" -"Language: {iso_code}\\n" -"Plural-Forms: {plural}\\n" - -""" - - -def parse_po_file(po_file: Path) -> dict[str, str]: - """ - Parse a PO file and extract msgid -> msgstr mappings. - For plural forms, uses msgid as the key - (msgid_plural entries are handled separately). - Uses polib if available, falls back to manual parsing. - """ - if not po_file.exists(): - return {} - - po = polib.pofile(str(po_file)) - entries = {} - for entry in po: - if entry.msgid: # Skip empty header msgid - # For plural entries, use msgid as key - entries[entry.msgid] = entry.msgstr or "" - return entries - - -def parse_po_file_with_metadata(po_file: Path) -> dict[str, dict]: - """ - Parse a PO file and extract msgid -> metadata mappings. - Returns dict with structure: - {msgid: {'msgstr': str, 'msgid_plural': str, 'msgstr_plural': dict, - 'locations': List[str], 'flags': List[str], 'is_plural': bool}} - Uses polib if available, falls back to manual parsing. - """ - if not po_file.exists(): - return {} - - po = polib.pofile(str(po_file)) - entries = {} - for entry in po: - if entry.msgid: # Skip empty header msgid - locations = [ - f"{occ[0]}:{occ[1]}" if len(occ) > 1 else occ[0] - for occ in entry.occurrences - ] - - entry_data = { - "msgstr": entry.msgstr or "", - "locations": locations, - "flags": entry.flags, # List of flags like ['python-format'] - "is_plural": entry.msgid_plural is not None, - } - if entry.msgid_plural: - entry_data["msgid_plural"] = entry.msgid_plural - # Convert msgstr_plural dict to simple dict - entry_data["msgstr_plural"] = { - form_index: entry.msgstr_plural.get(form_index, "") - for form_index in range(len(entry.msgstr_plural)) - } - entries[entry.msgid] = entry_data - return entries - - -def _create_po_entry_from_en( - entry: polib.POEntry, - lang_code: str | None = None, - nplurals_from_file: int | None = None, -) -> polib.POEntry: - """Create a new PO entry from an English entry with empty translation. - - Preserves all metadata from the English entry including: - - msgid, msgid_plural, msgctxt - - occurrences (location comments) - - flags (format flags like python-format) - - Args: - entry: English PO entry to copy from - lang_code: Target language code to determine number of plural forms. - If None, uses the number of forms from the English entry. - nplurals_from_file: When set, use this as the number of plural forms - (takes priority over lang_code). Use the translation file's - Plural-Forms header when syncing into an existing PO file. - """ - new_entry = polib.POEntry( - msgid=entry.msgid, - msgid_plural=entry.msgid_plural, - occurrences=entry.occurrences, - flags=entry.flags, - ) - # Preserve msgctxt (message context) if it exists - if hasattr(entry, "msgctxt") and entry.msgctxt: - new_entry.msgctxt = entry.msgctxt - - if entry.msgid_plural: - # Prefer nplurals from file, then lang rule, then English entry - if nplurals_from_file is not None: - num_forms = nplurals_from_file - elif lang_code: - num_forms = _get_po_plural_count(lang_code) - else: - num_forms = max(2, len(entry.msgstr_plural) if entry.msgstr_plural else 2) - # Special case: when singular/plural have different placeholders (e.g. variable - # only in plural), keep both [0] and [1] even if locale has nplurals=1 - if _entry_has_asymmetric_placeholders(entry): - num_forms = max(num_forms, 2) - new_entry.msgstr_plural = dict.fromkeys(range(num_forms), "") - else: - new_entry.msgstr = "" - return new_entry - - -def _sync_existing_po_file( - en_po: polib.POFile, - target_po: polib.POFile, - target_file: Path, - lang_code: str | None = None, -) -> int: - """Sync existing PO file by adding missing entries. Returns count added. - - Uses the target PO file's Plural-Forms header for the number of plural - forms when adding new entries; falls back to lang_code-based rule if - the header is missing. - """ - # Prefer nplurals from the existing translation file - nplurals_from_file: int | None = None - plural_forms_str = target_po.metadata.get("Plural-Forms", "") - if plural_forms_str: - nplurals_match = re.search(r"nplurals=(\d+)", plural_forms_str) - if nplurals_match: - nplurals_from_file = int(nplurals_match.group(1)) - - # Create a set of existing entries using (msgctxt, msgid, msgid_plural) tuple - # msgctxt is included because same msgid can have different contexts - existing_entries = set() - for entry in target_po: - if entry.msgid: - msgctxt = getattr(entry, "msgctxt", None) or None - key = ( - msgctxt, - entry.msgid, - entry.msgid_plural if entry.msgid_plural else None, - ) - existing_entries.add(key) - - # Add missing entries from English file - added_count = 0 - for entry in en_po: - if not entry.msgid: # Skip header - continue - - msgctxt = getattr(entry, "msgctxt", None) or None - entry_key = ( - msgctxt, - entry.msgid, - entry.msgid_plural if entry.msgid_plural else None, - ) - if entry_key not in existing_entries: - new_entry = _create_po_entry_from_en( - entry, lang_code, nplurals_from_file=nplurals_from_file - ) - target_po.append(new_entry) - added_count += 1 - - # CRITICAL: Normalize ALL entries to fix newline mismatches - normalized_count = _normalize_all_entries_in_po_file(target_po) - - if added_count > 0 or normalized_count > 0: - target_file.parent.mkdir(parents=True, exist_ok=True) - target_po.save(str(target_file)) - - return added_count - - -def _create_new_po_file( - en_po: polib.POFile, target_file: Path, lang_code: str, iso_code: str | None -) -> int: - """Create a new PO file with all entries from English. Returns count added.""" - target_po = polib.POFile() - - # Set metadata - preserve important fields from English file - target_po.metadata = en_po.metadata.copy() - target_po.metadata["Language"] = iso_code or lang_code - - # Set Plural-Forms for the target language (e.g. 2 forms for French) - target_po.metadata["Plural-Forms"] = _get_plural_form(lang_code) - - # Copy all entries with empty translations - added_count = 0 - for entry in en_po: - if not entry.msgid: # Skip header - continue - - new_entry = _create_po_entry_from_en(entry, lang_code) - target_po.append(new_entry) - added_count += 1 - - target_file.parent.mkdir(parents=True, exist_ok=True) - target_po.save(str(target_file)) - return added_count - - -def sync_or_create_po_file( - en_file: Path, target_file: Path, lang_code: str, iso_code: str | None = None -) -> dict: - """ - Sync or create a PO file, preserving location comments and format flags. - Returns dict with stats: {'action': 'created'|'synced'|'skipped', 'added': int} - Uses polib if available for robust PO file handling. - """ - if not en_file.exists(): - return {"action": "skipped", "added": 0, "error": "English file does not exist"} - - file_exists = target_file.exists() - stats = {"action": "created" if not file_exists else "synced", "added": 0} - - # Use polib for robust PO file handling - en_po = polib.pofile(str(en_file)) - - if not en_po: - return {"action": "skipped", "added": 0, "error": "English file has no entries"} - - if file_exists: - # File exists: sync entries - target_po = polib.pofile(str(target_file)) - stats["added"] = _sync_existing_po_file( - en_po, target_po, target_file, lang_code - ) - else: - # File doesn't exist: create new with all entries from English - stats["added"] = _create_new_po_file(en_po, target_file, lang_code, iso_code) - - return stats - - -def _extract_empty_keys_from_frontend(base_dir: Path, iso_code: str) -> list[dict]: - """Extract empty translation keys from frontend JSON files.""" - logger.debug("Extracting empty keys from frontend apps for language: %s", iso_code) - empty_keys = [] - - for app in LEARNER_FACING_APPS: - target_file = ( - base_dir - / app - / "src" - / TRANSLATION_FILE_NAMES["i18n_dir"] - / TRANSLATION_FILE_NAMES["messages_dir"] - / f"{iso_code}.json" - ) - en_file = ( - base_dir - / app - / "src" - / TRANSLATION_FILE_NAMES["i18n_dir"] - / TRANSLATION_FILE_NAMES["transifex_input"] - ) - if not en_file.exists(): - en_file = ( - base_dir - / app - / "src" - / TRANSLATION_FILE_NAMES["i18n_dir"] - / TRANSLATION_FILE_NAMES["messages_dir"] - / TRANSLATION_FILE_NAMES["english"] - ) - - if not target_file.exists() or not en_file.exists(): - logger.debug( - "Skipping %s: target file or English file missing (target: %s, en: %s)", - app, - target_file.exists(), - en_file.exists(), - ) - continue - - try: - target_data = load_json_file(target_file) - en_data = load_json_file(en_file) - logger.debug( - "Processing %s: found %d keys in English file", app, len(en_data) - ) - - for key in en_data: - target_value = target_data.get(key, "") - if not target_value or ( - isinstance(target_value, str) and not target_value.strip() - ): - english_value = en_data[key] - # Skip non-string values (numbers, booleans, objects, arrays) - # These shouldn't be translated as they would break JSON structure - if not isinstance(english_value, str): - logger.debug( - "Skipping non-string value for key '%s' in %s: %s " - "(type: %s). Only string values are translatable.", - key, - app, - english_value, - type(english_value).__name__, - ) - continue - # Check if English value is already in ICU MessageFormat - is_icu_plural = ( - isinstance(english_value, str) and ", plural," in english_value - ) - - empty_keys.append( - { - "app": app, - "key": key, - "english": english_value, - "translation": "", - "file_type": "json", - "file_path": str(target_file.resolve()), - "is_plural": is_icu_plural, - } - ) - logger.debug("Extracted %d empty key(s) from %s", len(empty_keys), app) - except (OSError, ValueError, json.JSONDecodeError) as e: - logger.warning( - "Skipping %s due to error loading translation files: %s", app, e - ) - continue - - logger.info( - "Extracted %d total empty key(s) from frontend apps for language: %s", - len(empty_keys), - iso_code, - ) - return empty_keys - - -def _is_po_entry_empty( - entry: polib.POEntry, target_entry: polib.POEntry | None -) -> bool: - """Check if a PO entry is empty or missing.""" - if target_entry is None: - return True - - if entry.msgid_plural: - # Plural entry - check if plural forms are empty - return any( - not target_entry.msgstr_plural.get(form_index, "").strip() - for form_index in range(len(target_entry.msgstr_plural)) - ) - - # Singular entry - check if empty - return not target_entry.msgstr or not target_entry.msgstr.strip() - - -def _extract_empty_keys_from_po_file( - target_file: Path, en_file: Path, po_file_name: str, app_name: str -) -> list[dict]: - """Extract empty keys from one PO file. Returns list of key dicts.""" - empty_keys = [] - try: - target_po = polib.pofile(str(target_file)) - en_po = polib.pofile(str(en_file)) - target_entries_dict = {} - for entry in target_po: - if entry.msgid: - msgctxt = getattr(entry, "msgctxt", None) or None - key = (msgctxt, entry.msgid) - target_entries_dict[key] = entry - for entry in en_po: - if not entry.msgid: - continue - msgctxt = getattr(entry, "msgctxt", None) or None - entry_key = (msgctxt, entry.msgid) - target_entry = target_entries_dict.get(entry_key) - if _is_po_entry_empty(entry, target_entry): - empty_keys.append( - { - "app": app_name, - "key": entry.msgid, - "english": entry.msgid, - "translation": "", - "file_type": "po", - "file_path": str(target_file.resolve()), - "po_file": po_file_name, - "is_plural": entry.msgid_plural is not None, - "msgid_plural": ( - entry.msgid_plural if entry.msgid_plural else None - ), - "msgctxt": msgctxt, - "flags": list(entry.flags) if entry.flags else [], - } - ) - except (OSError, polib.POFileError, ValueError) as e: - logger.warning("Skipping %s due to error loading PO file: %s", target_file, e) - return empty_keys - - -def _plugin_locale_base(base_dir: Path, repo_dir: str, module_name: str) -> Path: - """Return conf/locale path for a backend plugin under translations/.""" - return ( - base_dir - / repo_dir - / module_name - / TRANSLATION_FILE_NAMES["conf_dir"] - / TRANSLATION_FILE_NAMES["locale_dir"] - ) - - -def _iter_backend_plugin_po_files( - base_dir: Path, backend_locale: str -) -> Iterator[tuple[str, Path, Path, str]]: - """ - Yield (module_name, en_file, target_file, po_file_name) for each backend - plugin PO file where the English source exists. - """ - lc_messages = TRANSLATION_FILE_NAMES["lc_messages"] - for repo_dir, module_name in TRANSLATABLE_PLUGINS: - plugin_base = _plugin_locale_base(base_dir, repo_dir, module_name) - en_locale_dir = plugin_base / "en" / lc_messages - target_locale_dir = plugin_base / backend_locale / lc_messages - for po_file_name in BACKEND_PO_FILES: - en_file = en_locale_dir / po_file_name - if not en_file.exists(): - continue - target_file = target_locale_dir / po_file_name - yield (module_name, en_file, target_file, po_file_name) - - -def _extract_empty_keys_from_backend(base_dir: Path, backend_locale: str) -> list[dict]: - """Extract empty keys from backend PO files (edx-platform + backend plugins).""" - empty_keys = [] - lc_messages = TRANSLATION_FILE_NAMES["lc_messages"] - locale_dir = ( - base_dir - / TRANSLATION_FILE_NAMES["edx_platform"] - / TRANSLATION_FILE_NAMES["conf_dir"] - / TRANSLATION_FILE_NAMES["locale_dir"] - ) - for po_file_name in BACKEND_PO_FILES: - target_file = locale_dir / backend_locale / lc_messages / po_file_name - en_file = locale_dir / "en" / lc_messages / po_file_name - if not target_file.exists() or not en_file.exists(): - continue - empty_keys.extend( - _extract_empty_keys_from_po_file( - target_file, en_file, po_file_name, "edx-platform" - ) - ) - for ( - module_name, - en_file, - target_file, - po_file_name, - ) in _iter_backend_plugin_po_files(base_dir, backend_locale): - if not target_file.exists(): - continue - empty_keys.extend( - _extract_empty_keys_from_po_file( - target_file, en_file, po_file_name, module_name - ) - ) - return empty_keys - - -def extract_empty_keys( - base_dir: Path, - lang_code: str, - iso_code: str | None = None, - *, - skip_backend: bool = False, -) -> list[dict]: - """ - Extract all empty translation keys for a language. - Returns list of dicts with: - {'app': str, 'key': str, 'english': str, 'file_type': 'json'|'po'} - """ - if iso_code is None: - iso_code = lang_code - - empty_keys = _extract_empty_keys_from_frontend(base_dir, iso_code) - - if not skip_backend: - backend_locale = iso_code if iso_code and iso_code != lang_code else lang_code - empty_keys.extend(_extract_empty_keys_from_backend(base_dir, backend_locale)) - - return empty_keys - - -def apply_json_translations(file_path: Path, translations: dict[str, str]) -> int: - """ - Apply translations to a JSON file. - Returns number of translations applied. - """ - data = load_json_file(file_path) - applied = 0 - skipped = 0 - - for key, translation in translations.items(): - if key in data: - # Check if the value is empty (empty string, whitespace only, or None) - current_value = data[key] - if not current_value or ( - isinstance(current_value, str) and not current_value.strip() - ): - data[key] = translation - applied += 1 - logger.debug( - "Applied translation for key '%s' in %s", key, file_path.name - ) - else: - skipped += 1 - logger.debug( - "Skipped key '%s' in %s (already has value: %s)", - key, - file_path.name, - current_value[:50] - if isinstance(current_value, str) - else current_value, - ) - else: - skipped += 1 - logger.debug( - "Skipped key '%s' in %s (key not found in target file)", - key, - file_path.name, - ) - - if applied > 0: - save_json_file(file_path, data) - logger.info( - "Applied %d translation(s) to %s (%d skipped)", - applied, - file_path.name, - skipped, - ) - elif skipped > 0: - logger.debug( - "No translations applied to %s (%d keys skipped - already have values)", - file_path.name, - skipped, - ) - - return applied - - -def load_glossary(glossary_path: Path, _lang_code: str = "") -> dict[str, Any]: - """ - Load glossary for a language from a text file. - Parses text format with term mappings like: - 'english term' -> 'translation' - Returns dict mapping English -> Translation (string or dict for plural forms). - - Args: - glossary_path: Path to the glossary text file. - _lang_code: Language code (currently unused, kept for API compatibility). - - Returns: - Dictionary mapping English terms to translations. Translations can be: - - Strings for singular terms - - Dicts with 'singular' and 'plural' keys for plural forms - - Text file format: - # Comments and headers - ## TERM MAPPINGS - - 'english term' -> 'translation' - - 'another term' -> 'another translation' - - Example: - - 'accuracy' -> 'الدقة' - - 'activation function' -> 'دالّة التفعيل' - """ - if not glossary_path.exists(): - return {} - - glossary = {} - - try: - with glossary_path.open(encoding="utf-8") as f: - for raw_line in f: - line = raw_line.strip() - - # Skip empty lines, comments, and headers - if not line or line.startswith("#"): - continue - - # Parse lines like: - 'english term' -> 'translation' - if line.startswith("- ") and "->" in line: - # Extract the mapping - # Format: - 'english term' -> 'translation' - mapping_line = line[2:].strip() # Remove leading '- ' - parts = mapping_line.split("->", 1) - - if len(parts) == EXPECTED_GLOSSARY_PARTS: - english_term = parts[0].strip().strip("'\"") - translation = parts[1].strip().strip("'\"") - - if english_term and translation: - glossary[english_term] = translation - except (OSError, UnicodeDecodeError): - # Log specific file-related errors but return empty dict to allow continuation - # In a library function, we can't use stdout, so we just return empty dict - # The caller can handle logging if needed - return {} - except (ValueError, AttributeError, IndexError): - # Catch parsing errors and other unexpected errors - return {} - else: - return glossary - - -def match_glossary_term( - text: str, glossary: dict[str, Any] | None, *, exact_match: bool = True -) -> Any | None: - """ - Match text against glossary terms. - Returns translation (string or dict with 'singular'/'plural') if match found, - None otherwise. - Supports both simple format ("term": "translation") and plural format - ("term": {"singular": "...", "plural": "..."}). - - Args: - text: The text to match against glossary terms. - glossary: Dictionary mapping English terms to translations, or None. - exact_match: If True, only exact matches are returned. - If False, case-insensitive and partial matches are allowed. - - Returns: - Translation string/dict if match found, None otherwise. - """ - if not glossary: - return None - - if text in glossary: - # Return as-is: string for singular, dict for plural - return glossary[text] - - if not exact_match: - text_lower = text.lower().strip() - for term, translation in glossary.items(): - if term.lower().strip() == text_lower: - return translation - - for term, translation in glossary.items(): - if term.lower() in text_lower or text_lower in term.lower(): - return translation - - return None - - -def _normalize_translation_newlines(msgid: str, translation: str) -> str: - """ - Normalize translation to match msgid's newline structure EXACTLY. - - CRITICAL: msgfmt checks the ACTUAL last character of the string, NOT after - stripping whitespace. So we must match whether the string ends with '\n' or not. - - Args: - msgid: The original msgid string - translation: The translation string to normalize - - Returns: - Normalized translation string with matching newline structure - """ - if not translation: - return translation - - # Handle leading newlines - msgid_no_leading_spaces = msgid.lstrip(" \t") - msgid_starts_newline = ( - msgid_no_leading_spaces.startswith("\n") if msgid_no_leading_spaces else False - ) - - normalized = translation - - if msgid_starts_newline: - if not normalized.startswith("\n"): - normalized = "\n" + normalized - else: - normalized = normalized.lstrip("\n") - - # CRITICAL: Check the ACTUAL last character - msgid_ends_with_newline = msgid.endswith("\n") if msgid else False - normalized_ends_with_newline = normalized.endswith("\n") if normalized else False - - if msgid_ends_with_newline and not normalized_ends_with_newline: - # msgid ends with \n, but translation doesn't - add trailing newline - normalized = normalized.rstrip(" \t") + "\n" - elif not msgid_ends_with_newline and normalized_ends_with_newline: - # msgid doesn't end with \n, but translation does - remove trailing newline - normalized = normalized.rstrip(" \t\n") - - return normalized - - -def _normalize_all_entries_in_po_file(po: polib.POFile) -> int: - """ - Normalize newlines for ALL entries in a PO file. - Ensures entries with existing translations have correct newline structure. - - Returns: - Number of entries that were normalized (changed) - """ - normalized_count = 0 - - for entry in po: - if not entry.msgid: # Skip header - continue - - if entry.msgid_plural: - if _normalize_plural_entry(entry): - normalized_count += 1 - elif entry.msgstr and _normalize_singular_entry(entry): - normalized_count += 1 - - return normalized_count - - -def _normalize_plural_entry(entry: polib.POEntry) -> bool: - """Normalize plural entry newlines. Returns True if changed.""" - if not entry.msgstr_plural: - return False - - changed = False - for form_index, msgstr_plural_val in entry.msgstr_plural.items(): - if msgstr_plural_val: - # msgstr[0] matches msgid, msgstr[1+] matches msgid_plural - reference = entry.msgid if form_index == 0 else entry.msgid_plural - normalized = _normalize_translation_newlines(reference, msgstr_plural_val) - if normalized != msgstr_plural_val: - entry.msgstr_plural[form_index] = normalized - changed = True - return changed - - -def _normalize_singular_entry(entry: polib.POEntry) -> bool: - """Normalize singular entry newlines. Returns True if changed.""" - normalized = _normalize_translation_newlines(entry.msgid, entry.msgstr) - if normalized != entry.msgstr: - entry.msgstr = normalized - return True - return False - - -def _entry_has_brace_format(entry: polib.POEntry) -> bool: - """Return True if entry is python-brace-format (uses {variable} placeholders).""" - return bool(entry.flags and "python-brace-format" in entry.flags) - - -def _apply_numeric_plural_forms( - entry: polib.POEntry, - translation: dict[str, str], - numeric_keys: list, -) -> tuple[bool, bool]: - """Apply plural forms when translation has numeric keys (e.g. 0, 1, 2). - - Returns (applied, rejected_brace_format). - """ - plural_applied = False - rejected_brace = False - for key in numeric_keys: - form_index = int(key) if isinstance(key, str) else key - if ( - form_index >= len(entry.msgstr_plural) - or entry.msgstr_plural.get(form_index, "").strip() - ): - continue - reference = entry.msgid if form_index == 0 else entry.msgid_plural - normalized = _normalize_translation_newlines( - reference or entry.msgid, str(translation[key]).strip() - ) - if _entry_has_brace_format(entry) and not _is_valid_brace_format_translation( - reference or entry.msgid or "", normalized - ): - logger.warning( - "Rejected plural form %s for brace-format entry " - "(invalid placeholders): msgid=%r", - form_index, - (entry.msgid or "")[:60], - ) - rejected_brace = True - else: - entry.msgstr_plural[form_index] = normalized - plural_applied = True - return plural_applied, rejected_brace - - -def _apply_singular_plural_one_form( - entry: polib.POEntry, translation: dict[str, str] -) -> tuple[bool, bool]: - """Apply singular/plural dict when locale has nplurals=1.""" - if entry.msgstr_plural.get(0, "").strip(): - return False, False - if _plural_has_placeholders_not_in_singular(entry): - normalized = _normalize_translation_newlines( - entry.msgid, translation["singular"] - ) - ref = entry.msgid - else: - normalized = _normalize_translation_newlines( - entry.msgid_plural or entry.msgid, translation["plural"] - ) - ref = entry.msgid_plural or entry.msgid - if _entry_has_brace_format(entry) and not _is_valid_brace_format_translation( - ref or "", normalized - ): - logger.warning( - "Rejected plural form 0 for brace-format entry: msgid=%r", - (entry.msgid or "")[:60], - ) - return False, True - entry.msgstr_plural[0] = normalized - return True, False - - -def _apply_singular_plural_multi_form( - entry: polib.POEntry, translation: dict[str, str], num_forms: int -) -> tuple[bool, bool]: - """Apply singular/plural dict when locale has two or more plural forms.""" - plural_applied = False - rejected_brace = False - if not entry.msgstr_plural.get(0, "").strip(): - normalized_singular = _normalize_translation_newlines( - entry.msgid, translation["singular"] - ) - if _entry_has_brace_format(entry) and not _is_valid_brace_format_translation( - entry.msgid or "", normalized_singular - ): - logger.warning( - "Rejected singular form for brace-format entry: msgid=%r", - (entry.msgid or "")[:60], - ) - rejected_brace = True - else: - entry.msgstr_plural[0] = normalized_singular - plural_applied = True - - plural_val = (translation.get("plural") or "").strip() - if _plural_has_placeholders_not_in_singular(entry): - required = _get_python_format_placeholders(entry.msgid_plural or "") - existing = _get_python_format_placeholders(plural_val) - missing = required - existing - if missing: - suffix = " ".join(f"%({k})s" for k in sorted(missing)) - plural_val = f"{plural_val} {suffix}" if plural_val else suffix - if ( - not plural_val - and not _plural_has_placeholders_not_in_singular(entry) - and (translation.get("singular") or "").strip() - ): - plural_val = (translation.get("singular") or "").strip() - - for form_index in range(1, num_forms): - if not entry.msgstr_plural.get(form_index, "").strip() and plural_val: - normalized_plural = _normalize_translation_newlines( - entry.msgid_plural or entry.msgid, plural_val - ) - ref_plural = entry.msgid_plural or entry.msgid or "" - if _entry_has_brace_format( - entry - ) and not _is_valid_brace_format_translation(ref_plural, normalized_plural): - logger.warning( - "Rejected plural form %s for brace-format entry: msgid_plural=%r", - form_index, - (ref_plural or "")[:60], - ) - rejected_brace = True - else: - entry.msgstr_plural[form_index] = normalized_plural - plural_applied = True - return plural_applied, rejected_brace - - -def _apply_plural_dict_translation( - entry: polib.POEntry, translation: dict[str, str] -) -> tuple[bool, bool]: - """Apply plural translation from dict. Returns (applied, rejected_brace_format).""" - numeric_keys = _get_numeric_plural_keys(translation) - if numeric_keys: - return _apply_numeric_plural_forms(entry, translation, numeric_keys) - if "singular" in translation and "plural" in translation: - num_forms = len(entry.msgstr_plural) - if num_forms == 1: - return _apply_singular_plural_one_form(entry, translation) - return _apply_singular_plural_multi_form(entry, translation, num_forms) - return False, False - - -def _apply_plural_string_translation( - entry: polib.POEntry, translation: str -) -> tuple[bool, bool]: - """Apply plural translation from string. - - Returns (applied, rejected_brace_format). - """ - plural_applied = False - rejected_brace = False - # Normalize translation to match msgid structure - normalized_translation = _normalize_translation_newlines(entry.msgid, translation) - for form_index in range(len(entry.msgstr_plural)): - if not entry.msgstr_plural.get(form_index, "").strip(): - reference = ( - entry.msgid if form_index == 0 else (entry.msgid_plural or entry.msgid) - ) - if _entry_has_brace_format( - entry - ) and not _is_valid_brace_format_translation( - reference or "", normalized_translation - ): - logger.warning( - "Rejected plural string for brace-format entry (form %s): msgid=%r", - form_index, - (entry.msgid or "")[:60], - ) - rejected_brace = True - else: - entry.msgstr_plural[form_index] = normalized_translation - plural_applied = True - return plural_applied, rejected_brace - - -def _apply_translation_to_plural_entry( - entry: polib.POEntry, translation: Any -) -> tuple[bool, bool]: - """Apply translation to a plural PO entry. - - Returns (applied, rejected_brace_format). - """ - # Check if translation is a string representation of a dict - if ( - isinstance(translation, str) - and translation.strip().startswith("{") - and translation.strip().endswith("}") - ): - try: - translation = json.loads(translation.strip()) - except (json.JSONDecodeError, ValueError): - if translation: - applied, rejected = _apply_plural_string_translation(entry, translation) - return applied, rejected - return False, False - - if isinstance(translation, dict): - numeric_keys = _get_numeric_plural_keys(translation) - if numeric_keys or "singular" in translation: - return _apply_plural_dict_translation(entry, translation) - if isinstance(translation, str) and translation: - return _apply_plural_string_translation(entry, translation) - return False, False - - -def _apply_translation_to_singular_entry( - entry: polib.POEntry, translation: Any -) -> tuple[bool, bool]: - """Apply translation to a singular PO entry. - - Returns (applied, rejected_brace_format). - """ - if isinstance(translation, dict) and "singular" in translation: - logger.info( - "LLM returned dict for singular entry; msgid=%r msgctxt=%r", - entry.msgid, - getattr(entry, "msgctxt", None), - ) - translation_str = str(translation["singular"]).strip() - if translation_str: - normalized_translation = _normalize_translation_newlines( - entry.msgid, translation_str - ) - if _entry_has_brace_format( - entry - ) and not _is_valid_brace_format_translation( - entry.msgid or "", normalized_translation - ): - logger.warning( - "Rejected translation for brace-format msgid (invalid placeholders " - "or unterminated directive): msgid=%r", - (entry.msgid or "")[:80], - ) - return False, True - entry.msgstr = normalized_translation - return True, False - if isinstance(translation, str) and translation: - normalized_translation = _normalize_translation_newlines( - entry.msgid, translation - ) - if _entry_has_brace_format(entry) and not _is_valid_brace_format_translation( - entry.msgid or "", normalized_translation - ): - logger.warning( - "Rejected translation for brace-format msgid (invalid placeholders " - "or unterminated directive): msgid=%r", - (entry.msgid or "")[:80], - ) - return False, True - entry.msgstr = normalized_translation - return True, False - return False, False - - -def _apply_translation_to_entry( - entry: polib.POEntry, translation: Any -) -> tuple[bool, bool]: - """ - Apply translation to a PO entry. Returns (applied, rejected_brace_format). - - Args: - entry: The PO entry to apply translation to. - translation: Translation value (string or dict with 'singular'/'plural' - or numeric keys '0', '1', '2', etc. for multiple forms). - - Returns: - (True if translation was applied, True if rejected due to invalid brace format). - """ - if entry.msgid_plural: - return _apply_translation_to_plural_entry(entry, translation) - if not entry.msgstr or not entry.msgstr.strip(): - return _apply_translation_to_singular_entry(entry, translation) - return False, False - - -def _expand_plural_forms_if_needed(entry: polib.POEntry, po: polib.POFile) -> bool: - """Expand plural forms if entry has fewer forms than required by language. - - Required form count comes from the PO file's Plural-Forms header (apply - may set it from constants when lang_code is provided, to avoid empty msgstr). - """ - if not entry.msgid_plural: - return False - - plural_forms_str = po.metadata.get("Plural-Forms", "") - if not plural_forms_str: - return False - - nplurals_match = re.search(r"nplurals=(\d+)", plural_forms_str) - if not nplurals_match: - return False - - required_forms = int(nplurals_match.group(1)) - # Special case: when singular/plural have different placeholders, keep both - # msgstr[0] and msgstr[1] even if locale has nplurals=1 - if _entry_has_asymmetric_placeholders(entry): - required_forms = max(required_forms, 2) - current_forms = len(entry.msgstr_plural) if entry.msgstr_plural else 0 - - if current_forms < required_forms: - if not entry.msgstr_plural: - entry.msgstr_plural = {} - for form_index in range(current_forms, required_forms): - entry.msgstr_plural[form_index] = "" - return True - - return False - - -def _get_translation_for_po_entry( - entry: polib.POEntry, translations: dict[str, Any] -) -> Any | None: - """Look up translation for a PO entry (msgctxt:msgid or msgid).""" - entry_msgctxt = getattr(entry, "msgctxt", None) or None - if entry_msgctxt: - key_with_context = f"{entry_msgctxt}:{entry.msgid}" - value = translations.get(key_with_context) - if value is not None: - return value - return translations.get(entry.msgid) - - -def _log_po_entry_result( - entry: polib.POEntry, file_path: Path, *, applied: bool -) -> None: - """Log whether a translation was applied or skipped for an entry.""" - msgid_display = ( - entry.msgid[:MAX_LOG_STRING_LENGTH] + "..." - if len(entry.msgid) > MAX_LOG_STRING_LENGTH - else entry.msgid - ) - if applied: - logger.debug( - "Applied translation for msgid '%s' in %s", - msgid_display, - file_path.name, - ) - else: - logger.debug( - "Skipped msgid '%s' in %s (already has translation)", - msgid_display, - file_path.name, - ) - - -def _save_po_if_updated( - po: polib.POFile, - file_path: Path, - counts: tuple[int, int, int], - *, - header_updated: bool = False, -) -> None: - """Save PO file and log result if any changes were made. - - counts: (applied, skipped, normalized_count). - """ - applied, skipped, normalized_count = counts - if applied > 0 or normalized_count > 0 or header_updated: - po.save(str(file_path)) - if applied > 0 or normalized_count > 0: - logger.info( - "Applied %d translation(s) to %s (%d skipped)", - applied, - file_path.name, - skipped, - ) - elif header_updated: - logger.debug("Updated Plural-Forms in %s", file_path.name) - elif skipped > 0: - logger.debug( - "No translations applied to %s (%d entries skipped - " - "already have translations)", - file_path.name, - skipped, - ) - - -def apply_po_translations( - file_path: Path, - translations: dict[str, Any], - lang_code: str | None = None, - rejected_brace_entries: list[dict[str, str]] | None = None, -) -> int: - """ - Apply translations to a PO file. Returns number of translations applied. - Handles both singular and plural forms. - For plural forms, translations dict can contain: - - Dict with 'singular' and 'plural' keys: {"singular": "...", "plural": "..."} - - Dict with numeric keys '0', '1', '2', etc. for multiple forms - - String: applies same translation to all plural forms - - The translations dict is keyed by msgid. If entries have msgctxt, we try - to match by msgid first, and if there are multiple matches, we prefer - entries without msgctxt or with matching msgctxt. - - Plural-Forms: We give priority to the nplurals/Plural-Forms already in the - translation file (e.g. from the cloned repo or Transifex). We only set - Plural-Forms from our constants when the file has no Plural-Forms header. - - If rejected_brace_entries is provided, entries whose translation was - rejected due to invalid python-brace-format are appended as - {"msgid": ..., "file": ...} for PR description logging. - """ - po = polib.pofile(str(file_path)) - applied = 0 - skipped = 0 - - header_updated = False - if lang_code: - existing_plural = po.metadata.get("Plural-Forms", "").strip() - if not existing_plural: - # File has no Plural-Forms: fall back to our constant - po.metadata["Plural-Forms"] = _get_plural_form(lang_code) - header_updated = True - # If file already has Plural-Forms, we do not overwrite it - - for entry in po: - if entry.msgid_plural: - _expand_plural_forms_if_needed(entry, po) - if not entry.msgid: - continue - - translation = _get_translation_for_po_entry(entry, translations) - if translation is None: - skipped += 1 - continue - - was_applied, rejected_brace = _apply_translation_to_entry(entry, translation) - if rejected_brace and rejected_brace_entries is not None: - rejected_brace_entries.append( - {"msgid": (entry.msgid or "")[:200], "file": file_path.name} - ) - if was_applied: - applied += 1 - else: - skipped += 1 - _log_po_entry_result(entry, file_path, applied=was_applied) - - normalized_count = _normalize_all_entries_in_po_file(po) - _save_po_if_updated( - po, - file_path, - (applied, skipped, normalized_count), - header_updated=header_updated, - ) - return applied - - -def _sync_frontend_translations(base_dir: Path, iso_code: str) -> dict[str, int]: - """Sync frontend translation files. Returns stats.""" - frontend_stats = {"added": 0, "fixed": 0, "removed": 0, "created": 0, "synced": 0} - - for app in LEARNER_FACING_APPS: - app_dir = base_dir / app / "src" / TRANSLATION_FILE_NAMES["i18n_dir"] - messages_dir = app_dir / TRANSLATION_FILE_NAMES["messages_dir"] - - en_file = app_dir / TRANSLATION_FILE_NAMES["transifex_input"] - if not en_file.exists(): - en_file = messages_dir / TRANSLATION_FILE_NAMES["english"] - - target_file = messages_dir / f"{iso_code}.json" - - if not en_file.exists(): - continue - - try: - stats = sync_or_create_json_file(en_file, target_file) - if stats["action"] == "created": - frontend_stats["created"] += 1 - elif stats["action"] == "synced": - frontend_stats["synced"] += 1 - - frontend_stats["added"] += stats.get("added", 0) - frontend_stats["fixed"] += stats.get("fixed", 0) - frontend_stats["removed"] += stats.get("removed", 0) - except (OSError, ValueError, json.JSONDecodeError) as e: - logger.warning( - "Skipping %s due to error syncing translation file: %s", app, e - ) - continue - - return frontend_stats - - -def _sync_backend_translations( - base_dir: Path, lang_code: str, iso_code: str -) -> dict[str, int]: - """Sync backend translation files. Returns stats.""" - backend_stats = {"added": 0} - backend_locale = iso_code if iso_code and iso_code != lang_code else lang_code - locale_dir = ( - base_dir - / TRANSLATION_FILE_NAMES["edx_platform"] - / TRANSLATION_FILE_NAMES["conf_dir"] - / TRANSLATION_FILE_NAMES["locale_dir"] - / backend_locale - / TRANSLATION_FILE_NAMES["lc_messages"] - ) - - for po_file_name in BACKEND_PO_FILES: - en_file = ( - base_dir - / TRANSLATION_FILE_NAMES["edx_platform"] - / TRANSLATION_FILE_NAMES["conf_dir"] - / TRANSLATION_FILE_NAMES["locale_dir"] - / "en" - / TRANSLATION_FILE_NAMES["lc_messages"] - / po_file_name - ) - target_file = locale_dir / po_file_name - - if not en_file.exists(): - continue - - try: - stats = sync_or_create_po_file( - en_file, target_file, backend_locale, iso_code - ) - backend_stats["added"] += stats.get("added", 0) - except (OSError, polib.POFileError, ValueError): - continue - - # Backend plugin apps: sync translations///conf/locale/... - for ( - _module_name, - en_file, - target_file, - _po_file_name, - ) in _iter_backend_plugin_po_files(base_dir, backend_locale): - try: - stats = sync_or_create_po_file( - en_file, target_file, backend_locale, iso_code - ) - backend_stats["added"] += stats.get("added", 0) - except (OSError, polib.POFileError, ValueError): - continue - - return backend_stats - - -def sync_all_translations( - base_dir: Path, - lang_code: str, - iso_code: str | None = None, - *, - skip_backend: bool = False, -) -> dict: - """ - Sync all translation files for a language. - Returns summary stats. - """ - if iso_code is None: - iso_code = lang_code - - frontend_stats = _sync_frontend_translations(base_dir, iso_code) - backend_stats = ( - _sync_backend_translations(base_dir, lang_code, iso_code) - if not skip_backend - else {"added": 0} - ) - - return { - "frontend": frontend_stats, - "backend": backend_stats, - } diff --git a/src/ol_openedx_course_translations/pyproject.toml b/src/ol_openedx_course_translations/pyproject.toml index d62c64855..1a753fe85 100644 --- a/src/ol_openedx_course_translations/pyproject.toml +++ b/src/ol_openedx_course_translations/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "ol-openedx-course-translations" -version = "0.5.3" +version = "0.6.0" description = "An Open edX plugin to translate courses" authors = [ {name = "MIT Office of Digital Learning"} @@ -14,9 +14,6 @@ dependencies = [ "djangorestframework>=3.14.0", "deepl>=1.25.0", "litellm==1.82.5", - "GitPython>=3.1.40", - "requests>=2.31.0", - "polib>=1.2.0", "srt>=3.5.3", "edx-opaque-keys", ] From 94b32bc1b0e0ed9a33e599d0e0184b1eb9b47fb3 Mon Sep 17 00:00:00 2001 From: Asad Ali Date: Tue, 14 Apr 2026 14:42:23 +0500 Subject: [PATCH 05/14] more refactoring --- .../ol_openedx_ai_static_translations/apps.py | 3 +++ .../ol_openedx_ai_static_translations/settings/lms.py | 10 ++++++++++ src/ol_openedx_ai_static_translations/pyproject.toml | 4 +++- 3 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/settings/lms.py diff --git a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/apps.py b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/apps.py index e9757453b..7ad592bb0 100644 --- a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/apps.py +++ b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/apps.py @@ -20,5 +20,8 @@ class OLOpenedXAIStaticTranslationsConfig(AppConfig): ProjectType.CMS: { SettingsType.COMMON: {PluginSettings.RELATIVE_PATH: "settings.cms"}, }, + ProjectType.LMS: { + SettingsType.COMMON: {PluginSettings.RELATIVE_PATH: "settings.lms"}, + }, }, } diff --git a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/settings/lms.py b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/settings/lms.py new file mode 100644 index 000000000..d1e2a6054 --- /dev/null +++ b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/settings/lms.py @@ -0,0 +1,10 @@ +"""Settings to provide to edX""" + +from ol_openedx_ai_static_translations.settings import apply_common_settings + + +def plugin_settings(settings): + """ + Populate lms settings + """ + apply_common_settings(settings) diff --git a/src/ol_openedx_ai_static_translations/pyproject.toml b/src/ol_openedx_ai_static_translations/pyproject.toml index d2de24e71..c3fafcb14 100644 --- a/src/ol_openedx_ai_static_translations/pyproject.toml +++ b/src/ol_openedx_ai_static_translations/pyproject.toml @@ -20,6 +20,9 @@ dependencies = [ [project.entry-points."cms.djangoapp"] ol_openedx_ai_static_translations = "ol_openedx_ai_static_translations.apps:OLOpenedXAIStaticTranslationsConfig" +[project.entry-points."lms.djangoapp"] +ol_openedx_course_translations = "ol_openedx_course_translations.apps:OLOpenedXCourseTranslationsConfig" + [build-system] requires = ["hatchling"] build-backend = "hatchling.build" @@ -28,7 +31,6 @@ build-backend = "hatchling.build" packages = ["ol_openedx_ai_static_translations"] include = [ "ol_openedx_ai_static_translations/**/*.py", - "ol_openedx_ai_static_translations/**/*.txt", ] [tool.hatch.build.targets.sdist] From 020fd7999cc00565220fa2b4a8ccc923dfe9aadb Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 17 Apr 2026 12:48:28 +0000 Subject: [PATCH 06/14] refactor: address review feedback on ol_openedx_ai_static_translations Agent-Logs-Url: https://github.com/mitodl/open-edx-plugins/sessions/f770b098-565c-42d4-b11b-06957474d61f Co-authored-by: arslanashraf7 <34372316+arslanashraf7@users.noreply.github.com> --- .../README.rst | 6 +-- .../ol_openedx_ai_static_translations/apps.py | 3 -- .../settings/__init__.py | 41 +------------- .../settings/cms.py | 2 +- .../settings/common.py | 40 ++++++++++++++ .../settings/lms.py | 10 ---- .../pyproject.toml | 3 -- uv.lock | 54 ++++++++++++++++--- 8 files changed, 92 insertions(+), 67 deletions(-) create mode 100644 src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/settings/common.py delete mode 100644 src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/settings/lms.py diff --git a/src/ol_openedx_ai_static_translations/README.rst b/src/ol_openedx_ai_static_translations/README.rst index d57bd17cc..ce4b95863 100644 --- a/src/ol_openedx_ai_static_translations/README.rst +++ b/src/ol_openedx_ai_static_translations/README.rst @@ -30,9 +30,9 @@ This plugin shares settings with ``ol_openedx_course_translations``. Ensure the "gemini": {"api_key": "", "default_model": "gemini-3-pro-preview"}, "mistral": {"api_key": "", "default_model": "mistral-large-latest"}, } - TRANSLATIONS_GITHUB_TOKEN: - TRANSLATIONS_REPO_PATH: "" - TRANSLATIONS_REPO_URL: "https://github.com/mitodl/mitxonline-translations.git" + TRANSLATIONS_GITHUB_TOKEN: # Personal access token with repo write permissions for creating PRs + TRANSLATIONS_REPO_PATH: "" # Local filesystem path where the translations repo will be cloned/checked out + TRANSLATIONS_REPO_URL: "https://github.com/mitodl/mitxonline-translations.git" # URL of the remote translations repository Usage ===== diff --git a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/apps.py b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/apps.py index 7ad592bb0..e9757453b 100644 --- a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/apps.py +++ b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/apps.py @@ -20,8 +20,5 @@ class OLOpenedXAIStaticTranslationsConfig(AppConfig): ProjectType.CMS: { SettingsType.COMMON: {PluginSettings.RELATIVE_PATH: "settings.cms"}, }, - ProjectType.LMS: { - SettingsType.COMMON: {PluginSettings.RELATIVE_PATH: "settings.lms"}, - }, }, } diff --git a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/settings/__init__.py b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/settings/__init__.py index 3527c42e7..7ae9a6665 100644 --- a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/settings/__init__.py +++ b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/settings/__init__.py @@ -1,40 +1 @@ -"""Common settings for AI Static Translations plugin.""" - - -def apply_common_settings(settings): - """ - Apply custom settings for the AI Static Translations plugin. - - These settings are shared with ol_openedx_course_translations. - If that plugin is also installed, its settings take precedence - since both plugins configure the same keys. - """ - if not hasattr(settings, "TRANSLATIONS_PROVIDERS"): - settings.TRANSLATIONS_PROVIDERS = { - "default_provider": "mistral", - "deepl": { - "api_key": "", - }, - "openai": { - "api_key": "", - "default_model": "gpt-5.2", - }, - "gemini": { - "api_key": "", - "default_model": "gemini-3-pro-preview", - }, - "mistral": { - "api_key": "", - "default_model": "mistral-large-latest", - }, - } - if not hasattr(settings, "TRANSLATIONS_GITHUB_TOKEN"): - settings.TRANSLATIONS_GITHUB_TOKEN = "" - if not hasattr(settings, "TRANSLATIONS_REPO_URL"): - settings.TRANSLATIONS_REPO_URL = ( - "https://github.com/mitodl/mitxonline-translations.git" - ) - if not hasattr(settings, "TRANSLATIONS_REPO_PATH"): - settings.TRANSLATIONS_REPO_PATH = "" - if not hasattr(settings, "LITE_LLM_REQUEST_TIMEOUT"): - settings.LITE_LLM_REQUEST_TIMEOUT = 300 # seconds +"""Settings package for ol_openedx_ai_static_translations.""" diff --git a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/settings/cms.py b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/settings/cms.py index 23a77f66e..13bba5601 100644 --- a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/settings/cms.py +++ b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/settings/cms.py @@ -1,6 +1,6 @@ """Settings to provide to edX""" -from ol_openedx_ai_static_translations.settings import apply_common_settings +from ol_openedx_ai_static_translations.settings.common import apply_common_settings def plugin_settings(settings): diff --git a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/settings/common.py b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/settings/common.py new file mode 100644 index 000000000..3527c42e7 --- /dev/null +++ b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/settings/common.py @@ -0,0 +1,40 @@ +"""Common settings for AI Static Translations plugin.""" + + +def apply_common_settings(settings): + """ + Apply custom settings for the AI Static Translations plugin. + + These settings are shared with ol_openedx_course_translations. + If that plugin is also installed, its settings take precedence + since both plugins configure the same keys. + """ + if not hasattr(settings, "TRANSLATIONS_PROVIDERS"): + settings.TRANSLATIONS_PROVIDERS = { + "default_provider": "mistral", + "deepl": { + "api_key": "", + }, + "openai": { + "api_key": "", + "default_model": "gpt-5.2", + }, + "gemini": { + "api_key": "", + "default_model": "gemini-3-pro-preview", + }, + "mistral": { + "api_key": "", + "default_model": "mistral-large-latest", + }, + } + if not hasattr(settings, "TRANSLATIONS_GITHUB_TOKEN"): + settings.TRANSLATIONS_GITHUB_TOKEN = "" + if not hasattr(settings, "TRANSLATIONS_REPO_URL"): + settings.TRANSLATIONS_REPO_URL = ( + "https://github.com/mitodl/mitxonline-translations.git" + ) + if not hasattr(settings, "TRANSLATIONS_REPO_PATH"): + settings.TRANSLATIONS_REPO_PATH = "" + if not hasattr(settings, "LITE_LLM_REQUEST_TIMEOUT"): + settings.LITE_LLM_REQUEST_TIMEOUT = 300 # seconds diff --git a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/settings/lms.py b/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/settings/lms.py deleted file mode 100644 index d1e2a6054..000000000 --- a/src/ol_openedx_ai_static_translations/ol_openedx_ai_static_translations/settings/lms.py +++ /dev/null @@ -1,10 +0,0 @@ -"""Settings to provide to edX""" - -from ol_openedx_ai_static_translations.settings import apply_common_settings - - -def plugin_settings(settings): - """ - Populate lms settings - """ - apply_common_settings(settings) diff --git a/src/ol_openedx_ai_static_translations/pyproject.toml b/src/ol_openedx_ai_static_translations/pyproject.toml index c3fafcb14..f502bcb05 100644 --- a/src/ol_openedx_ai_static_translations/pyproject.toml +++ b/src/ol_openedx_ai_static_translations/pyproject.toml @@ -20,9 +20,6 @@ dependencies = [ [project.entry-points."cms.djangoapp"] ol_openedx_ai_static_translations = "ol_openedx_ai_static_translations.apps:OLOpenedXAIStaticTranslationsConfig" -[project.entry-points."lms.djangoapp"] -ol_openedx_course_translations = "ol_openedx_course_translations.apps:OLOpenedXCourseTranslationsConfig" - [build-system] requires = ["hatchling"] build-backend = "hatchling.build" diff --git a/uv.lock b/uv.lock index 93e25d16d..954e5673f 100644 --- a/uv.lock +++ b/uv.lock @@ -11,12 +11,14 @@ resolution-markers = [ members = [ "edx-sysadmin", "edx-username-changer", + "ol-openedx-ai-static-translations", "ol-openedx-auto-select-language", "ol-openedx-canvas-integration", "ol-openedx-chat", "ol-openedx-chat-xblock", "ol-openedx-checkout-external", "ol-openedx-course-export", + "ol-openedx-course-outline-api", "ol-openedx-course-structure-api", "ol-openedx-course-sync", "ol-openedx-course-translations", @@ -2029,6 +2031,28 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/be/9c/92789c596b8df838baa98fa71844d84283302f7604ed565dafe5a6b5041a/oauthlib-3.3.1-py3-none-any.whl", hash = "sha256:88119c938d2b8fb88561af5f6ee0eec8cc8d552b7bb1f712743136eb7523b7a1", size = 160065, upload-time = "2025-06-19T22:48:06.508Z" }, ] +[[package]] +name = "ol-openedx-ai-static-translations" +version = "0.1.0" +source = { editable = "src/ol_openedx_ai_static_translations" } +dependencies = [ + { name = "django", version = "5.2.12", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, + { name = "django", version = "6.0.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, + { name = "gitpython" }, + { name = "litellm" }, + { name = "polib" }, + { name = "requests" }, +] + +[package.metadata] +requires-dist = [ + { name = "django", specifier = ">=4.0" }, + { name = "gitpython", specifier = ">=3.1.40" }, + { name = "litellm", specifier = ">=1.80.0" }, + { name = "polib", specifier = ">=1.2.0" }, + { name = "requests", specifier = ">=2.31.0" }, +] + [[package]] name = "ol-openedx-auto-select-language" version = "0.1.0" @@ -2161,6 +2185,28 @@ requires-dist = [ { name = "edx-opaque-keys" }, ] +[[package]] +name = "ol-openedx-course-outline-api" +version = "0.1.0" +source = { editable = "src/ol_openedx_course_outline_api" } +dependencies = [ + { name = "django", version = "5.2.12", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, + { name = "django", version = "6.0.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, + { name = "djangorestframework" }, + { name = "edx-django-utils" }, + { name = "edx-drf-extensions" }, + { name = "edx-opaque-keys" }, +] + +[package.metadata] +requires-dist = [ + { name = "django", specifier = ">=4.0" }, + { name = "djangorestframework", specifier = ">=3.14.0" }, + { name = "edx-django-utils", specifier = ">4.0.0" }, + { name = "edx-drf-extensions", specifier = ">=10.0.0" }, + { name = "edx-opaque-keys" }, +] + [[package]] name = "ol-openedx-course-structure-api" version = "0.2.0" @@ -2207,7 +2253,7 @@ requires-dist = [ [[package]] name = "ol-openedx-course-translations" -version = "0.5.2" +version = "0.6.0" source = { editable = "src/ol_openedx_course_translations" } dependencies = [ { name = "deepl" }, @@ -2215,10 +2261,7 @@ dependencies = [ { name = "django", version = "6.0.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, { name = "djangorestframework" }, { name = "edx-opaque-keys" }, - { name = "gitpython" }, { name = "litellm" }, - { name = "polib" }, - { name = "requests" }, { name = "srt" }, ] @@ -2228,10 +2271,7 @@ requires-dist = [ { name = "django", specifier = ">=4.0" }, { name = "djangorestframework", specifier = ">=3.14.0" }, { name = "edx-opaque-keys" }, - { name = "gitpython", specifier = ">=3.1.40" }, { name = "litellm", specifier = "==1.82.5" }, - { name = "polib", specifier = ">=1.2.0" }, - { name = "requests", specifier = ">=2.31.0" }, { name = "srt", specifier = ">=3.5.3" }, ] From 7226134bb65c0da1cf1a8b28ddd18071e5b08723 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 14 Apr 2026 23:17:35 +0500 Subject: [PATCH 07/14] [pre-commit.ci] pre-commit autoupdate (#783) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.15.8 → v0.15.10](https://github.com/astral-sh/ruff-pre-commit/compare/v0.15.8...v0.15.10) - [github.com/pre-commit/mirrors-mypy: v1.19.1 → v1.20.1](https://github.com/pre-commit/mirrors-mypy/compare/v1.19.1...v1.20.1) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: arslanashraf7 <34372316+arslanashraf7@users.noreply.github.com> --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 298be80b8..2b70e570e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -39,13 +39,13 @@ repos: - --exclude-files '(uv.lock)' - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: 'v0.15.8' + rev: 'v0.15.10' hooks: - id: ruff-format - id: ruff args: [--extend-ignore=D1, --fix] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.19.1 + rev: v1.20.1 hooks: - id: mypy additional_dependencies: From f71e486966859b810c4de0838002e0a0bd05fee7 Mon Sep 17 00:00:00 2001 From: Muhammad Anas <88967643+Anas12091101@users.noreply.github.com> Date: Fri, 17 Apr 2026 17:37:21 +0500 Subject: [PATCH 08/14] feat: add clear expired tokens task in ol-social-auth (#778) * feat: add task * fix: issues * chore: bump version * docs: update readme * fix: issues * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix: rename to CELERYBEAT_SCHEDULE --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: arslanashraf7 <34372316+arslanashraf7@users.noreply.github.com> --- src/ol_social_auth/README.rst | 12 +++++++++ src/ol_social_auth/ol_social_auth/apps.py | 23 ++++++++++++++++ .../ol_social_auth/settings/__init__.py | 0 .../ol_social_auth/settings/common.py | 17 ++++++++++++ .../ol_social_auth/settings/production.py | 5 ++++ src/ol_social_auth/ol_social_auth/tasks.py | 25 ++++++++++++++++++ src/ol_social_auth/pyproject.toml | 6 ++++- src/ol_social_auth/setup.cfg | 26 +++++++++++++++++++ src/ol_social_auth/tests/tasks_test.py | 24 +++++++++++++++++ 9 files changed, 137 insertions(+), 1 deletion(-) create mode 100644 src/ol_social_auth/ol_social_auth/apps.py create mode 100644 src/ol_social_auth/ol_social_auth/settings/__init__.py create mode 100644 src/ol_social_auth/ol_social_auth/settings/common.py create mode 100644 src/ol_social_auth/ol_social_auth/settings/production.py create mode 100644 src/ol_social_auth/ol_social_auth/tasks.py create mode 100644 src/ol_social_auth/setup.cfg create mode 100644 src/ol_social_auth/tests/tasks_test.py diff --git a/src/ol_social_auth/README.rst b/src/ol_social_auth/README.rst index 961013962..4a5fdf30b 100644 --- a/src/ol_social_auth/README.rst +++ b/src/ol_social_auth/README.rst @@ -32,3 +32,15 @@ Make sure to properly configure the plugin following the links in the above "Con * Install the plugin in the lms following the installation steps above. * Verify that you are not logged in on edx-platform. * Create a new user in your MIT application and verify that a corresponding user is successfully created on the edX platform. + +Expired Token Cleanup +--------------------- +This plugin includes a scheduled Celery task (``ol_clear_expired_tokens``) that automatically removes expired OAuth2 access tokens, refresh tokens, and grant tokens from the database. + +**Behavior:** + +* Runs every **Monday at 9:00 AM** (server time) via Celery Beat by default. The schedule can be customized by overriding the ``ol_clear_expired_tokens`` entry in ``CELERYBEAT_SCHEDULE``. +* Uses django-oauth-toolkit's ``clear_expired()`` to delete tokens that have exceeded the configured expiration threshold. +* Sets ``REFRESH_TOKEN_EXPIRE_SECONDS`` to **30 days** (overriding the edx-platform default of 90 days). Tokens revoked or expired longer than 30 days ago will be cleaned up. + +**Note:** If running this plugin for the first time on a database with a large backlog of expired tokens (millions of rows), consider running the ``edx_clear_expired_tokens`` management command manually first to reduce the initial volume before relying on the scheduled task. diff --git a/src/ol_social_auth/ol_social_auth/apps.py b/src/ol_social_auth/ol_social_auth/apps.py new file mode 100644 index 000000000..56c7854f4 --- /dev/null +++ b/src/ol_social_auth/ol_social_auth/apps.py @@ -0,0 +1,23 @@ +"""ol_social_auth Django application initialization.""" + +from django.apps import AppConfig +from edx_django_utils.plugins import PluginSettings +from openedx.core.djangoapps.plugins.constants import ProjectType, SettingsType + + +class OLSocialAuthConfig(AppConfig): + name = "ol_social_auth" + verbose_name = "OL Social Auth" + + plugin_app = { + PluginSettings.CONFIG: { + ProjectType.LMS: { + SettingsType.COMMON: { + PluginSettings.RELATIVE_PATH: "settings.common", + }, + SettingsType.PRODUCTION: { + PluginSettings.RELATIVE_PATH: "settings.production", + }, + }, + }, + } diff --git a/src/ol_social_auth/ol_social_auth/settings/__init__.py b/src/ol_social_auth/ol_social_auth/settings/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/ol_social_auth/ol_social_auth/settings/common.py b/src/ol_social_auth/ol_social_auth/settings/common.py new file mode 100644 index 000000000..dbe37ec70 --- /dev/null +++ b/src/ol_social_auth/ol_social_auth/settings/common.py @@ -0,0 +1,17 @@ +"""Common settings for the ol-social-auth plugin.""" + +from celery.schedules import crontab + + +def plugin_settings(settings): + """Settings for the ol-social-auth plugin.""" # noqa: D401 + settings.OAUTH2_PROVIDER["REFRESH_TOKEN_EXPIRE_SECONDS"] = ( + 30 * 24 * 60 * 60 # 30 days + ) + # Add ol_clear_expired_tokens to the Celery beat schedule. + if not hasattr(settings, "CELERYBEAT_SCHEDULE"): + settings.CELERYBEAT_SCHEDULE = {} + settings.CELERYBEAT_SCHEDULE["ol_clear_expired_tokens"] = { + "task": "ol_social_auth.tasks.ol_clear_expired_tokens", + "schedule": crontab(hour=9, minute=0, day_of_week="monday"), + } diff --git a/src/ol_social_auth/ol_social_auth/settings/production.py b/src/ol_social_auth/ol_social_auth/settings/production.py new file mode 100644 index 000000000..4a3b9d3b9 --- /dev/null +++ b/src/ol_social_auth/ol_social_auth/settings/production.py @@ -0,0 +1,5 @@ +"""Production settings for the ol-social-auth plugin.""" + + +def plugin_settings(settings): + """Production overrides for ol-social-auth plugin.""" diff --git a/src/ol_social_auth/ol_social_auth/tasks.py b/src/ol_social_auth/ol_social_auth/tasks.py new file mode 100644 index 000000000..64a03ee7f --- /dev/null +++ b/src/ol_social_auth/ol_social_auth/tasks.py @@ -0,0 +1,25 @@ +"""Celery tasks for ol-social-auth plugin.""" + +import logging + +from celery import shared_task +from oauth2_provider.models import clear_expired + +log = logging.getLogger(__name__) +oauth2_logger = logging.getLogger("oauth2_provider") + + +@shared_task(acks_late=True) +def ol_clear_expired_tokens(): + """Clear expired OAuth2 access, refresh, and ID tokens.""" + log.info("Starting ol_clear_expired_tokens...") + # Suppress debug-level logs from oauth2_provider during cleanup. + # Its batch_delete debug logs lack the 'userid' field expected by + # Open edX's custom log formatter, causing noisy ValueError tracebacks. + original_level = oauth2_logger.level + oauth2_logger.setLevel(logging.INFO) + try: + clear_expired() + finally: + oauth2_logger.setLevel(original_level) + log.info("Finished ol_clear_expired_tokens.") diff --git a/src/ol_social_auth/pyproject.toml b/src/ol_social_auth/pyproject.toml index c819964a1..a56cd3e3b 100644 --- a/src/ol_social_auth/pyproject.toml +++ b/src/ol_social_auth/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "ol-social-auth" -version = "0.2.0" +version = "0.2.1" description = "An Open edX plugin implementing MIT social auth backend" authors = [ {name = "MIT Office of Digital Learning"} @@ -11,9 +11,13 @@ requires-python = ">=3.11" keywords = ["Python", "edx"] dependencies = [ "Django>=4.0", + "django-oauth-toolkit", "social-auth-core>=4.5.4", ] +[project.entry-points."lms.djangoapp"] +ol_social_auth = "ol_social_auth.apps:OLSocialAuthConfig" + [build-system] requires = ["hatchling"] build-backend = "hatchling.build" diff --git a/src/ol_social_auth/setup.cfg b/src/ol_social_auth/setup.cfg new file mode 100644 index 000000000..78c55b21c --- /dev/null +++ b/src/ol_social_auth/setup.cfg @@ -0,0 +1,26 @@ +[tool:pytest] +pep8maxlinelength = 119 +DJANGO_SETTINGS_MODULE = lms.envs.test +addopts = --nomigrations --reuse-db --durations=20 +filterwarnings = + default + ignore::xblock.exceptions.FieldDataDeprecationWarning + ignore::pytest.PytestConfigWarning + ignore:No request passed to the backend, unable to rate-limit:UserWarning + ignore:Flags not at the start of the expression:DeprecationWarning + ignore:Using or importing the ABCs from 'collections' instead of from 'collections.abc':DeprecationWarning + ignore:invalid escape sequence:DeprecationWarning + ignore:`formatargspec` is deprecated since Python 3.5:DeprecationWarning + ignore:the imp module is deprecated in favour of importlib:DeprecationWarning + ignore:"is" with a literal:SyntaxWarning + ignore:defusedxml.lxml is no longer supported:DeprecationWarning + ignore: `np.int` is a deprecated alias for the builtin `int`.:DeprecationWarning + ignore: `np.float` is a deprecated alias for the builtin `float`.:DeprecationWarning + ignore: `np.complex` is a deprecated alias for the builtin `complex`.:DeprecationWarning + ignore: 'etree' is deprecated. Use 'xml.etree.ElementTree' instead.:DeprecationWarning + ignore: defusedxml.cElementTree is deprecated, import from defusedxml.ElementTree instead.:DeprecationWarning + +junit_family = xunit2 +norecursedirs = .* *.egg build conf dist node_modules test_root cms/envs lms/envs +python_classes = +python_files = tests.py test_*.py tests_*.py *_tests.py __init__.py diff --git a/src/ol_social_auth/tests/tasks_test.py b/src/ol_social_auth/tests/tasks_test.py new file mode 100644 index 000000000..9f37bbec0 --- /dev/null +++ b/src/ol_social_auth/tests/tasks_test.py @@ -0,0 +1,24 @@ +"""Tests for ol_social_auth tasks.""" + +from ol_social_auth import tasks + + +def test_ol_clear_expired_tokens(mocker): + """Test that ol_clear_expired_tokens calls the clear_expired function.""" + patched_clear_expired = mocker.patch("ol_social_auth.tasks.clear_expired") + + tasks.ol_clear_expired_tokens.delay() + patched_clear_expired.assert_called_once_with() + + +def test_ol_clear_expired_tokens_logging(mocker): + """Test that ol_clear_expired_tokens logs start and finish messages.""" + mocker.patch("ol_social_auth.tasks.clear_expired") + patched_log_info = mocker.patch("ol_social_auth.tasks.log.info") + + tasks.ol_clear_expired_tokens() + + expected_log_call_count = 2 + assert patched_log_info.call_count == expected_log_call_count # noqa: S101 + patched_log_info.assert_any_call("Starting ol_clear_expired_tokens...") + patched_log_info.assert_any_call("Finished ol_clear_expired_tokens.") From 6f187b37e22c648646f89005fe01700077ded3e7 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 24 Mar 2026 09:09:41 +0000 Subject: [PATCH 09/14] Initial plan Co-authored-by: arslanashraf7 <34372316+arslanashraf7@users.noreply.github.com> From a8239026a373577c8eea3d61b7290e4d28d803de Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 21 Apr 2026 08:20:04 +0000 Subject: [PATCH 10/14] docs: move sync_and_translate_language docs to ol_openedx_ai_static_translations README Agent-Logs-Url: https://github.com/mitodl/open-edx-plugins/sessions/cf432dde-79f7-4c53-9dd0-e8aabb112dd2 Co-authored-by: arslanashraf7 <34372316+arslanashraf7@users.noreply.github.com> --- .../README.rst | 58 ++++++++++++++++--- 1 file changed, 51 insertions(+), 7 deletions(-) diff --git a/src/ol_openedx_ai_static_translations/README.rst b/src/ol_openedx_ai_static_translations/README.rst index ce4b95863..fcfbb56fa 100644 --- a/src/ol_openedx_ai_static_translations/README.rst +++ b/src/ol_openedx_ai_static_translations/README.rst @@ -34,16 +34,60 @@ This plugin shares settings with ``ol_openedx_course_translations``. Ensure the TRANSLATIONS_REPO_PATH: "" # Local filesystem path where the translations repo will be cloned/checked out TRANSLATIONS_REPO_URL: "https://github.com/mitodl/mitxonline-translations.git" # URL of the remote translations repository -Usage -===== +Generating static content translations +====================================== + +This command synchronizes translation keys from edx-platform and MFE's, translates empty keys using LLM, and automatically creates a pull request in the translations repository. + +**What it does:** + +1. Syncs translation keys from edx-platform and MFE's to the translations repository +2. Extracts empty translation keys that need translation +3. Translates empty keys using the specified LLM provider and model +4. Applies translations to JSON and PO files +5. Commits changes to a new branch +6. Creates a pull request with translation statistics + +**Usage:** + +1. Go to the CMS shell +2. Run the management command: + + .. code-block:: bash + + ./manage.py cms sync_and_translate_language [OPTIONS] + +**Required arguments:** + +- ``LANGUAGE_CODE``: Language code (e.g., ``el``, ``fr``, ``es_ES``) + +**Optional arguments:** + +- ``--iso-code``: ISO code for JSON files (default: same as language code) +- ``--provider``: Translation provider (``openai``, ``gemini``, ``mistral``). Default is taken from ``TRANSLATIONS_PROVIDERS['default_provider']`` setting +- ``--model``: LLM model name. If not specified, uses the ``default_model`` for the selected provider from ``TRANSLATIONS_PROVIDERS``. Examples: ``gpt-5.2``, ``gemini-3-pro-preview``, ``mistral-large-latest`` +- ``--repo-path``: Path to mitxonline-translations repository (can also be set via ``TRANSLATIONS_REPO_PATH`` setting or environment variable) +- ``--repo-url``: GitHub repository URL (default: ``https://github.com/mitodl/mitxonline-translations.git``, can also be set via ``TRANSLATIONS_REPO_URL`` setting or environment variable) +- ``--glossary``: Path to glossary directory (optional). Should contain language-specific files (e.g. ``{iso_code}.txt``). +- ``--batch-size``: Number of keys to translate per API request (default: 200, recommended: 200-300 for most models) +- ``--mfe``: Filter by specific MFE(s). Use ``edx-platform`` for backend translations +- ``--dry-run``: Run without committing or creating PR + +**Examples:** + + .. code-block:: bash + + # Use default provider (from TRANSLATIONS_PROVIDERS['default_provider']) with its default model + ./manage.py cms sync_and_translate_language el -.. code-block:: bash + # Use OpenAI provider with its default model (gpt-5.2) + ./manage.py cms sync_and_translate_language el --provider openai - # Sync and translate a language - ./manage.py cms sync_and_translate_language el + # Use OpenAI provider with a specific model + ./manage.py cms sync_and_translate_language el --provider openai --model gpt-5.2 - # With specific provider and model - ./manage.py cms sync_and_translate_language el --provider openai --model gpt-5.2 --glossary /path/to/glossary + # Use Mistral provider with a specific model and glossary + ./manage.py cms sync_and_translate_language el --provider mistral --model mistral-large-latest --glossary /path/to/glossary --batch-size 250 License ******* From 3359403fb6921fc31e6c8ba31f579b24098b9e77 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 24 Mar 2026 09:09:41 +0000 Subject: [PATCH 11/14] Initial plan Co-authored-by: arslanashraf7 <34372316+arslanashraf7@users.noreply.github.com> From b05ac9dfabae445d75760e647765db0c82bbd170 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 24 Mar 2026 09:09:41 +0000 Subject: [PATCH 12/14] Initial plan Co-authored-by: arslanashraf7 <34372316+arslanashraf7@users.noreply.github.com> From 9a128d589c685feb9f85d567794b294b492820f3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 21 Apr 2026 09:40:29 +0000 Subject: [PATCH 13/14] revert: remove spurious main commits from branch history Reverts the changes introduced by two commits that were accidentally included in this branch during an earlier rebase on a shallow clone: - f71e486 (feat: add clear expired tokens task in ol-social-auth #778) - 7226134 ([pre-commit.ci] pre-commit autoupdate #783) These commits are already in main; including them here caused the PR to show unrelated ol_social_auth and pre-commit changes. Co-authored-by: arslanashraf7 <34372316+arslanashraf7@users.noreply.github.com> --- .pre-commit-config.yaml | 4 +-- src/ol_social_auth/README.rst | 12 --------- src/ol_social_auth/ol_social_auth/apps.py | 23 ---------------- .../ol_social_auth/settings/__init__.py | 0 .../ol_social_auth/settings/common.py | 17 ------------ .../ol_social_auth/settings/production.py | 5 ---- src/ol_social_auth/ol_social_auth/tasks.py | 25 ------------------ src/ol_social_auth/pyproject.toml | 6 +---- src/ol_social_auth/setup.cfg | 26 ------------------- src/ol_social_auth/tests/tasks_test.py | 24 ----------------- 10 files changed, 3 insertions(+), 139 deletions(-) delete mode 100644 src/ol_social_auth/ol_social_auth/apps.py delete mode 100644 src/ol_social_auth/ol_social_auth/settings/__init__.py delete mode 100644 src/ol_social_auth/ol_social_auth/settings/common.py delete mode 100644 src/ol_social_auth/ol_social_auth/settings/production.py delete mode 100644 src/ol_social_auth/ol_social_auth/tasks.py delete mode 100644 src/ol_social_auth/setup.cfg delete mode 100644 src/ol_social_auth/tests/tasks_test.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2b70e570e..298be80b8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -39,13 +39,13 @@ repos: - --exclude-files '(uv.lock)' - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: 'v0.15.10' + rev: 'v0.15.8' hooks: - id: ruff-format - id: ruff args: [--extend-ignore=D1, --fix] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.20.1 + rev: v1.19.1 hooks: - id: mypy additional_dependencies: diff --git a/src/ol_social_auth/README.rst b/src/ol_social_auth/README.rst index 4a5fdf30b..961013962 100644 --- a/src/ol_social_auth/README.rst +++ b/src/ol_social_auth/README.rst @@ -32,15 +32,3 @@ Make sure to properly configure the plugin following the links in the above "Con * Install the plugin in the lms following the installation steps above. * Verify that you are not logged in on edx-platform. * Create a new user in your MIT application and verify that a corresponding user is successfully created on the edX platform. - -Expired Token Cleanup ---------------------- -This plugin includes a scheduled Celery task (``ol_clear_expired_tokens``) that automatically removes expired OAuth2 access tokens, refresh tokens, and grant tokens from the database. - -**Behavior:** - -* Runs every **Monday at 9:00 AM** (server time) via Celery Beat by default. The schedule can be customized by overriding the ``ol_clear_expired_tokens`` entry in ``CELERYBEAT_SCHEDULE``. -* Uses django-oauth-toolkit's ``clear_expired()`` to delete tokens that have exceeded the configured expiration threshold. -* Sets ``REFRESH_TOKEN_EXPIRE_SECONDS`` to **30 days** (overriding the edx-platform default of 90 days). Tokens revoked or expired longer than 30 days ago will be cleaned up. - -**Note:** If running this plugin for the first time on a database with a large backlog of expired tokens (millions of rows), consider running the ``edx_clear_expired_tokens`` management command manually first to reduce the initial volume before relying on the scheduled task. diff --git a/src/ol_social_auth/ol_social_auth/apps.py b/src/ol_social_auth/ol_social_auth/apps.py deleted file mode 100644 index 56c7854f4..000000000 --- a/src/ol_social_auth/ol_social_auth/apps.py +++ /dev/null @@ -1,23 +0,0 @@ -"""ol_social_auth Django application initialization.""" - -from django.apps import AppConfig -from edx_django_utils.plugins import PluginSettings -from openedx.core.djangoapps.plugins.constants import ProjectType, SettingsType - - -class OLSocialAuthConfig(AppConfig): - name = "ol_social_auth" - verbose_name = "OL Social Auth" - - plugin_app = { - PluginSettings.CONFIG: { - ProjectType.LMS: { - SettingsType.COMMON: { - PluginSettings.RELATIVE_PATH: "settings.common", - }, - SettingsType.PRODUCTION: { - PluginSettings.RELATIVE_PATH: "settings.production", - }, - }, - }, - } diff --git a/src/ol_social_auth/ol_social_auth/settings/__init__.py b/src/ol_social_auth/ol_social_auth/settings/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/src/ol_social_auth/ol_social_auth/settings/common.py b/src/ol_social_auth/ol_social_auth/settings/common.py deleted file mode 100644 index dbe37ec70..000000000 --- a/src/ol_social_auth/ol_social_auth/settings/common.py +++ /dev/null @@ -1,17 +0,0 @@ -"""Common settings for the ol-social-auth plugin.""" - -from celery.schedules import crontab - - -def plugin_settings(settings): - """Settings for the ol-social-auth plugin.""" # noqa: D401 - settings.OAUTH2_PROVIDER["REFRESH_TOKEN_EXPIRE_SECONDS"] = ( - 30 * 24 * 60 * 60 # 30 days - ) - # Add ol_clear_expired_tokens to the Celery beat schedule. - if not hasattr(settings, "CELERYBEAT_SCHEDULE"): - settings.CELERYBEAT_SCHEDULE = {} - settings.CELERYBEAT_SCHEDULE["ol_clear_expired_tokens"] = { - "task": "ol_social_auth.tasks.ol_clear_expired_tokens", - "schedule": crontab(hour=9, minute=0, day_of_week="monday"), - } diff --git a/src/ol_social_auth/ol_social_auth/settings/production.py b/src/ol_social_auth/ol_social_auth/settings/production.py deleted file mode 100644 index 4a3b9d3b9..000000000 --- a/src/ol_social_auth/ol_social_auth/settings/production.py +++ /dev/null @@ -1,5 +0,0 @@ -"""Production settings for the ol-social-auth plugin.""" - - -def plugin_settings(settings): - """Production overrides for ol-social-auth plugin.""" diff --git a/src/ol_social_auth/ol_social_auth/tasks.py b/src/ol_social_auth/ol_social_auth/tasks.py deleted file mode 100644 index 64a03ee7f..000000000 --- a/src/ol_social_auth/ol_social_auth/tasks.py +++ /dev/null @@ -1,25 +0,0 @@ -"""Celery tasks for ol-social-auth plugin.""" - -import logging - -from celery import shared_task -from oauth2_provider.models import clear_expired - -log = logging.getLogger(__name__) -oauth2_logger = logging.getLogger("oauth2_provider") - - -@shared_task(acks_late=True) -def ol_clear_expired_tokens(): - """Clear expired OAuth2 access, refresh, and ID tokens.""" - log.info("Starting ol_clear_expired_tokens...") - # Suppress debug-level logs from oauth2_provider during cleanup. - # Its batch_delete debug logs lack the 'userid' field expected by - # Open edX's custom log formatter, causing noisy ValueError tracebacks. - original_level = oauth2_logger.level - oauth2_logger.setLevel(logging.INFO) - try: - clear_expired() - finally: - oauth2_logger.setLevel(original_level) - log.info("Finished ol_clear_expired_tokens.") diff --git a/src/ol_social_auth/pyproject.toml b/src/ol_social_auth/pyproject.toml index a56cd3e3b..c819964a1 100644 --- a/src/ol_social_auth/pyproject.toml +++ b/src/ol_social_auth/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "ol-social-auth" -version = "0.2.1" +version = "0.2.0" description = "An Open edX plugin implementing MIT social auth backend" authors = [ {name = "MIT Office of Digital Learning"} @@ -11,13 +11,9 @@ requires-python = ">=3.11" keywords = ["Python", "edx"] dependencies = [ "Django>=4.0", - "django-oauth-toolkit", "social-auth-core>=4.5.4", ] -[project.entry-points."lms.djangoapp"] -ol_social_auth = "ol_social_auth.apps:OLSocialAuthConfig" - [build-system] requires = ["hatchling"] build-backend = "hatchling.build" diff --git a/src/ol_social_auth/setup.cfg b/src/ol_social_auth/setup.cfg deleted file mode 100644 index 78c55b21c..000000000 --- a/src/ol_social_auth/setup.cfg +++ /dev/null @@ -1,26 +0,0 @@ -[tool:pytest] -pep8maxlinelength = 119 -DJANGO_SETTINGS_MODULE = lms.envs.test -addopts = --nomigrations --reuse-db --durations=20 -filterwarnings = - default - ignore::xblock.exceptions.FieldDataDeprecationWarning - ignore::pytest.PytestConfigWarning - ignore:No request passed to the backend, unable to rate-limit:UserWarning - ignore:Flags not at the start of the expression:DeprecationWarning - ignore:Using or importing the ABCs from 'collections' instead of from 'collections.abc':DeprecationWarning - ignore:invalid escape sequence:DeprecationWarning - ignore:`formatargspec` is deprecated since Python 3.5:DeprecationWarning - ignore:the imp module is deprecated in favour of importlib:DeprecationWarning - ignore:"is" with a literal:SyntaxWarning - ignore:defusedxml.lxml is no longer supported:DeprecationWarning - ignore: `np.int` is a deprecated alias for the builtin `int`.:DeprecationWarning - ignore: `np.float` is a deprecated alias for the builtin `float`.:DeprecationWarning - ignore: `np.complex` is a deprecated alias for the builtin `complex`.:DeprecationWarning - ignore: 'etree' is deprecated. Use 'xml.etree.ElementTree' instead.:DeprecationWarning - ignore: defusedxml.cElementTree is deprecated, import from defusedxml.ElementTree instead.:DeprecationWarning - -junit_family = xunit2 -norecursedirs = .* *.egg build conf dist node_modules test_root cms/envs lms/envs -python_classes = -python_files = tests.py test_*.py tests_*.py *_tests.py __init__.py diff --git a/src/ol_social_auth/tests/tasks_test.py b/src/ol_social_auth/tests/tasks_test.py deleted file mode 100644 index 9f37bbec0..000000000 --- a/src/ol_social_auth/tests/tasks_test.py +++ /dev/null @@ -1,24 +0,0 @@ -"""Tests for ol_social_auth tasks.""" - -from ol_social_auth import tasks - - -def test_ol_clear_expired_tokens(mocker): - """Test that ol_clear_expired_tokens calls the clear_expired function.""" - patched_clear_expired = mocker.patch("ol_social_auth.tasks.clear_expired") - - tasks.ol_clear_expired_tokens.delay() - patched_clear_expired.assert_called_once_with() - - -def test_ol_clear_expired_tokens_logging(mocker): - """Test that ol_clear_expired_tokens logs start and finish messages.""" - mocker.patch("ol_social_auth.tasks.clear_expired") - patched_log_info = mocker.patch("ol_social_auth.tasks.log.info") - - tasks.ol_clear_expired_tokens() - - expected_log_call_count = 2 - assert patched_log_info.call_count == expected_log_call_count # noqa: S101 - patched_log_info.assert_any_call("Starting ol_clear_expired_tokens...") - patched_log_info.assert_any_call("Finished ol_clear_expired_tokens.") From 8c540ddb4f39c122264a25aa04fdfd315e77fe77 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 21 Apr 2026 09:58:25 +0000 Subject: [PATCH 14/14] fix: remove spurious ol-openedx-course-outline-api entries from uv.lock Agent-Logs-Url: https://github.com/mitodl/open-edx-plugins/sessions/1159269e-c5c3-4a1f-a502-df05ad32ee07 Co-authored-by: arslanashraf7 <34372316+arslanashraf7@users.noreply.github.com> --- uv.lock | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/uv.lock b/uv.lock index 954e5673f..e80e19a81 100644 --- a/uv.lock +++ b/uv.lock @@ -18,7 +18,6 @@ members = [ "ol-openedx-chat-xblock", "ol-openedx-checkout-external", "ol-openedx-course-export", - "ol-openedx-course-outline-api", "ol-openedx-course-structure-api", "ol-openedx-course-sync", "ol-openedx-course-translations", @@ -2185,28 +2184,6 @@ requires-dist = [ { name = "edx-opaque-keys" }, ] -[[package]] -name = "ol-openedx-course-outline-api" -version = "0.1.0" -source = { editable = "src/ol_openedx_course_outline_api" } -dependencies = [ - { name = "django", version = "5.2.12", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, - { name = "django", version = "6.0.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, - { name = "djangorestframework" }, - { name = "edx-django-utils" }, - { name = "edx-drf-extensions" }, - { name = "edx-opaque-keys" }, -] - -[package.metadata] -requires-dist = [ - { name = "django", specifier = ">=4.0" }, - { name = "djangorestframework", specifier = ">=3.14.0" }, - { name = "edx-django-utils", specifier = ">4.0.0" }, - { name = "edx-drf-extensions", specifier = ">=10.0.0" }, - { name = "edx-opaque-keys" }, -] - [[package]] name = "ol-openedx-course-structure-api" version = "0.2.0"