264 lines
16 KiB
Python
264 lines
16 KiB
Python
LANGUAGES = [
|
|
{"name": "Acehnese (Arabic script)", "nllb": "ace_Arab", "language_code": "ace_Arab"},
|
|
{"name": "Acehnese (Latin script)", "nllb": "ace_Latn", "language_code": "ace_Latn"},
|
|
{"name": "Mesopotamian Arabic", "nllb": "acm_Arab", "language_code": "acm_Arab"},
|
|
{"name": "Ta'izzi-Adeni Arabic", "nllb": "acq_Arab", "language_code": "acq_Arab"},
|
|
{"name": "Tunisian Arabic", "nllb": "aeb_Arab", "language_code": "aeb_Arab"},
|
|
{"name": "Afrikaans", "nllb": "afr_Latn", "language_code": "af"},
|
|
{"name": "South Levantine Arabic", "nllb": "ajp_Arab", "language_code": "ajp_Arab"},
|
|
{"name": "Akan", "nllb": "aka_Latn", "language_code": "ak"},
|
|
{"name": "Tosk Albanian", "nllb": "als_Latn", "language_code": "als"},
|
|
{"name": "Amharic", "nllb": "amh_Ethi", "language_code": "am"},
|
|
{"name": "North Levantine Arabic", "nllb": "apc_Arab", "language_code": "apc_Arab"},
|
|
{"name": "Modern Standard Arabic", "nllb": "arb_Arab", "language_code": "ar"},
|
|
{"name": "Modern Standard Arabic (Romanized)", "nllb": "arb_Latn", "language_code": "arb_Latn"},
|
|
{"name": "Najdi Arabic", "nllb": "ars_Arab", "language_code": "ars_Arab"},
|
|
{"name": "Moroccan Arabic", "nllb": "ary_Arab", "language_code": "ary_Arab"},
|
|
{"name": "Egyptian Arabic", "nllb": "arz_Arab", "language_code": "arz_Arab"},
|
|
{"name": "Assamese", "nllb": "asm_Beng", "language_code": "as"},
|
|
{"name": "Asturian", "nllb": "ast_Latn", "language_code": "ast"},
|
|
{"name": "Awadhi", "nllb": "awa_Deva", "language_code": "awa"},
|
|
{"name": "Central Aymara", "nllb": "ayr_Latn", "language_code": "ay"},
|
|
{"name": "South Azerbaijani", "nllb": "azb_Arab", "language_code": "azb"},
|
|
{"name": "North Azerbaijani", "nllb": "azj_Latn", "language_code": "az"},
|
|
{"name": "Bashkir", "nllb": "bak_Cyrl", "language_code": "ba"},
|
|
{"name": "Bambara", "nllb": "bam_Latn", "language_code": "bm"},
|
|
{"name": "Balinese", "nllb": "ban_Latn", "language_code": "ban"},
|
|
{"name": "Belarusian", "nllb": "bel_Cyrl", "language_code": "be"},
|
|
{"name": "Bemba", "nllb": "bem_Latn", "language_code": "bem"},
|
|
{"name": "Bengali", "nllb": "ben_Beng", "language_code": "bn"},
|
|
{"name": "Bhojpuri", "nllb": "bho_Deva", "language_code": "bho"},
|
|
{"name": "Banjar (Arabic script)", "nllb": "bjn_Arab", "language_code": "bjn_Arab"},
|
|
{"name": "Banjar (Latin script)", "nllb": "bjn_Latn", "language_code": "bjn_Latn"},
|
|
{"name": "Standard Tibetan", "nllb": "bod_Tibt", "language_code": "bo"},
|
|
{"name": "Bosnian", "nllb": "bos_Latn", "language_code": "bs"},
|
|
{"name": "Buginese", "nllb": "bug_Latn", "language_code": "bug"},
|
|
{"name": "Bulgarian", "nllb": "bul_Cyrl", "language_code": "bg"},
|
|
{"name": "Catalan", "nllb": "cat_Latn", "language_code": "ca"},
|
|
{"name": "Cebuano", "nllb": "ceb_Latn", "language_code": "ceb"},
|
|
{"name": "Czech", "nllb": "ces_Latn", "language_code": "cs"},
|
|
{"name": "Chokwe", "nllb": "cjk_Latn", "language_code": "cjk"},
|
|
{"name": "Central Kurdish", "nllb": "ckb_Arab", "language_code": "ckb"},
|
|
{"name": "Crimean Tatar", "nllb": "crh_Latn", "language_code": "crh"},
|
|
{"name": "Welsh", "nllb": "cym_Latn", "language_code": "cy"},
|
|
{"name": "Danish", "nllb": "dan_Latn", "language_code": "da"},
|
|
{"name": "German", "nllb": "deu_Latn", "language_code": "de"},
|
|
{"name": "Southwestern Dinka", "nllb": "dik_Latn", "language_code": "dik"},
|
|
{"name": "Dyula", "nllb": "dyu_Latn", "language_code": "dyu"},
|
|
{"name": "Dzongkha", "nllb": "dzo_Tibt", "language_code": "dz"},
|
|
{"name": "Greek", "nllb": "ell_Grek", "language_code": "el"},
|
|
{"name": "English", "nllb": "eng_Latn", "language_code": "en"},
|
|
{"name": "Esperanto", "nllb": "epo_Latn", "language_code": "eo"},
|
|
{"name": "Estonian", "nllb": "est_Latn", "language_code": "et"},
|
|
{"name": "Basque", "nllb": "eus_Latn", "language_code": "eu"},
|
|
{"name": "Ewe", "nllb": "ewe_Latn", "language_code": "ee"},
|
|
{"name": "Faroese", "nllb": "fao_Latn", "language_code": "fo"},
|
|
{"name": "Fijian", "nllb": "fij_Latn", "language_code": "fj"},
|
|
{"name": "Finnish", "nllb": "fin_Latn", "language_code": "fi"},
|
|
{"name": "Fon", "nllb": "fon_Latn", "language_code": "fon"},
|
|
{"name": "French", "nllb": "fra_Latn", "language_code": "fr"},
|
|
{"name": "Friulian", "nllb": "fur_Latn", "language_code": "fur-IT"},
|
|
{"name": "Nigerian Fulfulde", "nllb": "fuv_Latn", "language_code": "fuv"},
|
|
{"name": "West Central Oromo", "nllb": "gaz_Latn", "language_code": "om"},
|
|
{"name": "Scottish Gaelic", "nllb": "gla_Latn", "language_code": "gd"},
|
|
{"name": "Irish", "nllb": "gle_Latn", "language_code": "ga-IE"},
|
|
{"name": "Galician", "nllb": "glg_Latn", "language_code": "gl"},
|
|
{"name": "Guarani", "nllb": "grn_Latn", "language_code": "gn"},
|
|
{"name": "Gujarati", "nllb": "guj_Gujr", "language_code": "gu-IN"},
|
|
{"name": "Haitian Creole", "nllb": "hat_Latn", "language_code": "ht"},
|
|
{"name": "Hausa", "nllb": "hau_Latn", "language_code": "ha"},
|
|
{"name": "Hebrew", "nllb": "heb_Hebr", "language_code": "he"},
|
|
{"name": "Hindi", "nllb": "hin_Deva", "language_code": "hi"},
|
|
{"name": "Chhattisgarhi", "nllb": "hne_Deva", "language_code": "hne"},
|
|
{"name": "Croatian", "nllb": "hrv_Latn", "language_code": "hr"},
|
|
{"name": "Hungarian", "nllb": "hun_Latn", "language_code": "hu"},
|
|
{"name": "Armenian", "nllb": "hye_Armn", "language_code": "hy-AM"},
|
|
{"name": "Igbo", "nllb": "ibo_Latn", "language_code": "ig"},
|
|
{"name": "Ilocano", "nllb": "ilo_Latn", "language_code": "ilo"},
|
|
{"name": "Indonesian", "nllb": "ind_Latn", "language_code": "id"},
|
|
{"name": "Icelandic", "nllb": "isl_Latn", "language_code": "is"},
|
|
{"name": "Italian", "nllb": "ita_Latn", "language_code": "it"},
|
|
{"name": "Javanese", "nllb": "jav_Latn", "language_code": "jv"},
|
|
{"name": "Japanese", "nllb": "jpn_Jpan", "language_code": "ja"},
|
|
{"name": "Kabyle", "nllb": "kab_Latn", "language_code": "kab"},
|
|
{"name": "Jingpho", "nllb": "kac_Latn", "language_code": "kac"},
|
|
{"name": "Kamba", "nllb": "kam_Latn", "language_code": "kam"},
|
|
{"name": "Kannada", "nllb": "kan_Knda", "language_code": "kn"},
|
|
{"name": "Kashmiri (Arabic script)", "nllb": "kas_Arab", "language_code": "kas_Arab"},
|
|
{"name": "Kashmiri (Devanagari script)", "nllb": "kas_Deva", "language_code": "kas_Deva"},
|
|
{"name": "Georgian", "nllb": "kat_Geor", "language_code": "ka"},
|
|
{"name": "Kazakh", "nllb": "kaz_Cyrl", "language_code": "kk"},
|
|
{"name": "Kabiyè", "nllb": "kbp_Latn", "language_code": "kbp"},
|
|
{"name": "Kabuverdianu", "nllb": "kea_Latn", "language_code": "kea"},
|
|
{"name": "Halh Mongolian", "nllb": "khk_Cyrl", "language_code": "mn"},
|
|
{"name": "Khmer", "nllb": "khm_Khmr", "language_code": "km"},
|
|
{"name": "Kikuyu", "nllb": "kik_Latn", "language_code": "ki"},
|
|
{"name": "Kinyarwanda", "nllb": "kin_Latn", "language_code": "rw"},
|
|
{"name": "Kyrgyz", "nllb": "kir_Cyrl", "language_code": "ky"},
|
|
{"name": "Kimbundu", "nllb": "kmb_Latn", "language_code": "kmb"},
|
|
{"name": "Northern Kurdish", "nllb": "kmr_Latn", "language_code": "kmr"},
|
|
{"name": "Central Kanuri (Arabic script)", "nllb": "knc_Arab", "language_code": "knc_Arab"},
|
|
{"name": "Central Kanuri (Latin script)", "nllb": "knc_Latn", "language_code": "knc_Latn"},
|
|
{"name": "Kikongo", "nllb": "kon_Latn", "language_code": "kg"},
|
|
{"name": "Korean", "nllb": "kor_Hang", "language_code": "ko"},
|
|
{"name": "Lao", "nllb": "lao_Laoo", "language_code": "lo"},
|
|
{"name": "Ligurian", "nllb": "lij_Latn", "language_code": "lij"},
|
|
{"name": "Limburgish", "nllb": "lim_Latn", "language_code": "li"},
|
|
{"name": "Lingala", "nllb": "lin_Latn", "language_code": "ln"},
|
|
{"name": "Lithuanian", "nllb": "lit_Latn", "language_code": "lt"},
|
|
{"name": "Lombard", "nllb": "lmo_Latn", "language_code": "lmo"},
|
|
{"name": "Latgalian", "nllb": "ltg_Latn", "language_code": "ltg"},
|
|
{"name": "Luxembourgish", "nllb": "ltz_Latn", "language_code": "lb"},
|
|
{"name": "Luba-Kasai", "nllb": "lua_Latn", "language_code": "lua"},
|
|
{"name": "Ganda", "nllb": "lug_Latn", "language_code": "lg"},
|
|
{"name": "Luo", "nllb": "luo_Latn", "language_code": "luo"},
|
|
{"name": "Mizo", "nllb": "lus_Latn", "language_code": "lus"},
|
|
{"name": "Standard Latvian", "nllb": "lvs_Latn", "language_code": "lv"},
|
|
{"name": "Magahi", "nllb": "mag_Deva", "language_code": "mag"},
|
|
{"name": "Maithili", "nllb": "mai_Deva", "language_code": "mai"},
|
|
{"name": "Malayalam", "nllb": "mal_Mlym", "language_code": "ml-IN"},
|
|
{"name": "Marathi", "nllb": "mar_Deva", "language_code": "mr"},
|
|
{"name": "Minangkabau (Arabic script)", "nllb": "min_Arab", "language_code": "min_Arab"},
|
|
{"name": "Minangkabau (Latin script)", "nllb": "min_Latn", "language_code": "min_Latn"},
|
|
{"name": "Macedonian", "nllb": "mkd_Cyrl", "language_code": "mk"},
|
|
{"name": "Maltese", "nllb": "mlt_Latn", "language_code": "mt"},
|
|
{"name": "Meitei (Bengali script)", "nllb": "mni_Beng", "language_code": "mni"},
|
|
{"name": "Mossi", "nllb": "mos_Latn", "language_code": "mos"},
|
|
{"name": "Maori", "nllb": "mri_Latn", "language_code": "mi"},
|
|
{"name": "Burmese", "nllb": "mya_Mymr", "language_code": "my"},
|
|
{"name": "Dutch", "nllb": "nld_Latn", "language_code": "nl"},
|
|
{"name": "Norwegian Nynorsk", "nllb": "nno_Latn", "language_code": "nn-NO"},
|
|
{"name": "Norwegian Bokmål", "nllb": "nob_Latn", "language_code": "nb"},
|
|
{"name": "Nepali", "nllb": "npi_Deva", "language_code": "ne-NP"},
|
|
{"name": "Northern Sotho", "nllb": "nso_Latn", "language_code": "nso"},
|
|
{"name": "Nuer", "nllb": "nus_Latn", "language_code": "nus"},
|
|
{"name": "Nyanja", "nllb": "nya_Latn", "language_code": "ny"},
|
|
{"name": "Occitan", "nllb": "oci_Latn", "language_code": "oc"},
|
|
{"name": "Odia", "nllb": "ory_Orya", "language_code": "or"},
|
|
{"name": "Pangasinan", "nllb": "pag_Latn", "language_code": "pag"},
|
|
{"name": "Eastern Panjabi", "nllb": "pan_Guru", "language_code": "pa"},
|
|
{"name": "Papiamento", "nllb": "pap_Latn", "language_code": "pap"},
|
|
{"name": "Southern Pashto", "nllb": "pbt_Arab", "language_code": "pbt"},
|
|
{"name": "Western Persian", "nllb": "pes_Arab", "language_code": "fa"},
|
|
{"name": "Plateau Malagasy", "nllb": "plt_Latn", "language_code": "mg"},
|
|
{"name": "Polish", "nllb": "pol_Latn", "language_code": "pl"},
|
|
{"name": "Portuguese", "nllb": "por_Latn", "language_code": "pt-PT"},
|
|
{"name": "Dari", "nllb": "prs_Arab", "language_code": "fa-AF"},
|
|
{"name": "Ayacucho Quechua", "nllb": "quy_Latn", "language_code": "qu"},
|
|
{"name": "Romanian", "nllb": "ron_Latn", "language_code": "ro"},
|
|
{"name": "Rundi", "nllb": "run_Latn", "language_code": "rn"},
|
|
{"name": "Russian", "nllb": "rus_Cyrl", "language_code": "ru"},
|
|
{"name": "Sango", "nllb": "sag_Latn", "language_code": "sg"},
|
|
{"name": "Sanskrit", "nllb": "san_Deva", "language_code": "sa"},
|
|
{"name": "Santali", "nllb": "sat_Olck", "language_code": "sat"},
|
|
{"name": "Sicilian", "nllb": "scn_Latn", "language_code": "scn"},
|
|
{"name": "Shan", "nllb": "shn_Mymr", "language_code": "shn"},
|
|
{"name": "Sinhala", "nllb": "sin_Sinh", "language_code": "si-LK"},
|
|
{"name": "Slovak", "nllb": "slk_Latn", "language_code": "sk"},
|
|
{"name": "Slovenian", "nllb": "slv_Latn", "language_code": "sl"},
|
|
{"name": "Samoan", "nllb": "smo_Latn", "language_code": "sm"},
|
|
{"name": "Shona", "nllb": "sna_Latn", "language_code": "sn"},
|
|
{"name": "Sindhi", "nllb": "snd_Arab", "language_code": "sd"},
|
|
{"name": "Somali", "nllb": "som_Latn", "language_code": "so"},
|
|
{"name": "Southern Sotho", "nllb": "sot_Latn", "language_code": "st"},
|
|
{"name": "Spanish", "nllb": "spa_Latn", "language_code": "es-ES"},
|
|
{"name": "Sardinian", "nllb": "srd_Latn", "language_code": "sc"},
|
|
{"name": "Serbian", "nllb": "srp_Cyrl", "language_code": "sr"},
|
|
{"name": "Swati", "nllb": "ssw_Latn", "language_code": "ss"},
|
|
{"name": "Sundanese", "nllb": "sun_Latn", "language_code": "su"},
|
|
{"name": "Swedish", "nllb": "swe_Latn", "language_code": "sv-SE"},
|
|
{"name": "Swahili", "nllb": "swh_Latn", "language_code": "sw"},
|
|
{"name": "Silesian", "nllb": "szl_Latn", "language_code": "szl"},
|
|
{"name": "Tamil", "nllb": "tam_Taml", "language_code": "ta"},
|
|
{"name": "Tamasheq (Latin script)", "nllb": "taq_Latn", "language_code": "taq_Latn"},
|
|
{"name": "Tamasheq (Tifinagh script)", "nllb": "taq_Tfng", "language_code": "taq_Tfng"},
|
|
{"name": "Tatar", "nllb": "tat_Cyrl", "language_code": "tt-RU"},
|
|
{"name": "Telugu", "nllb": "tel_Telu", "language_code": "te"},
|
|
{"name": "Tajik", "nllb": "tgk_Cyrl", "language_code": "tg"},
|
|
{"name": "Tagalog", "nllb": "tgl_Latn", "language_code": "tl"},
|
|
{"name": "Thai", "nllb": "tha_Thai", "language_code": "th"},
|
|
{"name": "Tigrinya", "nllb": "tir_Ethi", "language_code": "ti"},
|
|
{"name": "Tok Pisin", "nllb": "tpi_Latn", "language_code": "tpi"},
|
|
{"name": "Tswana", "nllb": "tsn_Latn", "language_code": "tn"},
|
|
{"name": "Tsonga", "nllb": "tso_Latn", "language_code": "ts"},
|
|
{"name": "Turkmen", "nllb": "tuk_Latn", "language_code": "tk"},
|
|
{"name": "Tumbuka", "nllb": "tum_Latn", "language_code": "tum"},
|
|
{"name": "Turkish", "nllb": "tur_Latn", "language_code": "tr"},
|
|
{"name": "Twi", "nllb": "twi_Latn", "language_code": "tw"},
|
|
{"name": "Central Atlas Tamazight", "nllb": "tzm_Tfng", "language_code": "tzm"},
|
|
{"name": "Uyghur", "nllb": "uig_Arab", "language_code": "ug"},
|
|
{"name": "Ukrainian", "nllb": "ukr_Cyrl", "language_code": "uk"},
|
|
{"name": "Umbundu", "nllb": "umb_Latn", "language_code": "umb"},
|
|
{"name": "Urdu", "nllb": "urd_Arab", "language_code": "ur"},
|
|
{"name": "Northern Uzbek", "nllb": "uzn_Latn", "language_code": "uz"},
|
|
{"name": "Venetian", "nllb": "vec_Latn", "language_code": "vec"},
|
|
{"name": "Vietnamese", "nllb": "vie_Latn", "language_code": "vi"},
|
|
{"name": "Waray", "nllb": "war_Latn", "language_code": "war"},
|
|
{"name": "Wolof", "nllb": "wol_Latn", "language_code": "wo"},
|
|
{"name": "Xhosa", "nllb": "xho_Latn", "language_code": "xh"},
|
|
{"name": "Eastern Yiddish", "nllb": "ydd_Hebr", "language_code": "yi"},
|
|
{"name": "Yoruba", "nllb": "yor_Latn", "language_code": "yo"},
|
|
{"name": "Yue Chinese", "nllb": "yue_Hant", "language_code": "yue"},
|
|
{"name": "Chinese (Simplified)", "nllb": "zho_Hans", "language_code": "zh-CN"},
|
|
{"name": "Chinese (Traditional)", "nllb": "zho_Hant", "language_code": "zh-TW"},
|
|
{"name": "Standard Malay", "nllb": "zsm_Latn", "language_code": "ms"},
|
|
{"name": "Zulu", "nllb": "zul_Latn", "language_code": "zu"},
|
|
]
|
|
|
|
NAME_TO_NLLB = {lang["name"]: lang["nllb"] for lang in LANGUAGES}
|
|
NAME_TO_LANGUAGE_CODE = {lang["name"]: lang["language_code"] for lang in LANGUAGES}
|
|
LANGUAGE_CODE_TO_NLLB = {lang["language_code"]: lang["nllb"] for lang in LANGUAGES}
|
|
NLLB_TO_LANGUAGE_CODE = {lang["nllb"]: lang["language_code"] for lang in LANGUAGES}
|
|
LANGUAGE_CODE_TO_NAME = {lang["language_code"]: lang["name"] for lang in LANGUAGES}
|
|
NLLB_TO_NAME = {lang["nllb"]: lang["name"] for lang in LANGUAGES}
|
|
|
|
|
|
def get_nllb_code(language_code_code):
|
|
return LANGUAGE_CODE_TO_NLLB.get(language_code_code, None)
|
|
|
|
|
|
def get_language_code_code(nllb_code):
|
|
return NLLB_TO_LANGUAGE_CODE.get(nllb_code)
|
|
|
|
|
|
def get_language_name_by_language_code(language_code_code):
|
|
return LANGUAGE_CODE_TO_NAME.get(language_code_code)
|
|
|
|
|
|
def get_language_name_by_nllb(nllb_code):
|
|
return NLLB_TO_NAME.get(nllb_code)
|
|
|
|
|
|
def get_language_info(identifier, identifier_type="auto"):
|
|
if identifier_type == "auto":
|
|
for lang in LANGUAGES:
|
|
if (lang["name"].lower() == identifier.lower() or
|
|
lang["nllb"] == identifier or
|
|
lang["language_code"] == identifier):
|
|
return lang
|
|
elif identifier_type == "name":
|
|
for lang in LANGUAGES:
|
|
if lang["name"].lower() == identifier.lower():
|
|
return lang
|
|
elif identifier_type == "nllb":
|
|
for lang in LANGUAGES:
|
|
if lang["nllb"] == identifier:
|
|
return lang
|
|
elif identifier_type == "language_code":
|
|
for lang in LANGUAGES:
|
|
if lang["language_code"] == identifier:
|
|
return lang
|
|
|
|
return None
|
|
|
|
|
|
def list_all_languages():
|
|
return [lang["name"] for lang in LANGUAGES]
|
|
|
|
|
|
def list_all_nllb_codes():
|
|
return [lang["nllb"] for lang in LANGUAGES]
|
|
|
|
|
|
def list_all_language_code_codes():
|
|
return [lang["language_code"] for lang in LANGUAGES]
|