From 2c91ba2513cd1b11beaa4bf45cf5b4d341129b1c Mon Sep 17 00:00:00 2001 From: y5nw <37980625+y5nw@users.noreply.github.com> Date: Sun, 23 Feb 2025 23:34:05 +0100 Subject: [PATCH] Implement language code parsing --- src/unittest/CMakeLists.txt | 1 + src/unittest/test_langcode.cpp | 16 ++++++++++++ src/util/CMakeLists.txt | 1 + src/util/langcode.cpp | 47 ++++++++++++++++++++++++++++++++++ src/util/langcode.h | 9 +++++++ src/util/string.h | 14 +++++++--- 6 files changed, 85 insertions(+), 3 deletions(-) create mode 100644 src/unittest/test_langcode.cpp create mode 100644 src/util/langcode.cpp create mode 100644 src/util/langcode.h diff --git a/src/unittest/CMakeLists.txt b/src/unittest/CMakeLists.txt index c31be6e4e4..c175d65cf5 100644 --- a/src/unittest/CMakeLists.txt +++ b/src/unittest/CMakeLists.txt @@ -19,6 +19,7 @@ set (UNITTEST_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/test_irrptr.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_irr_matrix4.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_irr_rotation.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/test_langcode.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_logging.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_lbmmanager.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_lua.cpp diff --git a/src/unittest/test_langcode.cpp b/src/unittest/test_langcode.cpp new file mode 100644 index 0000000000..e131bc636c --- /dev/null +++ b/src/unittest/test_langcode.cpp @@ -0,0 +1,16 @@ +// Luanti +// SPDX-License-Identifier: LGPL-2.1-or-later + +#include "util/langcode.h" +#include "catch.h" + +TEST_CASE("test langcode") +{ + SECTION("test language list") + { + CHECK(expand_language_list(L"de_DE@euro.UTF-8:fr") == L"de_DE@euro:de_DE:de:fr"); + CHECK(expand_language_list(L"zh_HK:yue_HK:zh_TW") == L"zh_HK:yue_HK:yue:zh_TW:zh"); + CHECK(expand_language_list(L"de_DE:fr:de_CH:en:de:de_AT") == L"de_DE:fr:de_CH:en:de:de_AT"); + CHECK(expand_language_list(L".UTF-8:de:.ISO-8859-1:fr:.GB2312") == L"de:fr"); + } +} diff --git a/src/util/CMakeLists.txt b/src/util/CMakeLists.txt index 0cbf0eaa15..205ae645d4 100644 --- a/src/util/CMakeLists.txt +++ b/src/util/CMakeLists.txt @@ -11,6 +11,7 @@ set(util_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/guid.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hashing.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ieee_float.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/langcode.cpp ${CMAKE_CURRENT_SOURCE_DIR}/metricsbackend.cpp ${CMAKE_CURRENT_SOURCE_DIR}/numeric.cpp ${CMAKE_CURRENT_SOURCE_DIR}/pointedthing.cpp diff --git a/src/util/langcode.cpp b/src/util/langcode.cpp new file mode 100644 index 0000000000..e5e5ccd7f3 --- /dev/null +++ b/src/util/langcode.cpp @@ -0,0 +1,47 @@ +// Luanti +// SPDX-License-Identifier: LGPL-2.1-or-later + +#include +#include "util/string.h" + +std::vector parse_language_list(const std::wstring &lang) +{ + std::unordered_map added_by; + std::vector> expanded; + + for (const auto &name: str_split(lang, L':')) { + auto pos = name.find(L'.'); // strip encoding information + const auto realname = pos == name.npos ? name : name.substr(0, pos); + if (realname.empty()) + continue; + + std::vector basenames = {}; + auto base = realname; + do { + if (added_by[base] == base) + break; + added_by[base] = realname; + basenames.push_back(base); + + pos = base.find_last_of(L"_@"); + base = base.substr(0, pos); + } while (pos != base.npos); + if (!basenames.empty()) + expanded.push_back(std::move(basenames)); + } + + std::vector langlist; + for (auto &basenames: expanded) + { + auto first = basenames.front(); + for (auto &&name: basenames) + if (added_by[name] == first) + langlist.push_back(std::move(name)); + } + return langlist; +} + +std::wstring expand_language_list(const std::wstring &lang) +{ + return str_join(parse_language_list(lang), L":"); +} diff --git a/src/util/langcode.h b/src/util/langcode.h new file mode 100644 index 0000000000..800a0dda0b --- /dev/null +++ b/src/util/langcode.h @@ -0,0 +1,9 @@ +// Luanti +// SPDX-License-Identifier: LGPL-2.1-or-later + +#pragma once +#include +#include + +std::vector parse_language_list(const std::wstring &lang); +std::wstring expand_language_list(const std::wstring &lang); diff --git a/src/util/string.h b/src/util/string.h index 6674aae8a0..ca965decac 100644 --- a/src/util/string.h +++ b/src/util/string.h @@ -746,11 +746,12 @@ inline const std::string duration_to_string(int sec) * * @return A std::string */ +template [[nodiscard]] -inline std::string str_join(const std::vector &list, - std::string_view delimiter) +inline std::basic_string str_join(const std::vector> &list, + std::basic_string_view delimiter) { - std::ostringstream oss; + std::basic_ostringstream oss; bool first = true; for (const auto &part : list) { if (!first) @@ -761,6 +762,13 @@ inline std::string str_join(const std::vector &list, return oss.str(); } +template +inline std::basic_string str_join(const std::vector> &list, + const T *delimiter) +{ + return str_join(list, std::basic_string_view(delimiter)); +} + #if IS_CLIENT_BUILD /** * Create a UTF8 std::string from an core::stringw.