From cc6b56b034c72fef0855b3791139661ffb71fa28 Mon Sep 17 00:00:00 2001 From: y5nw <37980625+y5nw@users.noreply.github.com> Date: Sun, 14 Sep 2025 22:56:40 +0200 Subject: [PATCH] Minor refactor to the `Plural-Forms` parser (#16489) --- src/gettext_plural_form.cpp | 225 ++++++++++++----------------- src/gettext_plural_form.h | 40 +++-- src/unittest/test_translations.cpp | 67 ++++++--- 3 files changed, 158 insertions(+), 174 deletions(-) diff --git a/src/gettext_plural_form.cpp b/src/gettext_plural_form.cpp index 89dd721a44..a356e33e8e 100644 --- a/src/gettext_plural_form.cpp +++ b/src/gettext_plural_form.cpp @@ -1,136 +1,77 @@ -// Minetest +// Luanti // SPDX-License-Identifier: LGPL-2.1-or-later +/* This file implements a recursive descent parser for gettext plural forms. + * Left recursion (for left-associative operators) is implemented by parse_ltr, which iteratively attempts to reduce + * expressions from operations of the same precedence. This should not be confused with reduce_ltr, which recurses + * through a list of operators with the same precedence (not the input string!) to search for a match. + * Note that this only implements a subset of C expressions. See: + * https://git.savannah.gnu.org/gitweb/?p=gettext.git;a=blob;f=gettext-runtime/intl/plural.y + */ + #include "gettext_plural_form.h" #include "util/string.h" +#include -static size_t minsize(const GettextPluralForm::Ptr &form) +static GettextPluralForm::NumT identity(GettextPluralForm::NumT n) { - return form ? form->size() : 0; + return n; } -static size_t minsize(const GettextPluralForm::Ptr &f, const GettextPluralForm::Ptr &g) +static GettextPluralForm::NumT ternary_op(GettextPluralForm::NumT n, const GettextPluralForm::Function &cond, + const GettextPluralForm::Function &val, const GettextPluralForm::Function &alt) { - if (sizeof(g) > 0) - return std::min(minsize(f), minsize(g)); - return f ? f->size() : 0; + return cond(n) ? val(n) : alt(n); } -class Identity: public GettextPluralForm +template typename Func, class... Args> +static GettextPluralForm::Function wrap_op(Args&&... args) { - public: - Identity(size_t nplurals): GettextPluralForm(nplurals) {}; - NumT operator()(const NumT n) const override - { - return n; - } -}; + return std::bind(Func(), std::bind(std::move(args), std::placeholders::_1)...); +} -class ConstValue: public GettextPluralForm -{ - public: - ConstValue(size_t nplurals, NumT val): GettextPluralForm(nplurals), value(val) {}; - NumT operator()(const NumT n) const override - { - return value; - } - private: - NumT value; -}; +typedef std::pair ParserResult; +typedef ParserResult (*Parser)(std::wstring_view); -template typename F> -class UnaryOperation: public GettextPluralForm -{ - public: - UnaryOperation(const Ptr &op): - GettextPluralForm(minsize(op)), op(op) {} - NumT operator()(const NumT n) const override - { - if (operator bool()) - return func((*op)(n)); - return 0; - } - private: - Ptr op; - static constexpr F func = {}; -}; - -template typename F> -class BinaryOperation: public GettextPluralForm -{ - public: - BinaryOperation(const Ptr &lhs, const Ptr &rhs): - GettextPluralForm(minsize(lhs, rhs)), - lhs(lhs), rhs(rhs) {} - NumT operator()(const NumT n) const override - { - if (operator bool()) - return func((*lhs)(n), (*rhs)(n)); - return 0; - } - private: - Ptr lhs, rhs; - static constexpr F func = {}; -}; - -class TernaryOperation: public GettextPluralForm -{ - public: - TernaryOperation(const Ptr &cond, const Ptr &val, const Ptr &alt): - GettextPluralForm(std::min(minsize(cond), minsize(val, alt))), - cond(cond), val(val), alt(alt) {} - NumT operator()(const NumT n) const override - { - if (operator bool()) - return (*cond)(n) ? (*val)(n) : (*alt)(n); - return 0; - } - private: - Ptr cond, val, alt; -}; - -typedef std::pair ParserResult; -typedef ParserResult (*Parser)(const size_t, std::wstring_view); - -static ParserResult parse_expr(const size_t nplurals, std::wstring_view str); +static ParserResult parse_expr(std::wstring_view str); template typename Operator> -static ParserResult reduce_ltr(const size_t nplurals, const ParserResult &res, const wchar_t* pattern) +static ParserResult reduce_ltr_single(const ParserResult &res, const std::wstring &pattern) { if (!str_starts_with(res.second, pattern)) return ParserResult(nullptr, res.second); - auto next = Parser(nplurals, trim(res.second.substr(std::char_traits::length(pattern)))); + auto next = Parser(trim(res.second.substr(pattern.size()))); if (!next.first) return next; - next.first = GettextPluralForm::Ptr(new BinaryOperation(res.first, next.first)); + next.first = wrap_op(res.first, next.first); next.second = trim(next.second); return next; } template -static ParserResult reduce_ltr(const size_t nplurals, const ParserResult &res, const wchar_t**) +static ParserResult reduce_ltr(const ParserResult &res) { return ParserResult(nullptr, res.second); } template typename Operator, template typename... Operators> -static ParserResult reduce_ltr(const size_t nplurals, const ParserResult &res, const wchar_t** patterns) +static ParserResult reduce_ltr(const ParserResult &res, const std::wstring &pattern, const typename std::conditional<1,std::wstring,Operators>::type&... patterns) { - auto next = reduce_ltr(nplurals, res, patterns[0]); + auto next = reduce_ltr_single(res, pattern); if (next.first || next.second != res.second) return next; - return reduce_ltr(nplurals, res, patterns+1); + return reduce_ltr(res, patterns...); } -template typename Operator, template typename... Operators> -static ParserResult parse_ltr(const size_t nplurals, std::wstring_view str, const wchar_t** patterns) +template typename... Operators> +static ParserResult parse_ltr(std::wstring_view str, const typename std::conditional<1,std::wstring,Operators>::type&... patterns) { - auto &&pres = Parser(nplurals, str); + auto &&pres = Parser(str); if (!pres.first) return pres; pres.second = trim(pres.second); while (!pres.second.empty()) { - auto next = reduce_ltr(nplurals, pres, patterns); + auto next = reduce_ltr(pres, patterns...); if (!next.first) return pres; next.second = trim(next.second); @@ -139,25 +80,26 @@ static ParserResult parse_ltr(const size_t nplurals, std::wstring_view str, cons return pres; } -static ParserResult parse_atomic(const size_t nplurals, std::wstring_view str) +static ParserResult parse_atomic(std::wstring_view str) { if (str.empty()) return ParserResult(nullptr, str); if (str[0] == 'n') - return ParserResult(new Identity(nplurals), trim(str.substr(1))); + return ParserResult(identity, trim(str.substr(1))); wchar_t* endp; auto val = wcstoul(str.data(), &endp, 10); - return ParserResult(new ConstValue(nplurals, val), trim(str.substr(endp-str.data()))); + return ParserResult([val](GettextPluralForm::NumT _) -> GettextPluralForm::NumT { return val; }, + trim(str.substr(endp-str.data()))); } -static ParserResult parse_parenthesized(const size_t nplurals, std::wstring_view str) +static ParserResult parse_parenthesized(std::wstring_view str) { if (str.empty()) return ParserResult(nullptr, str); if (str[0] != '(') - return parse_atomic(nplurals, str); - auto result = parse_expr(nplurals, str.substr(1)); + return parse_atomic(str); + auto result = parse_expr(str.substr(1)); if (result.first) { if (result.second.empty() || result.second[0] != ')') result.first = nullptr; @@ -167,90 +109,101 @@ static ParserResult parse_parenthesized(const size_t nplurals, std::wstring_view return result; } -static ParserResult parse_negation(const size_t nplurals, std::wstring_view str) +static ParserResult parse_negation(std::wstring_view str) { if (str.empty()) return ParserResult(nullptr, str); if (str[0] != '!') - return parse_parenthesized(nplurals, str); - auto result = parse_negation(nplurals, trim(str.substr(1))); + return parse_parenthesized(str); + auto result = parse_negation(trim(str.substr(1))); if (result.first) - result.first = GettextPluralForm::Ptr(new UnaryOperation(result.first)); + result.first = wrap_op(result.first); return result; } -static ParserResult parse_multiplicative(const size_t nplurals, std::wstring_view str) +template struct safe_divides { + T operator()(T lhs, T rhs) const + { + return rhs == 0 ? 0 : (lhs / rhs); + } +}; + +template struct safe_modulus { + T operator()(T lhs, T rhs) const + { + return rhs == 0 ? 0 : (lhs % rhs); + } +}; + +static ParserResult parse_multiplicative(std::wstring_view str) { - static const wchar_t *patterns[] = { L"*", L"/", L"%" }; - return parse_ltr(nplurals, str, patterns); + return parse_ltr(str, L"*", L"/", L"%"); } -static ParserResult parse_additive(const size_t nplurals, std::wstring_view str) +static ParserResult parse_additive(std::wstring_view str) { - static const wchar_t *patterns[] = { L"+", L"-" }; - return parse_ltr(nplurals, str, patterns); + return parse_ltr(str, L"+", L"-"); } -static ParserResult parse_comparison(const size_t nplurals, std::wstring_view str) +static ParserResult parse_comparison(std::wstring_view str) { - static const wchar_t *patterns[] = { L"<=", L">=", L"<", L">" }; - return parse_ltr(nplurals, str, patterns); + return parse_ltr(str, L"<=", L">=", L"<", L">"); } -static ParserResult parse_equality(const size_t nplurals, std::wstring_view str) +static ParserResult parse_equality(std::wstring_view str) { - static const wchar_t *patterns[] = { L"==", L"!=" }; - return parse_ltr(nplurals, str, patterns); + return parse_ltr(str, L"==", L"!="); } -static ParserResult parse_conjunction(const size_t nplurals, std::wstring_view str) +static ParserResult parse_conjunction(std::wstring_view str) { - static const wchar_t *and_pattern[] = { L"&&" }; - return parse_ltr(nplurals, str, and_pattern); + return parse_ltr(str, L"&&"); } -static ParserResult parse_disjunction(const size_t nplurals, std::wstring_view str) +static ParserResult parse_disjunction(std::wstring_view str) { - static const wchar_t *or_pattern[] = { L"||" }; - return parse_ltr(nplurals, str, or_pattern); + return parse_ltr(str, L"||"); } -static ParserResult parse_ternary(const size_t nplurals, std::wstring_view str) +static ParserResult parse_ternary(std::wstring_view str) { - auto pres = parse_disjunction(nplurals, str); + auto pres = parse_disjunction(str); if (pres.second.empty() || pres.second[0] != '?') // no ? : return pres; auto cond = pres.first; - pres = parse_ternary(nplurals, trim(pres.second.substr(1))); + pres = parse_ternary(trim(pres.second.substr(1))); if (pres.second.empty() || pres.second[0] != ':') return ParserResult(nullptr, pres.second); auto val = pres.first; - pres = parse_ternary(nplurals, trim(pres.second.substr(1))); - return ParserResult(new TernaryOperation(cond, val, pres.first), pres.second); + pres = parse_ternary(trim(pres.second.substr(1))); + return ParserResult(std::bind(ternary_op, std::placeholders::_1, + std::move(cond), std::move(val), std::move(pres.first)), pres.second); } -static ParserResult parse_expr(const size_t nplurals, std::wstring_view str) +static ParserResult parse_expr(std::wstring_view str) { - return parse_ternary(nplurals, trim(str)); + return parse_ternary(trim(str)); } -GettextPluralForm::Ptr GettextPluralForm::parse(const size_t nplurals, std::wstring_view str) +static GettextPluralForm::Function parse(std::wstring_view str) { - if (nplurals == 0) - return nullptr; - auto result = parse_expr(nplurals, str); + auto result = parse_expr(str); if (!result.second.empty()) return nullptr; return result.first; } -GettextPluralForm::Ptr GettextPluralForm::parseHeaderLine(std::wstring_view str) +GettextPluralForm::GettextPluralForm(std::wstring_view str) { if (!str_starts_with(str, L"Plural-Forms: nplurals=") || !str_ends_with(str, L";")) - return nullptr; - auto nplurals = wcstoul(str.data()+23, nullptr, 10); + return; + auto size = wcstoul(str.data()+23, nullptr, 10); auto pos = str.find(L"plural="); if (pos == str.npos) - return nullptr; - return parse(nplurals, str.substr(pos+7, str.size()-pos-8)); + return; + auto result = parse(str.substr(pos+7, str.size()-pos-8)); + if (size > 0 && result) { + nplurals = size; + func = result; + } } diff --git a/src/gettext_plural_form.h b/src/gettext_plural_form.h index 1d3195e9a2..cfb6017503 100644 --- a/src/gettext_plural_form.h +++ b/src/gettext_plural_form.h @@ -1,33 +1,45 @@ -// Minetest +// Luanti // SPDX-License-Identifier: LGPL-2.1-or-later #pragma once #include #include +#include -// Note that this only implements a subset of C expressions. See: -// https://git.savannah.gnu.org/gitweb/?p=gettext.git;a=blob;f=gettext-runtime/intl/plural.y class GettextPluralForm { public: using NumT = unsigned long; + using Function = std::function; using Ptr = std::shared_ptr; + GettextPluralForm(std::wstring_view str); + size_t size() const { return nplurals; }; - virtual NumT operator()(const NumT) const = 0; - virtual operator bool() const - { - return size() > 0; - } - virtual ~GettextPluralForm() {}; - static GettextPluralForm::Ptr parse(const size_t nplurals, std::wstring_view str); - static GettextPluralForm::Ptr parseHeaderLine(std::wstring_view str); -protected: - GettextPluralForm(size_t nplurals): nplurals(nplurals) {}; + // Note that this function does not perform any bounds check as the number of plural + // translations provided by the translation file may deviate from nplurals, + NumT operator()(const NumT n) const { + return func ? func(n) : 0; + } + + operator bool() const + { + return nplurals > 0; + } + + static Ptr parseHeaderLine(std::wstring_view str) { + return Ptr(new GettextPluralForm(str)); + } private: - const size_t nplurals; + // The number of plural forms. + size_t nplurals = 0; + + // The formula for determining the plural form based on the input value; see + // https://www.gnu.org/software/gettext/manual/html_node/Translating-plural-forms.html + // for details. + Function func = nullptr; }; diff --git a/src/unittest/test_translations.cpp b/src/unittest/test_translations.cpp index 5bab3e15cc..96f5acb9b6 100644 --- a/src/unittest/test_translations.cpp +++ b/src/unittest/test_translations.cpp @@ -25,40 +25,59 @@ TEST_CASE("test translations") { SECTION("Plural-Forms function for translations") { -#define REQUIRE_FORM_SIZE(x) {REQUIRE(form); REQUIRE(form->size() == (x));} - // Test cases from https://www.gnu.org/software/gettext/manual/html_node/Plural-forms.html - auto form = GettextPluralForm::parseHeaderLine(L"Plural-Forms: nplurals=2; plural=n != 1;"); +#define REQUIRE_FORM_SIZE(x) {REQUIRE(form); REQUIRE(form.size() == (x));} + // Basic test cases + auto form = GettextPluralForm(L"Plural-Forms: nplurals=2; plural=1;"); REQUIRE_FORM_SIZE(2); - CHECK((*form)(0) == 1); - CHECK((*form)(1) == 0); - CHECK((*form)(2) == 1); + CHECK(form(0) == 1); - form = GettextPluralForm::parseHeaderLine(L"Plural-Forms: nplurals=3; plural=n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2;"); + form = GettextPluralForm(L""); + REQUIRE(form.size() == 0); + CHECK(form(0) == 0); + + // Test cases from https://www.gnu.org/software/gettext/manual/html_node/Plural-forms.html + form = GettextPluralForm(L"Plural-Forms: nplurals=2; plural=n != 1;"); + REQUIRE_FORM_SIZE(2); + CHECK(form(0) == 1); + CHECK(form(1) == 0); + CHECK(form(2) == 1); + + form = GettextPluralForm(L"Plural-Forms: nplurals=3; plural=n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2;"); REQUIRE_FORM_SIZE(3); - CHECK((*form)(0) == 2); - CHECK((*form)(1) == 0); - CHECK((*form)(102) == 1); - CHECK((*form)(111) == 1); + CHECK(form(0) == 2); + CHECK(form(1) == 0); + CHECK(form(102) == 1); + CHECK(form(111) == 1); - form = GettextPluralForm::parseHeaderLine(L"Plural-Forms: nplurals=3; " + form = GettextPluralForm(L"Plural-Forms: nplurals=3; " "plural=n%10==1 && n%100!=11 ? 0 : " "n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2;"); REQUIRE_FORM_SIZE(3); - CHECK((*form)(0) == 2); - CHECK((*form)(1) == 0); - CHECK((*form)(102) == 1); - CHECK((*form)(104) == 1); - CHECK((*form)(111) == 2); - CHECK((*form)(112) == 2); - CHECK((*form)(121) == 0); - CHECK((*form)(122) == 1); + CHECK(form(0) == 2); + CHECK(form(1) == 0); + CHECK(form(102) == 1); + CHECK(form(104) == 1); + CHECK(form(111) == 2); + CHECK(form(112) == 2); + CHECK(form(121) == 0); + CHECK(form(122) == 1); // Edge cases - form = GettextPluralForm::parseHeaderLine(L"Plural-Forms: nplurals=3; plural= (n-1+1)<=1 ? n||1?0:1 : 1?(!!2):2;"); + form = GettextPluralForm(L"Plural-Forms: nplurals=3; plural= (n-1+1)<=1 ? n||1?0:1 : 1?(!!2):2;"); REQUIRE_FORM_SIZE(3); - CHECK((*form)(0) == 0); - CHECK((*form)(1) == 0); - CHECK((*form)(2) == 1); + CHECK(form(0) == 0); + CHECK(form(1) == 0); + CHECK(form(2) == 1); + + form = GettextPluralForm(L"Plural-Forms: nplurals=2; plural=4/n;"); + REQUIRE_FORM_SIZE(2); + CHECK(form(1) == 4); + CHECK(form(0) == 0); + + form = GettextPluralForm(L"Plural-Forms: nplurals=2; plural=7%n;"); + REQUIRE_FORM_SIZE(2); + CHECK(form(3) == 1); + CHECK(form(0) == 0); #undef REQUIRE_FORM_SIZE }