1
0
Fork 0
mirror of https://github.com/luanti-org/luanti.git synced 2025-09-15 18:57:08 +00:00

Minor refactor to the Plural-Forms parser (#16489)

This commit is contained in:
y5nw 2025-09-14 22:56:40 +02:00 committed by GitHub
parent 053ca6287a
commit cc6b56b034
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 158 additions and 174 deletions

View file

@ -1,136 +1,77 @@
// Minetest // Luanti
// SPDX-License-Identifier: LGPL-2.1-or-later // SPDX-License-Identifier: LGPL-2.1-or-later
/* This file implements a recursive descent parser for gettext plural forms.
* Left recursion (for left-associative operators) is implemented by parse_ltr, which iteratively attempts to reduce
* expressions from operations of the same precedence. This should not be confused with reduce_ltr, which recurses
* through a list of operators with the same precedence (not the input string!) to search for a match.
* Note that this only implements a subset of C expressions. See:
* https://git.savannah.gnu.org/gitweb/?p=gettext.git;a=blob;f=gettext-runtime/intl/plural.y
*/
#include "gettext_plural_form.h" #include "gettext_plural_form.h"
#include "util/string.h" #include "util/string.h"
#include <type_traits>
static size_t minsize(const GettextPluralForm::Ptr &form) static GettextPluralForm::NumT identity(GettextPluralForm::NumT n)
{ {
return form ? form->size() : 0; return n;
} }
static size_t minsize(const GettextPluralForm::Ptr &f, const GettextPluralForm::Ptr &g) static GettextPluralForm::NumT ternary_op(GettextPluralForm::NumT n, const GettextPluralForm::Function &cond,
const GettextPluralForm::Function &val, const GettextPluralForm::Function &alt)
{ {
if (sizeof(g) > 0) return cond(n) ? val(n) : alt(n);
return std::min(minsize(f), minsize(g));
return f ? f->size() : 0;
} }
class Identity: public GettextPluralForm template<template<typename> typename Func, class... Args>
static GettextPluralForm::Function wrap_op(Args&&... args)
{ {
public: return std::bind(Func<GettextPluralForm::NumT>(), std::bind(std::move(args), std::placeholders::_1)...);
Identity(size_t nplurals): GettextPluralForm(nplurals) {}; }
NumT operator()(const NumT n) const override
{
return n;
}
};
class ConstValue: public GettextPluralForm typedef std::pair<GettextPluralForm::Function, std::wstring_view> ParserResult;
{ typedef ParserResult (*Parser)(std::wstring_view);
public:
ConstValue(size_t nplurals, NumT val): GettextPluralForm(nplurals), value(val) {};
NumT operator()(const NumT n) const override
{
return value;
}
private:
NumT value;
};
template<template<typename> typename F> static ParserResult parse_expr(std::wstring_view str);
class UnaryOperation: public GettextPluralForm
{
public:
UnaryOperation(const Ptr &op):
GettextPluralForm(minsize(op)), op(op) {}
NumT operator()(const NumT n) const override
{
if (operator bool())
return func((*op)(n));
return 0;
}
private:
Ptr op;
static constexpr F<NumT> func = {};
};
template<template<typename> typename F>
class BinaryOperation: public GettextPluralForm
{
public:
BinaryOperation(const Ptr &lhs, const Ptr &rhs):
GettextPluralForm(minsize(lhs, rhs)),
lhs(lhs), rhs(rhs) {}
NumT operator()(const NumT n) const override
{
if (operator bool())
return func((*lhs)(n), (*rhs)(n));
return 0;
}
private:
Ptr lhs, rhs;
static constexpr F<NumT> func = {};
};
class TernaryOperation: public GettextPluralForm
{
public:
TernaryOperation(const Ptr &cond, const Ptr &val, const Ptr &alt):
GettextPluralForm(std::min(minsize(cond), minsize(val, alt))),
cond(cond), val(val), alt(alt) {}
NumT operator()(const NumT n) const override
{
if (operator bool())
return (*cond)(n) ? (*val)(n) : (*alt)(n);
return 0;
}
private:
Ptr cond, val, alt;
};
typedef std::pair<GettextPluralForm::Ptr, std::wstring_view> ParserResult;
typedef ParserResult (*Parser)(const size_t, std::wstring_view);
static ParserResult parse_expr(const size_t nplurals, std::wstring_view str);
template<Parser Parser, template<typename> typename Operator> template<Parser Parser, template<typename> typename Operator>
static ParserResult reduce_ltr(const size_t nplurals, const ParserResult &res, const wchar_t* pattern) static ParserResult reduce_ltr_single(const ParserResult &res, const std::wstring &pattern)
{ {
if (!str_starts_with(res.second, pattern)) if (!str_starts_with(res.second, pattern))
return ParserResult(nullptr, res.second); return ParserResult(nullptr, res.second);
auto next = Parser(nplurals, trim(res.second.substr(std::char_traits<wchar_t>::length(pattern)))); auto next = Parser(trim(res.second.substr(pattern.size())));
if (!next.first) if (!next.first)
return next; return next;
next.first = GettextPluralForm::Ptr(new BinaryOperation<Operator>(res.first, next.first)); next.first = wrap_op<Operator>(res.first, next.first);
next.second = trim(next.second); next.second = trim(next.second);
return next; return next;
} }
template<Parser Parser> template<Parser Parser>
static ParserResult reduce_ltr(const size_t nplurals, const ParserResult &res, const wchar_t**) static ParserResult reduce_ltr(const ParserResult &res)
{ {
return ParserResult(nullptr, res.second); return ParserResult(nullptr, res.second);
} }
template<Parser Parser, template<typename> typename Operator, template<typename> typename... Operators> template<Parser Parser, template<typename> typename Operator, template<typename> typename... Operators>
static ParserResult reduce_ltr(const size_t nplurals, const ParserResult &res, const wchar_t** patterns) static ParserResult reduce_ltr(const ParserResult &res, const std::wstring &pattern, const typename std::conditional<1,std::wstring,Operators<GettextPluralForm::NumT>>::type&... patterns)
{ {
auto next = reduce_ltr<Parser, Operator>(nplurals, res, patterns[0]); auto next = reduce_ltr_single<Parser, Operator>(res, pattern);
if (next.first || next.second != res.second) if (next.first || next.second != res.second)
return next; return next;
return reduce_ltr<Parser, Operators...>(nplurals, res, patterns+1); return reduce_ltr<Parser, Operators...>(res, patterns...);
} }
template<Parser Parser, template<typename> typename Operator, template<typename> typename... Operators> template<Parser Parser, template<typename> typename... Operators>
static ParserResult parse_ltr(const size_t nplurals, std::wstring_view str, const wchar_t** patterns) static ParserResult parse_ltr(std::wstring_view str, const typename std::conditional<1,std::wstring,Operators<GettextPluralForm::NumT>>::type&... patterns)
{ {
auto &&pres = Parser(nplurals, str); auto &&pres = Parser(str);
if (!pres.first) if (!pres.first)
return pres; return pres;
pres.second = trim(pres.second); pres.second = trim(pres.second);
while (!pres.second.empty()) { while (!pres.second.empty()) {
auto next = reduce_ltr<Parser, Operator, Operators...>(nplurals, pres, patterns); auto next = reduce_ltr<Parser, Operators...>(pres, patterns...);
if (!next.first) if (!next.first)
return pres; return pres;
next.second = trim(next.second); next.second = trim(next.second);
@ -139,25 +80,26 @@ static ParserResult parse_ltr(const size_t nplurals, std::wstring_view str, cons
return pres; return pres;
} }
static ParserResult parse_atomic(const size_t nplurals, std::wstring_view str) static ParserResult parse_atomic(std::wstring_view str)
{ {
if (str.empty()) if (str.empty())
return ParserResult(nullptr, str); return ParserResult(nullptr, str);
if (str[0] == 'n') if (str[0] == 'n')
return ParserResult(new Identity(nplurals), trim(str.substr(1))); return ParserResult(identity, trim(str.substr(1)));
wchar_t* endp; wchar_t* endp;
auto val = wcstoul(str.data(), &endp, 10); auto val = wcstoul(str.data(), &endp, 10);
return ParserResult(new ConstValue(nplurals, val), trim(str.substr(endp-str.data()))); return ParserResult([val](GettextPluralForm::NumT _) -> GettextPluralForm::NumT { return val; },
trim(str.substr(endp-str.data())));
} }
static ParserResult parse_parenthesized(const size_t nplurals, std::wstring_view str) static ParserResult parse_parenthesized(std::wstring_view str)
{ {
if (str.empty()) if (str.empty())
return ParserResult(nullptr, str); return ParserResult(nullptr, str);
if (str[0] != '(') if (str[0] != '(')
return parse_atomic(nplurals, str); return parse_atomic(str);
auto result = parse_expr(nplurals, str.substr(1)); auto result = parse_expr(str.substr(1));
if (result.first) { if (result.first) {
if (result.second.empty() || result.second[0] != ')') if (result.second.empty() || result.second[0] != ')')
result.first = nullptr; result.first = nullptr;
@ -167,90 +109,101 @@ static ParserResult parse_parenthesized(const size_t nplurals, std::wstring_view
return result; return result;
} }
static ParserResult parse_negation(const size_t nplurals, std::wstring_view str) static ParserResult parse_negation(std::wstring_view str)
{ {
if (str.empty()) if (str.empty())
return ParserResult(nullptr, str); return ParserResult(nullptr, str);
if (str[0] != '!') if (str[0] != '!')
return parse_parenthesized(nplurals, str); return parse_parenthesized(str);
auto result = parse_negation(nplurals, trim(str.substr(1))); auto result = parse_negation(trim(str.substr(1)));
if (result.first) if (result.first)
result.first = GettextPluralForm::Ptr(new UnaryOperation<std::logical_not>(result.first)); result.first = wrap_op<std::logical_not>(result.first);
return result; return result;
} }
static ParserResult parse_multiplicative(const size_t nplurals, std::wstring_view str) template<typename T> struct safe_divides {
T operator()(T lhs, T rhs) const
{
return rhs == 0 ? 0 : (lhs / rhs);
}
};
template<typename T> struct safe_modulus {
T operator()(T lhs, T rhs) const
{
return rhs == 0 ? 0 : (lhs % rhs);
}
};
static ParserResult parse_multiplicative(std::wstring_view str)
{ {
static const wchar_t *patterns[] = { L"*", L"/", L"%" }; return parse_ltr<parse_negation, std::multiplies, safe_divides, safe_modulus>(str, L"*", L"/", L"%");
return parse_ltr<parse_negation, std::multiplies, std::divides, std::modulus>(nplurals, str, patterns);
} }
static ParserResult parse_additive(const size_t nplurals, std::wstring_view str) static ParserResult parse_additive(std::wstring_view str)
{ {
static const wchar_t *patterns[] = { L"+", L"-" }; return parse_ltr<parse_multiplicative, std::plus, std::minus>(str, L"+", L"-");
return parse_ltr<parse_multiplicative, std::plus, std::minus>(nplurals, str, patterns);
} }
static ParserResult parse_comparison(const size_t nplurals, std::wstring_view str) static ParserResult parse_comparison(std::wstring_view str)
{ {
static const wchar_t *patterns[] = { L"<=", L">=", L"<", L">" }; return parse_ltr<parse_additive, std::less_equal, std::greater_equal, std::less, std::greater>(str, L"<=", L">=", L"<", L">");
return parse_ltr<parse_additive, std::less_equal, std::greater_equal, std::less, std::greater>(nplurals, str, patterns);
} }
static ParserResult parse_equality(const size_t nplurals, std::wstring_view str) static ParserResult parse_equality(std::wstring_view str)
{ {
static const wchar_t *patterns[] = { L"==", L"!=" }; return parse_ltr<parse_comparison, std::equal_to, std::not_equal_to>(str, L"==", L"!=");
return parse_ltr<parse_comparison, std::equal_to, std::not_equal_to>(nplurals, str, patterns);
} }
static ParserResult parse_conjunction(const size_t nplurals, std::wstring_view str) static ParserResult parse_conjunction(std::wstring_view str)
{ {
static const wchar_t *and_pattern[] = { L"&&" }; return parse_ltr<parse_equality, std::logical_and>(str, L"&&");
return parse_ltr<parse_equality, std::logical_and>(nplurals, str, and_pattern);
} }
static ParserResult parse_disjunction(const size_t nplurals, std::wstring_view str) static ParserResult parse_disjunction(std::wstring_view str)
{ {
static const wchar_t *or_pattern[] = { L"||" }; return parse_ltr<parse_conjunction, std::logical_or>(str, L"||");
return parse_ltr<parse_conjunction, std::logical_or>(nplurals, str, or_pattern);
} }
static ParserResult parse_ternary(const size_t nplurals, std::wstring_view str) static ParserResult parse_ternary(std::wstring_view str)
{ {
auto pres = parse_disjunction(nplurals, str); auto pres = parse_disjunction(str);
if (pres.second.empty() || pres.second[0] != '?') // no ? : if (pres.second.empty() || pres.second[0] != '?') // no ? :
return pres; return pres;
auto cond = pres.first; auto cond = pres.first;
pres = parse_ternary(nplurals, trim(pres.second.substr(1))); pres = parse_ternary(trim(pres.second.substr(1)));
if (pres.second.empty() || pres.second[0] != ':') if (pres.second.empty() || pres.second[0] != ':')
return ParserResult(nullptr, pres.second); return ParserResult(nullptr, pres.second);
auto val = pres.first; auto val = pres.first;
pres = parse_ternary(nplurals, trim(pres.second.substr(1))); pres = parse_ternary(trim(pres.second.substr(1)));
return ParserResult(new TernaryOperation(cond, val, pres.first), pres.second); return ParserResult(std::bind(ternary_op, std::placeholders::_1,
std::move(cond), std::move(val), std::move(pres.first)), pres.second);
} }
static ParserResult parse_expr(const size_t nplurals, std::wstring_view str) static ParserResult parse_expr(std::wstring_view str)
{ {
return parse_ternary(nplurals, trim(str)); return parse_ternary(trim(str));
} }
GettextPluralForm::Ptr GettextPluralForm::parse(const size_t nplurals, std::wstring_view str) static GettextPluralForm::Function parse(std::wstring_view str)
{ {
if (nplurals == 0) auto result = parse_expr(str);
return nullptr;
auto result = parse_expr(nplurals, str);
if (!result.second.empty()) if (!result.second.empty())
return nullptr; return nullptr;
return result.first; return result.first;
} }
GettextPluralForm::Ptr GettextPluralForm::parseHeaderLine(std::wstring_view str) GettextPluralForm::GettextPluralForm(std::wstring_view str)
{ {
if (!str_starts_with(str, L"Plural-Forms: nplurals=") || !str_ends_with(str, L";")) if (!str_starts_with(str, L"Plural-Forms: nplurals=") || !str_ends_with(str, L";"))
return nullptr; return;
auto nplurals = wcstoul(str.data()+23, nullptr, 10); auto size = wcstoul(str.data()+23, nullptr, 10);
auto pos = str.find(L"plural="); auto pos = str.find(L"plural=");
if (pos == str.npos) if (pos == str.npos)
return nullptr; return;
return parse(nplurals, str.substr(pos+7, str.size()-pos-8)); auto result = parse(str.substr(pos+7, str.size()-pos-8));
if (size > 0 && result) {
nplurals = size;
func = result;
}
} }

View file

@ -1,33 +1,45 @@
// Minetest // Luanti
// SPDX-License-Identifier: LGPL-2.1-or-later // SPDX-License-Identifier: LGPL-2.1-or-later
#pragma once #pragma once
#include <string_view> #include <string_view>
#include <memory> #include <memory>
#include <functional>
// Note that this only implements a subset of C expressions. See:
// https://git.savannah.gnu.org/gitweb/?p=gettext.git;a=blob;f=gettext-runtime/intl/plural.y
class GettextPluralForm class GettextPluralForm
{ {
public: public:
using NumT = unsigned long; using NumT = unsigned long;
using Function = std::function<NumT(NumT)>;
using Ptr = std::shared_ptr<GettextPluralForm>; using Ptr = std::shared_ptr<GettextPluralForm>;
GettextPluralForm(std::wstring_view str);
size_t size() const size_t size() const
{ {
return nplurals; return nplurals;
}; };
virtual NumT operator()(const NumT) const = 0;
virtual operator bool() const
{
return size() > 0;
}
virtual ~GettextPluralForm() {};
static GettextPluralForm::Ptr parse(const size_t nplurals, std::wstring_view str); // Note that this function does not perform any bounds check as the number of plural
static GettextPluralForm::Ptr parseHeaderLine(std::wstring_view str); // translations provided by the translation file may deviate from nplurals,
protected: NumT operator()(const NumT n) const {
GettextPluralForm(size_t nplurals): nplurals(nplurals) {}; return func ? func(n) : 0;
}
operator bool() const
{
return nplurals > 0;
}
static Ptr parseHeaderLine(std::wstring_view str) {
return Ptr(new GettextPluralForm(str));
}
private: private:
const size_t nplurals; // The number of plural forms.
size_t nplurals = 0;
// The formula for determining the plural form based on the input value; see
// https://www.gnu.org/software/gettext/manual/html_node/Translating-plural-forms.html
// for details.
Function func = nullptr;
}; };

View file

@ -25,40 +25,59 @@ TEST_CASE("test translations")
{ {
SECTION("Plural-Forms function for translations") SECTION("Plural-Forms function for translations")
{ {
#define REQUIRE_FORM_SIZE(x) {REQUIRE(form); REQUIRE(form->size() == (x));} #define REQUIRE_FORM_SIZE(x) {REQUIRE(form); REQUIRE(form.size() == (x));}
// Test cases from https://www.gnu.org/software/gettext/manual/html_node/Plural-forms.html // Basic test cases
auto form = GettextPluralForm::parseHeaderLine(L"Plural-Forms: nplurals=2; plural=n != 1;"); auto form = GettextPluralForm(L"Plural-Forms: nplurals=2; plural=1;");
REQUIRE_FORM_SIZE(2); REQUIRE_FORM_SIZE(2);
CHECK((*form)(0) == 1); CHECK(form(0) == 1);
CHECK((*form)(1) == 0);
CHECK((*form)(2) == 1);
form = GettextPluralForm::parseHeaderLine(L"Plural-Forms: nplurals=3; plural=n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2;"); form = GettextPluralForm(L"");
REQUIRE(form.size() == 0);
CHECK(form(0) == 0);
// Test cases from https://www.gnu.org/software/gettext/manual/html_node/Plural-forms.html
form = GettextPluralForm(L"Plural-Forms: nplurals=2; plural=n != 1;");
REQUIRE_FORM_SIZE(2);
CHECK(form(0) == 1);
CHECK(form(1) == 0);
CHECK(form(2) == 1);
form = GettextPluralForm(L"Plural-Forms: nplurals=3; plural=n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2;");
REQUIRE_FORM_SIZE(3); REQUIRE_FORM_SIZE(3);
CHECK((*form)(0) == 2); CHECK(form(0) == 2);
CHECK((*form)(1) == 0); CHECK(form(1) == 0);
CHECK((*form)(102) == 1); CHECK(form(102) == 1);
CHECK((*form)(111) == 1); CHECK(form(111) == 1);
form = GettextPluralForm::parseHeaderLine(L"Plural-Forms: nplurals=3; " form = GettextPluralForm(L"Plural-Forms: nplurals=3; "
"plural=n%10==1 && n%100!=11 ? 0 : " "plural=n%10==1 && n%100!=11 ? 0 : "
"n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2;"); "n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2;");
REQUIRE_FORM_SIZE(3); REQUIRE_FORM_SIZE(3);
CHECK((*form)(0) == 2); CHECK(form(0) == 2);
CHECK((*form)(1) == 0); CHECK(form(1) == 0);
CHECK((*form)(102) == 1); CHECK(form(102) == 1);
CHECK((*form)(104) == 1); CHECK(form(104) == 1);
CHECK((*form)(111) == 2); CHECK(form(111) == 2);
CHECK((*form)(112) == 2); CHECK(form(112) == 2);
CHECK((*form)(121) == 0); CHECK(form(121) == 0);
CHECK((*form)(122) == 1); CHECK(form(122) == 1);
// Edge cases // Edge cases
form = GettextPluralForm::parseHeaderLine(L"Plural-Forms: nplurals=3; plural= (n-1+1)<=1 ? n||1?0:1 : 1?(!!2):2;"); form = GettextPluralForm(L"Plural-Forms: nplurals=3; plural= (n-1+1)<=1 ? n||1?0:1 : 1?(!!2):2;");
REQUIRE_FORM_SIZE(3); REQUIRE_FORM_SIZE(3);
CHECK((*form)(0) == 0); CHECK(form(0) == 0);
CHECK((*form)(1) == 0); CHECK(form(1) == 0);
CHECK((*form)(2) == 1); CHECK(form(2) == 1);
form = GettextPluralForm(L"Plural-Forms: nplurals=2; plural=4/n;");
REQUIRE_FORM_SIZE(2);
CHECK(form(1) == 4);
CHECK(form(0) == 0);
form = GettextPluralForm(L"Plural-Forms: nplurals=2; plural=7%n;");
REQUIRE_FORM_SIZE(2);
CHECK(form(3) == 1);
CHECK(form(0) == 0);
#undef REQUIRE_FORM_SIZE #undef REQUIRE_FORM_SIZE
} }