1
0
Fork 0
mirror of https://github.com/luanti-org/luanti.git synced 2025-09-15 18:57:08 +00:00

Minor refactor to the Plural-Forms parser (#16489)

This commit is contained in:
y5nw 2025-09-14 22:56:40 +02:00 committed by GitHub
parent 053ca6287a
commit cc6b56b034
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 158 additions and 174 deletions

View file

@ -1,136 +1,77 @@
// Minetest
// Luanti
// SPDX-License-Identifier: LGPL-2.1-or-later
/* This file implements a recursive descent parser for gettext plural forms.
* Left recursion (for left-associative operators) is implemented by parse_ltr, which iteratively attempts to reduce
* expressions from operations of the same precedence. This should not be confused with reduce_ltr, which recurses
* through a list of operators with the same precedence (not the input string!) to search for a match.
* Note that this only implements a subset of C expressions. See:
* https://git.savannah.gnu.org/gitweb/?p=gettext.git;a=blob;f=gettext-runtime/intl/plural.y
*/
#include "gettext_plural_form.h"
#include "util/string.h"
#include <type_traits>
static size_t minsize(const GettextPluralForm::Ptr &form)
static GettextPluralForm::NumT identity(GettextPluralForm::NumT n)
{
return form ? form->size() : 0;
return n;
}
static size_t minsize(const GettextPluralForm::Ptr &f, const GettextPluralForm::Ptr &g)
static GettextPluralForm::NumT ternary_op(GettextPluralForm::NumT n, const GettextPluralForm::Function &cond,
const GettextPluralForm::Function &val, const GettextPluralForm::Function &alt)
{
if (sizeof(g) > 0)
return std::min(minsize(f), minsize(g));
return f ? f->size() : 0;
return cond(n) ? val(n) : alt(n);
}
class Identity: public GettextPluralForm
template<template<typename> typename Func, class... Args>
static GettextPluralForm::Function wrap_op(Args&&... args)
{
public:
Identity(size_t nplurals): GettextPluralForm(nplurals) {};
NumT operator()(const NumT n) const override
{
return n;
}
};
return std::bind(Func<GettextPluralForm::NumT>(), std::bind(std::move(args), std::placeholders::_1)...);
}
class ConstValue: public GettextPluralForm
{
public:
ConstValue(size_t nplurals, NumT val): GettextPluralForm(nplurals), value(val) {};
NumT operator()(const NumT n) const override
{
return value;
}
private:
NumT value;
};
typedef std::pair<GettextPluralForm::Function, std::wstring_view> ParserResult;
typedef ParserResult (*Parser)(std::wstring_view);
template<template<typename> typename F>
class UnaryOperation: public GettextPluralForm
{
public:
UnaryOperation(const Ptr &op):
GettextPluralForm(minsize(op)), op(op) {}
NumT operator()(const NumT n) const override
{
if (operator bool())
return func((*op)(n));
return 0;
}
private:
Ptr op;
static constexpr F<NumT> func = {};
};
template<template<typename> typename F>
class BinaryOperation: public GettextPluralForm
{
public:
BinaryOperation(const Ptr &lhs, const Ptr &rhs):
GettextPluralForm(minsize(lhs, rhs)),
lhs(lhs), rhs(rhs) {}
NumT operator()(const NumT n) const override
{
if (operator bool())
return func((*lhs)(n), (*rhs)(n));
return 0;
}
private:
Ptr lhs, rhs;
static constexpr F<NumT> func = {};
};
class TernaryOperation: public GettextPluralForm
{
public:
TernaryOperation(const Ptr &cond, const Ptr &val, const Ptr &alt):
GettextPluralForm(std::min(minsize(cond), minsize(val, alt))),
cond(cond), val(val), alt(alt) {}
NumT operator()(const NumT n) const override
{
if (operator bool())
return (*cond)(n) ? (*val)(n) : (*alt)(n);
return 0;
}
private:
Ptr cond, val, alt;
};
typedef std::pair<GettextPluralForm::Ptr, std::wstring_view> ParserResult;
typedef ParserResult (*Parser)(const size_t, std::wstring_view);
static ParserResult parse_expr(const size_t nplurals, std::wstring_view str);
static ParserResult parse_expr(std::wstring_view str);
template<Parser Parser, template<typename> typename Operator>
static ParserResult reduce_ltr(const size_t nplurals, const ParserResult &res, const wchar_t* pattern)
static ParserResult reduce_ltr_single(const ParserResult &res, const std::wstring &pattern)
{
if (!str_starts_with(res.second, pattern))
return ParserResult(nullptr, res.second);
auto next = Parser(nplurals, trim(res.second.substr(std::char_traits<wchar_t>::length(pattern))));
auto next = Parser(trim(res.second.substr(pattern.size())));
if (!next.first)
return next;
next.first = GettextPluralForm::Ptr(new BinaryOperation<Operator>(res.first, next.first));
next.first = wrap_op<Operator>(res.first, next.first);
next.second = trim(next.second);
return next;
}
template<Parser Parser>
static ParserResult reduce_ltr(const size_t nplurals, const ParserResult &res, const wchar_t**)
static ParserResult reduce_ltr(const ParserResult &res)
{
return ParserResult(nullptr, res.second);
}
template<Parser Parser, template<typename> typename Operator, template<typename> typename... Operators>
static ParserResult reduce_ltr(const size_t nplurals, const ParserResult &res, const wchar_t** patterns)
static ParserResult reduce_ltr(const ParserResult &res, const std::wstring &pattern, const typename std::conditional<1,std::wstring,Operators<GettextPluralForm::NumT>>::type&... patterns)
{
auto next = reduce_ltr<Parser, Operator>(nplurals, res, patterns[0]);
auto next = reduce_ltr_single<Parser, Operator>(res, pattern);
if (next.first || next.second != res.second)
return next;
return reduce_ltr<Parser, Operators...>(nplurals, res, patterns+1);
return reduce_ltr<Parser, Operators...>(res, patterns...);
}
template<Parser Parser, template<typename> typename Operator, template<typename> typename... Operators>
static ParserResult parse_ltr(const size_t nplurals, std::wstring_view str, const wchar_t** patterns)
template<Parser Parser, template<typename> typename... Operators>
static ParserResult parse_ltr(std::wstring_view str, const typename std::conditional<1,std::wstring,Operators<GettextPluralForm::NumT>>::type&... patterns)
{
auto &&pres = Parser(nplurals, str);
auto &&pres = Parser(str);
if (!pres.first)
return pres;
pres.second = trim(pres.second);
while (!pres.second.empty()) {
auto next = reduce_ltr<Parser, Operator, Operators...>(nplurals, pres, patterns);
auto next = reduce_ltr<Parser, Operators...>(pres, patterns...);
if (!next.first)
return pres;
next.second = trim(next.second);
@ -139,25 +80,26 @@ static ParserResult parse_ltr(const size_t nplurals, std::wstring_view str, cons
return pres;
}
static ParserResult parse_atomic(const size_t nplurals, std::wstring_view str)
static ParserResult parse_atomic(std::wstring_view str)
{
if (str.empty())
return ParserResult(nullptr, str);
if (str[0] == 'n')
return ParserResult(new Identity(nplurals), trim(str.substr(1)));
return ParserResult(identity, trim(str.substr(1)));
wchar_t* endp;
auto val = wcstoul(str.data(), &endp, 10);
return ParserResult(new ConstValue(nplurals, val), trim(str.substr(endp-str.data())));
return ParserResult([val](GettextPluralForm::NumT _) -> GettextPluralForm::NumT { return val; },
trim(str.substr(endp-str.data())));
}
static ParserResult parse_parenthesized(const size_t nplurals, std::wstring_view str)
static ParserResult parse_parenthesized(std::wstring_view str)
{
if (str.empty())
return ParserResult(nullptr, str);
if (str[0] != '(')
return parse_atomic(nplurals, str);
auto result = parse_expr(nplurals, str.substr(1));
return parse_atomic(str);
auto result = parse_expr(str.substr(1));
if (result.first) {
if (result.second.empty() || result.second[0] != ')')
result.first = nullptr;
@ -167,90 +109,101 @@ static ParserResult parse_parenthesized(const size_t nplurals, std::wstring_view
return result;
}
static ParserResult parse_negation(const size_t nplurals, std::wstring_view str)
static ParserResult parse_negation(std::wstring_view str)
{
if (str.empty())
return ParserResult(nullptr, str);
if (str[0] != '!')
return parse_parenthesized(nplurals, str);
auto result = parse_negation(nplurals, trim(str.substr(1)));
return parse_parenthesized(str);
auto result = parse_negation(trim(str.substr(1)));
if (result.first)
result.first = GettextPluralForm::Ptr(new UnaryOperation<std::logical_not>(result.first));
result.first = wrap_op<std::logical_not>(result.first);
return result;
}
static ParserResult parse_multiplicative(const size_t nplurals, std::wstring_view str)
template<typename T> struct safe_divides {
T operator()(T lhs, T rhs) const
{
return rhs == 0 ? 0 : (lhs / rhs);
}
};
template<typename T> struct safe_modulus {
T operator()(T lhs, T rhs) const
{
return rhs == 0 ? 0 : (lhs % rhs);
}
};
static ParserResult parse_multiplicative(std::wstring_view str)
{
static const wchar_t *patterns[] = { L"*", L"/", L"%" };
return parse_ltr<parse_negation, std::multiplies, std::divides, std::modulus>(nplurals, str, patterns);
return parse_ltr<parse_negation, std::multiplies, safe_divides, safe_modulus>(str, L"*", L"/", L"%");
}
static ParserResult parse_additive(const size_t nplurals, std::wstring_view str)
static ParserResult parse_additive(std::wstring_view str)
{
static const wchar_t *patterns[] = { L"+", L"-" };
return parse_ltr<parse_multiplicative, std::plus, std::minus>(nplurals, str, patterns);
return parse_ltr<parse_multiplicative, std::plus, std::minus>(str, L"+", L"-");
}
static ParserResult parse_comparison(const size_t nplurals, std::wstring_view str)
static ParserResult parse_comparison(std::wstring_view str)
{
static const wchar_t *patterns[] = { L"<=", L">=", L"<", L">" };
return parse_ltr<parse_additive, std::less_equal, std::greater_equal, std::less, std::greater>(nplurals, str, patterns);
return parse_ltr<parse_additive, std::less_equal, std::greater_equal, std::less, std::greater>(str, L"<=", L">=", L"<", L">");
}
static ParserResult parse_equality(const size_t nplurals, std::wstring_view str)
static ParserResult parse_equality(std::wstring_view str)
{
static const wchar_t *patterns[] = { L"==", L"!=" };
return parse_ltr<parse_comparison, std::equal_to, std::not_equal_to>(nplurals, str, patterns);
return parse_ltr<parse_comparison, std::equal_to, std::not_equal_to>(str, L"==", L"!=");
}
static ParserResult parse_conjunction(const size_t nplurals, std::wstring_view str)
static ParserResult parse_conjunction(std::wstring_view str)
{
static const wchar_t *and_pattern[] = { L"&&" };
return parse_ltr<parse_equality, std::logical_and>(nplurals, str, and_pattern);
return parse_ltr<parse_equality, std::logical_and>(str, L"&&");
}
static ParserResult parse_disjunction(const size_t nplurals, std::wstring_view str)
static ParserResult parse_disjunction(std::wstring_view str)
{
static const wchar_t *or_pattern[] = { L"||" };
return parse_ltr<parse_conjunction, std::logical_or>(nplurals, str, or_pattern);
return parse_ltr<parse_conjunction, std::logical_or>(str, L"||");
}
static ParserResult parse_ternary(const size_t nplurals, std::wstring_view str)
static ParserResult parse_ternary(std::wstring_view str)
{
auto pres = parse_disjunction(nplurals, str);
auto pres = parse_disjunction(str);
if (pres.second.empty() || pres.second[0] != '?') // no ? :
return pres;
auto cond = pres.first;
pres = parse_ternary(nplurals, trim(pres.second.substr(1)));
pres = parse_ternary(trim(pres.second.substr(1)));
if (pres.second.empty() || pres.second[0] != ':')
return ParserResult(nullptr, pres.second);
auto val = pres.first;
pres = parse_ternary(nplurals, trim(pres.second.substr(1)));
return ParserResult(new TernaryOperation(cond, val, pres.first), pres.second);
pres = parse_ternary(trim(pres.second.substr(1)));
return ParserResult(std::bind(ternary_op, std::placeholders::_1,
std::move(cond), std::move(val), std::move(pres.first)), pres.second);
}
static ParserResult parse_expr(const size_t nplurals, std::wstring_view str)
static ParserResult parse_expr(std::wstring_view str)
{
return parse_ternary(nplurals, trim(str));
return parse_ternary(trim(str));
}
GettextPluralForm::Ptr GettextPluralForm::parse(const size_t nplurals, std::wstring_view str)
static GettextPluralForm::Function parse(std::wstring_view str)
{
if (nplurals == 0)
return nullptr;
auto result = parse_expr(nplurals, str);
auto result = parse_expr(str);
if (!result.second.empty())
return nullptr;
return result.first;
}
GettextPluralForm::Ptr GettextPluralForm::parseHeaderLine(std::wstring_view str)
GettextPluralForm::GettextPluralForm(std::wstring_view str)
{
if (!str_starts_with(str, L"Plural-Forms: nplurals=") || !str_ends_with(str, L";"))
return nullptr;
auto nplurals = wcstoul(str.data()+23, nullptr, 10);
return;
auto size = wcstoul(str.data()+23, nullptr, 10);
auto pos = str.find(L"plural=");
if (pos == str.npos)
return nullptr;
return parse(nplurals, str.substr(pos+7, str.size()-pos-8));
return;
auto result = parse(str.substr(pos+7, str.size()-pos-8));
if (size > 0 && result) {
nplurals = size;
func = result;
}
}

View file

@ -1,33 +1,45 @@
// Minetest
// Luanti
// SPDX-License-Identifier: LGPL-2.1-or-later
#pragma once
#include <string_view>
#include <memory>
#include <functional>
// Note that this only implements a subset of C expressions. See:
// https://git.savannah.gnu.org/gitweb/?p=gettext.git;a=blob;f=gettext-runtime/intl/plural.y
class GettextPluralForm
{
public:
using NumT = unsigned long;
using Function = std::function<NumT(NumT)>;
using Ptr = std::shared_ptr<GettextPluralForm>;
GettextPluralForm(std::wstring_view str);
size_t size() const
{
return nplurals;
};
virtual NumT operator()(const NumT) const = 0;
virtual operator bool() const
{
return size() > 0;
}
virtual ~GettextPluralForm() {};
static GettextPluralForm::Ptr parse(const size_t nplurals, std::wstring_view str);
static GettextPluralForm::Ptr parseHeaderLine(std::wstring_view str);
protected:
GettextPluralForm(size_t nplurals): nplurals(nplurals) {};
// Note that this function does not perform any bounds check as the number of plural
// translations provided by the translation file may deviate from nplurals,
NumT operator()(const NumT n) const {
return func ? func(n) : 0;
}
operator bool() const
{
return nplurals > 0;
}
static Ptr parseHeaderLine(std::wstring_view str) {
return Ptr(new GettextPluralForm(str));
}
private:
const size_t nplurals;
// The number of plural forms.
size_t nplurals = 0;
// The formula for determining the plural form based on the input value; see
// https://www.gnu.org/software/gettext/manual/html_node/Translating-plural-forms.html
// for details.
Function func = nullptr;
};

View file

@ -25,40 +25,59 @@ TEST_CASE("test translations")
{
SECTION("Plural-Forms function for translations")
{
#define REQUIRE_FORM_SIZE(x) {REQUIRE(form); REQUIRE(form->size() == (x));}
// Test cases from https://www.gnu.org/software/gettext/manual/html_node/Plural-forms.html
auto form = GettextPluralForm::parseHeaderLine(L"Plural-Forms: nplurals=2; plural=n != 1;");
#define REQUIRE_FORM_SIZE(x) {REQUIRE(form); REQUIRE(form.size() == (x));}
// Basic test cases
auto form = GettextPluralForm(L"Plural-Forms: nplurals=2; plural=1;");
REQUIRE_FORM_SIZE(2);
CHECK((*form)(0) == 1);
CHECK((*form)(1) == 0);
CHECK((*form)(2) == 1);
CHECK(form(0) == 1);
form = GettextPluralForm::parseHeaderLine(L"Plural-Forms: nplurals=3; plural=n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2;");
form = GettextPluralForm(L"");
REQUIRE(form.size() == 0);
CHECK(form(0) == 0);
// Test cases from https://www.gnu.org/software/gettext/manual/html_node/Plural-forms.html
form = GettextPluralForm(L"Plural-Forms: nplurals=2; plural=n != 1;");
REQUIRE_FORM_SIZE(2);
CHECK(form(0) == 1);
CHECK(form(1) == 0);
CHECK(form(2) == 1);
form = GettextPluralForm(L"Plural-Forms: nplurals=3; plural=n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2;");
REQUIRE_FORM_SIZE(3);
CHECK((*form)(0) == 2);
CHECK((*form)(1) == 0);
CHECK((*form)(102) == 1);
CHECK((*form)(111) == 1);
CHECK(form(0) == 2);
CHECK(form(1) == 0);
CHECK(form(102) == 1);
CHECK(form(111) == 1);
form = GettextPluralForm::parseHeaderLine(L"Plural-Forms: nplurals=3; "
form = GettextPluralForm(L"Plural-Forms: nplurals=3; "
"plural=n%10==1 && n%100!=11 ? 0 : "
"n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2;");
REQUIRE_FORM_SIZE(3);
CHECK((*form)(0) == 2);
CHECK((*form)(1) == 0);
CHECK((*form)(102) == 1);
CHECK((*form)(104) == 1);
CHECK((*form)(111) == 2);
CHECK((*form)(112) == 2);
CHECK((*form)(121) == 0);
CHECK((*form)(122) == 1);
CHECK(form(0) == 2);
CHECK(form(1) == 0);
CHECK(form(102) == 1);
CHECK(form(104) == 1);
CHECK(form(111) == 2);
CHECK(form(112) == 2);
CHECK(form(121) == 0);
CHECK(form(122) == 1);
// Edge cases
form = GettextPluralForm::parseHeaderLine(L"Plural-Forms: nplurals=3; plural= (n-1+1)<=1 ? n||1?0:1 : 1?(!!2):2;");
form = GettextPluralForm(L"Plural-Forms: nplurals=3; plural= (n-1+1)<=1 ? n||1?0:1 : 1?(!!2):2;");
REQUIRE_FORM_SIZE(3);
CHECK((*form)(0) == 0);
CHECK((*form)(1) == 0);
CHECK((*form)(2) == 1);
CHECK(form(0) == 0);
CHECK(form(1) == 0);
CHECK(form(2) == 1);
form = GettextPluralForm(L"Plural-Forms: nplurals=2; plural=4/n;");
REQUIRE_FORM_SIZE(2);
CHECK(form(1) == 4);
CHECK(form(0) == 0);
form = GettextPluralForm(L"Plural-Forms: nplurals=2; plural=7%n;");
REQUIRE_FORM_SIZE(2);
CHECK(form(3) == 1);
CHECK(form(0) == 0);
#undef REQUIRE_FORM_SIZE
}