Gettext and plural support for client-side translations (#14726)

--------- Co-authored-by: Ekdohibs <nathanael.courant@laposte.net> Co-authored-by: y5nw <y5nw@protonmail.com> Co-authored-by: rubenwardy <rw@rubenwardy.com>
2025-09-30 19:22:14 +00:00 · 2024-10-13 11:29:08 +02:00 · 2024-10-13 11:29:08 +02:00 · e3aa79cffb
commit e3aa79cffb
parent dbbe0ca065
28 changed files with 1360 additions and 74 deletions
--- a/src/util/string.cpp
+++ b/src/util/string.cpp
@ -154,6 +154,16 @@ std::string wide_to_utf8(std::wstring_view input)
 	return out;
 }

+void wide_add_codepoint(std::wstring &result, char32_t codepoint)
+{
+	if ((0xD800 <= codepoint && codepoint <= 0xDFFF) || (0x10FFFF < codepoint)) {
+		// Invalid codepoint, replace with unicode replacement character
+		result.push_back(0xFFFD);
+		return;
+	}
+	result.push_back(codepoint);
+}
+
 #else // _WIN32

 std::wstring utf8_to_wide(std::string_view input)
@ -180,6 +190,29 @@ std::string wide_to_utf8(std::wstring_view input)
 	return out;
 }

+void wide_add_codepoint(std::wstring &result, char32_t codepoint)
+{
+	if (codepoint < 0x10000) {
+		if (0xD800 <= codepoint && codepoint <= 0xDFFF) {
+			// Invalid codepoint, part of a surrogate pair
+			// Replace with unicode replacement character
+			result.push_back(0xFFFD);
+			return;
+		}
+		result.push_back((wchar_t) codepoint);
+		return;
+	}
+	codepoint -= 0x10000;
+	if (codepoint >= 0x100000) {
+		// original codepoint was above 0x10FFFF, so invalid
+		// replace with unicode replacement character
+		result.push_back(0xFFFD);
+		return;
+	}
+	result.push_back((wchar_t) ((codepoint >> 10) | 0xD800));
+	result.push_back((wchar_t) ((codepoint & 0x3FF) | 0xDC00));
+}
+
 #endif // _WIN32


@ -668,13 +701,20 @@ std::string wrap_rows(std::string_view from, unsigned row_len, bool has_color_co
 * We get the argument "White", translated, and create a template string with "@1" instead of it.
 * We finally get the template "@1 Wool" that was used in the beginning, which we translate
 * before filling it again.
+ *
+ * The \x1bT marking the beginning of a translated string allows two '@'-separated arguments:
+ * - The first one is the textdomain/context in which the string is to be translated. Most often,
+ *   this is the name of the mod which asked for the translation.
+ * - The second argument, if present, should be an integer; it is used to decide which plural form
+ *   to use, for languages containing several plural forms.
 */

 static void translate_all(std::wstring_view s, size_t &i,
 		Translations *translations, std::wstring &res);

 static void translate_string(std::wstring_view s, Translations *translations,
-		const std::wstring &textdomain, size_t &i, std::wstring &res)
+		const std::wstring &textdomain, size_t &i, std::wstring &res,
+		bool use_plural, unsigned long int number)
 {
 	std::vector<std::wstring> args;
 	int arg_number = 1;
@ -751,8 +791,17 @@ static void translate_string(std::wstring_view s, Translations *translations,
 	}

 	// Translate the template.
-	const std::wstring &toutput = translations ?
-		translations->getTranslation(textdomain, output) : output;
+	std::wstring toutput;
+	if (translations != nullptr) {
+		if (use_plural)
+			toutput = translations->getPluralTranslation(
+					textdomain, output, number);
+		else
+			toutput = translations->getTranslation(
+					textdomain, output);
+	} else {
+		toutput = output;
+	}

 	// Put back the arguments in the translated template.
 	size_t j = 0;
@ -835,10 +884,37 @@ static void translate_all(std::wstring_view s, size_t &i,
 		} else if (parts[0] == L"T") {
 			// Beginning of translated string.
 			std::wstring textdomain;
+			bool use_plural = false;
+			unsigned long int number = 0;
 			if (parts.size() > 1)
 				textdomain = parts[1];
+			if (parts.size() > 2 && parts[2] != L"") {
+				// parts[2] should contain a number used for selecting
+				// the plural form.
+				// However, we can't blindly cast it to an unsigned long int,
+				// as it might be too large for that.
+				//
+				// We follow the advice of gettext and reduce integers larger than 1000000
+				// to something in the range [1000000, 2000000), with the same last 6 digits.
+				//
+				// https://www.gnu.org/software/gettext/manual/html_node/Plural-forms.html
+				constexpr unsigned long int max = 1000000;
+
+				use_plural = true;
+				number = 0;
+				for (char c : parts[2]) {
+					if (L'0' <= c && c <= L'9') {
+						number = (10 * number + (unsigned long int)(c - L'0'));
+						if (number >= 2 * max) number = (number % max) + max;
+					} else {
+						// Invalid number
+						use_plural = false;
+						break;
+					}
+				}
+			}
 			std::wstring translated;
-			translate_string(s, translations, textdomain, i, translated);
+			translate_string(s, translations, textdomain, i, translated, use_plural, number);
 			res.append(translated);
 		} else {
 			// Another escape sequence, such as colors. Preserve it.
--- a/src/util/string.h
+++ b/src/util/string.h
@ -32,6 +32,7 @@ with this program; if not, write to the Free Software Foundation, Inc.,
 #include <sstream>
 #include <iomanip>
 #include <cctype>
+#include <cwctype>
 #include <unordered_map>

 class Translations;
@ -87,6 +88,8 @@ struct FlagDesc {
 std::wstring utf8_to_wide(std::string_view input);
 std::string wide_to_utf8(std::wstring_view input);

+void wide_add_codepoint(std::wstring &result, char32_t codepoint);
+
 std::string urlencode(std::string_view str);
 std::string urldecode(std::string_view str);

@ -325,19 +328,30 @@ inline std::string lowercase(std::string_view str)
 }


+inline bool my_isspace(const char c)
+{
+	return std::isspace(c);
+}
+
+inline bool my_isspace(const wchar_t c)
+{
+	return std::iswspace(c);
+}
+
 /**
 * @param str
 * @return A view of \p str with leading and trailing whitespace removed.
 */
-inline std::string_view trim(std::string_view str)
+template<typename T>
+inline std::basic_string_view<T> trim(const std::basic_string_view<T> &str)
 {
 	size_t front = 0;
 	size_t back = str.size();

-	while (front < back && std::isspace(str[front]))
+	while (front < back && my_isspace(str[front]))
 		++front;

-	while (back > front && std::isspace(str[back - 1]))
+	while (back > front && my_isspace(str[back - 1]))
 		--back;

 	return str.substr(front, back - front);
@ -351,16 +365,24 @@ inline std::string_view trim(std::string_view str)
 * @param str
 * @return A copy of \p str with leading and trailing whitespace removed.
 */
-inline std::string trim(std::string &&str)
+template<typename T>
+inline std::basic_string<T> trim(std::basic_string<T> &&str)
 {
-	std::string ret(trim(std::string_view(str)));
+	std::basic_string<T> ret(trim(std::basic_string_view<T>(str)));
 	return ret;
 }

-// The above declaration causes ambiguity with char pointers so we have to fix that:
-inline std::string_view trim(const char *str)
+template<typename T>
+inline std::basic_string_view<T> trim(const std::basic_string<T> &str)
 {
-	return trim(std::string_view(str));
+	return trim(std::basic_string_view<T>(str));
+}
+
+// The above declaration causes ambiguity with char pointers so we have to fix that:
+template<typename T>
+inline std::basic_string_view<T> trim(const T *str)
+{
+	return trim(std::basic_string_view<T>(str));
 }