1
0
Fork 0
mirror of https://github.com/luanti-org/luanti.git synced 2025-06-27 16:36:03 +00:00
This commit is contained in:
Lars Müller 2025-06-27 12:40:09 +07:00 committed by GitHub
commit 84084025b3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 376 additions and 0 deletions

View file

@ -290,6 +290,7 @@ if(NOT USE_LUAJIT)
add_subdirectory(lib/bitop) add_subdirectory(lib/bitop)
endif() endif()
add_subdirectory(lib/sha256) add_subdirectory(lib/sha256)
add_subdirectory(lib/lutf8)
if(BUILD_UNITTESTS OR BUILD_BENCHMARKS) if(BUILD_UNITTESTS OR BUILD_BENCHMARKS)
add_subdirectory(lib/catch2) add_subdirectory(lib/catch2)

View file

@ -11994,6 +11994,23 @@ Functions: bit.tobit, bit.tohex, bit.bnot, bit.band, bit.bor, bit.bxor, bit.lshi
See http://bitop.luajit.org/ for advanced information. See http://bitop.luajit.org/ for advanced information.
UTF-8 Library
-------------
Constants:
* `utf8.charpattern`
Functions:
* `utf8.char(...)`
* `utf8.codes(s [, lax])`
* `utf8.codepoint(s [, i [, j [, lax]]])`
* `utf8.len(s [, i [, j [, lax]]])`
* `utf8.offset(s, n [, i])`
See [the Lua 5.4 reference manual](https://www.lua.org/manual/5.4/manual.html#6.5) for more information.
Tracy Profiler Tracy Profiler
-------------- --------------

View file

@ -341,3 +341,10 @@ local function test_ipc_poll(cb)
print("delta: " .. (core.get_us_time() - t0) .. "us") print("delta: " .. (core.get_us_time() - t0) .. "us")
end end
unittests.register("test_ipc_poll", test_ipc_poll) unittests.register("test_ipc_poll", test_ipc_poll)
local function test_utf8()
assert(#string.char(0xc3, 0xa4):match(utf8.charpattern) == 2)
assert(("\0"):match(utf8.charpattern) == "\0")
assert(utf8.char(0x11, 0x22, 0x10abcd) == string.char(0x11, 0x22, 0xf4, 0x8a, 0xaf, 0x8d))
end
unittests.register("test_utf8", test_utf8)

5
lib/lutf8/CMakeLists.txt Normal file
View file

@ -0,0 +1,5 @@
add_library(lutf8 STATIC lutf8.c)
target_include_directories(lutf8 PRIVATE ${LUA_INCLUDE_DIR})
set(LUA_UTF8_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR} PARENT_SCOPE)
set(LUA_UTF8_LIBRARY lutf8 PARENT_SCOPE)

20
lib/lutf8/LICENSE.txt Normal file
View file

@ -0,0 +1,20 @@
Copyright (C) 1994-2025 Lua.org, PUC-Rio.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

308
lib/lutf8/lutf8.c Normal file
View file

@ -0,0 +1,308 @@
// PUC Lua UTF-8 library, with minor modifications for integration in Luanti.
// Taken from https://github.com/lua/lua/blob/c15543b9afa31ab5dc564511ae11acd808405e8f/lutf8lib.c
// MIT-licensed, see LICENSE.txt
#define lutf8lib_c
#define LUA_LIB
#include "lutf8.h"
#include <assert.h>
#include <limits.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include "lua.h"
#include "lauxlib.h"
#define MAXUNICODE 0x10FFFFu
#define MAXUTF 0x7FFFFFFFu
#define MSGInvalid "invalid UTF-8 code"
#define iscont(c) (((c) & 0xC0) == 0x80)
#define iscontp(p) iscont(*(p))
typedef uint32_t l_uint32;
typedef uint64_t lua_Unsigned;
/* from strlib */
/* translate a relative string position: negative means back from end */
static lua_Integer u_posrelat (lua_Integer pos, size_t len) {
if (pos >= 0) return pos;
else if (0u - (size_t)pos > len) return 0;
else return (lua_Integer)len + pos + 1;
}
/*
** Decode one UTF-8 sequence, returning NULL if byte sequence is
** invalid. The array 'limits' stores the minimum value for each
** sequence length, to check for overlong representations. Its first
** entry forces an error for non-ascii bytes with no continuation
** bytes (count == 0).
*/
static const char *utf8_decode (const char *s, l_uint32 *val, int strict) {
static const l_uint32 limits[] =
{~(l_uint32)0, 0x80, 0x800, 0x10000u, 0x200000u, 0x4000000u};
unsigned int c = (unsigned char)s[0];
l_uint32 res = 0; /* final result */
if (c < 0x80) /* ascii? */
res = c;
else {
int count = 0; /* to count number of continuation bytes */
for (; c & 0x40; c <<= 1) { /* while it needs continuation bytes... */
unsigned int cc = (unsigned char)s[++count]; /* read next byte */
if (!iscont(cc)) /* not a continuation byte? */
return NULL; /* invalid byte sequence */
res = (res << 6) | (cc & 0x3F); /* add lower 6 bits from cont. byte */
}
res |= ((l_uint32)(c & 0x7F) << (count * 5)); /* add first byte */
if (count > 5 || res > MAXUTF || res < limits[count])
return NULL; /* invalid byte sequence */
s += count; /* skip continuation bytes read */
}
if (strict) {
/* check for invalid code points; too large or surrogates */
if (res > MAXUNICODE || (0xD800u <= res && res <= 0xDFFFu))
return NULL;
}
if (val) *val = res;
return s + 1; /* +1 to include first byte */
}
/*
** utf8len(s [, i [, j [, lax]]]) --> number of characters that
** start in the range [i,j], or nil + current position if 's' is not
** well formed in that interval
*/
static int utflen (lua_State *L) {
lua_Integer n = 0; /* counter for the number of characters */
size_t len; /* string length in bytes */
const char *s = luaL_checklstring(L, 1, &len);
lua_Integer posi = u_posrelat(luaL_optinteger(L, 2, 1), len);
lua_Integer posj = u_posrelat(luaL_optinteger(L, 3, -1), len);
int lax = lua_toboolean(L, 4);
luaL_argcheck(L, 1 <= posi && --posi <= (lua_Integer)len, 2,
"initial position out of bounds");
luaL_argcheck(L, --posj < (lua_Integer)len, 3,
"final position out of bounds");
while (posi <= posj) {
const char *s1 = utf8_decode(s + posi, NULL, !lax);
if (s1 == NULL) { /* conversion error? */
lua_pushnil(L); /* return fail ... */
lua_pushinteger(L, posi + 1); /* ... and current position */
return 2;
}
posi = (size_t)(s1 - s);
n++;
}
lua_pushinteger(L, n);
return 1;
}
/*
** codepoint(s, [i, [j [, lax]]]) -> returns codepoints for all
** characters that start in the range [i,j]
*/
static int codepoint (lua_State *L) {
size_t len;
const char *s = luaL_checklstring(L, 1, &len);
lua_Integer posi = u_posrelat(luaL_optinteger(L, 2, 1), len);
lua_Integer pose = u_posrelat(luaL_optinteger(L, 3, posi), len);
int lax = lua_toboolean(L, 4);
int n;
const char *se;
luaL_argcheck(L, posi >= 1, 2, "out of bounds");
luaL_argcheck(L, pose <= (lua_Integer)len, 3, "out of bounds");
if (posi > pose) return 0; /* empty interval; return no values */
if (pose - posi >= INT_MAX) /* (lua_Integer -> int) overflow? */
return luaL_error(L, "string slice too long");
n = (int)(pose - posi) + 1; /* upper bound for number of returns */
luaL_checkstack(L, n, "string slice too long");
n = 0; /* count the number of returns */
se = s + pose; /* string end */
for (s += posi - 1; s < se;) {
l_uint32 code;
s = utf8_decode(s, &code, !lax);
if (s == NULL)
return luaL_error(L, MSGInvalid);
lua_pushinteger(L, code);
n++;
}
return n;
}
#define UTF8BUFFSZ 8
// Taken from https://github.com/lua/lua/blob/c15543b9afa31ab5dc564511ae11acd808405e8f/lobject.c#L385-L400
static int luaO_utf8esc(char *buff, l_uint32 x) {
int n = 1; /* number of bytes put in buffer (backwards) */
if (x < 0x80) /* ascii? */
buff[UTF8BUFFSZ - 1] = (char)(x);
else { /* need continuation bytes */
unsigned int mfb = 0x3f; /* maximum that fits in first byte */
do { /* add continuation bytes */
buff[UTF8BUFFSZ - (n++)] = (char)(0x80 | (x & 0x3f));
x >>= 6; /* remove added bits */
mfb >>= 1; /* now there is one less bit available in first byte */
} while (x > mfb); /* still needs continuation byte? */
buff[UTF8BUFFSZ - n] = (char)((~mfb << 1) | x); /* add first byte */
}
return n;
}
static void pushutfchar (lua_State *L, int arg) {
lua_Unsigned code = (lua_Unsigned)luaL_checkinteger(L, arg);
luaL_argcheck(L, code <= MAXUTF, arg, "value out of range");
char bf[UTF8BUFFSZ];
int len = luaO_utf8esc(bf, (l_uint32)code);
lua_pushlstring(L, &bf[UTF8BUFFSZ - len], len);
}
/*
** utfchar(n1, n2, ...) -> char(n1)..char(n2)...
*/
static int utfchar (lua_State *L) {
int n = lua_gettop(L); /* number of arguments */
if (n == 1) /* optimize common case of single char */
pushutfchar(L, 1);
else {
int i;
luaL_Buffer b;
luaL_buffinit(L, &b);
for (i = 1; i <= n; i++) {
pushutfchar(L, i);
luaL_addvalue(&b);
}
luaL_pushresult(&b);
}
return 1;
}
/*
** offset(s, n, [i]) -> indices where n-th character counting from
** position 'i' starts and ends; 0 means character at 'i'.
*/
static int byteoffset (lua_State *L) {
size_t len;
const char *s = luaL_checklstring(L, 1, &len);
lua_Integer n = luaL_checkinteger(L, 2);
lua_Integer posi = (n >= 0) ? 1 : (lua_Integer)(len + 1);
posi = u_posrelat(luaL_optinteger(L, 3, posi), len);
luaL_argcheck(L, 1 <= posi && --posi <= (lua_Integer)len, 3,
"position out of bounds");
if (n == 0) {
/* find beginning of current byte sequence */
while (posi > 0 && iscontp(s + posi)) posi--;
}
else {
if (iscontp(s + posi))
return luaL_error(L, "initial position is a continuation byte");
if (n < 0) {
while (n < 0 && posi > 0) { /* move back */
do { /* find beginning of previous character */
posi--;
} while (posi > 0 && iscontp(s + posi));
n++;
}
}
else {
n--; /* do not move for 1st character */
while (n > 0 && posi < (lua_Integer)len) {
do { /* find beginning of next character */
posi++;
} while (iscontp(s + posi)); /* (cannot pass final '\0') */
n--;
}
}
}
if (n != 0) { /* did not find given character? */
lua_pushnil(L);
return 1;
}
lua_pushinteger(L, posi + 1); /* initial position */
if ((s[posi] & 0x80) != 0) { /* multi-byte character? */
do {
posi++;
} while (iscontp(s + posi + 1)); /* skip to final byte */
}
/* else one-byte character: final position is the initial one */
lua_pushinteger(L, posi + 1); /* 'posi' now is the final position */
return 2;
}
static int iter_aux (lua_State *L, int strict) {
size_t len;
const char *s = luaL_checklstring(L, 1, &len);
lua_Unsigned n = (lua_Unsigned)lua_tointeger(L, 2);
if (n < len) {
while (iscontp(s + n)) n++; /* go to next character */
}
if (n >= len) /* (also handles original 'n' being negative) */
return 0; /* no more codepoints */
else {
l_uint32 code;
const char *next = utf8_decode(s + n, &code, strict);
if (next == NULL || iscontp(next))
return luaL_error(L, MSGInvalid);
lua_pushinteger(L, (lua_Integer)(n + 1));
lua_pushinteger(L, (lua_Integer)code);
return 2;
}
}
static int iter_auxstrict (lua_State *L) {
return iter_aux(L, 1);
}
static int iter_auxlax (lua_State *L) {
return iter_aux(L, 0);
}
static int iter_codes (lua_State *L) {
int lax = lua_toboolean(L, 2);
const char *s = luaL_checkstring(L, 1);
luaL_argcheck(L, !iscontp(s), 1, MSGInvalid);
lua_pushcfunction(L, lax ? iter_auxlax : iter_auxstrict);
lua_pushvalue(L, 1);
lua_pushinteger(L, 0);
return 3;
}
/* pattern to match a single UTF-8 character */
#define UTF8PATT "[%z-\x7F\xC2-\xFD][\x80-\xBF]*"
static const luaL_Reg funcs[] = {
{"offset", byteoffset},
{"codepoint", codepoint},
{"char", utfchar},
{"len", utflen},
{"codes", iter_codes},
{NULL, NULL}
};
LUALIB_API int luaopen_utf8 (lua_State *L) {
luaL_register(L, LUA_UTF8LIBNAME, funcs);
lua_pushlstring(L, UTF8PATT, sizeof(UTF8PATT)/sizeof(char) - 1);
lua_setfield(L, -2, "charpattern");
return 1;
}

6
lib/lutf8/lutf8.h Normal file
View file

@ -0,0 +1,6 @@
#pragma once
#include "lua.h"
#define LUA_UTF8LIBNAME "utf8"
LUALIB_API int luaopen_utf8(lua_State *L);

View file

@ -573,6 +573,7 @@ include_directories(SYSTEM
${GMP_INCLUDE_DIR} ${GMP_INCLUDE_DIR}
${JSON_INCLUDE_DIR} ${JSON_INCLUDE_DIR}
${LUA_BIT_INCLUDE_DIR} ${LUA_BIT_INCLUDE_DIR}
${LUA_UTF8_INCLUDE_DIR}
# on Android, Luanti depends on SDL2 directly # on Android, Luanti depends on SDL2 directly
# on other platforms, only IrrlichtMt depends on SDL2 # on other platforms, only IrrlichtMt depends on SDL2
"$<$<PLATFORM_ID:Android>:${SDL2_INCLUDE_DIRS}>" "$<$<PLATFORM_ID:Android>:${SDL2_INCLUDE_DIRS}>"
@ -695,6 +696,7 @@ if(BUILD_CLIENT)
${GMP_LIBRARY} ${GMP_LIBRARY}
${JSON_LIBRARY} ${JSON_LIBRARY}
${LUA_BIT_LIBRARY} ${LUA_BIT_LIBRARY}
${LUA_UTF8_LIBRARY}
sha256 sha256
${FREETYPE_LIBRARY} ${FREETYPE_LIBRARY}
${PLATFORM_LIBS} ${PLATFORM_LIBS}
@ -785,6 +787,7 @@ if(BUILD_SERVER)
${JSON_LIBRARY} ${JSON_LIBRARY}
${LUA_LIBRARY} ${LUA_LIBRARY}
${LUA_BIT_LIBRARY} ${LUA_BIT_LIBRARY}
${LUA_UTF8_LIBRARY}
sha256 sha256
${GMP_LIBRARY} ${GMP_LIBRARY}
${PLATFORM_LIBS} ${PLATFORM_LIBS}

View file

@ -29,6 +29,7 @@ extern "C" {
#else #else
#include "bit.h" #include "bit.h"
#endif #endif
#include "lutf8.h"
} }
#include <cstdio> #include <cstdio>
@ -84,6 +85,10 @@ ScriptApiBase::ScriptApiBase(ScriptingType type):
lua_pushstring(m_luastack, LUA_BITLIBNAME); lua_pushstring(m_luastack, LUA_BITLIBNAME);
lua_call(m_luastack, 1, 0); lua_call(m_luastack, 1, 0);
// Load utf8 library
lua_pushcfunction(m_luastack, luaopen_utf8);
lua_call(m_luastack, 0, 0);
#if BUILD_WITH_TRACY #if BUILD_WITH_TRACY
// Load tracy lua bindings // Load tracy lua bindings
tracy::LuaRegister(m_luastack); tracy::LuaRegister(m_luastack);

View file

@ -3,6 +3,7 @@
// Copyright (C) 2013 celeron55, Perttu Ahola <celeron55@gmail.com> // Copyright (C) 2013 celeron55, Perttu Ahola <celeron55@gmail.com>
#include "cpp_api/s_security.h" #include "cpp_api/s_security.h"
#include "log.h"
#include "lua_api/l_base.h" #include "lua_api/l_base.h"
#include "filesys.h" #include "filesys.h"
#include "porting.h" #include "porting.h"
@ -38,6 +39,7 @@ static void shallow_copy_table(lua_State *L, int from=-2, int to=-1)
if (from < 0) from = lua_gettop(L) + from + 1; if (from < 0) from = lua_gettop(L) + from + 1;
if (to < 0) to = lua_gettop(L) + to + 1; if (to < 0) to = lua_gettop(L) + to + 1;
lua_pushnil(L); lua_pushnil(L);
assert(lua_istable(L, from));
while (lua_next(L, from) != 0) { while (lua_next(L, from) != 0) {
assert(lua_type(L, -1) != LUA_TTABLE); assert(lua_type(L, -1) != LUA_TTABLE);
// duplicate key and value for lua_rawset // duplicate key and value for lua_rawset
@ -96,6 +98,7 @@ void ScriptApiSecurity::initializeSecurity()
"table", "table",
"math", "math",
"bit", "bit",
"utf8",
// Not sure if completely safe. But if someone enables tracy, they'll // Not sure if completely safe. But if someone enables tracy, they'll
// know what they do. // know what they do.
#if BUILD_WITH_TRACY #if BUILD_WITH_TRACY
@ -296,6 +299,7 @@ void ScriptApiSecurity::initializeSecurityClient()
"table", "table",
"math", "math",
"bit", "bit",
"utf8",
// Not sure if completely safe. But if someone enables tracy, they'll // Not sure if completely safe. But if someone enables tracy, they'll
// know what they do. // know what they do.
#if BUILD_WITH_TRACY #if BUILD_WITH_TRACY