1
0
Fork 0
mirror of https://github.com/luanti-org/luanti.git synced 2025-07-22 17:18:39 +00:00

Cache grouped sparse buffers (#15594)

continuation of #15531
This commit is contained in:
sfan5 2025-01-14 23:40:57 +01:00 committed by GitHub
parent 7053348e31
commit cf074dd271
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 178 additions and 61 deletions

View file

@ -1841,7 +1841,7 @@ mesh_generation_threads (Mapblock mesh generation threads) int 0 0 8
# All mesh buffers with less than this number of vertices will be merged # All mesh buffers with less than this number of vertices will be merged
# during map rendering. This improves rendering performance. # during map rendering. This improves rendering performance.
mesh_buffer_min_vertices (Minimum vertex count for mesh buffers) int 100 0 1000 mesh_buffer_min_vertices (Minimum vertex count for mesh buffers) int 300 0 1000
# True = 256 # True = 256
# False = 128 # False = 128

View file

@ -564,7 +564,8 @@ void Client::step(float dtime)
std::vector<MinimapMapblock*> minimap_mapblocks; std::vector<MinimapMapblock*> minimap_mapblocks;
bool do_mapper_update = true; bool do_mapper_update = true;
MapSector *sector = m_env.getMap().emergeSector(v2s16(r.p.X, r.p.Z)); ClientMap &map = m_env.getClientMap();
MapSector *sector = map.emergeSector(v2s16(r.p.X, r.p.Z));
MapBlock *block = sector->getBlockNoCreateNoEx(r.p.Y); MapBlock *block = sector->getBlockNoCreateNoEx(r.p.Y);
@ -576,6 +577,8 @@ void Client::step(float dtime)
if (block) { if (block) {
// Delete the old mesh // Delete the old mesh
if (block->mesh)
map.invalidateMapBlockMesh(block->mesh);
delete block->mesh; delete block->mesh;
block->mesh = nullptr; block->mesh = nullptr;
block->solid_sides = r.solid_sides; block->solid_sides = r.solid_sides;
@ -590,9 +593,9 @@ void Client::step(float dtime)
if (r.mesh->getMesh(l)->getMeshBufferCount() != 0) if (r.mesh->getMesh(l)->getMeshBufferCount() != 0)
is_empty = false; is_empty = false;
if (is_empty) if (is_empty) {
delete r.mesh; delete r.mesh;
else { } else {
// Replace with the new mesh // Replace with the new mesh
block->mesh = r.mesh; block->mesh = r.mesh;
if (r.urgent) if (r.urgent)

View file

@ -122,6 +122,12 @@ namespace {
} }
} }
void CachedMeshBuffer::drop()
{
for (auto *it : buf)
it->drop();
}
/* /*
ClientMap ClientMap
*/ */
@ -191,6 +197,9 @@ void ClientMap::onSettingChanged(std::string_view name, bool all)
ClientMap::~ClientMap() ClientMap::~ClientMap()
{ {
g_settings->deregisterAllChangedCallbacks(this); g_settings->deregisterAllChangedCallbacks(this);
for (auto &it : m_dynamic_buffers)
it.second.drop();
} }
void ClientMap::updateCamera(v3f pos, v3f dir, f32 fov, v3s16 offset, video::SColor light_color) void ClientMap::updateCamera(v3f pos, v3f dir, f32 fov, v3s16 offset, video::SColor light_color)
@ -788,27 +797,24 @@ void MeshBufListMaps::addFromBlock(v3s16 block_pos, MapBlockMesh *block_mesh,
* @param src buffer list * @param src buffer list
* @param dst draw order * @param dst draw order
* @param get_world_pos returns translation for a buffer * @param get_world_pos returns translation for a buffer
* @param buffer_trash output container for temporary mesh buffers * @param dynamic_buffers cache structure for merged buffers
* @return number of buffers that were merged * @return number of buffers that were merged
*/ */
template <typename F, typename C> template <typename F>
static u32 transformBuffersToDrawOrder( static u32 transformBuffersToDrawOrder(
const MeshBufListMaps::MeshBufList &src, DrawDescriptorList &draw_order, const MeshBufListMaps::MeshBufList &src, DrawDescriptorList &draw_order,
F get_world_pos, C &buffer_trash) F get_world_pos, CachedMeshBuffers &dynamic_buffers)
{ {
/** /**
* This is a tradeoff between time spent merging buffers and time spent * This is a tradeoff between time spent merging buffers and time spent
* due to excess drawcalls. * due to excess drawcalls.
* Testing has shown that the ideal value is in the low hundreds, as extra * Testing has shown that the ideal value is in the low hundreds, as extra
* CPU work quickly eats up the benefits. * CPU work quickly eats up the benefits (though alleviated by a cache).
* In MTG landscape scenes this was found to save around 20-40% of drawcalls. * In MTG landscape scenes this was found to save around 20-40% of drawcalls.
* *
* NOTE: if you attempt to test this with quicktune, it won't give you valid * NOTE: if you attempt to test this with quicktune, it won't give you valid
* results since HW buffers stick around and Irrlicht handles large amounts * results since HW buffers stick around and Irrlicht handles large amounts
* inefficiently. * inefficiently.
*
* TODO: as a next step we should cache merged meshes, so they do not need
* to be re-built *and* can be kept in GPU memory.
*/ */
const u32 target_min_vertices = g_settings->getU32("mesh_buffer_min_vertices"); const u32 target_min_vertices = g_settings->getU32("mesh_buffer_min_vertices");
@ -826,23 +832,8 @@ static u32 transformBuffersToDrawOrder(
} }
} }
scene::SMeshBuffer *tmp = nullptr;
const auto &finish_buf = [&] () {
if (tmp) {
draw_order.emplace_back(v3f(0), tmp);
total_vtx = subtract_or_zero(total_vtx, tmp->getVertexCount());
total_idx = subtract_or_zero(total_idx, tmp->getIndexCount());
// Upload buffer here explicitly to give the driver some
// extra time to get it ready before drawing.
tmp->setHardwareMappingHint(scene::EHM_STREAM);
driver->updateHardwareBuffer(tmp->getVertexBuffer());
driver->updateHardwareBuffer(tmp->getIndexBuffer());
}
tmp = nullptr;
};
// iterate in reverse to get closest blocks first // iterate in reverse to get closest blocks first
std::vector<std::pair<v3f, scene::IMeshBuffer*>> to_merge;
for (auto it = src.rbegin(); it != src.rend(); ++it) { for (auto it = src.rbegin(); it != src.rend(); ++it) {
v3f translate = get_world_pos(it->first); v3f translate = get_world_pos(it->first);
auto *buf = it->second; auto *buf = it->second;
@ -850,25 +841,82 @@ static u32 transformBuffersToDrawOrder(
draw_order.emplace_back(translate, buf); draw_order.emplace_back(translate, buf);
continue; continue;
} }
to_merge.emplace_back(translate, buf);
bool new_buffer = false; }
if (!tmp)
new_buffer = true; /*
else if (tmp->getVertexCount() + buf->getVertexCount() > U16_MAX) * Tracking buffers, their contents and modifications would be quite complicated
new_buffer = true; * so we opt for something simple here: We identify buffers by their location
if (new_buffer) { * in memory.
finish_buf(); * This imposes the following assumptions:
tmp = new scene::SMeshBuffer(); * - buffers don't move in memory
buffer_trash.push_back(tmp); * - vertex and index data is immutable
assert(tmp->getPrimitiveType() == buf->getPrimitiveType()); * - we know when to invalidate (invalidateMapBlockMesh does this)
tmp->Material = buf->getMaterial(); */
// preallocate std::sort(to_merge.begin(), to_merge.end(), [] (const auto &l, const auto &r) {
tmp->Vertices->Data.reserve(total_vtx); return static_cast<void*>(l.second) < static_cast<void*>(r.second);
tmp->Indices->Data.reserve(total_idx); });
} // cache key is a string of sorted raw pointers
appendToMeshBuffer(tmp, buf, translate); std::string key;
key.reserve(sizeof(void*) * to_merge.size());
for (auto &it : to_merge)
key.append(reinterpret_cast<const char*>(&it.second), sizeof(void*));
// try to take from cache
auto it2 = dynamic_buffers.find(key);
if (it2 != dynamic_buffers.end()) {
g_profiler->avg("CM::transformBuffersToDO: cache hit rate", 1);
const auto &use_mat = to_merge.front().second->getMaterial();
for (auto *buf : it2->second.buf) {
// material is not part of the cache key, so make sure it still matches
buf->getMaterial() = use_mat;
draw_order.emplace_back(v3f(0), buf);
}
it2->second.age = 0;
} else if (!key.empty()) {
g_profiler->avg("CM::transformBuffersToDO: cache hit rate", 0);
// merge and save to cache
auto &put_buffers = dynamic_buffers[key];
scene::SMeshBuffer *tmp = nullptr;
const auto &finish_buf = [&] () {
if (tmp) {
draw_order.emplace_back(v3f(0), tmp);
total_vtx = subtract_or_zero(total_vtx, tmp->getVertexCount());
total_idx = subtract_or_zero(total_idx, tmp->getIndexCount());
// Upload buffer here explicitly to give the driver some
// extra time to get it ready before drawing.
tmp->setHardwareMappingHint(scene::EHM_STREAM);
driver->updateHardwareBuffer(tmp->getVertexBuffer());
driver->updateHardwareBuffer(tmp->getIndexBuffer());
}
tmp = nullptr;
};
for (auto &it : to_merge) {
v3f translate = it.first;
auto *buf = it.second;
bool new_buffer = false;
if (!tmp)
new_buffer = true;
else if (tmp->getVertexCount() + buf->getVertexCount() > U16_MAX)
new_buffer = true;
if (new_buffer) {
finish_buf();
tmp = new scene::SMeshBuffer();
put_buffers.buf.push_back(tmp);
assert(tmp->getPrimitiveType() == buf->getPrimitiveType());
tmp->Material = buf->getMaterial();
// preallocate approximately
tmp->Vertices->Data.reserve(MYMIN(U16_MAX, total_vtx));
tmp->Indices->Data.reserve(total_idx);
}
appendToMeshBuffer(tmp, buf, translate);
}
finish_buf();
assert(!put_buffers.buf.empty());
} }
finish_buf();
// first call needs to set the material // first call needs to set the material
if (draw_order.size() > draw_order_pre) if (draw_order.size() > draw_order_pre)
@ -921,7 +969,6 @@ void ClientMap::renderMap(video::IVideoDriver* driver, s32 pass)
TimeTaker tt_collect(""); TimeTaker tt_collect("");
MeshBufListMaps grouped_buffers; MeshBufListMaps grouped_buffers;
std::vector<scene::IMeshBuffer*> buffer_trash;
DrawDescriptorList draw_order; DrawDescriptorList draw_order;
auto is_frustum_culled = m_client->getCamera()->getFrustumCuller(); auto is_frustum_culled = m_client->getCamera()->getFrustumCuller();
@ -979,7 +1026,7 @@ void ClientMap::renderMap(video::IVideoDriver* driver, s32 pass)
for (auto &map : grouped_buffers.maps) { for (auto &map : grouped_buffers.maps) {
for (auto &list : map) { for (auto &list : map) {
merged_count += transformBuffersToDrawOrder( merged_count += transformBuffersToDrawOrder(
list.second, draw_order, get_block_wpos, buffer_trash); list.second, draw_order, get_block_wpos, m_dynamic_buffers);
} }
} }
@ -1036,6 +1083,20 @@ void ClientMap::renderMap(video::IVideoDriver* driver, s32 pass)
if (pass == scene::ESNRP_SOLID) { if (pass == scene::ESNRP_SOLID) {
g_profiler->avg("renderMap(): animated meshes [#]", mesh_animate_count); g_profiler->avg("renderMap(): animated meshes [#]", mesh_animate_count);
g_profiler->avg(prefix + "merged buffers [#]", merged_count); g_profiler->avg(prefix + "merged buffers [#]", merged_count);
u32 cached_count = 0;
for (auto it = m_dynamic_buffers.begin(); it != m_dynamic_buffers.end(); ) {
// prune aggressively since every new/changed block or camera
// rotation can have big effects
if (++it->second.age > 1) {
it->second.drop();
it = m_dynamic_buffers.erase(it);
} else {
cached_count += it->second.buf.size();
it++;
}
}
g_profiler->avg(prefix + "merged buffers in cache [#]", cached_count);
} }
if (pass == scene::ESNRP_TRANSPARENT) { if (pass == scene::ESNRP_TRANSPARENT) {
@ -1045,9 +1106,51 @@ void ClientMap::renderMap(video::IVideoDriver* driver, s32 pass)
g_profiler->avg(prefix + "vertices drawn [#]", vertex_count); g_profiler->avg(prefix + "vertices drawn [#]", vertex_count);
g_profiler->avg(prefix + "drawcalls [#]", drawcall_count); g_profiler->avg(prefix + "drawcalls [#]", drawcall_count);
g_profiler->avg(prefix + "material swaps [#]", material_swaps); g_profiler->avg(prefix + "material swaps [#]", material_swaps);
}
for (auto &x : buffer_trash) void ClientMap::invalidateMapBlockMesh(MapBlockMesh *mesh)
x->drop(); {
// find all buffers for this block
MeshBufListMaps tmp;
tmp.addFromBlock(v3s16(), mesh, getSceneManager()->getVideoDriver());
std::vector<void*> to_delete;
void *maxp = 0;
for (auto &it : tmp.maps) {
for (auto &it2 : it) {
for (auto &it3 : it2.second) {
void *const p = it3.second; // explicit downcast
to_delete.push_back(p);
maxp = std::max(maxp, p);
}
}
}
if (to_delete.empty())
return;
// we know which buffers were used to produce a merged buffer
// so go through the cache and drop any entries that match
const auto &match_any = [&] (const std::string &key) {
assert(key.size() % sizeof(void*) == 0);
void *v;
for (size_t off = 0; off < key.size(); off += sizeof(void*)) {
// no alignment guarantee so *(void**)&key[off] is not allowed!
memcpy(&v, &key[off], sizeof(void*));
if (v > maxp) // early exit, since it's sorted
break;
if (CONTAINS(to_delete, v))
return true;
}
return false;
};
for (auto it = m_dynamic_buffers.begin(); it != m_dynamic_buffers.end(); ) {
if (match_any(it->first)) {
it->second.drop();
it = m_dynamic_buffers.erase(it);
} else {
it++;
}
}
} }
static bool getVisibleBrightness(Map *map, const v3f &p0, v3f dir, float step, static bool getVisibleBrightness(Map *map, const v3f &p0, v3f dir, float step,
@ -1263,7 +1366,6 @@ void ClientMap::renderMapShadows(video::IVideoDriver *driver,
}; };
MeshBufListMaps grouped_buffers; MeshBufListMaps grouped_buffers;
std::vector<scene::IMeshBuffer*> buffer_trash;
DrawDescriptorList draw_order; DrawDescriptorList draw_order;
std::size_t count = 0; std::size_t count = 0;
@ -1308,7 +1410,7 @@ void ClientMap::renderMapShadows(video::IVideoDriver *driver,
for (auto &map : grouped_buffers.maps) { for (auto &map : grouped_buffers.maps) {
for (auto &list : map) { for (auto &list : map) {
transformBuffersToDrawOrder( transformBuffersToDrawOrder(
list.second, draw_order, get_block_wpos, buffer_trash); list.second, draw_order, get_block_wpos, m_dynamic_buffers);
} }
} }
@ -1373,9 +1475,6 @@ void ClientMap::renderMapShadows(video::IVideoDriver *driver,
g_profiler->avg(prefix + "vertices drawn [#]", vertex_count); g_profiler->avg(prefix + "vertices drawn [#]", vertex_count);
g_profiler->avg(prefix + "drawcalls [#]", drawcall_count); g_profiler->avg(prefix + "drawcalls [#]", drawcall_count);
g_profiler->avg(prefix + "material swaps [#]", material_swaps); g_profiler->avg(prefix + "material swaps [#]", material_swaps);
for (auto &x : buffer_trash)
x->drop();
} }
/* /*

View file

@ -36,6 +36,16 @@ namespace irr::video
class IVideoDriver; class IVideoDriver;
} }
struct CachedMeshBuffer {
std::vector<scene::IMeshBuffer*> buf;
u8 age = 0;
void drop();
};
using CachedMeshBuffers = std::unordered_map<std::string, CachedMeshBuffer>;
/* /*
ClientMap ClientMap
@ -95,6 +105,8 @@ public:
void renderPostFx(CameraMode cam_mode); void renderPostFx(CameraMode cam_mode);
void invalidateMapBlockMesh(MapBlockMesh *mesh);
// For debug printing // For debug printing
void PrintInfo(std::ostream &out) override; void PrintInfo(std::ostream &out) override;
@ -151,6 +163,7 @@ private:
std::vector<MapBlock*> m_keeplist; std::vector<MapBlock*> m_keeplist;
std::map<v3s16, MapBlock*> m_drawlist_shadow; std::map<v3s16, MapBlock*> m_drawlist_shadow;
bool m_needs_update_drawlist; bool m_needs_update_drawlist;
CachedMeshBuffers m_dynamic_buffers;
bool m_cache_trilinear_filter; bool m_cache_trilinear_filter;
bool m_cache_bilinear_filter; bool m_cache_bilinear_filter;

View file

@ -170,13 +170,11 @@ private:
/* /*
Holds a mesh for a mapblock. Holds a mesh for a mapblock.
Besides the SMesh*, this contains information used for animating Besides the SMesh*, this contains information used fortransparency sorting
the vertex positions, colors and texture coordinates of the mesh. and texture animation.
For example: For example:
- cracks [implemented] - cracks
- day/night transitions [implemented] - day/night transitions
- animated flowing liquids [not implemented]
- animating vertex positions for e.g. axles [not implemented]
*/ */
class MapBlockMesh class MapBlockMesh
{ {
@ -193,13 +191,17 @@ public:
// Returns true if anything has been changed. // Returns true if anything has been changed.
bool animate(bool faraway, float time, int crack, u32 daynight_ratio); bool animate(bool faraway, float time, int crack, u32 daynight_ratio);
/// @warning ClientMap requires that the vertex and index data is not modified
scene::IMesh *getMesh() scene::IMesh *getMesh()
{ {
return m_mesh[0].get(); return m_mesh[0].get();
} }
/// @param layer layer index
/// @warning ClientMap requires that the vertex and index data is not modified
scene::IMesh *getMesh(u8 layer) scene::IMesh *getMesh(u8 layer)
{ {
assert(layer < MAX_TILE_LAYERS);
return m_mesh[layer].get(); return m_mesh[layer].get();
} }

View file

@ -103,7 +103,7 @@ void set_default_settings()
settings->setDefault("sound_extensions_blacklist", ""); settings->setDefault("sound_extensions_blacklist", "");
settings->setDefault("mesh_generation_interval", "0"); settings->setDefault("mesh_generation_interval", "0");
settings->setDefault("mesh_generation_threads", "0"); settings->setDefault("mesh_generation_threads", "0");
settings->setDefault("mesh_buffer_min_vertices", "100"); settings->setDefault("mesh_buffer_min_vertices", "300");
settings->setDefault("free_move", "false"); settings->setDefault("free_move", "false");
settings->setDefault("pitch_move", "false"); settings->setDefault("pitch_move", "false");
settings->setDefault("fast_move", "false"); settings->setDefault("fast_move", "false");