diff --git a/builtin/settingtypes.txt b/builtin/settingtypes.txt index c6b84fde7..7707a369b 100644 --- a/builtin/settingtypes.txt +++ b/builtin/settingtypes.txt @@ -1841,7 +1841,7 @@ mesh_generation_threads (Mapblock mesh generation threads) int 0 0 8 # All mesh buffers with less than this number of vertices will be merged # during map rendering. This improves rendering performance. -mesh_buffer_min_vertices (Minimum vertex count for mesh buffers) int 100 0 1000 +mesh_buffer_min_vertices (Minimum vertex count for mesh buffers) int 300 0 1000 # True = 256 # False = 128 diff --git a/src/client/client.cpp b/src/client/client.cpp index b709f8cbf..34d05d256 100644 --- a/src/client/client.cpp +++ b/src/client/client.cpp @@ -564,7 +564,8 @@ void Client::step(float dtime) std::vector minimap_mapblocks; bool do_mapper_update = true; - MapSector *sector = m_env.getMap().emergeSector(v2s16(r.p.X, r.p.Z)); + ClientMap &map = m_env.getClientMap(); + MapSector *sector = map.emergeSector(v2s16(r.p.X, r.p.Z)); MapBlock *block = sector->getBlockNoCreateNoEx(r.p.Y); @@ -576,6 +577,8 @@ void Client::step(float dtime) if (block) { // Delete the old mesh + if (block->mesh) + map.invalidateMapBlockMesh(block->mesh); delete block->mesh; block->mesh = nullptr; block->solid_sides = r.solid_sides; @@ -590,9 +593,9 @@ void Client::step(float dtime) if (r.mesh->getMesh(l)->getMeshBufferCount() != 0) is_empty = false; - if (is_empty) + if (is_empty) { delete r.mesh; - else { + } else { // Replace with the new mesh block->mesh = r.mesh; if (r.urgent) diff --git a/src/client/clientmap.cpp b/src/client/clientmap.cpp index ee05d8816..d6ab137b5 100644 --- a/src/client/clientmap.cpp +++ b/src/client/clientmap.cpp @@ -122,6 +122,12 @@ namespace { } } +void CachedMeshBuffer::drop() +{ + for (auto *it : buf) + it->drop(); +} + /* ClientMap */ @@ -191,6 +197,9 @@ void ClientMap::onSettingChanged(std::string_view name, bool all) ClientMap::~ClientMap() { g_settings->deregisterAllChangedCallbacks(this); + + for (auto &it : m_dynamic_buffers) + it.second.drop(); } void ClientMap::updateCamera(v3f pos, v3f dir, f32 fov, v3s16 offset, video::SColor light_color) @@ -788,27 +797,24 @@ void MeshBufListMaps::addFromBlock(v3s16 block_pos, MapBlockMesh *block_mesh, * @param src buffer list * @param dst draw order * @param get_world_pos returns translation for a buffer - * @param buffer_trash output container for temporary mesh buffers + * @param dynamic_buffers cache structure for merged buffers * @return number of buffers that were merged */ -template +template static u32 transformBuffersToDrawOrder( const MeshBufListMaps::MeshBufList &src, DrawDescriptorList &draw_order, - F get_world_pos, C &buffer_trash) + F get_world_pos, CachedMeshBuffers &dynamic_buffers) { /** * This is a tradeoff between time spent merging buffers and time spent * due to excess drawcalls. * Testing has shown that the ideal value is in the low hundreds, as extra - * CPU work quickly eats up the benefits. + * CPU work quickly eats up the benefits (though alleviated by a cache). * In MTG landscape scenes this was found to save around 20-40% of drawcalls. * * NOTE: if you attempt to test this with quicktune, it won't give you valid * results since HW buffers stick around and Irrlicht handles large amounts * inefficiently. - * - * TODO: as a next step we should cache merged meshes, so they do not need - * to be re-built *and* can be kept in GPU memory. */ const u32 target_min_vertices = g_settings->getU32("mesh_buffer_min_vertices"); @@ -826,23 +832,8 @@ static u32 transformBuffersToDrawOrder( } } - scene::SMeshBuffer *tmp = nullptr; - const auto &finish_buf = [&] () { - if (tmp) { - draw_order.emplace_back(v3f(0), tmp); - total_vtx = subtract_or_zero(total_vtx, tmp->getVertexCount()); - total_idx = subtract_or_zero(total_idx, tmp->getIndexCount()); - - // Upload buffer here explicitly to give the driver some - // extra time to get it ready before drawing. - tmp->setHardwareMappingHint(scene::EHM_STREAM); - driver->updateHardwareBuffer(tmp->getVertexBuffer()); - driver->updateHardwareBuffer(tmp->getIndexBuffer()); - } - tmp = nullptr; - }; - // iterate in reverse to get closest blocks first + std::vector> to_merge; for (auto it = src.rbegin(); it != src.rend(); ++it) { v3f translate = get_world_pos(it->first); auto *buf = it->second; @@ -850,25 +841,82 @@ static u32 transformBuffersToDrawOrder( draw_order.emplace_back(translate, buf); continue; } - - bool new_buffer = false; - if (!tmp) - new_buffer = true; - else if (tmp->getVertexCount() + buf->getVertexCount() > U16_MAX) - new_buffer = true; - if (new_buffer) { - finish_buf(); - tmp = new scene::SMeshBuffer(); - buffer_trash.push_back(tmp); - assert(tmp->getPrimitiveType() == buf->getPrimitiveType()); - tmp->Material = buf->getMaterial(); - // preallocate - tmp->Vertices->Data.reserve(total_vtx); - tmp->Indices->Data.reserve(total_idx); - } - appendToMeshBuffer(tmp, buf, translate); + to_merge.emplace_back(translate, buf); + } + + /* + * Tracking buffers, their contents and modifications would be quite complicated + * so we opt for something simple here: We identify buffers by their location + * in memory. + * This imposes the following assumptions: + * - buffers don't move in memory + * - vertex and index data is immutable + * - we know when to invalidate (invalidateMapBlockMesh does this) + */ + std::sort(to_merge.begin(), to_merge.end(), [] (const auto &l, const auto &r) { + return static_cast(l.second) < static_cast(r.second); + }); + // cache key is a string of sorted raw pointers + std::string key; + key.reserve(sizeof(void*) * to_merge.size()); + for (auto &it : to_merge) + key.append(reinterpret_cast(&it.second), sizeof(void*)); + + // try to take from cache + auto it2 = dynamic_buffers.find(key); + if (it2 != dynamic_buffers.end()) { + g_profiler->avg("CM::transformBuffersToDO: cache hit rate", 1); + const auto &use_mat = to_merge.front().second->getMaterial(); + for (auto *buf : it2->second.buf) { + // material is not part of the cache key, so make sure it still matches + buf->getMaterial() = use_mat; + draw_order.emplace_back(v3f(0), buf); + } + it2->second.age = 0; + } else if (!key.empty()) { + g_profiler->avg("CM::transformBuffersToDO: cache hit rate", 0); + // merge and save to cache + auto &put_buffers = dynamic_buffers[key]; + scene::SMeshBuffer *tmp = nullptr; + const auto &finish_buf = [&] () { + if (tmp) { + draw_order.emplace_back(v3f(0), tmp); + total_vtx = subtract_or_zero(total_vtx, tmp->getVertexCount()); + total_idx = subtract_or_zero(total_idx, tmp->getIndexCount()); + + // Upload buffer here explicitly to give the driver some + // extra time to get it ready before drawing. + tmp->setHardwareMappingHint(scene::EHM_STREAM); + driver->updateHardwareBuffer(tmp->getVertexBuffer()); + driver->updateHardwareBuffer(tmp->getIndexBuffer()); + } + tmp = nullptr; + }; + + for (auto &it : to_merge) { + v3f translate = it.first; + auto *buf = it.second; + + bool new_buffer = false; + if (!tmp) + new_buffer = true; + else if (tmp->getVertexCount() + buf->getVertexCount() > U16_MAX) + new_buffer = true; + if (new_buffer) { + finish_buf(); + tmp = new scene::SMeshBuffer(); + put_buffers.buf.push_back(tmp); + assert(tmp->getPrimitiveType() == buf->getPrimitiveType()); + tmp->Material = buf->getMaterial(); + // preallocate approximately + tmp->Vertices->Data.reserve(MYMIN(U16_MAX, total_vtx)); + tmp->Indices->Data.reserve(total_idx); + } + appendToMeshBuffer(tmp, buf, translate); + } + finish_buf(); + assert(!put_buffers.buf.empty()); } - finish_buf(); // first call needs to set the material if (draw_order.size() > draw_order_pre) @@ -921,7 +969,6 @@ void ClientMap::renderMap(video::IVideoDriver* driver, s32 pass) TimeTaker tt_collect(""); MeshBufListMaps grouped_buffers; - std::vector buffer_trash; DrawDescriptorList draw_order; auto is_frustum_culled = m_client->getCamera()->getFrustumCuller(); @@ -979,7 +1026,7 @@ void ClientMap::renderMap(video::IVideoDriver* driver, s32 pass) for (auto &map : grouped_buffers.maps) { for (auto &list : map) { merged_count += transformBuffersToDrawOrder( - list.second, draw_order, get_block_wpos, buffer_trash); + list.second, draw_order, get_block_wpos, m_dynamic_buffers); } } @@ -1036,6 +1083,20 @@ void ClientMap::renderMap(video::IVideoDriver* driver, s32 pass) if (pass == scene::ESNRP_SOLID) { g_profiler->avg("renderMap(): animated meshes [#]", mesh_animate_count); g_profiler->avg(prefix + "merged buffers [#]", merged_count); + + u32 cached_count = 0; + for (auto it = m_dynamic_buffers.begin(); it != m_dynamic_buffers.end(); ) { + // prune aggressively since every new/changed block or camera + // rotation can have big effects + if (++it->second.age > 1) { + it->second.drop(); + it = m_dynamic_buffers.erase(it); + } else { + cached_count += it->second.buf.size(); + it++; + } + } + g_profiler->avg(prefix + "merged buffers in cache [#]", cached_count); } if (pass == scene::ESNRP_TRANSPARENT) { @@ -1045,9 +1106,51 @@ void ClientMap::renderMap(video::IVideoDriver* driver, s32 pass) g_profiler->avg(prefix + "vertices drawn [#]", vertex_count); g_profiler->avg(prefix + "drawcalls [#]", drawcall_count); g_profiler->avg(prefix + "material swaps [#]", material_swaps); +} - for (auto &x : buffer_trash) - x->drop(); +void ClientMap::invalidateMapBlockMesh(MapBlockMesh *mesh) +{ + // find all buffers for this block + MeshBufListMaps tmp; + tmp.addFromBlock(v3s16(), mesh, getSceneManager()->getVideoDriver()); + + std::vector to_delete; + void *maxp = 0; + for (auto &it : tmp.maps) { + for (auto &it2 : it) { + for (auto &it3 : it2.second) { + void *const p = it3.second; // explicit downcast + to_delete.push_back(p); + maxp = std::max(maxp, p); + } + } + } + if (to_delete.empty()) + return; + + // we know which buffers were used to produce a merged buffer + // so go through the cache and drop any entries that match + const auto &match_any = [&] (const std::string &key) { + assert(key.size() % sizeof(void*) == 0); + void *v; + for (size_t off = 0; off < key.size(); off += sizeof(void*)) { + // no alignment guarantee so *(void**)&key[off] is not allowed! + memcpy(&v, &key[off], sizeof(void*)); + if (v > maxp) // early exit, since it's sorted + break; + if (CONTAINS(to_delete, v)) + return true; + } + return false; + }; + for (auto it = m_dynamic_buffers.begin(); it != m_dynamic_buffers.end(); ) { + if (match_any(it->first)) { + it->second.drop(); + it = m_dynamic_buffers.erase(it); + } else { + it++; + } + } } static bool getVisibleBrightness(Map *map, const v3f &p0, v3f dir, float step, @@ -1263,7 +1366,6 @@ void ClientMap::renderMapShadows(video::IVideoDriver *driver, }; MeshBufListMaps grouped_buffers; - std::vector buffer_trash; DrawDescriptorList draw_order; std::size_t count = 0; @@ -1308,7 +1410,7 @@ void ClientMap::renderMapShadows(video::IVideoDriver *driver, for (auto &map : grouped_buffers.maps) { for (auto &list : map) { transformBuffersToDrawOrder( - list.second, draw_order, get_block_wpos, buffer_trash); + list.second, draw_order, get_block_wpos, m_dynamic_buffers); } } @@ -1373,9 +1475,6 @@ void ClientMap::renderMapShadows(video::IVideoDriver *driver, g_profiler->avg(prefix + "vertices drawn [#]", vertex_count); g_profiler->avg(prefix + "drawcalls [#]", drawcall_count); g_profiler->avg(prefix + "material swaps [#]", material_swaps); - - for (auto &x : buffer_trash) - x->drop(); } /* diff --git a/src/client/clientmap.h b/src/client/clientmap.h index 760a1a4db..daa9d80f1 100644 --- a/src/client/clientmap.h +++ b/src/client/clientmap.h @@ -36,6 +36,16 @@ namespace irr::video class IVideoDriver; } +struct CachedMeshBuffer { + std::vector buf; + u8 age = 0; + + void drop(); +}; + +using CachedMeshBuffers = std::unordered_map; + + /* ClientMap @@ -95,6 +105,8 @@ public: void renderPostFx(CameraMode cam_mode); + void invalidateMapBlockMesh(MapBlockMesh *mesh); + // For debug printing void PrintInfo(std::ostream &out) override; @@ -151,6 +163,7 @@ private: std::vector m_keeplist; std::map m_drawlist_shadow; bool m_needs_update_drawlist; + CachedMeshBuffers m_dynamic_buffers; bool m_cache_trilinear_filter; bool m_cache_bilinear_filter; diff --git a/src/client/mapblock_mesh.h b/src/client/mapblock_mesh.h index 55aa172bd..61fefd284 100644 --- a/src/client/mapblock_mesh.h +++ b/src/client/mapblock_mesh.h @@ -170,13 +170,11 @@ private: /* Holds a mesh for a mapblock. - Besides the SMesh*, this contains information used for animating - the vertex positions, colors and texture coordinates of the mesh. + Besides the SMesh*, this contains information used fortransparency sorting + and texture animation. For example: - - cracks [implemented] - - day/night transitions [implemented] - - animated flowing liquids [not implemented] - - animating vertex positions for e.g. axles [not implemented] + - cracks + - day/night transitions */ class MapBlockMesh { @@ -193,13 +191,17 @@ public: // Returns true if anything has been changed. bool animate(bool faraway, float time, int crack, u32 daynight_ratio); + /// @warning ClientMap requires that the vertex and index data is not modified scene::IMesh *getMesh() { return m_mesh[0].get(); } + /// @param layer layer index + /// @warning ClientMap requires that the vertex and index data is not modified scene::IMesh *getMesh(u8 layer) { + assert(layer < MAX_TILE_LAYERS); return m_mesh[layer].get(); } diff --git a/src/defaultsettings.cpp b/src/defaultsettings.cpp index ad36456f5..d9e1356fd 100644 --- a/src/defaultsettings.cpp +++ b/src/defaultsettings.cpp @@ -103,7 +103,7 @@ void set_default_settings() settings->setDefault("sound_extensions_blacklist", ""); settings->setDefault("mesh_generation_interval", "0"); settings->setDefault("mesh_generation_threads", "0"); - settings->setDefault("mesh_buffer_min_vertices", "100"); + settings->setDefault("mesh_buffer_min_vertices", "300"); settings->setDefault("free_move", "false"); settings->setDefault("pitch_move", "false"); settings->setDefault("fast_move", "false");