- 
                Notifications
    You must be signed in to change notification settings 
- Fork 67
Smooth normal calculation Update! #941
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 9 commits
efa37a7
              cc4a219
              c018762
              42ccf48
              05ee8ed
              f4b8ac6
              0dd02e8
              ce99287
              28f73f5
              5acb40d
              0feb4e9
              7424828
              d078b2c
              e72e0ef
              e91aba1
              509b359
              a30ef6f
              c580d72
              5af4a9b
              39e288c
              File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -13,6 +13,13 @@ concept HashGridVertexData = requires(T obj, T const cobj, uint32_t hash) { | |
| { cobj.getPosition() } -> std::same_as<hlsl::float32_t3>; | ||
| }; | ||
|  | ||
| template <typename Fn, typename T> | ||
| concept HashGridIteratorFn = HashGridVertexData<T> && requires(Fn && fn, T const cobj) | ||
| { | ||
| // return whether hash grid should continue the iteration | ||
| { std::invoke(std::forward<Fn>(fn), cobj) } -> std::same_as<bool>; | ||
| }; | ||
|  | ||
| template <HashGridVertexData VertexData> | ||
| class CVertexHashGrid | ||
| { | ||
|  | @@ -26,43 +33,40 @@ class CVertexHashGrid | |
| collection_t::const_iterator end; | ||
| }; | ||
|  | ||
| CVertexHashGrid(size_t _vertexCount, uint32_t _hashTableMaxSize, float _cellSize) : | ||
| m_sorter(createSorter(_vertexCount)), | ||
| m_hashTableMaxSize(_hashTableMaxSize), | ||
| m_cellSize(_cellSize) | ||
| CVertexHashGrid(size_t cellSize, uint32_t hashTableMaxSizeLog2, float vertexCountReserve) : | ||
| m_cellSize(cellSize), | ||
| m_hashTableMaxSize(1llu << hashTableMaxSizeLog2), | ||
| m_sorter(createSorter(vertexCountReserve)) | ||
| { | ||
| assert((core::isPoT(m_hashTableMaxSize))); | ||
|  | ||
| m_vertices.reserve(_vertexCount); | ||
| m_vertices.reserve(vertexCountReserve); | ||
| } | ||
|  | ||
| //inserts vertex into hash table | ||
| void add(VertexData&& vertex) | ||
| { | ||
| vertex.setHash(hash(vertex)); | ||
| m_vertices.push_back(vertex); | ||
| m_vertices.push_back(std::move(vertex)); | ||
| } | ||
|  | ||
| void validate() | ||
| void bake() | ||
|          | ||
| { | ||
| const auto oldSize = m_vertices.size(); | ||
| m_vertices.resize(oldSize*2u); | ||
| auto finalSortedOutput = std::visit( [&](auto& sorter) { return sorter(m_vertices.data(), m_vertices.data() + oldSize, oldSize, KeyAccessor()); },m_sorter ); | ||
| auto scratchBuffer = collection_t(m_vertices.size()); | ||
|  | ||
| auto finalSortedOutput = std::visit( [&](auto& sorter) | ||
| { | ||
| return sorter(m_vertices.data(), scratchBuffer.data(), m_vertices.size(), KeyAccessor()); | ||
| }, m_sorter ); | ||
|  | ||
| if (finalSortedOutput != m_vertices.data()) | ||
| m_vertices.erase(m_vertices.begin(), m_vertices.begin() + oldSize); | ||
| else | ||
| m_vertices.resize(oldSize); | ||
| m_vertices = std::move(scratchBuffer); | ||
| } | ||
|  | ||
| const collection_t& vertices() const { return m_vertices; } | ||
|  | ||
| collection_t& vertices(){ return m_vertices; } | ||
|  | ||
| inline uint32_t getVertexCount() const { return m_vertices.size(); } | ||
|  | ||
| template <typename Fn> | ||
| void iterateBroadphaseCandidates(const VertexData& vertex, Fn fn) const | ||
| template <HashGridIteratorFn<VertexData> Fn> | ||
| void forEachBroadphaseNeighborCandidates(const VertexData& vertex, Fn&& fn) const | ||
| { | ||
| std::array<uint32_t, 8> neighboringCells; | ||
| const auto cellCount = getNeighboringCellHashes(neighboringCells.data(), vertex); | ||
|  | @@ -76,7 +80,7 @@ class CVertexHashGrid | |
| { | ||
| const vertex_data_t& neighborVertex = *bounds.begin; | ||
| if (&vertex != &neighborVertex) | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. imho the "not checking against oneself" thing could be done in the  | ||
| if (!fn(neighborVertex)) break; | ||
| if (!std::invoke(std::forward<Fn>(fn), neighborVertex)) break; | ||
| } | ||
| } | ||
|  | ||
|  | @@ -85,7 +89,7 @@ class CVertexHashGrid | |
| private: | ||
| struct KeyAccessor | ||
| { | ||
| _NBL_STATIC_INLINE_CONSTEXPR size_t key_bit_count = 32ull; | ||
| constexpr static size_t key_bit_count = 32ull; | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 
 | ||
|  | ||
| template<auto bit_offset, auto radix_mask> | ||
| inline decltype(radix_mask) operator()(const VertexData& item) const | ||
|  | @@ -98,21 +102,19 @@ class CVertexHashGrid | |
| static constexpr uint32_t primeNumber2 = 19349663; | ||
| static constexpr uint32_t primeNumber3 = 83492791; | ||
|  | ||
| static constexpr uint32_t invalidHash = 0xFFFFFFFF; | ||
|  | ||
| using sorter_t = std::variant< | ||
| core::LSBSorter<KeyAccessor::key_bit_count, uint16_t>, | ||
| core::LSBSorter<KeyAccessor::key_bit_count, uint32_t>, | ||
| core::LSBSorter<KeyAccessor::key_bit_count, size_t>>; | ||
| core::RadixLsbSorter<KeyAccessor::key_bit_count, uint16_t>, | ||
| core::RadixLsbSorter<KeyAccessor::key_bit_count, uint32_t>, | ||
| core::RadixLsbSorter<KeyAccessor::key_bit_count, size_t>>; | ||
| sorter_t m_sorter; | ||
|  | ||
| static sorter_t createSorter(size_t vertexCount) | ||
| { | ||
| if (vertexCount < (0x1ull << 16ull)) | ||
| return core::LSBSorter<KeyAccessor::key_bit_count, uint16_t>(); | ||
| return core::RadixLsbSorter<KeyAccessor::key_bit_count, uint16_t>(); | ||
| if (vertexCount < (0x1ull << 32ull)) | ||
| return core::LSBSorter<KeyAccessor::key_bit_count, uint32_t>(); | ||
| return core::LSBSorter<KeyAccessor::key_bit_count, size_t>(); | ||
| return core::RadixLsbSorter<KeyAccessor::key_bit_count, uint32_t>(); | ||
| return core::RadixLsbSorter<KeyAccessor::key_bit_count, size_t>(); | ||
| } | ||
|  | ||
| collection_t m_vertices; | ||
|  | @@ -122,10 +124,8 @@ class CVertexHashGrid | |
| uint32_t hash(const VertexData& vertex) const | ||
| { | ||
| const hlsl::float32_t3 position = floor(vertex.getPosition() / m_cellSize); | ||
|  | ||
| return ((static_cast<uint32_t>(position.x) * primeNumber1) ^ | ||
| (static_cast<uint32_t>(position.y) * primeNumber2) ^ | ||
| (static_cast<uint32_t>(position.z) * primeNumber3))& (m_hashTableMaxSize - 1); | ||
| const auto position_uint32 = hlsl::uint32_t3(position.x, position.y, position.z); | ||
| return hash(position_uint32); | ||
| } | ||
|  | ||
| uint32_t hash(const hlsl::uint32_t3& position) const | ||
|  | @@ -137,6 +137,7 @@ class CVertexHashGrid | |
|  | ||
| uint8_t getNeighboringCellHashes(uint32_t* outNeighbors, const VertexData& vertex) const | ||
| { | ||
| // both 0.x and -0.x would be converted to 0 if we directly casting the position to unsigned integer. Causing the 0 to be crowded then the rest of the cells. So we use floor here to spread the vertex more uniformly. | ||
| hlsl::float32_t3 cellfloatcoord = floor(vertex.getPosition() / m_cellSize - hlsl::float32_t3(0.5)); | ||
| hlsl::uint32_t3 baseCoord = hlsl::uint32_t3(static_cast<uint32_t>(cellfloatcoord.x), static_cast<uint32_t>(cellfloatcoord.y), static_cast<uint32_t>(cellfloatcoord.z)); | ||
|  | ||
|  | @@ -167,12 +168,9 @@ class CVertexHashGrid | |
|  | ||
| BucketBounds getBucketBoundsByHash(uint32_t hash) const | ||
| { | ||
| if (hash == invalidHash) | ||
| return { m_vertices.end(), m_vertices.end() }; | ||
|  | ||
| const auto skipListBound = std::visit([&](auto& sorter) | ||
| { | ||
| auto hashBound = sorter.getHashBound(hash); | ||
| auto hashBound = sorter.getMostSignificantRadixBound(hash); | ||
| return std::pair<collection_t::const_iterator, collection_t::const_iterator>(m_vertices.begin() + hashBound.first, m_vertices.begin() + hashBound.second); | ||
| }, m_sorter); | ||
|  | ||
|  | @@ -182,7 +180,7 @@ class CVertexHashGrid | |
| hash, | ||
| [](const VertexData& vertex, uint32_t hash) | ||
| { | ||
| return vertex.hash < hash; | ||
| return vertex.getHash() < hash; | ||
| }); | ||
|  | ||
| auto end = std::upper_bound( | ||
|  | @@ -191,7 +189,7 @@ class CVertexHashGrid | |
| hash, | ||
| [](uint32_t hash, const VertexData& vertex) | ||
| { | ||
| return hash < vertex.hash; | ||
| return hash < vertex.getHash(); | ||
| }); | ||
|  | ||
| const auto beginIx = begin - m_vertices.begin(); | ||
|  | ||
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -11,33 +11,137 @@ namespace nbl::asset { | |
| class CVertexWelder { | ||
|  | ||
| public: | ||
| using WeldPredicateFn = std::function<bool(const ICPUPolygonGeometry* geom, uint64_t idx1, uint64_t idx2)>; | ||
| using WeldPredicateFn = std::function<bool(const ICPUPolygonGeometry* geom, uint32_t idx1, uint32_t idx2)>; | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. at this point it can be a pure virtual interface class with pure virtual  Because the init needs to check validity of the polygon geometry, e.g. your default predicate should fail the initialization if there are any attribute views which are not formatted (format is unknown) because it doesn't know what data in the buffer belongs to what vertex (because there's no clear mapping) See #921 (comment) | ||
|  | ||
| class DefaultWeldPredicate | ||
| { | ||
| private: | ||
| static bool isAttributeValEqual(const ICPUPolygonGeometry::SDataView& view, uint32_t index1, uint32_t index2, float epsilon) | ||
| { | ||
| if (!view) return true; | ||
| const auto channelCount = getFormatChannelCount(view.composed.format); | ||
| switch (view.composed.rangeFormat) | ||
| { | ||
| case IGeometryBase::EAABBFormat::U64: | ||
| case IGeometryBase::EAABBFormat::U32: | ||
| { | ||
| hlsl::uint64_t4 val1, val2; | ||
| view.decodeElement<hlsl::uint64_t4>(index1, val1); | ||
| view.decodeElement<hlsl::uint64_t4>(index2, val2); | ||
| for (auto channel_i = 0u; channel_i < channelCount; channel_i++) | ||
| if (val1[channel_i] != val2[channel_i]) return false; | ||
| break; | ||
| } | ||
| case IGeometryBase::EAABBFormat::S64: | ||
| case IGeometryBase::EAABBFormat::S32: | ||
| { | ||
| hlsl::int64_t4 val1, val2; | ||
| view.decodeElement<hlsl::int64_t4>(index1, val1); | ||
| view.decodeElement<hlsl::int64_t4>(index2, val2); | ||
| for (auto channel_i = 0u; channel_i < channelCount; channel_i++) | ||
| if (val1[channel_i] != val2[channel_i]) return false; | ||
| break; | ||
| } | ||
| default: | ||
| { | ||
| // TODO: Handle 16,32,64 bit float vectors once the pixel encode/decode functions get reimplemented in HLSL and decodeElement can actually benefit from that. | ||
| hlsl::float64_t4 val1, val2; | ||
| view.decodeElement<hlsl::float64_t4>(index1, val1); | ||
| view.decodeElement<hlsl::float64_t4>(index2, val2); | ||
| for (auto channel_i = 0u; channel_i < channelCount; channel_i++) | ||
| { | ||
| const auto diff = abs(val1[channel_i] - val2[channel_i]); | ||
| if (diff > epsilon) return false; | ||
| } | ||
| break; | ||
| } | ||
| } | ||
| return true; | ||
| } | ||
|  | ||
| static bool isAttributeDirEqual(const ICPUPolygonGeometry::SDataView& view, uint32_t index1, uint32_t index2, float epsilon) | ||
| { | ||
| if (!view) return true; | ||
| const auto channelCount = getFormatChannelCount(view.composed.format); | ||
| switch (view.composed.rangeFormat) | ||
| { | ||
| case IGeometryBase::EAABBFormat::U64: | ||
| case IGeometryBase::EAABBFormat::U32: | ||
| { | ||
| hlsl::uint64_t4 val1, val2; | ||
| view.decodeElement<hlsl::uint64_t4>(index1, val1); | ||
| view.decodeElement<hlsl::uint64_t4>(index2, val2); | ||
| return (1.0 - hlsl::dot(val1, val2)) < epsilon; | ||
| } | ||
| case IGeometryBase::EAABBFormat::S64: | ||
| case IGeometryBase::EAABBFormat::S32: | ||
| { | ||
| hlsl::int64_t4 val1, val2; | ||
| view.decodeElement<hlsl::int64_t4>(index1, val1); | ||
| view.decodeElement<hlsl::int64_t4>(index2, val2); | ||
| return (1.0 - hlsl::dot(val1, val2)) < epsilon; | ||
| } | ||
| 
      Comment on lines
    
      +28
     to 
      +83
    
   There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for formats which don't decode to floats you need to use  There's a  There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. | ||
| default: | ||
| { | ||
| hlsl::float64_t4 val1, val2; | ||
| view.decodeElement<hlsl::float64_t4>(index1, val1); | ||
| view.decodeElement<hlsl::float64_t4>(index2, val2); | ||
| return (1.0 - hlsl::dot(val1, val2)) < epsilon; | ||
| } | ||
| } | ||
| return true; | ||
| } | ||
|  | ||
| float m_epsilon; | ||
|  | ||
| public: | ||
|  | ||
| DefaultWeldPredicate(float epsilon) : m_epsilon(epsilon) {} | ||
|  | ||
| bool operator()(const ICPUPolygonGeometry* polygon, uint32_t index1, uint32_t index2) | ||
| { | ||
| if (!isAttributeValEqual(polygon->getPositionView(), index1, index2, m_epsilon)) | ||
| return false; | ||
| if (!isAttributeDirEqual(polygon->getNormalView(), index1, index2, m_epsilon)) | ||
| return false; | ||
| for (const auto& jointWeightView : polygon->getJointWeightViews()) | ||
| { | ||
| if (!isAttributeValEqual(jointWeightView.indices, index1, index2, m_epsilon)) return false; | ||
| if (!isAttributeValEqual(jointWeightView.weights, index1, index2, m_epsilon)) return false; | ||
| } | ||
| for (const auto& auxAttributeView : polygon->getAuxAttributeViews()) | ||
| if (!isAttributeValEqual(auxAttributeView, index1, index2, m_epsilon)) return false; | ||
|  | ||
| return true; | ||
| } | ||
|  | ||
| }; | ||
|  | ||
| template <typename AccelStructureT> | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there should be some concept you require here | ||
| static core::smart_refctd_ptr<ICPUPolygonGeometry> weldVertices(const ICPUPolygonGeometry* polygon, const AccelStructureT& as, WeldPredicateFn shouldWeldFn) { | ||
| auto outPolygon = core::move_and_static_cast<ICPUPolygonGeometry>(polygon->clone(0u)); | ||
| outPolygon->setIndexing(IPolygonGeometryBase::TriangleList()); | ||
|  | ||
| core::vector<uint64_t> vertexIndexToAsIndex(as.getVertexCount()); | ||
| core::vector<uint32_t> vertexIndexToAsIndex(as.getVertexCount()); | ||
|  | ||
| for (uint64_t vertexData_i = 0u; vertexData_i < as.getVertexCount(); vertexData_i++) | ||
| for (uint32_t vertexData_i = 0u; vertexData_i < as.getVertexCount(); vertexData_i++) | ||
| { | ||
| const auto& vertexData = as.vertices()[vertexData_i]; | ||
| vertexIndexToAsIndex[vertexData.index] = vertexData.index; | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. but this is the same as initializing your  There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it should be vertexIndexToAsIndex[vertexData.index] = vertexData_i. | ||
| } | ||
|  | ||
| static constexpr auto INVALID_INDEX = std::numeric_limits<uint64_t>::max(); | ||
| core::vector<uint64_t> remappedVertexIndexes(as.getVertexCount()); | ||
| static constexpr auto INVALID_INDEX = std::numeric_limits<uint32_t>::max(); | ||
| core::vector<uint32_t> remappedVertexIndexes(as.getVertexCount()); | ||
| std::fill(remappedVertexIndexes.begin(), remappedVertexIndexes.end(), INVALID_INDEX); | ||
|  | ||
| uint64_t maxRemappedIndex = 0; | ||
| uint32_t maxRemappedIndex = 0; | ||
| // iterate by index, so that we always use the smallest index when multiple vertexes can be welded together | ||
| for (uint64_t index = 0; index < as.getVertexCount(); index++) | ||
| for (uint32_t index = 0; index < as.getVertexCount(); index++) | ||
| { | ||
| const auto asIndex = vertexIndexToAsIndex[index]; | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why have that whole  There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also the  There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this needs a different construction https://github.com/Devsh-Graphics-Programming/Nabla/pull/941/files?diff=unified&w=1#r2466545963 
 Now any vertex thats not in the acceleration structure itself or has no equivalent will be set Invalid in the  when you later iterate over the index buffer to create a new one, if you encounter such an invalid remapping it means the vertex was never the in Acceleration Structure and therefore you can abort with an error There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. | ||
| const auto& vertexData = as.vertices()[asIndex]; | ||
| auto& remappedVertexIndex = remappedVertexIndexes[index]; | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if you went through vertices in order from 0 to max vertex reference, then you could initialize to INVALID here, and not  | ||
| as.iterateBroadphaseCandidates(vertexData, [&, polygon, index](const typename AccelStructureT::vertex_data_t& neighbor) { | ||
| as.forEachBroadphaseNeighborCandidates(vertexData, [&, polygon, index](const typename AccelStructureT::vertex_data_t& neighbor) { | ||
| const auto neighborRemappedIndex = remappedVertexIndexes[neighbor.index]; | ||
| if (shouldWeldFn(polygon, index, neighbor.index) && neighborRemappedIndex != INVALID_INDEX) { | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. check the neighbour remap for being initialized first, remember  | ||
| remappedVertexIndex = neighborRemappedIndex; | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. btw because of funny cache and aliasing effects (tapping same array), you want to  make the  | ||
|  | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why is cellSize now a integer and vertex count a
float!?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
add a default value for
vertexCountReservelike 8kThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.