<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE root>
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" article-type="research-article" dtd-version="1.2" xml:lang="en"><front><journal-meta><journal-id journal-id-type="publisher-id">Programming and Computer Software</journal-id><journal-title-group><journal-title xml:lang="en">Programming and Computer Software</journal-title><trans-title-group xml:lang="ru"><trans-title>Программирование</trans-title></trans-title-group></journal-title-group><issn publication-format="print">0132-3474</issn><issn publication-format="electronic">3034-5847</issn><publisher><publisher-name xml:lang="en">The Russian Academy of Sciences</publisher-name></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">688125</article-id><article-id pub-id-type="doi">10.31857/S0132347425030088</article-id><article-id pub-id-type="edn">GRMVKR</article-id><article-categories><subj-group subj-group-type="toc-heading" xml:lang="en"><subject>COMPUTER GRAFICS AND VISUALIZATION</subject></subj-group><subj-group subj-group-type="toc-heading" xml:lang="ru"><subject>КОМПЬЮТЕРНАЯ ГРАФИКА И ВИЗУАЛИЗАЦИЯ</subject></subj-group><subj-group subj-group-type="article-type"><subject>Research Article</subject></subj-group></article-categories><title-group><article-title xml:lang="en">Research on Methods for Traversing Two-Level BVH Trees on Graphics Processors</article-title><trans-title-group xml:lang="ru"><trans-title>Исследование методов обхода двухуровневых BVH-деревьев на графических процессорах</trans-title></trans-title-group></title-group><contrib-group><contrib contrib-type="author"><contrib-id contrib-id-type="orcid">https://orcid.org/0000-0001-5385-4841</contrib-id><name-alternatives><name xml:lang="en"><surname>Smirnov</surname><given-names>L. M.</given-names></name><name xml:lang="ru"><surname>Смирнов</surname><given-names>Л. М.</given-names></name></name-alternatives><address><country country="RU">Russian Federation</country></address><email>lyovasmirnov@gmail.com</email><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><contrib-id contrib-id-type="orcid">https://orcid.org/0000-0001-8829-9884</contrib-id><name-alternatives><name xml:lang="en"><surname>Frolov</surname><given-names>V. A.</given-names></name><name xml:lang="ru"><surname>Фролов</surname><given-names>В. А.</given-names></name></name-alternatives><address><country country="RU">Russian Federation</country></address><email>vladimir.frolov@graphics.cs.msu.ru</email><xref ref-type="aff" rid="aff2"/><xref ref-type="aff" rid="aff1"/><xref ref-type="aff" rid="aff3"/></contrib><contrib contrib-type="author"><contrib-id contrib-id-type="orcid">https://orcid.org/0009-0009-5676-0475</contrib-id><name-alternatives><name xml:lang="en"><surname>Kryachko</surname><given-names>Y. A.</given-names></name><name xml:lang="ru"><surname>Крячко</surname><given-names>Ю. А.</given-names></name></name-alternatives><address><country country="RU">Russian Federation</country></address><email>yuri.kryachko@gmail.com</email><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><contrib-id contrib-id-type="orcid">https://orcid.org/0000-0003-1252-8294</contrib-id><name-alternatives><name xml:lang="en"><surname>Voloboy</surname><given-names>A. G.</given-names></name><name xml:lang="ru"><surname>Волобой</surname><given-names>А. Г.</given-names></name></name-alternatives><address><country country="RU">Russian Federation</country></address><email>voloboy@gin.keldysh.ru</email><xref ref-type="aff" rid="aff3"/></contrib></contrib-group><aff-alternatives id="aff1"><aff><institution xml:lang="en">Lomonosov Moscow State University, Faculty of Computational Mathematics and Cybernetics</institution></aff><aff><institution xml:lang="ru">Московский государственный университет им. М. В. Ломоносова, факультет высшей математики и кибернетики</institution></aff></aff-alternatives><aff-alternatives id="aff2"><aff><institution xml:lang="en">Institute of Artificial Intelligence, Moscow State University Leninskie Gory</institution></aff><aff><institution xml:lang="ru">Институт искусственного интеллекта Московского государственного университета им. М. В. Ломоносова</institution></aff></aff-alternatives><aff-alternatives id="aff3"><aff><institution xml:lang="en">Keldysh Institute of Applied Mathematics, Russian Academy of Sciences</institution></aff><aff><institution xml:lang="ru">Институт прикладной математики им. М. В. Келдыша РАН</institution></aff></aff-alternatives><pub-date date-type="pub" iso-8601-date="2025-07-04" publication-format="electronic"><day>04</day><month>07</month><year>2025</year></pub-date><issue>3</issue><issue-title xml:lang="en"/><issue-title xml:lang="ru"/><fpage>80</fpage><lpage>101</lpage><history><date date-type="received" iso-8601-date="2025-07-22"><day>22</day><month>07</month><year>2025</year></date><date date-type="accepted" iso-8601-date="2025-07-22"><day>22</day><month>07</month><year>2025</year></date></history><permissions><copyright-statement xml:lang="en">Copyright ©; 2025, Russian Academy of Sciences</copyright-statement><copyright-statement xml:lang="ru">Copyright ©; 2025, Российская академия наук</copyright-statement><copyright-year>2025</copyright-year><copyright-holder xml:lang="en">Russian Academy of Sciences</copyright-holder><copyright-holder xml:lang="ru">Российская академия наук</copyright-holder></permissions><self-uri xlink:href="https://transsyst.ru/0132-3474/article/view/688125">https://transsyst.ru/0132-3474/article/view/688125</self-uri><abstract xml:lang="en"><p>A key part of the most common ray tracing methods is the traversal/search for intersections with a hierarchical structure – BVH, which describes the scene geometry. This paper presents a comparative analysis of the performance of several BVH tree traversal methods on stationary and mobile graphics processors. We investigated BVH trees with varying depths and numbers of child nodes, implemented several stack-based traversal algorithms, and two different stackless traversal algorithms; we proposed our own stackless traversal variant, which is more performant than existing ones in some cases. We proposed our own compression method for BVH trees with 2 nodes, losing no more than 15% performance. We identified a common problem that occurs in almost all algorithms when implemented on graphics processors. We believe that our analysis will help developers of ray tracing hardware accelerators create more economical hardware solutions, not limited solely to ray tracing. More specifically, our experimental results suggest that up to a 5x speedup can be achieved by changing the L2 cache mechanism, and this has likely already been implemented in stationary GPUs with hardware-accelerated ray tracing, not only within the ray tracing mechanism itself but also in a more general context.</p></abstract><trans-abstract xml:lang="ru"><p>Ключевой частью наиболее распространенных методов трассировки лучей является обход/поиск пересечения с иерархической структурой – BVH, описывающей геометрию сцены. В данной работе представлен сравнительный анализ производительности нескольких методов обхода BVH-деревьев на стационарных и мобильных графических процессорах. Мы исследовали BVH-деревья с различной глубиной и количеством дочерних узлов, реализовали несколько алгоритмов обхода со стэком и два различных алгоритма безстэкового обхода; предложили свой вариант безстэкового обхода, более производительный чем существующие в ряде случаев. Предложили свой вариант сжатия BVH-дерева с двумя узлами, теряющий не более 15% производительности. Мы выявили некоторую общую проблему, встречающуюся почти во всех алгоритмах при их реализации на графических процессорах. Мы полагаем, что наш анализ поможет разработчикам аппаратных ускорителей трассировки лучей создавать более экономное аппаратное решение, не ограничиваемое при этом только лишь трассировкой лучей. Если говорить более конкретно, результаты наших экспериментов говорят о том, что можно получить ускорение до 5 раз с помощью изменения механизма работы L2-кэша, причем на стационарных GPU с аппаратным ускорением трассировки лучей это, по-видимому, уже сделано не только в рамках непосредственно механизма аппаратного ускорения трассировки лучей, но и в более общем случае.</p></trans-abstract><kwd-group xml:lang="en"><kwd>ray tracing</kwd><kwd>GPU</kwd><kwd>BVH trees</kwd></kwd-group><kwd-group xml:lang="ru"><kwd>трассировка лучей</kwd><kwd>GPU</kwd><kwd>BVH-деревья</kwd></kwd-group><funding-group/></article-meta></front><body></body><back><ref-list><ref id="B1"><label>1.</label><mixed-citation>Beets K. The six levels of ray tracing acceleration. Imagination white paper. https://forums.macrumors.com/attachments/imagination-raytracing-primer-sept2020-pdf.1973926/</mixed-citation></ref><ref id="B2"><label>2.</label><mixed-citation>Meister D., Bittne J. Performance Comparison of Bounding Volume Hierarchies for GPU Ray Tracing, Journal of Computer Graphics Techniques (JCGT). 2022. V. 11. № 4. Р. 1–19. http://jcgt.org/published/0011/04/01/</mixed-citation></ref><ref id="B3"><label>3.</label><mixed-citation>Meister D., Ogaki S., Benthin C., Michael J., Doyle M.J., Guthe М., Bittner J. A Survey on Bounding Volume Hierarchies for Ray Tracing. Computer Graphics Forum. 2021. V. 40. P. 683–712.</mixed-citation></ref><ref id="B4"><label>4.</label><mixed-citation>Aila T., Laine S. Understanding the Efficiency of Ray Traversal on GPUs. In Proceedings of HPG. 2009. Р. 145–149. 16, 17, 23.</mixed-citation></ref><ref id="B5"><label>5.</label><mixed-citation>Aila T., Karras T. Architecture Considerations for Tracing Incoherent Rays. In Proceedings of PG. 2010. Р. 113–122. 18, 19.</mixed-citation></ref><ref id="B6"><label>6.</label><mixed-citation>Aila T., Karras T., Laine S. On Quality Metrics of Bounding Volume Hierarchies. HPG. 2013. Р. 101–108. 4, 5, 10.</mixed-citation></ref><ref id="B7"><label>7.</label><mixed-citation>Lier A., Stamminger M., Selgrad K. CPU-style SIMD ray traversal on GPUs. In Proceedings of the Conference on High-Performance Graphics, Association for Computing Machinery, 7:1–7:4. 2018. https://doi.org/10.1145/3231578. 3231583. 7, 8, 11, 15</mixed-citation></ref><ref id="B8"><label>8.</label><mixed-citation>Ernst M., Greiner G. Early Split Clipping for Bounding Volume Hierarchies. In Proceedings of Symposium on Interactive Ray Tracing. 2007. Р. 73–78. 9, 10.</mixed-citation></ref><ref id="B9"><label>9.</label><mixed-citation>Stich M., Friedrich H., Dietrich A. Spatial Splits in Bounding Volume Hierarchies. In Proceedings of the High-Performance Graphics. 2009. Р. 7–13. 10, 22, 23.</mixed-citation></ref><ref id="B10"><label>10.</label><mixed-citation>Segivia B., Ernst M. Memory Efficient Ray Tracing with Hierarchical Mesh Quantization. In Proceedings of Graphics Interface. 2010. Р. 153–160. 12, 13.</mixed-citation></ref><ref id="B11"><label>11.</label><mixed-citation>Mahovsky J., Wyvill B. Memory-Conserving Bounding Volume Hierarchies with Coherent Raytracing. Computer Graphics Forum 25. 2006. № 2. Р. 173–182. 12.</mixed-citation></ref><ref id="B12"><label>12.</label><mixed-citation>Liktor G., Vaidyanathan K. Bandwidth-efficient BVH Layout for Incremental Hardware Traversal. In Proceedings of High-Performance Graphics. 2016. Р. 51–61. 8, 18, 19.</mixed-citation></ref><ref id="B13"><label>13.</label><mixed-citation>Kalojanov J., Billeter M., Slusallek P. Two‐level grids for ray tracing on GPUs // Computer Graphics Forum. Oxford, UK: Blackwell Publishing Ltd. 2011. V. 30. № 2. P. 307–314.</mixed-citation></ref><ref id="B14"><label>14.</label><mixed-citation>Bartels P., Harada T. Combining GPU Tracing Methods within a Single Ray Query. In SIGGRAPH Asia 2022 Technical Communications (SA '22). Association for Computing Machinery, New York, NY, USA, 2022. Article 17, 1–4. https://doi.org/10.1145/3550340.3564231</mixed-citation></ref><ref id="B15"><label>15.</label><mixed-citation>Zellmann S., Wu Q., Ma K.L., Wald I. Memory-Efficient GPU Volume Path Tracing of AMR Data Using the Dual Mesh. 2023.</mixed-citation></ref><ref id="B16"><label>16.</label><mixed-citation>Garanzha K., Bel A., Premoze S., Galaktionov V. (2011). Out-of-core GPU ray tracing of complex scenes. In ACM SIGGRAPH 2011 Talks (pp. 1-1).</mixed-citation></ref><ref id="B17"><label>17.</label><mixed-citation>Roberto T. et al. Ray casting using a roped BVH with CUDA. Spring conference on Computer graphics. 2009.</mixed-citation></ref><ref id="B18"><label>18.</label><mixed-citation>Binder N., Keller A. Efficient Stackless Hierarchy Traversal on GPUs with Backtracking in Constant Time. In Proceedings of High-Performance Graphics. 2016. Р. 41–50. 17.</mixed-citation></ref><ref id="B19"><label>19.</label><mixed-citation>Laine S., Karra T., Aila T. Megakernels Considered Harmful: Wavefront Path Tracing on GPUs. High-Performance Graphics 2013.</mixed-citation></ref><ref id="B20"><label>20.</label><mixed-citation>Wald I., Woop S., Benthin C., Johnson G.S. &amp; Ernst M. (2014). Embree: a kernel framework for efficient CPU ray tracing. ACM Transactions on Graphics (TOG). 2014. № 33(4). Р. 1–8.</mixed-citation></ref><ref id="B21"><label>21.</label><mixed-citation>Wald I., Benthin C., Boulos S. Getting Rid of Packets – Efficient SIMD Single-Ray Traversal using Multi-Branching BVHs. In Symposium on Interactive Ray Tracing. 2008. Р. 49–57. 10, 14, 22, 23.</mixed-citation></ref><ref id="B22"><label>22.</label><mixed-citation>Popov S., Georgiev I., Dimov R., Slusallek P. Object Partitioning Considered Harmful: Space Subdivision for BVHs. In Proceedings of High-Performance Graphics. 2009. Р. 15–22. 4, 5, 10, 22.</mixed-citation></ref><ref id="B23"><label>23.</label><mixed-citation>Ylitie H., Karras T., Laine S. Efficient Incoherent Ray Traversal on GPUs Through Compressed Wide BVHs. In Proceedings of High-Performance Graphics. 2017. Р. 4: 1–4: 13, 10, 12, 16, 17, 22, 23</mixed-citation></ref><ref id="B24"><label>24.</label><mixed-citation>Yoon S.E., Manocha D. Cache-Efficient Layouts of Bounding Volume Hierarchies. Computer Graphics Forum. 2006. 8.</mixed-citation></ref><ref id="B25"><label>25.</label><mixed-citation>Wachter C., Keller A. Instant Ray Tracing: The Bounding Interval Hierarchy. In Proceedings Eurographics Symposium on Rendering. 2006. Р. 139–149. 13.</mixed-citation></ref><ref id="B26"><label>26.</label><mixed-citation>Eisemann M., Woizischke C., Magnor M. Ray Tracing with the Single Slab Hierarchy. In Proceedings of Vision, Modeling, and Visualization. 2008. Р. 373–381. 13.</mixed-citation></ref><ref id="B27"><label>27.</label><mixed-citation>Lin D., Vasiou E., Yuksel C., Kopta D., Brunvand E. Hardware-Accelerated Dual-Split Trees. Proceedings of the ACM on Computer Graphics and Interactive Techniques 3, 2 (2020). 13.</mixed-citation></ref><ref id="B28"><label>28.</label><mixed-citation>Weier P., Rath A., Michel É., Georgiev I., Slusallek P., Boubekeur T. N-BVH: Neural ray queries with bounding volume hierarchies. In ACM SIGGRAPH 2024 Conference Papers (SIGGRAPH '24). Association for Computing Machinery, New York, NY, USA, Article 99, 1–11. DOI: 10.1145/3641519.3657464.</mixed-citation></ref><ref id="B29"><label>29.</label><mixed-citation>Frolov V., Sanzharov V., Garifullin A., Raenchuk M., Voloboy A. CrossRT: A cross platform programming technology for hardware-accelerated ray tracing in CG and CV applications // arXiv:2409.12617</mixed-citation></ref></ref-list></back></article>
