<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE root>
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" article-type="research-article" dtd-version="1.2" xml:lang="en"><front><journal-meta><journal-id journal-id-type="publisher-id">ARTIFICIAL INTELLIGENCE AND DECISION MAKING</journal-id><journal-title-group><journal-title xml:lang="en">ARTIFICIAL INTELLIGENCE AND DECISION MAKING</journal-title><trans-title-group xml:lang="ru"><trans-title>Искусственный интеллект и принятие решений</trans-title></trans-title-group></journal-title-group><issn publication-format="print">2071-8594</issn></journal-meta><article-meta><article-id pub-id-type="publisher-id">270353</article-id><article-id pub-id-type="doi">10.14357/20718594230310</article-id><article-categories><subj-group subj-group-type="toc-heading" xml:lang="en"><subject>Analysis of Signals, Audio and Video Information</subject></subj-group><subj-group subj-group-type="toc-heading" xml:lang="ru"><subject>Анализ сигналов, аудио и видео информации</subject></subj-group><subj-group subj-group-type="article-type"><subject>Research Article</subject></subj-group></article-categories><title-group><article-title xml:lang="en">Method for Processing Photo and Video Data from Camera Traps Using a Two-Stage Neural Network Approach</article-title><trans-title-group xml:lang="ru"><trans-title>Метод обработки фото- и видеоданных с фотоловушек с использованием двухстадийного нейросетевого подхода</trans-title></trans-title-group></title-group><contrib-group><contrib contrib-type="author"><name-alternatives><name xml:lang="en"><surname>Efremov</surname><given-names>Vladislav A.</given-names></name><name xml:lang="ru"><surname>Ефремов</surname><given-names>Владислав Александрович</given-names></name></name-alternatives><address><country country="RU">Russian Federation</country></address><bio xml:lang="en"><p>Postgraduate student, programmer of the Laboratory of Digital Systems for Special Purposes</p></bio><bio xml:lang="ru"><p>Аспирант. Программист лаборатории цифровых систем специального назначения</p></bio><email>efremov.va@phystech.edu</email><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><name-alternatives><name xml:lang="en"><surname>Leus</surname><given-names>Andrey V.</given-names></name><name xml:lang="ru"><surname>Леус</surname><given-names>Андрей Владимирович</given-names></name></name-alternatives><address><country country="RU">Russian Federation</country></address><bio xml:lang="en"><p>Candidate of Technical Sciences, Leading Programmer of the Laboratory of Digital Systems for Special Purposes</p></bio><bio xml:lang="ru"><p>Кандидат технических наук. Ведущий программист лаборатории цифровых систем специ- ального назначения</p></bio><email>leus.av@mipt.ru</email><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><name-alternatives><name xml:lang="en"><surname>Gavrilov</surname><given-names>Dmitry A.</given-names></name><name xml:lang="ru"><surname>Гаврилов</surname><given-names>Дмитрий Александрович</given-names></name></name-alternatives><address><country country="RU">Russian Federation</country></address><bio xml:lang="en"><p>Doctor of Technical Sciences, Director of the Physical and Technical School of the FRCT</p></bio><bio xml:lang="ru"><p>Доктор технических наук. Директор физтех школы ФРКТ</p></bio><email>gavrilov.da@mipt.ru</email><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><name-alternatives><name xml:lang="en"><surname>Mangazeev</surname><given-names>Daniil I.</given-names></name><name xml:lang="ru"><surname>Мангазеев</surname><given-names>Даниил Игоревич</given-names></name></name-alternatives><address><country country="RU">Russian Federation</country></address><bio xml:lang="en"><p>Master, programmer of the Laboratory of Digital Systems for Special Purposes</p></bio><bio xml:lang="ru"><p>Магистр. Программист лаборатории цифровых систем специального назначения</p></bio><email>mangazeev.di@phystech.edu</email><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><name-alternatives><name xml:lang="en"><surname>Kholodnyak</surname><given-names>Ivan V.</given-names></name><name xml:lang="ru"><surname>Холодняк</surname><given-names>Иван Витальевич</given-names></name></name-alternatives><address><country country="RU">Russian Federation</country></address><bio xml:lang="en"><p>Master</p></bio><bio xml:lang="ru"><p>Магистр</p></bio><email>kholodnyak.iv@phystech.edu</email><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><name-alternatives><name xml:lang="en"><surname>Radysh</surname><given-names>Alexandra S.</given-names></name><name xml:lang="ru"><surname>Радыш</surname><given-names>Александра Сергеевна</given-names></name></name-alternatives><bio xml:lang="en"><p>Master</p></bio><bio xml:lang="ru"><p>Магистр</p></bio><email>radysh.as@phystech.edu</email><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><name-alternatives><name xml:lang="en"><surname>Zuev</surname><given-names>Viktor A.</given-names></name><name xml:lang="ru"><surname>Зуев</surname><given-names>Виктор Александрович</given-names></name></name-alternatives><address><country country="RU">Russian Federation</country></address><bio xml:lang="en"><p>Master</p></bio><bio xml:lang="ru"><p>Магистр</p></bio><email>zuev.va@phystech.edu</email><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><name-alternatives><name xml:lang="en"><surname>Vodichev</surname><given-names>Nikita A.</given-names></name><name xml:lang="ru"><surname>Водичев</surname><given-names>Никита Алексеевич</given-names></name></name-alternatives><address><country country="RU">Russian Federation</country></address><bio xml:lang="en"><p>Master</p></bio><bio xml:lang="ru"><p>Магистр</p></bio><email>vodichev.na@phystech.edu</email><xref ref-type="aff" rid="aff1"/></contrib></contrib-group><aff-alternatives id="aff1"><aff><institution xml:lang="en">Moscow Institute of Physics and Technology (National Research University)</institution></aff><aff><institution xml:lang="ru">Московский физико-технический институт (национальный исследовательский университет)</institution></aff></aff-alternatives><pub-date date-type="pub" iso-8601-date="2023-08-15" publication-format="electronic"><day>15</day><month>08</month><year>2023</year></pub-date><issue>3</issue><issue-title xml:lang="en"/><issue-title xml:lang="ru"/><fpage>98</fpage><lpage>108</lpage><history><date date-type="received" iso-8601-date="2024-11-15"><day>15</day><month>11</month><year>2024</year></date><date date-type="accepted" iso-8601-date="2024-11-15"><day>15</day><month>11</month><year>2024</year></date></history><permissions><copyright-statement xml:lang="en">Copyright ©; 2023, ФИЦ ИУ РАН</copyright-statement><copyright-statement xml:lang="ru">Copyright ©; 2023,</copyright-statement><copyright-year>2023</copyright-year><copyright-holder xml:lang="en">ФИЦ ИУ РАН</copyright-holder></permissions><self-uri xlink:href="https://journals.rcsi.science/2071-8594/article/view/270353">https://journals.rcsi.science/2071-8594/article/view/270353</self-uri><abstract xml:lang="en"><p>The paper proposes a technology for analyzing data from camera traps using two-stage neural network processing. The task of the first stage is to separate empty images from non-empty ones. To solve the problem, a comparative analysis of the YOLOv5, YOLOR, YOLOX architectures was carried out and the most optimal detector model was identified. The task of the second stage is to classify the objects found by the detector. Models such as EfficientNetV2, SeResNet, ResNeSt, ReXNet, ResNet were compared. To train the detector model and the classifier, a data preparation approach was developed, which consists in removing duplicate images from the sample. The method was modified using agglomerative clustering to divide the sample into training, validation, and test. In the task of object detection, the YOLOv5-L6 algorithm was the best with an accuracy of 98.5% on the data set. In the task of classifying the found objects, the ResNeSt-101 architecture was the best of all with a recognition quality of 98.339% on test data.</p></abstract><trans-abstract xml:lang="ru"><p>В работе предложена технология анализа данных с фотоловушек с помощью двухстадийной нейросетевой обработки. Задача первого этапа состоит в отделении пустых изображений от непустых. Для решения задачи проведен сравнительный анализ архитектур YOLOv5, YOLOR, YOLOX и выявлена наиболее оптимальная модель детектора. Задача второго этапа заключается в классификации объектов, найденных детектором. Сравнивались модели EfficientNetV2, SeResNet, ResNeSt, ReXNet, ResNet. Для обучения модели детектора и классификатора разработан подход подготовки данных, заключающийся в удалении изображений-дубликатов из выборки. Метод был модифицирован с помощью агломеративной кластеризации для разделения выборки на обучение, валидацию и тест. В задаче обнаружения объектов лучшим на наборе данных оказался алгоритм YOLOv5-L6 с точностью нахождения 98,5%. В задаче классификации найденных объектов, лучше всех себя показала архитектура ResNeSt-101 с качеством распознавания 98,339% на тестовых данных.</p></trans-abstract><kwd-group xml:lang="en"><kwd>camera trap images</kwd><kwd>agglomerative clustering</kwd><kwd>deep convolutional neural networks</kwd><kwd>detection</kwd><kwd>classification</kwd><kwd>two-stage approach</kwd></kwd-group><kwd-group xml:lang="ru"><kwd>изображения с фотоловушек</kwd><kwd>агломеративная кластеризация</kwd><kwd>глубокие сверточные нейронные сети</kwd><kwd>детекция</kwd><kwd>классификация</kwd><kwd>двухстадийный подход</kwd></kwd-group><funding-group/></article-meta></front><body></body><back><ref-list><ref id="B1"><label>1.</label><citation-alternatives><mixed-citation xml:lang="en">O’Connell A. F., Nichols J. D., Karanth K.U. Camera traps in animal ecology: Methods and analyses. – Berlin, Germany: Springer Science &amp; Business Media. 2011. 279 р.</mixed-citation><mixed-citation xml:lang="ru">O’Connell A. F., Nichols J. D., Karanth K.U. Camera traps in animal ecology: Methods and analyses. – Berlin, Germany: Springer Science &amp; Business Media. 2011. P. 279</mixed-citation></citation-alternatives></ref><ref id="B2"><label>2.</label><citation-alternatives><mixed-citation xml:lang="en">He K., Zhang X., Ren S., Sun J. Deep residual learning for image recognition. // Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2016. P. 770–778.</mixed-citation><mixed-citation xml:lang="ru">He K., Zhang X., Ren S., Sun J. Deep residual learning for image recognition. // Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2016. Р. 770-778.</mixed-citation></citation-alternatives></ref><ref id="B3"><label>3.</label><citation-alternatives><mixed-citation xml:lang="en">Gavrilov D.A., Lovtsov D.A. Automated processing of visual information using artificial intelligence technologies // Artificial intelligence and decision making. 2020. No. 4. Р. 33 - 46.</mixed-citation><mixed-citation xml:lang="ru">Гаврилов Д.А., Ловцов Д.А. Автоматизированная переработка визуальной информации с помощью технологий искусственного интеллекта // Искусственный интеллект и принятие решений. 2020. №4. С. 33 – 46.</mixed-citation></citation-alternatives></ref><ref id="B4"><label>4.</label><citation-alternatives><mixed-citation xml:lang="en">Lovtsov D. A., Gavrilov D.A. Automated special purpose optical electronic system’s functional diagnosis // Proc. Int. Semin. Electron Devices Des. Prod. SED-2019 (23 – 24 April 2019). – Prague, Czech Repub. IEEE, 2019. P. 70 – 73.</mixed-citation><mixed-citation xml:lang="ru">Lovtsov D. A., Gavrilov D.A. Automated special purpose optical electronic system’s functional diagnosis // Proc. Int. Semin. Electron Devices Des. Prod. SED-2019 (23 – 24 April 2019). Prague, Czech Repub. IEEE. 2019. P. 70–73</mixed-citation></citation-alternatives></ref><ref id="B5"><label>5.</label><citation-alternatives><mixed-citation xml:lang="en">Yu X., Wang J., Kays R., Jansen P. A., Wang T. H.T. Automated identification of animal species in camera trap images// EURASIP Journal on Image and Video Processing. 2013. Vol. №. 1. P. 52.</mixed-citation><mixed-citation xml:lang="ru">Yu X., Wang J., Kays R., Jansen P. A., Wang T. H.T. Automated identification of animal species in camera trap images// EURASIP Journal on Image and Video Processing. 2013. Vol. No 1. P. 52.</mixed-citation></citation-alternatives></ref><ref id="B6"><label>6.</label><mixed-citation>Chen G., Han T. X, He Z., Kays R., Forrester T. Deep convolutional neural network based species recognition for wild animal monitoring // IEEE international conference on image processing (ICIP). 2014. P. 858–862.</mixed-citation></ref><ref id="B7"><label>7.</label><citation-alternatives><mixed-citation xml:lang="en">Gomez-Villa A., Salazar A., Vargas F. Towards automatic wild animal monitoring: Identification of animal species in camera-trap images using very deep convolutional neural networks.// Ecological Informatics. 2017. № 41. Р. 24–32.</mixed-citation><mixed-citation xml:lang="ru">Gomez-Villa A., Salazar A., Vargas F. Towards automatic wild animal monitoring: Identification of animal species in camera-trap images using very deep convolutional neural networks.// Ecological Informatics. 2017. No 41. Р. 24–32.</mixed-citation></citation-alternatives></ref><ref id="B8"><label>8.</label><mixed-citation>Nguyen H., Maclagan S. J., Nguyen T. D., Nguyen T., Flemons P., Andrews K., Ritchie E. G., Phung D. Animal recognition and identification with deep convolutional neural networks for automated wildlife monitoring. //International Conference on Data Science and Advanced Analytics, DSAA. – 2017: Tokyo, Japan 19-21 October 2017. Р. 40-49.</mixed-citation></ref><ref id="B9"><label>9.</label><mixed-citation>Beery S., Van Horn, G., Perona P. Recognition in Terra Incognita// Computer Vision. ECCV 2018. Lecture Notes in Computer Science. Vol. 11220.</mixed-citation></ref><ref id="B10"><label>10.</label><mixed-citation>Norouzzadeh M. S., Morris D., Beery S., Joshi N., Jojic N., Clune J. A deep active learning system for species identification and counting in camera trap images. //Methods in Ecology and Evolution. 2021. Vol. 12 (1). Р. 150–161.</mixed-citation></ref><ref id="B11"><label>11.</label><citation-alternatives><mixed-citation xml:lang="en">Whytock R. C, Świeżewski J., Zwerts J.A. Robust ecological analysis of camera trap data labelled by a machine learning model// Methods in Ecology and Evolution. 2021. №12 (6). Р. 1080 –1092.</mixed-citation><mixed-citation xml:lang="ru">Whytock R. C, Świeżewski J., Zwerts J.A. Robust ecological analysis of camera trap data labelled by a machine learning model// Methods in Ecology and Evolution. 2021. No 12 (6). Р. 1080 –1092.</mixed-citation></citation-alternatives></ref><ref id="B12"><label>12.</label><mixed-citation>Leus A.V., Efremov V.A. Computer vision methods application for camera traps image analysis within the software for the reserves environmental state monitoring//Proceedings of the Mordovia State Nature Reserve. 2021. Vol. 28. Р.121-129.</mixed-citation></ref><ref id="B13"><label>13.</label><citation-alternatives><mixed-citation xml:lang="en">Tabak M. A., Norouzzade, M. S., Wolfson D. W., Sweeney S. J., VerCauteren K. C., Snow N. P., Halseth J. M., Di Salvo P. A., Lewis J. S., White M. D., Teton B., Beasley J. C., Schlichting P. E., Boughton R. K., Wight B., Newkirk E. S., Ivan R.S. Machine learning to classify animal species in camera trap images: Applications in ecology. //Methods in Ecology and Evolution. 2018. №10 (4). Р.585–590.</mixed-citation><mixed-citation xml:lang="ru">Tabak M. A., Norouzzade, M. S., Wolfson D. W., Sweeney S. J., VerCauteren K. C., Snow N. P., Halseth J. M., Di Salvo P. A., Lewis J. S., White M. D., Teton B., Beasley J. C., Schlichting P. E., Boughton R. K., Wight B., Newkirk E. S., Ivan R.S. Machine learning to classify animal species in camera trap images: Applications in ecology. //Methods in Ecology and Evolution. 2018. No 10 (4). Р. 585–590.</mixed-citation></citation-alternatives></ref><ref id="B14"><label>14.</label><mixed-citation>Glenn J. YOLOv5 release v6.1 – 2021 – https://github.com/ultralytics/yolov5/releases/tag/v6.1.</mixed-citation></ref><ref id="B15"><label>15.</label><mixed-citation>Wang C., Yeh I., Liao H.M. You Only Learn One Representation: Unified Network for Multiple Tasks. 2021.</mixed-citation></ref><ref id="B16"><label>16.</label><citation-alternatives><mixed-citation xml:lang="en">Ge Z., Liu S., Wang F. Li Z., Sun J. YOLOX: Exceeding YOLO Series in 2021. 2021.</mixed-citation><mixed-citation xml:lang="ru">Ge Z., Liu S., Wang F. Li Z., Sun J. YOLOX: Exceeding YOLO Series in 2021. – 2021.</mixed-citation></citation-alternatives></ref><ref id="B17"><label>17.</label><mixed-citation>Lin T. Y., Maire M., Belongie S., Hays J., Perona P., Ramanan D., Zitnick C.L. Microsoft COCO: Common objects in context // European conference on computer vision. 2014. Р. 740–755.</mixed-citation></ref><ref id="B18"><label>18.</label><citation-alternatives><mixed-citation xml:lang="en">Hu J., Shen L., Albanie S., Sun G., Wu E. Squeeze-and-Excitation Networks. // IEEE Transactions on Pattern Analysis and Machine Intelligence. 2020. Vol. 42. № 8. P. 2011-2023.</mixed-citation><mixed-citation xml:lang="ru">Hu J., Shen L., Albanie S., Sun G., Wu E. Squeeze-and-Excitation Networks. // IEEE Transactions on Pattern Analysis and Machine Intelligence. 2020. Vol. 42. No 8. P. 2011-2023.</mixed-citation></citation-alternatives></ref><ref id="B19"><label>19.</label><mixed-citation>Zhang H., Wu C., Zhang Z., Zhu Y., Zhang Z., Lin H., Sun Y., He T., Mueller J., Manmatha R., Li M., Smola A. ResNeSt: Split-Attention Networks. 2020.</mixed-citation></ref><ref id="B20"><label>20.</label><mixed-citation>Han D., Yun S., Heo B., Yoo Y.J. ReXNet: Diminishing Representational Bottleneck on Convolutional Neural Network. 2020.</mixed-citation></ref><ref id="B21"><label>21.</label><mixed-citation>Tan M., Le Q. V. EfficientNetV2: Smaller Models and Faster Training. 2021.</mixed-citation></ref><ref id="B22"><label>22.</label><mixed-citation>Tan M., Le Q. V. EfficientNet: Rethinking model scaling for convolutional neural networks. 2020.</mixed-citation></ref><ref id="B23"><label>23.</label><citation-alternatives><mixed-citation xml:lang="en">Sibson R. SLINK: An Optimally Efficient Algorithm for the Single-Link Cluster Method//Comput. J. 1973. №16. Р. 30-34.</mixed-citation><mixed-citation xml:lang="ru">Sibson R. SLINK: An Optimally Efficient Algorithm for the Single-Link Cluster Method//Comput. J. 1973. No 16. Р. 30-34</mixed-citation></citation-alternatives></ref></ref-list></back></article>
