<script src="https://bibbase.org/show?bib=https%3A%2F%2Fflorentfo.rest%2Ffiles%2Fpublications.bib&jsonp=1"></script>
<?php
$contents = file_get_contents("https://bibbase.org/show?bib=https%3A%2F%2Fflorentfo.rest%2Ffiles%2Fpublications.bib");
print_r($contents);
?>
<iframe src="https://bibbase.org/show?bib=https%3A%2F%2Fflorentfo.rest%2Ffiles%2Fpublications.bib"></iframe>
For more details see the documention.
To the site owner:
Action required! Mendeley is changing its API. In order to keep using Mendeley with BibBase past April 14th, you need to:
@article{forest2023calibratedadaptive, title = {Calibrated {Adaptive} {Teacher} for {Domain}-{Adaptive} {Intelligent} {Fault} {Diagnosis}}, author = {Forest, Florent and Fink, Olga}, journal = {Sensors}, month = nov, year = {2024}, volume = {24}, number = {23}, issn = {1424-8220}, doi = {10.3390/s24237539}, abstract = {Intelligent fault diagnosis (IFD) based on deep learning can achieve high accuracy from raw condition monitoring signals. However, models usually perform well on the training distribution only, and experience severe performance drops when applied to a different distribution. This is also observed in fault diagnosis, where assets are often operated in working conditions different from the ones in which the labeled data have been collected. The scenario where labeled data are available in a source domain and only unlabeled data are available in a target domain has been addressed recently by unsupervised domain adaptation (UDA) approaches for IFD. Recent methods have relied on self-training with confident pseudo-labels for the unlabeled target samples. However, the confidence-based selection of pseudo-labels is hindered by poorly calibrated uncertainty estimates in the target domain, primarily due to over-confident predictions, which limits the quality of pseudo-labels and leads to error accumulation. In this paper, we propose a novel method called Calibrated Adaptive Teacher (CAT), where we propose to calibrate the predictions of the teacher network on target samples throughout the self-training process, leveraging post hoc calibration techniques. We evaluate CAT on domain-adaptive IFD and perform extensive experiments on the Paderborn University (PU) benchmark for fault diagnosis of rolling bearings under varying operating conditions, using both time- and frequency-domain inputs. We compare four different calibration techniques within our framework, where temperature scaling is both the most effective and lightweight one. The resulting method—CAT+TempScaling—achieves state-of-the-art performance on most transfer tasks, with on average 7.5% higher accuracy and 4 times lower calibration error compared to domain-adversarial neural networks (DANNs) across the twelve PU transfer tasks.}, url_Link = {https://www.mdpi.com/1424-8220/24/23/7539}, url_Code = {https://github.com/EPFL-IMOS/CAT}, bibbase_note = {<img src="assets/img/papers/cat.png">} }
@article{bouhadra_knowledge-based_2024, title = {Knowledge-based and {Expert} {Systems} in {Prognostics} and {Health} {Management}: a {Survey}}, shorttitle = {Knowledge-based and {Expert} {Systems} in {Prognostics} and {Health} {Management}}, author = {Bouhadra, Kalil and Forest, Florent}, doi = {10.36001/ijphm.2024.v15i2.3986}, abstract = {Prognostics and Health Management (PHM) has become increasingly popular in recent years, and data-driven methods and artificial intelligence have emerged as dominant tools within the PHM field. This trend is mainly due to the increasing use of sensors and the ability of machine learning techniques to leverage condition monitoring data. However, despite their utility and effectiveness, these techniques are not without drawbacks. One major issue is that data-driven methods often lack transparency in their reasoning, which is crucial for understanding fault occurrences and diagnostics. Additionally, the availability of data can be a challenge. In some cases, data are scarce or hard to obtain, either due to the cost of installing necessary sensors or the rarity of the required information. Lastly, the insights derived from data can sometimes diverge from those obtained through expert analysis and established norms. This contrasts with knowledge-based approaches such as expert systems, which formally organize the knowledge acquired from norms and experts, and then deduce the desired conclusion. While research is increasingly exploring data-driven techniques, industry tends to still frequently employ knowledge-based methods. To fill this gap, this paper offers a detailed survey of knowledge-based and expert systems in PHM, examining methodologies such as propositional logic, fuzzy logic, Dempster-Shafer theory and Bayesian networks. It assesses the integration and impact of these techniques in PHM for fault detection, diagnosis and prognosis, highlighting their strengths, limitations, and potential future developments. The study provides a thorough evaluation of current developments and contributes significant insights into the current capabilities and future directions of knowledge-based techniques in enhancing decision-making processes in PHM.}, journal = {International Journal of Prognostics and Health Management}, issn = {2153-2648}, month = oct, year = {2024}, volume = {15}, number = {2}, url_Link = {http://papers.phmsociety.org/index.php/ijphm/article/view/3986}, url_Paper = {http://papers.phmsociety.org/index.php/ijphm/article/download/3986/2543}, bibbase_note = {<img src="assets/img/papers/es-phm.png">} }
@inproceedings{hao2024simplifying, title = {Simplifying {Source}-{Free} {Domain} {Adaptation} for {Object} {Detection}: {Effective} {Self}-{Training} {Strategies} and {Performance} {Insights}}, shorttitle = {Simplifying {Source}-{Free} {Domain} {Adaptation} for {Object} {Detection}}, abstract = {This paper focuses on source-free domain adaptation for object detection in computer vision. This task is challenging and of great practical interest, due to the cost of obtaining annotated data sets for every new domain. Recent research has proposed various solutions for Source-Free Object Detection (SFOD), most being variations of teacher-student architectures with diverse feature alignment, regularization and pseudo-label selection strategies. Our work investigates simpler approaches and their performance compared to more complex SFOD methods in several adaptation scenarios. We highlight the importance of batch normalization layers in the detector backbone, and show that adapting only the batch statistics is a strong baseline for SFOD. We propose a simple extension of a Mean Teacher with strong-weak augmentation in the source-free setting, Source-Free Unbiased Teacher (SF-UT), and show that it actually outperforms most of the previous SFOD methods. Additionally, we showcase that an even simpler strategy consisting in training on a fixed set of pseudo-labels can achieve similar performance to the more complex teacher-student mutual learning, while being computationally efficient and mitigating the major issue of teacher-student collapse. We conduct experiments on several adaptation tasks using benchmark driving datasets including (Foggy)Cityscapes, Sim10k and KITTI, and achieve a notable improvement of 4.7% AP50 on Cityscapes->Foggy-Cityscapes compared with the latest state-of-the-art in SFOD. Source code is available at https://github.com/EPFL-IMOS/simple-SFOD.}, booktitle = {{ECCV 2024}}, author = {Hao, Yan and Forest, Florent and Fink, Olga}, month = oct, year = {2024}, doi = {10.1007/978-3-031-72949-2_12}, url_Link = {https://link.springer.com/chapter/10.1007/978-3-031-72949-2_12}, url_Paper = {http://arxiv.org/pdf/2407.07586.pdf}, url_Code = {https://github.com/EPFL-IMOS/simple-SFOD}, bibbase_note = {<img src="assets/img/papers/sfod.png">} }
@misc{xu_exploiting_2024, title = {Exploiting {Semantic} {Scene} {Reconstruction} for {Estimating} {Building} {Envelope} {Characteristics}}, abstract = {Achieving the EU's climate neutrality goal requires retrofitting existing buildings to reduce energy use and emissions. A critical step in this process is the precise assessment of geometric building envelope characteristics to inform retrofitting decisions. Previous methods for estimating building characteristics, such as window-to-wall ratio, building footprint area, and the location of architectural elements, have primarily relied on applying deep-learning-based detection or segmentation techniques on 2D images. However, these approaches tend to focus on planar facade properties, limiting their accuracy and comprehensiveness when analyzing complete building envelopes in 3D. While neural scene representations have shown exceptional performance in indoor scene reconstruction, they remain under-explored for external building envelope analysis. This work addresses this gap by leveraging cutting-edge neural surface reconstruction techniques based on signed distance function (SDF) representations for 3D building analysis. We propose BuildNet3D, a novel framework to estimate geometric building characteristics from 2D image inputs. By integrating SDF-based representation with semantic modality, BuildNet3D recovers fine-grained 3D geometry and semantics of building envelopes, which are then used to automatically extract building characteristics. Our framework is evaluated on a range of complex building structures, demonstrating high accuracy and generalizability in estimating window-to-wall ratio and building footprint. The results underscore the effectiveness of BuildNet3D for practical applications in building analysis and retrofitting.}, author = {Xu, Chenghao and Mielle, Malcolm and Laborde, Antoine and Waseem, Ali and Forest, Florent and Fink, Olga}, month = oct, year = {2024}, note = {arXiv:2410.22383}, url_Link = {http://arxiv.org/abs/2410.22383}, url_Paper = {http://arxiv.org/pdf/2410.22383.pdf}, bibbase_note = {<img src="assets/img/papers/buildnet3d.png">} }
@article{forest2024classification, title = {From classification to segmentation with explainable {AI}: {A} study on crack detection and growth monitoring}, volume = {165}, issn = {0926-5805}, shorttitle = {From classification to segmentation with explainable {AI}}, doi = {10.1016/j.autcon.2024.105497}, abstract = {Monitoring surface cracks in infrastructure is crucial for structural health monitoring. Automatic visual inspection offers an effective solution, especially in hard-to-reach areas. Machine learning approaches have proven their effectiveness but typically require large annotated datasets for supervised training. Once a crack is detected, monitoring its severity often demands precise segmentation of the damage. However, pixel-level annotation of images for segmentation is labor-intensive. To mitigate this cost, one can leverage explainable artificial intelligence (XAI) to derive segmentations from the explanations of a classifier, requiring only weak image-level supervision. This paper proposes applying this methodology to segment and monitor surface cracks. We evaluate the performance of various XAI methods and examine how this approach facilitates severity quantification and growth monitoring. Results reveal that while the resulting segmentation masks may exhibit lower quality than those produced by supervised methods, they remain meaningful and enable severity monitoring, thus reducing substantial labeling costs. Code and data available at https://github.com/EPFL-IMOS/crack-explanations.}, journal = {Automation in Construction}, author = {Forest, Florent and Porta, Hugo and Tuia, Devis and Fink, Olga}, month = sep, year = {2024}, keywords = {Attribution maps, Crack detection, Crack segmentation, Deep learning, Explainable AI, Growth monitoring, Severity quantification}, pages = {105497}, url_Link = {https://www.sciencedirect.com/science/article/pii/S0926580524002334}, url_Code = {https://github.com/EPFL-IMOS/crack-explanations}, bibbase_note = {<img src="assets/img/papers/crack-explain.png">} }
@inproceedings{madane_aesim_2024, title = {{AESim}: {A} {Data}-{Driven} {Aircraft} {Engine} {Simulator}}, shorttitle = {{AESim}}, url = {https://www.ijcai.org/proceedings/2024/1021.pdf}, booktitle = {IJCAI}, author = {Madane, Abdellah and Forest, Florent and Azzag, Hanane and Lebbah, Mustapha and Lacaille, Jérôme}, month = aug, year = {2024}, pages = {8737--8740}, url_Link = {https://www.ijcai.org/proceedings/2024/1021}, url_Paper = {https://www.ijcai.org/proceedings/2024/1021.pdf}, bibbase_note = {<img src="assets/img/papers/aesim.png">} }
@inproceedings{madane_one-pass_2024, title = {One-{Pass} {Generation} of {Multivariate} {Time} {Series} through {Conditional} {Multivariate} {Modeling}}, url = {https://ieeexplore.ieee.org/abstract/document/10651016}, doi = {10.1109/IJCNN60899.2024.10651016}, abstract = {In recent years, exploring deep generative models for generating time series has garnered significant interest within the research community. These models have found wide-ranging applications in areas such as data augmentation, scenario simulation, and the imputation of missing data. The authenticity of the generated time series has seen remarkable advancements with the integration of recurrent neural networks (RNNs) and generative adversarial networks (GANs). RNNs used to represent the state-of-the-art (SOA) in processing sequence dependencies until the advent of Transformers, which redefined the SOA, especially in Natural Language Processing and Computer Vision. The introduction of a transformer-based GAN represented an innovative step forward, aiming to address the limitations inherent in RNNs. However, this model’s efficacy is constrained when faced with unimodal data distribution assumptions, leading to arbitrary outputs in complex distribution scenarios. This paper introduces a novel Multivariate Time Series Conditional GAN (MTS-CGAN), that leverages transformer-based architectures in generator and discriminator networks. MTS-CGAN conditions the generation process on a specific encoded context (categorical and MTS inputs), enabling one-pass generation of multivariate time series, and accommodating mixed distribution frameworks, outperforming existing models. We evaluate MTS-CGAN using quantitative metrics across multiple multivariate time series datasets. Furthermore, we propose also an innovative adaptation of the Frechet Inception Distance (FID), tailored for time series, to assess the quality of the generated data. This research demonstrates the potential of MTS-CGAN in generating high-fidelity multivariate time series.}, booktitle = {2024 {International} {Joint} {Conference} on {Neural} {Networks} ({IJCNN})}, author = {Madane, Abdellah and Forest, Florent and Azzag, Hanane and Lebbah, Mustapha and Lacaille, Jerôme}, month = jun, year = {2024}, pages = {1--9}, url_Link = {https://ieeexplore.ieee.org/abstract/document/10651016}, bibbase_note = {<img src="assets/img/papers/mtscgan.png">} }
@inproceedings{siripathitti2024cutandpaste, title = {Cut-and-{Paste} with {Precision}: a {Content} and {Perspective}-aware {Data} {Augmentation} for {Road} {Damage} {Detection}}, shorttitle = {Cut-and-{Paste} with {Precision}}, doi = {10.48550/arXiv.2406.18586}, abstract = {Damage to road pavement can develop into cracks, potholes, spallings, and other issues posing significant challenges to the integrity, safety, and durability of the road structure. Detecting and monitoring the evolution of these damages is crucial for maintaining the condition and structural health of road infrastructure. In recent years, researchers have explored various data-driven methods for image-based damage detection in road monitoring applications. The field gained attention with the introduction of the Road Damage Detection Challenge (RDDC2018), encouraging competition in developing object detectors on street-view images from various countries. Leading teams have demonstrated the effectiveness of ensemble models, mostly based on the YOLO and Faster R-CNN series. Data augmentations have also shown benefits in object detection within the computer vision field, including transformations such as random flipping, cropping, cutting out patches, as well as cut-and-pasting object instances. Applying cut-and-paste augmentation to road damages appears to be a promising approach to increase data diversity. However, the standard cut-and-paste technique, which involves sampling an object instance from a random image and pasting it at a random location onto the target image, has demonstrated limited effectiveness for road damage detection. This method overlooks the location of the road and disregards the difference in perspective between the sampled damage and the target image, resulting in unrealistic augmented images. In this work, we propose an improved Cut-and-Paste augmentation technique that is both content-aware (i.e. considers the true location of the road in the image) and perspective-aware (i.e. takes into account the difference in perspective between the injected damage and the target image).}, booktitle = {Proceedings of the 34th {European} {Safety} and {Reliability} {Conference} ({ESREL})}, author = {Siripathitti, Punnawat and Forest, Florent and Fink, Olga}, month = jun, year = {2024}, url_Link = {http://arxiv.org/abs/2406.18586}, url_Paper = {http://arxiv.org/pdf/2406.18586.pdf}, bibbase_note = {<img src="assets/img/papers/cut-paste.png">} }
@misc{forest2024interpretable, title = {Interpretable {Prognostics} with {Concept} {Bottleneck} {Models}}, doi = {10.48550/arXiv.2405.17575}, abstract = {Deep learning approaches have recently been extensively explored for the prognostics of industrial assets. However, they still suffer from a lack of interpretability, which hinders their adoption in safety-critical applications. To improve their trustworthiness, explainable AI (XAI) techniques have been applied in prognostics, primarily to quantify the importance of input variables for predicting the remaining useful life (RUL) using post-hoc attribution methods. In this work, we propose the application of Concept Bottleneck Models (CBMs), a family of inherently interpretable neural network architectures based on concept explanations, to the task of RUL prediction. Unlike attribution methods, which explain decisions in terms of low-level input features, concepts represent high-level information that is easily understandable by users. Moreover, once verified in actual applications, CBMs enable domain experts to intervene on the concept activations at test-time. We propose using the different degradation modes of an asset as intermediate concepts. Our case studies on the New Commercial Modular AeroPropulsion System Simulation (N-CMAPSS) aircraft engine dataset for RUL prediction demonstrate that the performance of CBMs can be on par or superior to black-box models, while being more interpretable, even when the available labeled concepts are limited. Code available at https://github.com/EPFL-IMOS/concept-prognostics.}, publisher = {arXiv}, author = {Forest, Florent and Rombach, Katharina and Fink, Olga}, month = may, year = {2024}, note = {arXiv:2405.17575 [cs, eess, stat]}, url_Link = {http://arxiv.org/abs/2405.17575}, url_Paper = {http://arxiv.org/pdf/2405.17575.pdf}, url_Code = {https://github.com/EPFL-IMOS/concept-prognostics}, bibbase_note = {<img src="assets/img/papers/concept-prognostics.png">} }
@article{che2024health, title = {Health {Prediction} for {Lithium}-{Ion} {Batteries} {Under} {Unseen} {Working} {Conditions}}, issn = {1557-9948}, doi = {10.1109/TIE.2024.3379664}, abstract = {Battery health prediction is significant while challenging for intelligent battery management. This article proposes a general framework for both short-term and long-term predictions of battery health under unseen dynamic loading and temperature conditions using domain-adaptive multitask learning (MTL) with long-term regularization. First, features extracted from partial charging curves are utilized for short-term state of health predictions. Then, the long-term degradation trajectory is directly predicted by recursively using the predicted features within the multitask framework, enhancing the model integrity and lowering the complexity. Then, domain adaptation (DA) is adopted to reduce the discrepancies between different working conditions. Additionally, a long-term regularization is introduced to address the shortcoming that arises when the model is extrapolated recursively for future health predictions. Thus, the short-term prediction ability is maintained while the long-term prediction performance is enhanced. Finally, predictions are validated through aging experiments under various dynamic loading profiles. By using partial charging capacity–voltage data, the results show that the early-stage long-term predictions are accurate and stable under various working profiles, with root mean square errors below 2\% and fitting coefficients surpassing 0.86.}, journal = {IEEE Transactions on Industrial Electronics}, author = {Che, Yunhong and Forest, Florent and Zheng, Yusheng and Xu, Le and Teodorescu, Remus}, year = {2024}, month = apr, keywords = {Aging, Batteries, Battery, Degradation, domain adaptation (DA), Feature extraction, health and trajectory prediction, Loading, multi-task learning, Predictive models, Testing, transfer learning}, pages = {1--11}, url_Link = {https://ieeexplore.ieee.org/document/10500447}, bibbase_note = {<img src="assets/img/papers/battery-conditions.png">} }
@misc{hassan2024thermonerf, title = {{ThermoNeRF}: {Multimodal} {Neural} {Radiance} {Fields} for {Thermal} {Novel} {View} {Synthesis}}, copyright = {All rights reserved}, shorttitle = {{ThermoNeRF}}, doi = {10.48550/arXiv.2403.12154}, abstract = {Thermal scene reconstruction exhibit great potential for applications across a broad spectrum of fields, including building energy consumption analysis and non-destructive testing. However, existing methods typically require dense scene measurements and often rely on RGB images for 3D geometry reconstruction, with thermal information being projected post-reconstruction. This two-step strategy, adopted due to the lack of texture in thermal images, can lead to disparities between the geometry and temperatures of the reconstructed objects and those of the actual scene. To address this challenge, we propose ThermoNeRF, a novel multimodal approach based on Neural Radiance Fields, capable of rendering new RGB and thermal views of a scene jointly. To overcome the lack of texture in thermal images, we use paired RGB and thermal images to learn scene density, while distinct networks estimate color and temperature information. Furthermore, we introduce ThermoScenes, a new dataset to palliate the lack of available RGB+thermal datasets for scene reconstruction. Experimental results validate that ThermoNeRF achieves accurate thermal image synthesis, with an average mean absolute error of 1.5$\circ$C, an improvement of over 50\% compared to using concatenated RGB+thermal data with Nerfacto, a state-of-the-art NeRF method.}, publisher = {arXiv}, author = {Hassan, Mariam and Forest, Florent and Fink, Olga and Mielle, Malcolm}, month = mar, year = {2024}, note = {arXiv:2403.12154 [cs]}, url_Link = {http://arxiv.org/abs/2403.12154}, url_Paper = {http://arxiv.org/pdf/2403.12154.pdf}, url_Code = {https://github.com/SchindlerEPFL/thermo-nerf}, bibbase_note = {<img src="assets/img/papers/thermonerf.png">} }
@misc{nejjar2024uncertainty, title = {Uncertainty-{Guided} {Alignment} for {Unsupervised} {Domain} {Adaptation} in {Regression}}, copyright = {All rights reserved}, doi = {10.48550/arXiv.2401.13721}, abstract = {Unsupervised Domain Adaptation for Regression (UDAR) aims to adapt a model from a labeled source domain to an unlabeled target domain for regression tasks. Recent successful works in UDAR mostly focus on subspace alignment, involving the alignment of a selected subspace within the entire feature space. This contrasts with the feature alignment methods used for classification, which aim at aligning the entire feature space and have proven effective but are less so in regression settings. Specifically, while classification aims to identify separate clusters across the entire embedding dimension, regression induces less structure in the data representation, necessitating additional guidance for efficient alignment. In this paper, we propose an effective method for UDAR by incorporating guidance from uncertainty. Our approach serves a dual purpose: providing a measure of confidence in predictions and acting as a regularization of the embedding space. Specifically, we leverage the Deep Evidential Learning framework, which outputs both predictions and uncertainties for each input sample. We propose aligning the parameters of higher-order evidential distributions between the source and target domains using traditional alignment methods at the feature or posterior level. Additionally, we propose to augment the feature space representation by mixing source samples with pseudo-labeled target samples based on label similarity. This cross-domain mixing strategy produces more realistic samples than random mixing and introduces higher uncertainty, facilitating further alignment. We demonstrate the effectiveness of our approach on four benchmarks for UDAR, on which we outperform existing methods.}, publisher = {arXiv}, author = {Nejjar, Ismail and Frusque, Gaëtan and Forest, Florent and Fink, Olga}, month = jan, year = {2024}, note = {arXiv:2401.13721 [cs]}, url_Link = {http://arxiv.org/abs/2401.13721}, url_Paper = {http://arxiv.org/pdf/2401.13721.pdf}, bibbase_note = {<img src="assets/img/papers/uga.png">} }
@inproceedings{forest2023calibratedself, title = {Calibrated {Self}-{Training} for {Cross}-{Domain} {Bearing} {Fault} {Diagnosis}}, copyright = {All rights reserved}, isbn = {978-981-18807-1-1}, doi = {10.3850/978-981-18-8071-1_P249-cd}, abstract = {Fault diagnosis of rolling bearings is a crucial task in Prognostics and Health Management, as rolling elements are ubiquitous in industrial assets. Data-driven approaches based on deep neural networks have made significant progress in this area. However, they require collecting large representative labeled data sets. However, in industrial settings, assets are often operated in conditions different from the ones in which labeled data were collected, requiring a transfer between working conditions. In this work, we tackle the classification of bearing fault types and severity levels in the setting of unsupervised domain adaptation (UDA), where labeled data are available in a source domain and only unlabeled data are available in a different but related target domain. We focus on UDA with self-training methods, based on pseudo-labeling of target samples. One major challenge in these methods is to avoid error accumulation due to low-quality pseudo-labels. To address this challenge, we propose incorporating post-hoc calibration, such as the well-known temperature scaling, into the self-training process to increase the quality of selected pseudo-labels. We implement our proposed calibration approach in two self-training algorithms, Calibrated Pseudo-Labeling and Calibrated Adaptive Teacher, and demonstrate their competitive results on the Paderborn University (PU) benchmark for fault diagnosis of rolling bearings under varying operating conditions.}, booktitle = {Proceedings of the 33rd {European} {Safety} and {Reliability} {Conference} (ESREL)}, author = {Forest, Florent and Fink, Olga}, year = {2023}, pages = {3406--3407}, url_Link = {https://www.rpsonline.com.sg/proceedings/esrel2023/html/P249.html}, url_Paper = {https://www.rpsonline.com.sg/proceedings/esrel2023/pdf/P249.pdf}, }
@inproceedings{forest2023segmenting, title = {Segmenting {Without} {Annotating}: {Crack} {Segmentation} and {Monitoring} via {Post}-{Hoc} {Classifier} {Explanations}}, copyright = {All rights reserved}, isbn = {978-981-18807-1-1}, shorttitle = {Segmenting {Without} {Annotating}}, doi = {10.3850/978-981-18-8071-1_P290-cd}, abstract = {Monitoring the cracks in walls, roads and other types of infrastructure is essential to ensure the safety of a structure, and plays an important role in structural health monitoring. Automatic visual inspection allows an efficient, costeffective and safe health monitoring, especially in hard-to-reach locations. To this aim, data-driven approaches based on machine learning have demonstrated their effectiveness, at the expense of annotating large sets of images for supervised training. Once a damage has been detected, one also needs to monitor the evolution of its severity, in order to trigger a timely maintenance operation and avoid any catastrophic consequence. This evaluation requires a precise segmentation of the damage. However, pixel-level annotation of images for segmentation is labor-intensive. On the other hand, labeling images for a classification task is relatively cheap in comparison. To circumvent the cost of annotating images for segmentation, recent works inspired by explainable AI (XAI) have proposed to use the post-hoc explanations of a classifier to obtain a segmentation of the input image. In this work, we study the application of XAI techniques to the detection and monitoring of cracks in masonry wall surfaces. We benchmark different post-hoc explainability methods in terms of segmentation quality and accuracy of the damage severity quantification (for example, the width of a crack), thus enabling timely decision-making.}, booktitle = {Proceedings of the 33rd {European} {Safety} and {Reliability} {Conference} (ESREL)}, author = {Forest, Florent and Porta, Hugo and Tuia, Devis and Fink, Olga}, year = {2023}, pages = {1392--1393}, url_Link = {https://www.rpsonline.com.sg/proceedings/esrel2023/html/P290.html}, url_Paper = {https://www.rpsonline.com.sg/proceedings/esrel2023/pdf/P290.pdf}, }
@article{che2023predictive, title = {Predictive {Health} {Assessment} for {Lithium}-ion {Batteries} with {Probabilistic} {Degradation} {Prediction} and {Accelerating} {Aging} {Detection}}, copyright = {All rights reserved}, issn = {0951-8320}, doi = {10.1016/j.ress.2023.109603}, abstract = {Predictive health assessment is of vital importance for smarter battery management to ensure optimal and safe operations and thus make the most use of battery life. This paper proposes a general framework for battery aging prognostics in order to provide the predictions of battery knee, lifetime, state of health degradation, and aging rate variations, as well as the assessment of battery health. Early information is used to predict knee slope and other life-related information via deep multi-task learning, where the convolutional-long-short-term memory-bayesian neural network is proposed. The structure is also used for online state of health and degradation rate predictions for the detection of accelerating aging. The two probabilistic predicted boundaries identify the accelerating aging regions for battery health assessment. To avoid wrong and premature alarms, the empirical model is used for data preprocessing and the slope is predicted together with the state of health via multi-task learning. A cloud-edge framework is considered where fine-tuning is adopted for performance improvement during cycling. The proposed general framework is flexible for adjustment to different practical requirements and can be extrapolated to other batteries aged under different conditions. The results indicate that the early predictions are improved using the proposed method compared to multiple single feature-based benchmarks, and that integration of the algorithm is improved. The sequence prediction is reliable for different predicted lengths with root mean square errors of less than 1.41\%, and the detection of accelerating aging can guide reliable predictive health management.}, journal = {Reliability Engineering \& System Safety}, author = {Che, Yunhong and Zheng, Yusheng and Forest, Florent and Sui, Xin and Hu, Xiaosong and Teodorescu, Remus}, month = aug, year = {2023}, keywords = {Transfer learning, Battery degradation prediction, Knee point detection, Multi-task learning, Predictive health assessment, Probabilistic prediction}, pages = {109603}, url_Link = {https://www.sciencedirect.com/science/article/pii/S0951832023005173}, bibbase_note = {<img src="assets/img/papers/battery-aging.png">} }
@inproceedings{mourer2023selecting, address = {Osaka, Japan}, title = {Selecting the {Number} of {Clusters} $K$ with a {Stability} {Trade}-off: an {Internal} {Validation} {Criterion}}, copyright = {All rights reserved}, shorttitle = {Selecting the {Number} of {Clusters} $K$ with a {Stability} {Trade}-off}, doi = {10.48550/arXiv.2006.08530}, abstract = {Model selection is a major challenge in non-parametric clustering. There is no universally admitted way to evaluate clustering results for the obvious reason that no ground truth is available. The difficulty to find a universal evaluation criterion is a consequence of the ill-defined objective of clustering. In this perspective, clustering stability has emerged as a natural and model-agnostic principle: an algorithm should find stable structures in the data. If data sets are repeatedly sampled from the same underlying distribution, an algorithm should find similar partitions. However, stability alone is not well-suited to determine the number of clusters. For instance, it is unable to detect if the number of clusters is too small. We propose a new principle: a good clustering should be stable, and within each cluster, there should exist no stable partition. This principle leads to a novel clustering validation criterion based on between-cluster and within-cluster stability, overcoming limitations of previous stability-based methods. We empirically demonstrate the effectiveness of our criterion to select the number of clusters and compare it with existing methods. Code is available at https://github.com/FlorentF9/skstab.}, booktitle = {{PAKDD}}, author = {Mourer, Alex and Forest, Florent and Lebbah, Mustapha and Azzag, Hanane and Lacaille, Jérôme}, month = may, year = {2023}, note = {arXiv:2006.08530 [cs, stat]}, keywords = {validity index,clustering,model selection,stability analysis}, url_Link = {http://arxiv.org/abs/2006.08530}, url_Paper = {http://arxiv.org/pdf/2006.08530.pdf}, url_Code = {https://github.com/FlorentF9/skstab}, bibbase_note = {<img src="assets/img/papers/stadion.png">} }
@inproceedings{madane_transformer-based_2023, title = {Transformer-based conditional generative adversarial network for multivariate time series generation}, abstract = {Conditional generation of time-dependent data is a task that has much interest, whether for data augmentation, scenario simulation, completing missing data, or other purposes. Recent works proposed a Transformer-based Time series generative adversarial network (TTS-GAN) to address the limitations of recurrent neural networks. However, this model assumes a unimodal distribution and tries to generate samples around the expectation of the real data distribution. One of its limitations is that it may generate a random multivariate time series; it may fail to generate samples in the presence of multiple sub-components within an overall distribution. One could train models to fit each sub-component separately to overcome this limitation. Our work extends the TTS-GAN by conditioning its generated output on a particular encoded context allowing the use of one model to fit a mixture distribution with multiple sub-components. Technically, it is a conditional generative adversarial network that models realistic multivariate time series under different types of conditions, such as categorical variables or multivariate time series. We evaluate our model on UniMiB Dataset, which contains acceleration data following the XYZ axes of human activities collected using Smartphones. We use qualitative evaluations and quantitative metrics such as Principal Component Analysis (PCA), and we introduce a modified version of the Frechet inception distance (FID) to measure the performance of our model and the statistical similarities between the generated and the real data distributions. We show that this transformer-based CGAN can generate realistic high-dimensional and long data sequences under different kinds of conditions.}, booktitle = {{PAKDD} {International} {Workshop} on {Temporal} {Analytics}}, author = {Madane, Abdellah and Dilmi, Mohamed-djallel and Forest, Florent and Azzag, Hanane and Lebbah, Mustapha and Lacaille, Jerome}, month = may, year = {2023}, url_Link = {http://arxiv.org/abs/2210.02089}, url_Paper = {http://arxiv.org/pdf/2210.02089.pdf}, }
@article{forest2021deepembedded, abstract = {A recent research area in unsupervised learning is the combination of representation learning with deep neural networks and data clustering. The success of deep learning for supervised tasks is widely established. However, recent research has demonstrated how neural networks are able to learn representations to improve clustering in their intermediate feature space, using specific regularizations. By considering representation learning and clustering as a joint task, models learn clustering-friendly spaces and outperform two-stage approaches where dimensionality reduction and clustering are performed separately. Recently, this idea has been extended to topology-preserving clustering models, known as self-organizing maps (SOM). This work is a thorough study on the deep embedded self-organizing map (DESOM), a model composed of an autoencoder and a SOM layer, training jointly the code vectors and network weights to learn SOM-friendly representations. In other words, SOM induces a form a regularization to improve the quality of quantization and topology in latent space. After detailing the architecture, loss and training algorithm, we study hyperparameters with a series of experiments. Different SOM-based models are evaluated in terms of clustering, visualization and classification on benchmark datasets. We study benefits and trade-offs of joint representation learning and self-organization. DESOM achieves competitive results, requires no pretraining and produces topologically organized visualizations.}, author = {Forest, Florent and Lebbah, Mustapha and Azzag, Hanene and Lacaille, J{\'{e}}r{\^{o}}me}, doi = {10.1007/s00521-021-06331-w}, isbn = {0052102106}, journal = {Neural Computing and Applications}, keywords = {autoencoder,clustering,deep learning,representation learning,self-organizing maps,visualization}, title = {{Deep Embedded Self-Organizing Map for Joint Representation Learning and Topology-Preserving Clustering}}, year = {2021}, url_Link = {https://link.springer.com/article/10.1007/s00521-021-06331-w}, url_Paper = {https://www.researchgate.net/journal/Neural-Computing-and-Applications-1433-3058/publication/353679111_Deep_embedded_self-organizing_maps_for_joint_representation_learning_and_topology-preserving_clustering/links/610a2059169a1a0103daf991/Deep-embedded-self-organizing-maps-for-joint-representation-learning-and-topology-preserving-clustering.pdf}, url_Code = {https://github.com/FlorentF9/DESOM}, bibbase_note = {<img src="assets/img/papers/desom.png">} }
@phdthesis{forest2021unsupervised, author = {Forest, Florent}, pages = {326}, school = {Universit{\'{e}} Sorbonne Paris Nord}, title = {{Unsupervised Learning of Data Representations and Cluster Structures: Applications to Large-scale Health Monitoring of Turbofan Aircraft Engines}}, type = {PhD thesis}, year = {2021}, url_Link = {http://theses.fr/s194400}, url_Paper = {Forest2021-manuscrit.pdf}, url_Slides = {Forest2021-defense.pdf}, bibbase_note = {<img src="assets/img/papers/these.png">} }
@inproceedings{forest2020invariance, abstract = {Time series clustering is a challenging task due to the specificities of this type of data. Temporal correlation and invariance to transformations such as shifting, warping or noise prevent the use of standard data mining methods. Time series clustering has been mostly studied under the angle of finding efficient algorithms and distance metrics adapted to the specific nature of time series data. Much less attention has been devoted to the general problem of model selection. Clustering stability has emerged as a universal and model-agnostic principle for clustering model selection. This principle can be stated as follows: an algorithm should find a structure in the data that is resilient to perturbation by sampling or noise. We propose to apply stability analysis to time series by leveraging prior knowledge on the nature and invariances of the data. These invariances determine the perturbation process used to assess stability. Based on a recently introduced criterion combining between-cluster and within-cluster stability, we propose an invariance-guided method for model selection, applicable to a wide range of clustering algorithms. Experiments conducted on artificial and benchmark data sets demonstrate the ability of our criterion to discover structure and select the correct number of clusters, whenever data invariances are known beforehand.}, author = {Forest, Florent and Mourer, Alex and Lebbah, Mustapha and Azzag, Hanane and Lacaille, J{\'{e}}r{\^{o}}me}, booktitle = {International Conference on Pattern Recognition (ICPR)}, title = {{An Invariance-guided Stability Criterion for Time Series Clustering Validation}}, year = {2020}, url_Link = {https://ieeexplore.ieee.org/abstract/document/9412020}, url_Paper = {ICPR-2020-InvarianceGuidedStabilityTSC-full-paper.pdf}, url_Slides = {pres-ICPR-2020.pdf}, bibbase_note = {<img src="assets/img/papers/ts-stab.png">} }
@inproceedings{forest2020largescale, abstract = {Vibration analysis is an important component of industrial equipment health monitoring. Aircraft engines in particular are complex rotating machines where vibrations, mainly caused by unbalance, misalignment, or damaged bearings, put engine parts under dynamic structural stress. Thus, monitoring the vibratory behavior of engines is essential to detect anomalies and trends, avoid faults and improve availability. Intrinsic properties of parts can be described by the evolution of vibration as function of rotation speed, called a vibration signature. This work presents a methodology for large-scale vibration monitoring on operating civil aircraft engines, based on unsupervised learning algorithms and a flight recorder database. Firstly, we present a pipeline for massive extraction of vibration signatures from raw flight data, consisting in time-domain medium-frequency sensor measurements. Then, signatures are classified and visualized using interpretable self-organized clustering algorithms, yielding a visual cartography of vibration profiles. Domain experts can then extract various insights from resulting models. An abnormal temporal evolution of a signature gives early warning before failure of an engine. In a post-finding situation after an event has occurred, similar at-risk engines are detectable. The approach is global, end-to-end and scalable, which is yet uncommon in our industry, and has been tested on real flight data.}, author = {Forest, Florent and Cochard, Quentin and Noyer, Cecile and Cabut, Adrien and Joncour, Marc and Lacaille, J{\'{e}}r{\^{o}}me and Lebbah, Mustapha and Azzag, Hanene}, booktitle = {Annual Conference of the PHM Society}, keywords = {aircraft engine, vibration analysis, health monitoring, big data, clustering, self-organizing map}, doi = {10.36001/phmconf.2020.v12i1.1131}, title = {{Large-scale Vibration Monitoring of Aircraft Engines from Operational Data using Self-organized Models}}, year = {2020}, url_Link = {https://www.phmpapers.org/index.php/phmconf/article/view/1131}, url_Paper = {https://www.phmpapers.org/index.php/phmconf/article/download/1131/913}, url_Slides = {pres-PHM-2020.pdf}, bibbase_note = {<img src="assets/img/papers/vib.png">} }
@unpublished{forest2020survey, abstract = {Self-Organizing Map algorithms have been used for almost 40 years across various application domains such as biology, geology, healthcare, industry and humanities as an interpretable tool to explore, cluster and visualize high-dimensional data sets. In every application, practitioners need to know whether they can \textit{trust} the resulting mapping, and perform model selection to tune algorithm parameters (e.g. the map size). Quantitative evaluation of self-organizing maps (SOM) is a subset of clustering validation, which is a challenging problem as such. Clustering model selection is typically achieved by using clustering validity indices. While they also apply to self-organized clustering models, they ignore the topology of the map, only answering the question: do the SOM code vectors approximate well the data distribution? Evaluating SOM models brings in the additional challenge of assessing their topology: does the mapping preserve neighborhood relationships between the map and the original data? The problem of assessing the performance of SOM models has already been tackled quite thoroughly in literature, giving birth to a family of quality indices incorporating neighborhood constraints, called \textit{topographic} indices. Commonly used examples of such metrics are the topographic error, neighborhood preservation or the topographic product. However, open-source implementations are almost impossible to find. This is the issue we try to solve in this work: after a survey of existing SOM performance metrics, we implemented them in Python and widely used numerical libraries, and provide them as an open-source library, SOMperf. This paper introduces each metric available in our module along with usage examples.}, archivePrefix = {arXiv}, arxivId = {arXiv:2011.05847}, author = {Forest, Florent and Lebbah, Mustapha and Azzag, Hanane and Lacaille, J{\'{e}}r{\^{o}}me}, eprint = {arXiv:2011.05847}, note = {arXiv:2011.05847 [cs]}, title = {{A Survey and Implementation of Performance Metrics for Self-Organized Maps}}, year = {2020}, month = nov, url_Link = {https://arxiv.org/abs/2011.05847}, url_Paper = {https://arxiv.org/pdf/2011.05847.pdf}, url_Code = {https://github.com/FlorentF9/SOMperf}, bibbase_note = {<img src="assets/img/papers/somperf.png">} }
@inproceedings{forest2020carte, abstract = {Dans la lign{\'{e}}e des r{\'{e}}centes avanc{\'{e}}es en apprentissage profond de repr{\'{e}}sentations pour le clustering, ce travail (pr{\'{e}}c{\'{e}}demment publi{\'{e}} en anglais) pr{\'{e}}sente le mod{\`{e}}le DESOM (Deep Embedded SOM), combinant l'apprentisssage non supervis{\'{e}} de repr{\'{e}}sentations et d'une carte auto-organis{\'{e}}e de Kohonen (SOM). Le mod{\`{e}}le, compos{\'{e}} d'un auto-encodeur et d'une couche SOM, est optimis{\'{e}} conjointement, an de r{\'{e}}gulariser l'espace latent et am{\'{e}}liorer la performance de la carte SOM. Nous {\'{e}}valuons les performances de classification et de visualisation ainsi que les b{\'{e}}n{\'{e}}fices de l'apprentissage joint. Mots-clef : carte auto-organis{\'{e}}e, clustering, apprentissage profond, auto-encodeur.}, author = {Forest, Florent and Lebbah, Mustapha and Azzag, Hanene and Lacaille, J{\'{e}}r{\^{o}}me}, booktitle = {CAp2020: Conf{\'{e}}rence d'Apprentissage}, keywords = {autoencoder,clustering,deep learning,self-organizing map}, title = {{Carte SOM profonde : Apprentissage joint de repr{\'{e}}sentations et auto-organisation}}, url_Link = {https://hal.archives-ouvertes.fr/hal-02859997}, url_Paper = {https://hal.archives-ouvertes.fr/hal-02859997/document}, year = {2020} }
@inproceedings{forest2019deeparchitectures, abstract = {Recent research has demonstrated how deep neural networks are able to learn representations to improve data clustering. By considering representation learning and clustering as a joint task, models learn clustering-friendly spaces and achieve superior performance, com- pared with standard two-stage approaches where dimensionality reduc- tion and clustering are performed separately. We extend this idea to topology-preserving clustering models, known as self-organizing maps (SOM). First, we present the Deep Embedded Self-Organizing Map (DE- SOM), a model composed of a fully-connected autoencoder and a custom SOM layer, where the SOM code vectors are learnt jointly with the au- toencoder weights. Then, we show that this generic architecture can be extended to image and sequence data by using convolutional and recur- rent architectures, and present variants of these models. First results demonstrate advantages of the DESOM architecture in terms of cluster- ing performance, visualization and training time.}, author = {Forest, Florent and Lebbah, Mustapha and Azzag, Hanane and Lacaille, J{\'{e}}r{\^{o}}me}, booktitle = {Workshop on Learning Data Representations for Clustering (LDRC), PAKDD}, doi = {10.1007/978-3-030-26142-9_10}, keywords = {autoencoder,clustering,deep learning,representation learning,self-organizing map}, title = {{Deep Architectures for Joint Clustering and Visualization with Self-Organizing Maps}}, year = {2019}, url_Link = {https://link.springer.com/chapter/10.1007/978-3-030-26142-9_10}, url_Paper = {LDRC-2019-DeepArchitecturesJointClusteringVisualization-full-paper.pdf}, url_Code = {https://github.com/FlorentF9/DESOM}, bibbase_note = {<img src="assets/img/papers/convdesom.png">} }
@inproceedings{forest2019deepembedded, abstract = {In the wake of recent advances in joint clustering and deep learning, we introduce the Deep Embedded Self-Organizing Map, a model that jointly learns representations and the code vectors of a self-organizing map. Our model is composed of an autoencoder and a custom SOM layer that are optimized in a joint training procedure, motivated by the idea that the SOM prior could help learning SOM-friendly representations. We eval- uate SOM-based models in terms of clustering quality and unsupervised clustering accuracy, and study the benefits of joint training.}, author = {Forest, Florent and Lebbah, Mustapha and Azzag, Hanane and Lacaille, J{\'{e}}r{\^{o}}me}, booktitle = {European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning (ESANN)}, keywords = {autoencoder,clustering,deep learning,representation learning,self-organizing map}, title = {{Deep Embedded SOM: Joint Representation Learning and Self-Organization}}, year = {2019}, url_Link = {https://www.esann.org/proceedings/2019}, url_Paper = {https://www.elen.ucl.ac.be/Proceedings/esann/esannpdf/es2019-30.pdf}, url_Slides = {ESANN-2019-DeepEmbeddedSOM-pres.pdf}, url_Code = {https://github.com/FlorentF9/DESOM}, bibbase_note = {<img src="assets/img/papers/desom-maps.png">} }
@inproceedings{forest2018generic, abstract = {A major application of data analytics for aircraft engine manufacturers is engine health monitoring, which consists in improving availability and operation of engines by leveraging operational data and past events. Traditional tools can no longer handle the increasing volume and velocity of data collected on modern aircraft. We propose a generic and scalable pipeline for large-scale analytics of operational data from a recent type of aircraft engine, oriented towards health monitoring applications. Based on Hadoop and Spark, our approach enables domain experts to scale their algorithms and extract features from tens of thousands of flights stored on a cluster. All computations are performed using the Spark framework, however custom functions and algorithms can be integrated without knowledge of distributed programming. Unsupervised learning algorithms are integrated for clustering and dimensionality reduction of the flight features, in order to allow efficient visualization and interpretation through a dedicated web application. The use case guiding our work is a methodology for engine fleet monitoring with a self-organizing map. Finally, this pipeline is meant to be end-to-end, fully customizable and ready for use in an industrial setting.}, author = {Forest, Florent and Lacaille, J{\'{e}}r{\^{o}}me and Lebbah, Mustapha and Azzag, Hanane}, booktitle = {IEEE International Conference on Big Data}, doi = {10.1109/BigData.2018.8622297}, isbn = {9781538650356}, keywords = {big data,aircraft engine,aviation,generic,hadoop,health monitoring,scalable,spark}, title = {{A Generic and Scalable Pipeline for Large-Scale Analytics of Continuous Aircraft Engine Data}}, year = {2018}, url_Link = {https://ieeexplore.ieee.org/document/8622297}, url_Paper = {IEEEBigData-2018-ForestLacailleLebbahAzzag-full-paper.pdf}, bibbase_note = {<img src="assets/img/papers/pipeline.png">} }
@article{knodlseder2016gammalib, abstract = {The field of gamma-ray astronomy has seen important progress during the last decade, yet to date no common software framework has been developed for the scientific analysis of gamma-ray telescope data. We propose to fill this gap by means of the GammaLib software, a generic library that we have developed to support the analysis of gamma-ray event data. GammaLib was written in C++ and all functionality is available in Python through an extension module. Based on this framework we have developed the ctools software package, a suite of software tools that enables flexible workflows to be built for the analysis of Imaging Air Cherenkov Telescope event data. The ctools are inspired by science analysis software available for existing high-energy astronomy instruments, and they follow the modular ftools model developed by the High Energy Astrophysics Science Archive Research Center. The ctools were written in Python and C++, and can be either used from the command line via shell scripts or directly from Python. In this paper we present the GammaLib and ctools software versions 1.0 that were released at the end of 2015. GammaLib and ctools are ready for the science analysis of Imaging Air Cherenkov Telescope event data, and also support the analysis of Fermi-LAT data and the exploitation of the COMPTEL legacy data archive. We propose using ctools as the science tools software for the Cherenkov Telescope Array Observatory.}, archivePrefix = {arXiv}, arxivId = {1606.00393}, author = {Kn{\"{o}}dlseder, J. and Mayer, M. and Deil, C. and Cayrou, J. B. and Owen, E. and Kelley-Hoskins, N. and Lu, C. C. and Buehler, R. and Forest, F. and Louge, T. and Siejkowski, H. and Kosack, K. and Gerard, L. and Schulz, A. and Martin, P. and Sanchez, D. and Ohm, S. and Hassan, T. and Brau-Nogu{\'{e}}, S.}, doi = {10.1051/0004-6361/201628822}, eprint = {1606.00393}, issn = {14320746}, journal = {Astronomy and Astrophysics}, keywords = {data analysis,virtual observatory tools}, pages = {1--19}, title = {{GammaLib and ctools: A software framework for the analysis of astronomical gamma-ray data}}, volume = {593}, year = {2016}, url_Link = {https://www.aanda.org/articles/aa/abs/2016/09/aa28822-16/aa28822-16.html}, url_Paper = {https://www.aanda.org/articles/aa/pdf/2016/09/aa28822-16.pdf}, url_Code = {http://cta.irap.omp.eu/gammalib/}, bibbase_note = {<img src="assets/img/papers/gammalib.png">} }