<script src="https://bibbase.org/show?bib=https%3A%2F%2Fbibbase.org%2Fzotero%2Fmh_lenguyen&jsonp=1"></script>
<?php
$contents = file_get_contents("https://bibbase.org/show?bib=https%3A%2F%2Fbibbase.org%2Fzotero%2Fmh_lenguyen");
print_r($contents);
?>
<iframe src="https://bibbase.org/show?bib=https%3A%2F%2Fbibbase.org%2Fzotero%2Fmh_lenguyen"></iframe>
For more details see the documention.
To the site owner:
Action required! Mendeley is changing its API. In order to keep using Mendeley with BibBase past April 14th, you need to:
@article{heine_techniques_2023, title = {Techniques to produce and evaluate realistic multivariate synthetic data}, volume = {13}, copyright = {2023 The Author(s)}, issn = {2045-2322}, url = {https://www.nature.com/articles/s41598-023-38832-0}, doi = {10.1038/s41598-023-38832-0}, abstract = {Data modeling requires a sufficient sample size for reproducibility. A small sample size can inhibit model evaluation. A synthetic data generation technique addressing this small sample size problem is evaluated: from the space of arbitrarily distributed samples, a subgroup (class) has a latent multivariate normal characteristic; synthetic data can be generated from this class with univariate kernel density estimation (KDE); and synthetic samples are statistically like their respective samples. Three samples (n = 667) were investigated with 10 input variables (X). KDE was used to augment the sample size in X. Maps produced univariate normal variables in Y. Principal component analysis in Y produced uncorrelated variables in T, where the probability density functions were approximated as normal and characterized; synthetic data was generated with normally distributed univariate random variables in T. Reversing each step produced synthetic data in Y and X. All samples were approximately multivariate normal in Y, permitting the generation of synthetic data. Probability density function and covariance comparisons showed similarity between samples and synthetic samples. A class of samples has a latent normal characteristic. For such samples, this approach offers a solution to the small sample size problem. Further studies are required to understand this latent class.}, language = {en}, number = {1}, urldate = {2023-10-17}, journal = {Scientific Reports}, author = {Heine, John and Fowler, Erin E. E. and Berglund, Anders and Schell, Michael J. and Eschrich, Steven}, month = jul, year = {2023}, note = {Number: 1 Publisher: Nature Publishing Group}, keywords = {Applied mathematics, Computational science, Data processing, Predictive medicine, Scientific data, Statistical methods, Statistics}, pages = {12266}, }
@misc{wen_transformers_2023, title = {Transformers in {Time} {Series}: {A} {Survey}}, shorttitle = {Transformers in {Time} {Series}}, url = {http://arxiv.org/abs/2202.07125}, doi = {10.48550/arXiv.2202.07125}, abstract = {Transformers have achieved superior performances in many tasks in natural language processing and computer vision, which also triggered great interest in the time series community. Among multiple advantages of Transformers, the ability to capture long-range dependencies and interactions is especially attractive for time series modeling, leading to exciting progress in various time series applications. In this paper, we systematically review Transformer schemes for time series modeling by highlighting their strengths as well as limitations. In particular, we examine the development of time series Transformers in two perspectives. From the perspective of network structure, we summarize the adaptations and modifications that have been made to Transformers in order to accommodate the challenges in time series analysis. From the perspective of applications, we categorize time series Transformers based on common tasks including forecasting, anomaly detection, and classification. Empirically, we perform robust analysis, model size analysis, and seasonal-trend decomposition analysis to study how Transformers perform in time series. Finally, we discuss and suggest future directions to provide useful research guidance. To the best of our knowledge, this paper is the first work to comprehensively and systematically summarize the recent advances of Transformers for modeling time series data. We hope this survey will ignite further research interests in time series Transformers.}, urldate = {2023-10-17}, publisher = {arXiv}, author = {Wen, Qingsong and Zhou, Tian and Zhang, Chaoli and Chen, Weiqi and Ma, Ziqing and Yan, Junchi and Sun, Liang}, month = may, year = {2023}, note = {arXiv:2202.07125 [cs, eess, stat]}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Machine Learning, Electrical Engineering and Systems Science - Signal Processing, Statistics - Machine Learning}, }
@misc{ma_survey_2023, title = {A {Survey} on {Time}-{Series} {Pre}-{Trained} {Models}}, url = {http://arxiv.org/abs/2305.10716}, doi = {10.48550/arXiv.2305.10716}, abstract = {Time-Series Mining (TSM) is an important research area since it shows great potential in practical applications. Deep learning models that rely on massive labeled data have been utilized for TSM successfully. However, constructing a large-scale well-labeled dataset is difficult due to data annotation costs. Recently, Pre-Trained Models have gradually attracted attention in the time series domain due to their remarkable performance in computer vision and natural language processing. In this survey, we provide a comprehensive review of Time-Series Pre-Trained Models (TS-PTMs), aiming to guide the understanding, applying, and studying TS-PTMs. Specifically, we first briefly introduce the typical deep learning models employed in TSM. Then, we give an overview of TS-PTMs according to the pre-training techniques. The main categories we explore include supervised, unsupervised, and self-supervised TS-PTMs. Further, extensive experiments are conducted to analyze the advantages and disadvantages of transfer learning strategies, Transformer-based models, and representative TS-PTMs. Finally, we point out some potential directions of TS-PTMs for future work.}, urldate = {2023-10-17}, publisher = {arXiv}, author = {Ma, Qianli and Liu, Zhen and Zheng, Zhenjing and Huang, Ziyang and Zhu, Siying and Yu, Zhongzhong and Kwok, James T.}, month = may, year = {2023}, note = {arXiv:2305.10716 [cs]}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Machine Learning}, }
@article{sehri_university_2023, title = {University of {Ottawa} constant load and speed rolling-element bearing vibration and acoustic fault signature datasets}, volume = {49}, issn = {2352-3409}, url = {https://www.sciencedirect.com/science/article/pii/S2352340923004456}, doi = {10.1016/j.dib.2023.109327}, abstract = {The collection and analysis of data play a critical role in detecting and diagnosing faults in bearings. However, the availability of large open-access rolling-element bearing datasets for fault diagnosis is limited. To overcome this challenge, the University of Ottawa Rolling-element Bearing Vibration and Acoustic Fault Signature Datasets Operating under Constant Load and Speed Conditions are introduced to provide supplementary data that can be combined or merged with existing bearing datasets to increase the amount of data available to researchers. This data utilizes various sensors such as an accelerometer, a microphone, a load cell, a hall effect sensor, and thermocouples to gather quality data on bearing health. By incorporating vibration and acoustic signals, the datasets enable both traditional and machine learning-based approaches for rolling-element bearing fault diagnosis. Furthermore, this dataset offers valuable insights into the accelerated deterioration of bearing life under constant loads, making it an invaluable resource for research in this domain. Ultimately, these datasets deliver high quality data for the detection and diagnosis of faults in rolling-element bearings, thereby holding significant implications for machinery operation and maintenance.}, urldate = {2023-10-04}, journal = {Data in Brief}, author = {Sehri, Mert and Dumond, Patrick and Bouchard, Michel}, month = aug, year = {2023}, keywords = {Fault detection/Diagnosis, Machine condition monitoring, Signal processing, Vibration}, pages = {109327}, }
@article{jung_vibration_2023, title = {Vibration and current dataset of three-phase permanent magnet synchronous motors with stator faults}, volume = {47}, issn = {2352-3409}, url = {https://www.sciencedirect.com/science/article/pii/S2352340923000707}, doi = {10.1016/j.dib.2023.108952}, abstract = {Permanent magnet synchronous motors (PMSM) are widely used in industry applications such as home appliances, manufacturing process, high-speed trains, and electric vehicles. Unexpected faults of PMSM are directly related to the significant losses in the engineered systems. The majority of motor faults are bearing fault (mechanical) and stator fault (electrical). This article reports vibration and driving current dataset of three-phase PMSM with three different motor powers under eight different severities of stator fault. PMSM conditions including normal, inter-coil short circuit fault, and inter-turn short circuit fault in three motors are demonstrated with different powers of 1.0 kW, 1.5 kW and 3.0 kW, respectively. The PMSMs are operated under the same torque load condition and rotating speed. Dataset is acquired using one integrated electronics piezo-electric (IEPE) based accelerometer and three current transformers (CT) with National Instruments (NI) data acquisition (DAQ) board under international organization for standardization standard (ISO 10816-1:1995). Established dataset can be used to verify newly developed state-of-the-art methods for PMSM stator fault diagnosis. Mendeley Data. DOI: 10.17632/rgn5brrgrn.5}, urldate = {2023-10-04}, journal = {Data in Brief}, author = {Jung, Wonho and Yun, Sung-Hyun and Lim, Yoon-Seop and Cheong, Sungjin and Park, Yong-Hwa}, month = apr, year = {2023}, keywords = {Condition Monitoring, Current, Fault Diagnosis, Stator Fault, Three-Phase Permanent Magnet Synchronous Motors, Vibration}, pages = {108952}, }
@article{ahern_dataset_2023, title = {A dataset for fault detection and diagnosis of an air handling unit from a real industrial facility}, volume = {48}, issn = {2352-3409}, url = {https://www.sciencedirect.com/science/article/pii/S235234092300327X}, doi = {10.1016/j.dib.2023.109208}, abstract = {This dataset was collected for the purpose of applying fault detection and diagnosis (FDD) techniques to real data from an industrial facility. The data for an air handling unit (AHU) is extracted from a building management system (BMS) and aligned with the Project Haystack naming convention. This dataset differs from other publicly available datasets in three main ways. Firstly, the dataset does not contain fault detection ground truth. The lack of labelled datasets in the industrial setting is a significant limitation to the application of FDD techniques found in the literature. Secondly, unlike other publicly available datasets that typically record values every 1 min or 5 min, this dataset captures measurements at a lower frequency of every 15 min, which is due to data storage constraints. Thirdly, the dataset contains a myriad of data issues. For example, there are missing features, missing time intervals, and inaccurate data. Therefore, we hope this dataset will encourage the development of robust FDD techniques that are more suitable for real world applications.}, urldate = {2023-10-04}, journal = {Data in Brief}, author = {Ahern, Michael and O'Sullivan, Dominic T. J. and Bruton, Ken}, month = jun, year = {2023}, keywords = {Detection, HVAC data, Real data, Time series}, pages = {109208}, }
@article{jung_vibration_2023, title = {Vibration, acoustic, temperature, and motor current dataset of rotating machine under varying operating conditions for fault diagnosis}, volume = {48}, issn = {2352-3409}, url = {https://www.sciencedirect.com/science/article/pii/S2352340923001671}, doi = {10.1016/j.dib.2023.109049}, abstract = {Rotating machines are often operated under various operating conditions. However, the characteristics of the data varies with their operating conditions. This article presents the time-series dataset, including vibration, acoustic, temperature, and driving current data of rotating machines under varying operating conditions. The dataset was acquired using four ceramic shear ICP based accelerometers, one microphone, two thermocouples, and three current transformer (CT) based on the international organization for standardization (ISO) standard. The conditions of the rotating machine consisted of normal, bearing faults (inner and outer races), shaft misalignment, and rotor unbalance with three different torque load conditions (0 Nm, 2 Nm, and 4 Nm). This article also reports the vibration and driving current dataset of a rolling element bearing under varying speed conditions (680 RPM to 2460 RPM). The established dataset can be used to verify newly developed state-of-the-art methods for fault diagnosis of rotating machines. Mendeley Data. DOI:10.17632/ztmf3m7h5x.6, DOI:10.17632/vxkj334rzv.7, DOI:10.17632/x3vhp8t6hg.7, DOI:10.17632/j8d8pfkvj2.7}, urldate = {2023-10-04}, journal = {Data in Brief}, author = {Jung, Wonho and Kim, Seong-Hu and Yun, Sung-Hyun and Bae, Jaewoong and Park, Yong-Hwa}, month = jun, year = {2023}, keywords = {Ball Bearing, Condition Monitoring, Load Fluctuation, Misalignment, Speed Fluctuation, Unbalance}, pages = {109049}, }
@article{ogaili_wind_2023, title = {Wind turbine blades fault diagnosis based on vibration dataset analysis}, volume = {49}, issn = {2352-3409}, url = {https://www.sciencedirect.com/science/article/pii/S2352340923005152}, doi = {10.1016/j.dib.2023.109414}, abstract = {Globally, wind turbines play a significant role in generating sustainable and clean energy. Ensuring optimal performance and reliability is crucial to minimize failures and reduce operating and maintenance costs. However, due to their conventional design, identifying faults in wind turbines is challenging. This dataset provides vibration data for faulty wind turbine blades, which covers common vibration excitation mechanisms associated with various faults and operating conditions, including wind speed. The introduced faults in the wind turbine blades include surface erosion, cracked blade, mass imbalance, and twist blade fault. This data article serves as a valuable resource for validating condition monitoring methods in industrial wind turbine applications and facilitates a better understanding of vibration signal characteristics associated with different faults.}, urldate = {2023-10-04}, journal = {Data in Brief}, author = {Ogaili, Ahmed Ali Farhan and Abdulhady Jaber, Alaa and Hamzah, Mohsin Noori}, month = aug, year = {2023}, keywords = {Condition monitoring, Fault diagnosis, Vibration signal analysis, Wind turbine blade}, pages = {109414}, }
@article{de_pater_developing_2023, title = {Developing health indicators and {RUL} prognostics for systems with few failure instances and varying operating conditions using a {LSTM} autoencoder}, volume = {117}, issn = {0952-1976}, url = {https://www.sciencedirect.com/science/article/pii/S0952197622005723}, doi = {10.1016/j.engappai.2022.105582}, abstract = {Most Remaining Useful Life (RUL) prognostics are obtained using supervised learning models trained with many labelled data samples (i.e., the true RUL is known). In aviation, however, aircraft systems are often preventively replaced before failure. There are thus very few labelled data samples available. We therefore propose a Long Short-Term Memory (LSTM) autoencoder with attention to develop health indicators for an aircraft system instead. This autoencoder is trained with unlabelled data samples (i.e., the true RUL is unknown). Since aircraft fly under various operating conditions (varying altitude, speed, etc.), these conditions are also integrated in the autoencoder. We show that the consideration of the operating conditions leads to robust health indicators and improves significantly the monotonicity, trendability and prognosability of these indicators. These health indicators are further used to predict the RUL of the aircraft system using a similarity-based matching approach. We illustrate our approach for turbofan engines. We show that the consideration of the operating conditions improves the monotonicity of the health indicators by 97\%. Also, our approach leads to accurate RUL estimates with a Root Mean Square Error (RMSE) of 2.67 flights only. Moreover, a 19\% reduction in the RMSE is obtained using our approach in comparison to existing supervised learning models.}, urldate = {2023-09-28}, journal = {Engineering Applications of Artificial Intelligence}, author = {de Pater, Ingeborg and Mitici, Mihaela}, month = jan, year = {2023}, keywords = {Attention, Autoencoder, Health indicators, Remaining Useful Life prognostics, Unlabelled data samples, Varying operating conditions}, pages = {105582}, }
@phdthesis{nguyen_online_2023, type = {phdthesis}, title = {Online machine learning-based predictive maintenance for the railway industry}, url = {https://theses.hal.science/tel-04164338}, abstract = {Being an effective long-distance mass transit, the railway will continue to flourish for its limited carbon footprint in the environment. Ensuring the equipment's reliability and passenger safety brings forth the need for efficient maintenance. Apart from the prevalence of corrective and periodic maintenance, predictive maintenance has come into prominence lately. Recent advances in machine learning and the abundance of data drive practitioners to data-driven predictive maintenance. The common practice is to collect data to train a machine learning model, then deploy the model for production and keep it unchanged afterward. We argue that such practice is suboptimal on a data stream. The unboundedness of the stream makes the model prone to incomplete learning. Dynamic changes on the stream introduce novel concepts unseen by the model and decrease its accuracy. The velocity of the stream makes manual labeling infeasible and disables supervised learning algorithms. Therefore, switching from a static, offline learning paradigm to an adaptive, online one is necessary, especially when new generations of connected trains continuously generating sensor data have already been a reality. We investigate the applicability of online machine learning for predictive maintenance on typical complex systems in the railway. First, we develop InterCE as an active learning-based framework that extracts cycles from an unlabeled stream by interacting with a human expert. Then, we implement a long short-term memory autoencoder to transform the extracted cycles into feature vectors that are more compact yet remain representative. Finally, we design CheMoc as a framework that continuously monitors the condition of the systems using online adaptive clustering. Our methods are evaluated on the passenger access systems on two fleets of passenger trains managed by the national railway company SNCF of France.}, language = {en}, urldate = {2023-09-15}, school = {Institut Polytechnique de Paris}, author = {Nguyen, Minh Huong Le}, month = jun, year = {2023}, }
@inproceedings{huber_physics-informed_2023, title = {Physics-{Informed} {Machine} {Learning} for {Predictive} {Maintenance}: {Applied} {Use}-{Cases}}, shorttitle = {Physics-{Informed} {Machine} {Learning} for {Predictive} {Maintenance}}, doi = {10.1109/SDS57534.2023.00016}, abstract = {The combination of physics and engineering information with data-driven methods like machine learning (ML) and deep learning is gaining attention in various research fields. One of the promising practical applications of such hybrid methods is for supporting maintenance decision making in the form of condition-based and predictive maintenance. In this paper we focus on the potential of physics-informed data augmentation for ML algorithms. We demonstrate possible implementations of the concept using three use cases, differing in their technical systems, their algorithms and their tasks ranging from anomaly detection, through fault diagnostics up to prognostics of the remaining useful life. We elaborate on the benefits and prerequisites of each technique and provide guidelines for future practical implementations in other systems.}, booktitle = {2023 10th {IEEE} {Swiss} {Conference} on {Data} {Science} ({SDS})}, author = {Huber, Lilach Goren and Palmé, Thomas and Chao, Manuel Arias}, month = jun, year = {2023}, note = {ISSN: 2835-3420}, keywords = {Anomaly Detection, ConditionBased Maintenance, Data science, Decision making, Deep Learning., Deep learning, Distance measurement, Fault Diagnostics, Fault Prognostics, Machine learning algorithms, Prediction algorithms, Predictive Maintenance, Task analysis, physics-informed Machine Learning}, pages = {66--72}, }
@misc{pashami_explainable_2023, title = {Explainable {Predictive} {Maintenance}}, url = {http://arxiv.org/abs/2306.05120}, doi = {10.48550/arXiv.2306.05120}, abstract = {Explainable Artificial Intelligence (XAI) fills the role of a critical interface fostering interactions between sophisticated intelligent systems and diverse individuals, including data scientists, domain experts, end-users, and more. It aids in deciphering the intricate internal mechanisms of ``black box'' Machine Learning (ML), rendering the reasons behind their decisions more understandable. However, current research in XAI primarily focuses on two aspects; ways to facilitate user trust, or to debug and refine the ML model. The majority of it falls short of recognising the diverse types of explanations needed in broader contexts, as different users and varied application areas necessitate solutions tailored to their specific needs. One such domain is Predictive Maintenance (PdM), an exploding area of research under the Industry 4.0 {\textbackslash}\& 5.0 umbrella. This position paper highlights the gap between existing XAI methodologies and the specific requirements for explanations within industrial applications, particularly the Predictive Maintenance field. Despite explainability's crucial role, this subject remains a relatively under-explored area, making this paper a pioneering attempt to bring relevant challenges to the research community's attention. We provide an overview of predictive maintenance tasks and accentuate the need and varying purposes for corresponding explanations. We then list and describe XAI techniques commonly employed in the literature, discussing their suitability for PdM tasks. Finally, to make the ideas and claims more concrete, we demonstrate XAI applied in four specific industrial use cases: commercial vehicles, metro trains, steel plants, and wind farms, spotlighting areas requiring further research.}, urldate = {2023-08-30}, publisher = {arXiv}, author = {Pashami, Sepideh and Nowaczyk, Slawomir and Fan, Yuantao and Jakubowski, Jakub and Paiva, Nuno and Davari, Narjes and Bobek, Szymon and Jamshidi, Samaneh and Sarmadi, Hamid and Alabdallah, Abdallah and Ribeiro, Rita P. and Veloso, Bruno and Sayed-Mouchaweh, Moamar and Rajaoarisoa, Lala and Nalepa, Grzegorz J. and Gama, João}, month = jun, year = {2023}, note = {arXiv:2306.05120 [cs]}, keywords = {Computer Science - Artificial Intelligence, I.2.1}, }
@article{mitici_prognostics_2023, title = {Prognostics for {Lithium}-ion batteries for electric {Vertical} {Take}-off and {Landing} aircraft using data-driven machine learning}, volume = {12}, issn = {2666-5468}, url = {https://www.sciencedirect.com/science/article/pii/S2666546823000058}, doi = {10.1016/j.egyai.2023.100233}, abstract = {The health management of batteries is a key enabler for the adoption of Electric Vertical Take-off and Landing vehicles (eVTOLs). Currently, few studies consider the health management of eVTOL batteries. One distinct characteristic of batteries for eVTOLs is that the discharge rates are significantly larger during take-off and landing, compared with the battery discharge rates needed for automotives. Such discharge protocols are expected to impact the long-run health of batteries. This paper proposes a data-driven machine learning framework to estimate the state-of-health and remaining-useful-lifetime of eVTOL batteries under varying flight conditions and taking into account the entire flight profile of the eVTOLs. Three main features are considered for the assessment of the health of the batteries: charge, discharge and temperature. The importance of these features is also quantified. Considering battery charging before flight, a selection of missions for state-of-health and remaining-useful-lifetime prediction is performed. The results show that indeed, discharge-related features have the highest importance when predicting battery state-of-health and remaining-useful-lifetime. Using several machine learning algorithms, it is shown that the battery state-of-health and remaining-useful-life are well estimated using Random Forest regression and Extreme Gradient Boosting, respectively.}, language = {en}, urldate = {2023-06-07}, journal = {Energy and AI}, author = {Mitici, Mihaela and Hennink, Birgitte and Pavel, Marilena and Dong, Jianning}, month = apr, year = {2023}, keywords = {Electric Vertical Take-off and Landing vehicles, Lithium-ion battery, Machine learning, Remaining-useful-life, State-of-health}, pages = {100233}, }
@article{de_pater_developing_2023, title = {Developing health indicators and {RUL} prognostics for systems with few failure instances and varying operating conditions using a {LSTM} autoencoder}, volume = {117}, issn = {0952-1976}, url = {https://www.sciencedirect.com/science/article/pii/S0952197622005723}, doi = {10.1016/j.engappai.2022.105582}, abstract = {Most Remaining Useful Life (RUL) prognostics are obtained using supervised learning models trained with many labelled data samples (i.e., the true RUL is known). In aviation, however, aircraft systems are often preventively replaced before failure. There are thus very few labelled data samples available. We therefore propose a Long Short-Term Memory (LSTM) autoencoder with attention to develop health indicators for an aircraft system instead. This autoencoder is trained with unlabelled data samples (i.e., the true RUL is unknown). Since aircraft fly under various operating conditions (varying altitude, speed, etc.), these conditions are also integrated in the autoencoder. We show that the consideration of the operating conditions leads to robust health indicators and improves significantly the monotonicity, trendability and prognosability of these indicators. These health indicators are further used to predict the RUL of the aircraft system using a similarity-based matching approach. We illustrate our approach for turbofan engines. We show that the consideration of the operating conditions improves the monotonicity of the health indicators by 97\%. Also, our approach leads to accurate RUL estimates with a Root Mean Square Error (RMSE) of 2.67 flights only. Moreover, a 19\% reduction in the RMSE is obtained using our approach in comparison to existing supervised learning models.}, language = {en}, urldate = {2023-06-07}, journal = {Engineering Applications of Artificial Intelligence}, author = {de Pater, Ingeborg and Mitici, Mihaela}, month = jan, year = {2023}, keywords = {Attention, Autoencoder, Health indicators, Remaining Useful Life prognostics, Unlabelled data samples, Varying operating conditions}, pages = {105582}, }
@misc{hurtado_continual_2023, title = {Continual {Learning} for {Predictive} {Maintenance}: {Overview} and {Challenges}}, shorttitle = {Continual {Learning} for {Predictive} {Maintenance}}, url = {http://arxiv.org/abs/2301.12467}, doi = {10.48550/arXiv.2301.12467}, abstract = {Machine learning techniques have become one of the main propellers for solving many engineering problems effectively and efficiently. In Predictive Maintenance, for instance, Data-Driven methods have been used to improve predictions of when maintenance is needed on different machines and operative contexts. However, one of the limitations of these methods is that they are trained on a fixed distribution that does not change over time, which seldom happens in real-world applications. When internal or external factors alter the data distribution, the model performance may decrease or even fail unpredictably, resulting in severe consequences for machine maintenance. Continual Learning methods propose ways of adapting prediction models and incorporating new knowledge after deployment. The main objective of these methods is to avoid the plasticity-stability dilemma by updating the parametric model while not forgetting previously learned tasks. In this work, we present the current state of the art in applying Continual Learning to Predictive Maintenance, with an extensive review of both disciplines. We first introduce the two research themes independently, then discuss the current intersection of Continual Learning and Predictive Maintenance. Finally, we discuss the main research directions and conclusions.}, urldate = {2023-06-03}, publisher = {arXiv}, author = {Hurtado, Julio and Salvati, Dario and Semola, Rudy and Bosio, Mattia and Lomonaco, Vincenzo}, month = jan, year = {2023}, note = {arXiv:2301.12467 [cs]}, keywords = {Computer Science - Machine Learning}, }
@article{mercha_machine_2023, title = {Machine learning and deep learning for sentiment analysis across languages: {A} survey}, volume = {531}, issn = {0925-2312}, shorttitle = {Machine learning and deep learning for sentiment analysis across languages}, url = {https://www.sciencedirect.com/science/article/pii/S0925231223001546}, doi = {10.1016/j.neucom.2023.02.015}, abstract = {The inception and rapid growth of the Web, social media, and other online forums have resulted in the continuous and rapid generation of opinionated textual data. Several real-world applications have been focusing on determining the sentiments expressed in these data. Owing to the multilinguistic nature of the generated data, there exists an increasing need to perform sentiment analysis on data in diverse languages. This study presents an overview of the methods used to perform sentiment analysis across languages. We primarily focus on multilingual and cross-lingual approaches. This survey covers the early approaches and current advancements that employ machine learning and deep learning models. We categorize these methods and techniques and provide new research directions. Our findings reveal that deep learning techniques have been widely used in both approaches and yield the best results. Additionally, the scarcity of multilingual annotated datasets limits the progress of multilingual and cross-lingual sentiment analyses, and therefore increases the complexity in comparing these techniques and determining the ones with the best performance.}, language = {en}, urldate = {2023-03-21}, journal = {Neurocomputing}, author = {Mercha, El Mahdi and Benbrahim, Houda}, month = apr, year = {2023}, keywords = {Cross-lingual sentiment analysis, Deep learning, Machine learning, Multilingual sentiment analysis, Sentiment analysis}, pages = {195--216}, }
@article{nanath_investigation_2023, title = {An investigation of crowdsourcing methods in enhancing the machine learning approach for detecting online recruitment fraud}, volume = {3}, issn = {2667-0968}, url = {https://www.sciencedirect.com/science/article/pii/S2667096823000149}, doi = {10.1016/j.jjimei.2023.100167}, abstract = {Misinformation on the web has become a problem of significant impact in an information-driven society. Persistent and large volumes of fake content are being injected, and hence the content (news, articles, jobs, facts) available online is often questionable. This research reviews a range of machine learning algorithms to tackle a specific case of online recruitment fraud (ORF). A model with content features of job posting is tested with five supervised machine learning (ML) algorithms. It then investigates various crowdsourcing techniques that could enhance prediction accuracy and add human insights to machine learning automation. Each crowdsourcing method (explored as human signals online) was tested across the same ML algorithms to test its effectiveness in predicting fake job postings. The testing was conducted by comparing the hybrid models of machine learning and crowdsourced inputs. This study revealed that the best ML algorithm was different in the automated model compared to the hybrid model. Results also indicated that the net promoter type crowdsourced question resulted in the best accuracy in classifying fraudulent and legitimate jobs. The decision tree and generalized linear model demonstrated the highest accuracy among all the tested models.}, language = {en}, number = {1}, urldate = {2023-03-08}, journal = {International Journal of Information Management Data Insights}, author = {Nanath, Krishnadas and Olney, Liting}, month = apr, year = {2023}, keywords = {Crowdsourcing, Fake content, Machine learning, Misinformation, Online recruitment fraud}, pages = {100167}, }
@article{yao_deep_2023, title = {Deep adaptive arbitrary polynomial chaos expansion: {A} mini-data-driven semi-supervised method for uncertainty quantification}, volume = {229}, issn = {0951-8320}, shorttitle = {Deep adaptive arbitrary polynomial chaos expansion}, url = {https://www.sciencedirect.com/science/article/pii/S095183202200432X}, doi = {10.1016/j.ress.2022.108813}, abstract = {All kinds of uncertainties influence the reliability of the engineering system. Thus, uncertainty quantification is significant to the system reliability analysis. Polynomial chaos expansion (PCE) is an effective method for uncertainty quantification while it requires sufficient labeled data to quantify uncertainty accurately. To overcome this problem, this paper proposes the adaptive arbitrary polynomial chaos (aPC) and proves two properties of the adaptive expansion coefficients. Sequentially, a semi-supervised deep adaptive arbitrary polynomial chaos expansion (Deep aPCE) method is proposed based on the adaptive aPC and the deep neural network (DNN). The Deep aPCE method uses two properties of the adaptive aPC to assist in training the DNN by a small amount of labeled data and abundant unlabeled data, significantly reducing the training data cost. On the other hand, the Deep aPCE method adopts the DNN to fine-tune the adaptive expansion coefficients dynamically, improving the accuracy of uncertainty quantification. Besides, the Deep aPCE method can directly construct accurate surrogate models of the high dimensional stochastic systems. Five numerical examples are used to verify the effectiveness of the Deep aPCE method. Finally, the Deep aPCE method is applied to the reliability analysis of an axisymmetric conical aircraft.}, language = {en}, urldate = {2022-10-29}, journal = {Reliability Engineering \& System Safety}, author = {Yao, Wen and Zheng, Xiaohu and Zhang, Jun and Wang, Ning and Tang, Guijian}, month = jan, year = {2023}, keywords = {Arbitrary polynomial chaos expansion, Deep learning, Mini-data, Semi-supervised, Uncertainty quantification}, pages = {108813}, }
@article{tohme_reliable_2023, title = {Reliable neural networks for regression uncertainty estimation}, volume = {229}, issn = {0951-8320}, url = {https://www.sciencedirect.com/science/article/pii/S0951832022004306}, doi = {10.1016/j.ress.2022.108811}, abstract = {While deep neural networks are highly performant and successful in a wide range of real-world problems, estimating their predictive uncertainty remains a challenging task. To address this challenge, we propose and implement a loss function for regression uncertainty estimation based on the Bayesian Validation Metric (BVM) framework while using ensemble learning. The proposed loss reproduces maximum likelihood estimation in the limiting case. A series of experiments on in-distribution data show that the proposed method is competitive with existing state-of-the-art methods. Experiments on out-of-distribution data show that the proposed method is robust to statistical change and exhibits superior predictive capability.}, language = {en}, urldate = {2022-10-29}, journal = {Reliability Engineering \& System Safety}, author = {Tohme, Tony and Vanslette, Kevin and Youcef-Toumi, Kamal}, month = jan, year = {2023}, keywords = {Neural networks, Predictive uncertainty estimation, Regression, Reliability}, pages = {108811}, }
@article{shangguan_train_2023, title = {Train wheel degradation generation and prediction based on the time series generation adversarial network}, volume = {229}, issn = {0951-8320}, url = {https://www.sciencedirect.com/science/article/pii/S0951832022004355}, doi = {10.1016/j.ress.2022.108816}, abstract = {To ensure the safe operation of high-speed railways, it is necessary to assess the reliability of its key components. Among them, as wheels are prone to wear degradation and the wear data acquisition process has the disadvantages of high cost and long cycle. There are few wheels degradation samples, which in turn makes the wheel degradation prediction have large errors. Hence, this paper uses the time series generator adversarial network (TimeGAN) to generate synthetic wheel degradation, in which the original data is segmented through a sliding window to obtain more input sets, and the noise distribution in the generator network is combined with the stationary gamma process (SGP). Then, the wheel degradation at measured distance k is predicted by the Gated Recurrent Unit (GRU) network. To evaluate the effectiveness of the proposed method, different methods in this paper are conducted for the experiment comparison. The experiment result shows that the proposed method has a better effect on the generation of train wheel degradation, and the Kullback-Leibler (KL) divergence and the prediction error are the smallest in the comparison. Hence, the proposed method can provide support for the further reliability analysis of railways and further ensure their operational safety.}, language = {en}, urldate = {2022-10-29}, journal = {Reliability Engineering \& System Safety}, author = {Shangguan, Anqi and Xie, Guo and Fei, Rong and Mu, Lingxia and Hei, Xinhong}, month = jan, year = {2023}, keywords = {Data generation, Degradation analysis, Rail safety, Train wheel}, pages = {108816}, }
@article{ma_multiple_2023, title = {Multiple health indicators fusion-based health prognostic for lithium-ion battery using transfer learning and hybrid deep learning method}, volume = {229}, issn = {0951-8320}, url = {https://www.sciencedirect.com/science/article/pii/S0951832022004379}, doi = {10.1016/j.ress.2022.108818}, abstract = {Accurate state of health (SOH) estimation of lithium-ion battery provides a guarantee for the safe driving of electric vehicles. Most SOH estimation methods based on the machine learning assume that the training and testing data follow the uniform distribution. However, the distribution of the datasets obtained at the different working conditions has discrepancy, which also increases its inherently large computational burden. Therefore, a novel SOH estimation method based on multiple health indicators (HIs) fusion using transfer learning and deep belief network (DBN)-long short-term memory (LSTM) hybrid network is proposed. Transfer learning is used to learn the shared features in the source domain and the target domain. Then, aiming at the insufficiency of shallow network in mining data features, DBN is utilized for SOH estimation. And considering the influence of historical information on future prediction, LSTM cell is used to replace the traditional BP neural network structure. Comparative study is conducted by applying deep and shallow network on the measured data for monitoring SOH of the battery in applications. The experimental results show that the method proposed in this paper is effective, and the performance of knowledge transferring under single domain and cross domain is also verified.}, language = {en}, urldate = {2022-10-29}, journal = {Reliability Engineering \& System Safety}, author = {Ma, Yan and Shan, Ce and Gao, Jinwu and Chen, Hong}, month = jan, year = {2023}, keywords = {Deep belief network, Lithium-ion batteries, Long short term memory, Multiple health indicators, Transfer learning}, pages = {108818}, }
@article{zhou_generic_2023, title = {A generic physics-informed neural network-based framework for reliability assessment of multi-state systems}, volume = {229}, issn = {0951-8320}, url = {https://www.sciencedirect.com/science/article/pii/S0951832022004537}, doi = {10.1016/j.ress.2022.108835}, abstract = {In this paper, we develop a generic physics-informed neural network (PINN)-based framework to assess the reliability of multi-state systems (MSSs). The proposed framework follows a two-step procedure. In the first step, we recast the reliability assessment of MSS as a machine learning problem using the framework of PINN. A feedforward neural network with two individual loss groups is constructed to encode the initial condition and the state transitions governed by ordinary differential equations in MSS, respectively. Next, we tackle the problem of high imbalance in the magnitudes of back-propagated gradients from a multi-task learning perspective and establish a continuous latent function for system reliability assessment. Particularly, we regard each element of the loss function as an individual learning task and project a task’s gradient onto the norm plane of any other task with a conflicting gradient by taking the projecting conflicting gradients (PCGrad) method. We demonstrate the applications of the proposed framework for MSS reliability assessment in a variety of scenarios, including time-independent or dependent state transitions, where system scales increase from small to medium. The computational results indicate that PINN-based framework reveals a promising performance in MSS reliability assessment and incorporation of PCGrad into PINN substantially improves the solution quality and convergence speed of the algorithm.}, language = {en}, urldate = {2022-10-29}, journal = {Reliability Engineering \& System Safety}, author = {Zhou, Taotao and Zhang, Xiaoge and Droguett, Enrique Lopez and Mosleh, Ali}, month = jan, year = {2023}, keywords = {Gradient projection, Markov process, Multi-state systems, Physics-informed neural network, Reliability assessment}, pages = {108835}, }
@article{zhou_availability_2023, title = {Availability analysis of shared bikes using abnormal trip data}, volume = {229}, issn = {0951-8320}, url = {https://www.sciencedirect.com/science/article/pii/S0951832022004616}, doi = {10.1016/j.ress.2022.108844}, abstract = {The users’ cancelling rental data in the bike-sharing system (BSS) is usually regarded as abnormal trip data and is ignored. Abnormal trip data may have implicit information about the availability of shared bikes. So this paper presents an approach based on functional principal components analysis (FPCA) and clustering to advance the shared-bike availability analysis and maintenance strategy optimization using the abnormal trip data. In the proposed approach, the ratio of the cancelling rental number to the total rental number is scored as an index. Their values reflect a smooth variation in availability. The FPCA method is performed to explore the long-term availability variation modes of shared bikes. Then the dominant modes of availability variations are determined using the k-means algorithm. The effectiveness of the proposed approach is illustrated on the real-world trip data of a BSS. The analysis result indicates that the long-term availability level of the referred BSS has decreased from the initial 0.907 to 0.861. In the definite availability variation modes, the availability of one of the variation modes even has decreased to 0.709. Finally, the preventive maintenance model is presented to prevent the deterioration or availability decrease of shared bikes based on the mean functions of availability variation modes.}, language = {en}, urldate = {2022-10-29}, journal = {Reliability Engineering \& System Safety}, author = {Zhou, Yu and Kou, Gang and Guo, Zhen-Zhu and Xiao, Hui}, month = jan, year = {2023}, keywords = {Abnormal trip data, Availability, Clustering, Functional principal component analysis, Preventive maintenance, Shared bike}, pages = {108844}, }
@article{yang_data_2023, title = {Data {Regeneration} {Based} on {Multiple} {Degradation} {Processes} for {Remaining} {Useful} {Life} {Estimation}}, volume = {229}, issn = {0951-8320}, url = {https://www.sciencedirect.com/science/article/pii/S0951832022004847}, doi = {10.1016/j.ress.2022.108867}, abstract = {Remaining useful life prediction based on deep learning for critical components demands sufficient and varied degradation samples. However, the field acquisition or laboratory preparation is generally cumbersome or the samples obtained are stereotyped. The paper proposes a data regeneration method based on multiple degradation processes to deal with the dilemma, which consists of three parts: state identification, regeneration rules from run to failure and state databases. In the first part, a global gain index and a local gain index are proposed to identify the different states of components. In the second part, an identical transformation method, a probability distribution of degradation states and data regeneration criteria are proposed to serve regeneration process of samples from run to failure. In the third part, an augmentation framework based on conditional generative adversarial networks is proposed to enrich the samples of the state database, which makes state samples more diverse. The practicability of regenerated samples obtained by the proposed method was verified by two experiments. In each experiment, initial samples, regenerated samples and hybrid samples were established respectively. Experiments with different training samples based on the same network were carried out to verify the effectiveness of the regenerated samples.}, language = {en}, urldate = {2022-10-29}, journal = {Reliability Engineering \& System Safety}, author = {Yang, Ningning and Wang, Zhijian and Cai, Wenan and Li, Yanfeng}, month = jan, year = {2023}, keywords = {Data regeneration, Deep learning, Regeneration rules, Remaining useful life, State identification}, pages = {108867}, }
@article{xia_toward_2022, title = {Toward cognitive predictive maintenance: {A} survey of graph-based approaches}, volume = {64}, issn = {0278-6125}, shorttitle = {Toward cognitive predictive maintenance}, url = {https://www.sciencedirect.com/science/article/pii/S0278612522000978}, doi = {10.1016/j.jmsy.2022.06.002}, abstract = {Predictive Maintenance (PdM) has continually attracted interest from the manufacturing community due to its significant potential in reducing unexpected machine downtime and related cost. Much attention to existing PdM research has been paid to perceiving the fault, while the identification and estimation processes are affected by many factors. Many existing approaches have not been able to manage the existing knowledge effectively for reasoning the causal relationship of fault. Meanwhile, complete correlation analysis of identified faults and the corresponding root causes is often missing. To address this problem, graph-based approaches (GbA) with cognitive intelligence are proposed, because the GbA are superior in semantic causal inference, heterogeneous association, and visualized explanation. In addition, GbA can achieve promising performance on PdM’s perception tasks by revealing the dependency relationship among parts/components of the equipment. However, despite its advantages, few papers discuss cognitive inference in PdM, let alone GbA. Aiming to fill this gap, this paper concentrates on GbA, and carries out a comprehensive survey organized by the sequential stages in PdM, i.e., anomaly detection, diagnosis, prognosis, and maintenance decision-making. Firstly, GbA and their corresponding graph construction methods are introduced. Secondly, the implementation strategies and instances of GbA in PdM are presented. Finally, challenges and future works toward cognitive PdM are proposed. It is hoped that this work can provide a fundamental basis for researchers and industrial practitioners in adopting GbA-based PdM, and initiate several future research directions to achieve the cognitive PdM.}, urldate = {2023-10-26}, journal = {Journal of Manufacturing Systems}, author = {Xia, Liqiao and Zheng, Pai and Li, Xinyu and Gao, Robert. X. and Wang, Lihui}, month = jul, year = {2022}, keywords = {Bayesian network, Cognitive computing, Graph neural network, Knowledge graph, Predictive maintenance}, pages = {107--120}, }
@article{veloso_metropt_2022, title = {The {MetroPT} dataset for predictive maintenance}, volume = {9}, copyright = {2022 The Author(s)}, issn = {2052-4463}, url = {https://www.nature.com/articles/s41597-022-01877-3}, doi = {10.1038/s41597-022-01877-3}, abstract = {The paper describes the MetroPT data set, an outcome of a Predictive Maintenance project with an urban metro public transportation service in Porto, Portugal. The data was collected in 2022 to develop machine learning methods for online anomaly detection and failure prediction. Several analog sensor signals (pressure, temperature, current consumption), digital signals (control signals, discrete signals), and GPS information (latitude, longitude, and speed) provide a framework that can be easily used and help the development of new machine learning methods. This dataset contains some interesting characteristics and can be a good benchmark for predictive maintenance models.}, language = {en}, number = {1}, urldate = {2023-10-09}, journal = {Scientific Data}, author = {Veloso, Bruno and Ribeiro, Rita P. and Gama, João and Pereira, Pedro Mota}, month = dec, year = {2022}, keywords = {Computer science, Scientific data}, pages = {764}, }
@article{dangut_application_2022, title = {Application of deep reinforcement learning for extremely rare failure prediction in aircraft maintenance}, volume = {171}, issn = {0888-3270}, url = {https://www.sciencedirect.com/science/article/pii/S0888327022000693}, doi = {10.1016/j.ymssp.2022.108873}, abstract = {The use of aircraft operational logs to predict potential failure that may lead to disruption poses many challenges and has yet to be fully explored. Given that aircraft are high-integrity assets, failures are extremely rare, and hence the distribution of relevant log data containing prior indicators will be highly skewed to the normal (healthy) case. This will present a significant challenge in using data-driven techniques because the model will be biased to the heavily weighted no-fault outcomes. This paper presents a novel approach for predicting unscheduled aircraft maintenance action based on deep reinforcement learning techniques using aircraft central maintenance system logs. The algorithm transforms the rare failure prediction problem into a sequential decision-making process that is optimised using a reward system that penalises proposed predictions that result in a false diagnosis and preferentially favours predictions that result in the right diagnosis. The validation data is directly associated with the physical health aspects of the aircraft components. The influence of extremely rare failure prediction on the proposed method is analysed. The effectiveness of the new approach is verified by comparison with previous studies, cost-sensitive and oversampling methods. Performance was evaluated based on G-mean and false-positives rates. The proposed approach shows the superior performance of 20.3\% improvement in G-mean and 97\% reduction in false-positive rate.}, language = {en}, urldate = {2022-03-03}, journal = {Mechanical Systems and Signal Processing}, author = {Dangut, Maren David and Jennions, Ian K. and King, Steve and Skaf, Zakwan}, month = may, year = {2022}, keywords = {Aircraft maintenance, Deep reinforcement learning, Extremely rare event, Imbalance classification}, pages = {108873}, }
@inproceedings{ye_continual_2022, address = {Cham}, series = {Lecture {Notes} in {Computer} {Science}}, title = {Continual {Variational} {Autoencoder} {Learning} via {Online} {Cooperative} {Memorization}}, isbn = {978-3-031-20050-2}, doi = {10.1007/978-3-031-20050-2_31}, abstract = {Due to their inference, data representation and reconstruction properties, Variational Autoencoders (VAE) have been successfully used in continual learning classification tasks. However, their ability to generate images with specifications corresponding to the classes and databases learned during Continual Learning (CL) is not well understood and catastrophic forgetting remains a significant challenge. In this paper, we firstly analyze the forgetting behaviour of VAEs by developing a new theoretical framework that formulates CL as a dynamic optimal transport problem. This framework proves approximate bounds to the data likelihood without requiring the task information and explains how the prior knowledge is lost during the training process. We then propose a novel memory buffering approach, namely the Online Cooperative Memorization (OCM) framework, which consists of a Short-Term Memory (STM) that continually stores recent samples to provide future information for the model, and a Long-Term Memory (LTM) aiming to preserve a wide diversity of samples. The proposed OCM transfers certain samples from STM to LTM according to the information diversity selection criterion without requiring any supervised signals. The OCM framework is then combined with a dynamic VAE expansion mixture network for further enhancing its performance.}, language = {en}, booktitle = {Computer {Vision} – {ECCV} 2022}, publisher = {Springer Nature Switzerland}, author = {Ye, Fei and Bors, Adrian G.}, editor = {Avidan, Shai and Brostow, Gabriel and Cissé, Moustapha and Farinella, Giovanni Maria and Hassner, Tal}, year = {2022}, keywords = {Continual learning, Lifelong generative modelling, VAE}, pages = {531--549}, }
@inproceedings{davari_fault_2022, address = {Cham}, series = {Lecture {Notes} in {Computer} {Science}}, title = {A {Fault} {Detection} {Framework} {Based} on {LSTM} {Autoencoder}: {A} {Case} {Study} for {Volvo} {Bus} {Data} {Set}}, isbn = {978-3-031-01333-1}, shorttitle = {A {Fault} {Detection} {Framework} {Based} on {LSTM} {Autoencoder}}, doi = {10.1007/978-3-031-01333-1_4}, abstract = {This study applies a data-driven anomaly detection framework based on a Long Short-Term Memory (LSTM) autoencoder network for several subsystems of a public transport bus. The proposed framework efficiently detects abnormal data, significantly reducing the false alarm rate compared to available alternatives. Using historical repair records, we demonstrate how detection of abnormal sequences in the signals can be used for predicting equipment failures. The deviations from normal operation patterns are detected by analysing the data collected from several on-board sensors (e.g., wet tank air pressure, engine speed, engine load) installed on the bus. The performance of LSTM autoencoder (LSTM-AE) is compared against the multi-layer autoencoder (mlAE) network in the same anomaly detection framework. The experimental results show that the performance indicators of the LSTM-AE network, in terms of F1 Score, Recall, and Precision, are better than those of the mlAE network.}, language = {en}, booktitle = {Advances in {Intelligent} {Data} {Analysis} {XX}}, publisher = {Springer International Publishing}, author = {Davari, Narjes and Pashami, Sepideh and Veloso, Bruno and Nowaczyk, Sławomir and Fan, Yuantao and Pereira, Pedro Mota and Ribeiro, Rita P. and Gama, João}, editor = {Bouadi, Tassadit and Fromont, Elisa and Hüllermeier, Eyke}, year = {2022}, keywords = {Autoencoder, Fault detection, LSTM, Outliers, Time series}, pages = {39--52}, }
@article{calikus_wisdom_2022, title = {Wisdom of the contexts: active ensemble learning for contextual anomaly detection}, volume = {36}, issn = {1573-756X}, shorttitle = {Wisdom of the contexts}, url = {https://doi.org/10.1007/s10618-022-00868-7}, doi = {10.1007/s10618-022-00868-7}, abstract = {In contextual anomaly detection, an object is only considered anomalous within a specific context. Most existing methods use a single context based on a set of user-specified contextual features. However, identifying the right context can be very challenging in practice, especially in datasets with a large number of attributes. Furthermore, in real-world systems, there might be multiple anomalies that occur in different contexts and, therefore, require a combination of several “useful” contexts to unveil them. In this work, we propose a novel approach, called wisdom of the contexts (WisCon), to effectively detect complex contextual anomalies in situations where the true contextual and behavioral attributes are unknown. Our method constructs an ensemble of multiple contexts, with varying importance scores, based on the assumption that not all useful contexts are equally so. We estimate the importance of each context using an active learning approach with a novel query strategy. Experiments show that WisCon significantly outperforms existing baselines in different categories (i.e., active learning methods, unsupervised contextual and non-contextual anomaly detectors) on 18 datasets. Furthermore, the results support our initial hypothesis that there is no single perfect context that successfully uncovers all kinds of contextual anomalies, and leveraging the “wisdom” of multiple contexts is necessary.}, language = {en}, number = {6}, urldate = {2023-05-21}, journal = {Data Mining and Knowledge Discovery}, author = {Calikus, Ece and Nowaczyk, Sławomir and Bouguelia, Mohamed-Rafik and Dikmen, Onur}, month = nov, year = {2022}, keywords = {Active learning, Anomaly detection, Contextual anomaly detection, Ensemble learning}, pages = {2410--2458}, }
@article{gomes_survey_2022, title = {A {Survey} on {Semi}-supervised {Learning} for {Delayed} {Partially} {Labelled} {Data} {Streams}}, volume = {55}, issn = {0360-0300}, url = {https://dl.acm.org/doi/10.1145/3523055}, doi = {10.1145/3523055}, abstract = {Unlabelled data appear in many domains and are particularly relevant to streaming applications, where even though data is abundant, labelled data is rare. To address the learning problems associated with such data, one can ignore the unlabelled data and focus only on the labelled data (supervised learning); use the labelled data and attempt to leverage the unlabelled data (semi-supervised learning); or assume some labels will be available on request (active learning). The first approach is the simplest, yet the amount of labelled data available will limit the predictive performance. The second relies on finding and exploiting the underlying characteristics of the data distribution. The third depends on an external agent to provide the required labels in a timely fashion. This survey pays special attention to methods that leverage unlabelled data in a semi-supervised setting. We also discuss the delayed labelling issue, which impacts both fully supervised and semi-supervised methods. We propose a unified problem setting, discuss the learning guarantees and existing methods, and explain the differences between related problem settings. Finally, we review the current benchmarking practices and propose adaptations to enhance them.}, number = {4}, urldate = {2023-03-31}, journal = {ACM Computing Surveys}, author = {Gomes, Heitor Murilo and Grzenda, Maciej and Mello, Rodrigo and Read, Jesse and Le Nguyen, Minh Huong and Bifet, Albert}, month = nov, year = {2022}, keywords = {Semi-supervised learning, concept drift, data streams, delayed labeling, verification latency}, pages = {75:1--75:42}, }
@inproceedings{montiel_online_2022, address = {New York, NY, USA}, series = {{KDD} '22}, title = {Online {Clustering}: {Algorithms}, {Evaluation}, {Metrics}, {Applications} and {Benchmarking}}, isbn = {978-1-4503-9385-0}, shorttitle = {Online {Clustering}}, url = {https://doi.org/10.1145/3534678.3542600}, doi = {10.1145/3534678.3542600}, abstract = {Online clustering algorithms play a critical role in data science, especially with the advantages regarding time, memory usage and complexity, while maintaining a high performance compared to traditional clustering methods. This tutorial serves, first, as a survey on online machine learning and, in particular, data stream clustering methods. During this tutorial, state-of-the-art algorithms and the associated core research threads will be presented by identifying different categories based on distance, density grids and hidden statistical models. Clustering validity indices, an important part of the clustering process which are usually neglected or replaced with classification metrics, resulting in misleading interpretation of final results, will also be deeply investigated. Then, this introduction will be put into the context with River, a go-to Python library merged between Creme and scikit-multiflow. It is also the first open-source project to include an online clustering module that can facilitate reproducibility and allow direct further improvements. From this, we propose methods of clustering configuration, applications and settings for benchmarking, using real-world problems and datasets.}, urldate = {2023-03-31}, booktitle = {Proceedings of the 28th {ACM} {SIGKDD} {Conference} on {Knowledge} {Discovery} and {Data} {Mining}}, publisher = {Association for Computing Machinery}, author = {Montiel, Jacob and Ngo, Hoang-Anh and Le-Nguyen, Minh-Huong and Bifet, Albert}, month = aug, year = {2022}, keywords = {benchmarking, data streams, decision support, online clustering, stream clustering, stream learning}, pages = {4808--4809}, }
@article{elhaik_principal_2022, title = {Principal {Component} {Analyses} ({PCA})-based findings in population genetic studies are highly biased and must be reevaluated}, volume = {12}, copyright = {2022 The Author(s)}, issn = {2045-2322}, url = {https://www.nature.com/articles/s41598-022-14395-4}, doi = {10.1038/s41598-022-14395-4}, abstract = {Principal Component Analysis (PCA) is a multivariate analysis that reduces the complexity of datasets while preserving data covariance. The outcome can be visualized on colorful scatterplots, ideally with only a minimal loss of information. PCA applications, implemented in well-cited packages like EIGENSOFT and PLINK, are extensively used as the foremost analyses in population genetics and related fields (e.g., animal and plant or medical genetics). PCA outcomes are used to shape study design, identify, and characterize individuals and populations, and draw historical and ethnobiological conclusions on origins, evolution, dispersion, and relatedness. The replicability crisis in science has prompted us to evaluate whether PCA results are reliable, robust, and replicable. We analyzed twelve common test cases using an intuitive color-based model alongside human population data. We demonstrate that PCA results can be artifacts of the data and can be easily manipulated to generate desired outcomes. PCA adjustment also yielded unfavorable outcomes in association studies. PCA results may not be reliable, robust, or replicable as the field assumes. Our findings raise concerns about the validity of results reported in the population genetics literature and related fields that place a disproportionate reliance upon PCA outcomes and the insights derived from them. We conclude that PCA may have a biasing role in genetic investigations and that 32,000-216,000 genetic studies should be reevaluated. An alternative mixed-admixture population genetic model is discussed.}, language = {en}, number = {1}, urldate = {2023-03-08}, journal = {Scientific Reports}, author = {Elhaik, Eran}, month = aug, year = {2022}, note = {Number: 1 Publisher: Nature Publishing Group}, keywords = {Computational models, Population genetics}, pages = {14683}, }
@article{emmert-streib_taxonomy_2022, title = {Taxonomy of machine learning paradigms: {A} data-centric perspective}, volume = {12}, issn = {1942-4795}, shorttitle = {Taxonomy of machine learning paradigms}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/widm.1470}, doi = {10.1002/widm.1470}, abstract = {Machine learning is a field composed of various pillars. Traditionally, supervised learning (SL), unsupervised learning (UL), and reinforcement learning (RL) are the dominating learning paradigms that inspired the field since the 1950s. Based on these, thousands of different methods have been developed during the last seven decades used in nearly all application domains. However, recently, other learning paradigms are gaining momentum which complement and extend the above learning paradigms significantly. These are multi-label learning (MLL), semi-supervised learning (SSL), one-class classification (OCC), positive-unlabeled learning (PUL), transfer learning (TL), multi-task learning (MTL), and one-shot learning (OSL). The purpose of this article is a systematic discussion of these modern learning paradigms and their connection to the traditional ones. We discuss each of the learning paradigms formally by defining key constituents and paying particular attention to the data requirements for allowing an easy connection to applications. That means, we assume a data-driven perspective. This perspective will also allow a systematic identification of relations between the individual learning paradigms in the form of a learning-paradigm graph (LP-graph). Overall, the LP-graph establishes a taxonomy among 10 different learning paradigms. This article is categorized under: Technologies {\textgreater} Machine Learning Application Areas {\textgreater} Science and Technology Fundamental Concepts of Data and Knowledge {\textgreater} Key Design Issues in Data Mining}, language = {en}, number = {5}, urldate = {2023-03-08}, journal = {WIREs Data Mining and Knowledge Discovery}, author = {Emmert-Streib, Frank and Dehmer, Matthias}, year = {2022}, note = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1002/widm.1470}, keywords = {artificial intelligence, machine learning, multi-label learning, multi-task learning, transfer learning}, pages = {e1470}, }
@inproceedings{le-nguyen_real-time_2022, address = {Lisbon, Portugal}, title = {Real-time learning for real-time data: online machine learning for predictive maintenance of railway systems}, booktitle = {Transport {Research} {Arena} ({TRA})}, author = {Le-Nguyen, Minh-Huong and Turgis, Fabien and Fayemi, Pierre-Emmanuel and Bifet, Albert}, month = nov, year = {2022}, }
@inproceedings{turgis_health_2022, address = {Birmingham, United Kingdom}, title = {Health state characterization using clustering algorithms for railway maintenance}, abstract = {To perform maintenance of a large rolling fleet, with operational constraints due to mass transit, a mixed maintenance solution combining real-time data analysis and condition-based maintenance has been integrated into the SNCF maintenance process in 2017. Based on a prognostic expert system, this solution relies on constant signaling thresholds defined using technical knowledge and physical models to assess the health state of a system. As the health of a system differs from one train to another and independently evolves in time, constant signaling thresholds do not always take into account maintenance load, maintenance infrastructure availabilities and effect of aging during the lifetime of the system. To overtake these limitations and enhance the current maintenance solution, an upgrade of the existing expert system has been developed and funded by SNCF Materiel, using dynamic signaling thresholds, based on the study of health indicators distribution across the whole fleet. This article describes the concept of this new hybrid system, mixing expert system and machine learning tools. It shows how dynamic thresholds can be computed and how clustering algorithms can be used to identify and characterize potential failure modes}, author = {Turgis, Fabien and Audier, Pierre and Nemoz, Valentin and Marion, Rémy}, year = {2022}, }
@inproceedings{le-nguyen_continuous_2022, title = {Continuous {Health} {Monitoring} of {Machinery} using {Online} {Clustering} on {Unlabeled} {Data} {Streams}}, doi = {10.1109/BigData55660.2022.10021002}, abstract = {Maintenance is an important support function to ensure the reliability, safety, and availability in the railway. Lately, machine learning has become a major player and allows practitioners to build intricate learning models for machinery maintenance. Commonly, a model is trained on static data and is retrained on new data that exhibit novelties unknown to the model. On the contrary, online machine learning is a learning paradigm that adapts the models to new data, thus enabling adaptive, lifelong learning. Our goal is to leverage online learning on unlabeled data streams to enhance railway machinery maintenance. We propose Continuous Health Monitoring using Online Clustering (CheMoc) as an unsupervised method that learns the health profiles of the systems incrementally, assesses their working condition continuously via an adaptive health score, and works efficiently on streaming data. We evaluate CheMoc on a real-world data set from a national railway company. The results show that CheMoc discovered relevant health clusters, as confirmed by a domain expert, and processed the data of an entire year under two hours using only 600 MB of memory.}, booktitle = {2022 {IEEE} {International} {Conference} on {Big} {Data} ({Big} {Data})}, author = {Le-Nguyen, Minh-Huong and Turgis, Fabien and Fayemi, Pierre-Emmanuel and Bifet, Albert}, month = dec, year = {2022}, keywords = {Adaptation models, Companies, Employee welfare, Machine learning, Maintenance engineering, Memory management, Rail transportation, maintenance, online clustering, railway}, pages = {1866--1873}, }
@inproceedings{libera_right_2022, address = {Cham}, series = {Lecture {Notes} in {Computer} {Science}}, title = {‘{Right} to {Be} {Forgotten}’: {Analyzing} the {Impact} of {Forgetting} {Data} {Using} {K}-{NN} {Algorithm} in {Data} {Stream} {Learning}}, isbn = {978-3-031-15086-9}, shorttitle = {‘{Right} to {Be} {Forgotten}’}, doi = {10.1007/978-3-031-15086-9_34}, abstract = {New international regulations concerning personal management data guarantee the ‘Right to Be Forgotten’. One might request to have their data erased from third-party tools and services. This requirement is especially challenging when considering the behavior of machine learning estimators that will need to forget portions of their knowledge. In this paper, we investigate the impact of these learning and forgetting policies in data stream learning. In data stream mining, the sheer volume of instances typically makes it unfeasible to store the data or retraining the learning models from scratch. Hence, more efficient solutions are needed to deal with the dynamic nature of online machine learning. We modify an incremental k-NN classifier to enable it to erase its past data and we also investigate the impact of data forgetting in the obtained predictive performance. Our proposal is compared against the original k-NN algorithm using seven non-stationary stream datasets. Our results show that the forgetting-enabled algorithm can achieve similar prediction patterns compared to the vanilla one, although it yields lower predictive performance at the beginning of the learning process. Such a scenario is a typical cold-start behavior often observed in data stream mining applications, and not necessarily related to the employed forgetting mechanisms.}, language = {en}, booktitle = {Electronic {Government}}, publisher = {Springer International Publishing}, author = {Libera, Caio and Miranda, Leandro and Bernardini, Flávia and Mastelini, Saulo and Viterbo, José}, editor = {Janssen, Marijn and Csáki, Csaba and Lindgren, Ida and Loukis, Euripidis and Melin, Ulf and Viale Pereira, Gabriela and Rodríguez Bolívar, Manuel Pedro and Tambouris, Efthimios}, year = {2022}, keywords = {Data stream, K-NN, Lazy learning, Right to be forgotten, Stream learning}, pages = {530--542}, }
@misc{nozawa_empirical_2022, title = {Empirical {Evaluation} and {Theoretical} {Analysis} for {Representation} {Learning}: {A} {Survey}}, shorttitle = {Empirical {Evaluation} and {Theoretical} {Analysis} for {Representation} {Learning}}, url = {http://arxiv.org/abs/2204.08226}, doi = {10.48550/arXiv.2204.08226}, abstract = {Representation learning enables us to automatically extract generic feature representations from a dataset to solve another machine learning task. Recently, extracted feature representations by a representation learning algorithm and a simple predictor have exhibited state-of-the-art performance on several machine learning tasks. Despite its remarkable progress, there exist various ways to evaluate representation learning algorithms depending on the application because of the flexibility of representation learning. To understand the current representation learning, we review evaluation methods of representation learning algorithms and theoretical analyses. On the basis of our evaluation survey, we also discuss the future direction of representation learning. Note that this survey is the extended version of Nozawa and Sato (2022).}, urldate = {2022-07-29}, publisher = {arXiv}, author = {Nozawa, Kento and Sato, Issei}, month = apr, year = {2022}, note = {arXiv:2204.08226 [cs]}, keywords = {Computer Science - Computer Vision and Pattern Recognition, Computer Science - Machine Learning}, }
@article{sui_dynamic_2022, title = {Dynamic {Sparse} {Subspace} {Clustering} for {Evolving} {High}-{Dimensional} {Data} {Streams}}, volume = {52}, issn = {2168-2275}, doi = {10.1109/TCYB.2020.3023973}, abstract = {In an era of ubiquitous large-scale evolving data streams, data stream clustering (DSC) has received lots of attention because the scale of the data streams far exceeds the ability of expert human analysts. It has been observed that high-dimensional data are usually distributed in a union of low-dimensional subspaces. In this article, we propose a novel sparse representation-based DSC algorithm, called evolutionary dynamic sparse subspace clustering (EDSSC). It can cope with the time-varying nature of subspaces underlying the evolving data streams, such as subspace emergence, disappearance, and recurrence. The proposed EDSSC consists of two phases: 1) static learning and 2) online clustering. During the first phase, a data structure for storing the statistic summary of data streams, called EDSSC summary, is proposed which can better address the dilemma between the two conflicting goals: 1) saving more points for accuracy of subspace clustering (SC) and 2) discarding more points for the efficiency of DSC. By further proposing an algorithm to estimate the subspace number, the proposed EDSSC does not need to know the number of subspaces. In the second phase, a more suitable index, called the average sparsity concentration index (ASCI), is proposed, which dramatically promotes the clustering accuracy compared to the conventionally utilized SCI index. In addition, the subspace evolution detection model based on the Page-Hinkley test is proposed where the appearing, disappearing, and recurring subspaces can be detected and adapted. Extinct experiments on real-world data streams show that the EDSSC outperforms the state-of-the-art online SC approaches.}, number = {6}, journal = {IEEE Transactions on Cybernetics}, author = {Sui, Jinping and Liu, Zhen and Liu, Li and Jung, Alexander and Li, Xiang}, month = jun, year = {2022}, note = {Conference Name: IEEE Transactions on Cybernetics}, keywords = {Adaptation models, Clustering algorithms, Data models, Data stream clustering (DSC), Data structures, Heuristic algorithms, Indexes, Task analysis, high-dimensional data stream, sparse representation, subspace clustering (SC)}, pages = {4173--4186}, }
@article{yu_meta-add_2022, title = {Meta-{ADD}: {A} meta-learning based pre-trained model for concept drift active detection}, volume = {608}, issn = {0020-0255}, shorttitle = {Meta-{ADD}}, url = {https://www.sciencedirect.com/science/article/pii/S0020025522007125}, doi = {10.1016/j.ins.2022.07.022}, abstract = {Concept drift is a phenomenon that commonly happened in data streams and need to be detected, because it means the statistical properties of a target variable, which the model is trying to predict, change over time in an unseen way. Most current detection methods are based on a hypothesis test framework. As a result, in these detection methods, a hypothesis test is need to be set, and more importantly, cannot obtain the type of drift. The setting of a hypothesis test requires an understanding of data streams, and cannot obtain the type of concept drift results in the loss of drift information. Hence, in this paper, to get rid of the setting of hypothesis test, and obtain the type of concept drift, we propose Active Drift Detection based on Meta learning (Meta-ADD), a novel framework that learns to classify concept drift by offline pre-training a model on data stream with known drifts, then online fine-tuning model to improve detection accuracy. Specifically, in the pre-trained phase, we extract meta-features based on the error rates of various concept drift, after which a pre-trained model called meta-detector is developed via a prototypical neural network by representing various concept drift classes as corresponding prototypes. In the detection phase, the meta-detector is fine-tuned to adapt to the real data stream via a simple stream-based active learning. Hence, Meta-ADD does not need a hypothesis test to detect concept drifts and identify their types automatically, which can directly support drift understand. The experiment results verify the effectiveness of Meta-ADD.}, language = {en}, urldate = {2022-07-12}, journal = {Information Sciences}, author = {Yu, Hang and Zhang, Qingyong and Liu, Tianyu and Lu, Jie and Wen, Yimin and Zhang, Guangquan}, month = aug, year = {2022}, keywords = {Concept drift, Drift detection, Pre-trained model, Prototypical neural networks}, pages = {996--1009}, }
@article{zhang_online_2022, title = {Online {Learning} of {Wearable} {Sensing} for {Human} {Activity} {Recognition}}, issn = {2327-4662}, doi = {10.1109/JIOT.2022.3188785}, abstract = {The paper presents a novel semi supervised learning method for wearable sensors to recognize human activities. The proposed method is termed as tri-VFDT (Very Fast Decision Tree). The proposed method is a more efficient version of the Hoeffding tree and three VFDT are generated from the original labeled example set and refined using unlabeled examples. Based on the heuristic growth characteristics of VFDT, a tri training framework is proposed which uses unlabeled data to update the model without labeled data. This significantly reduces the computational time and storage of the data processing. In addition, the proposed method is embedded into wearable devices for online learning, while the test data flow is regarded as the unlabeled data to update model. The experiment collects data stream of 16 minutes with motion state switching frequently while the wearable devices recognize motions in real time. An experimental comparison has also been undertaken for performance evaluation between the wearable and computation using desktop computer. The obtained results show that only minor difference in terms of the f1-score rendered by the proposed method online or offline. This is a prominent characteristic for wearable computing within internet of thing (IOT).}, journal = {IEEE Internet of Things Journal}, author = {Zhang, Yiwei and Gao, Bin and Yang, Daili and Woo, Wai Lok and Wen, Houlai}, year = {2022}, note = {Conference Name: IEEE Internet of Things Journal}, keywords = {Classification algorithms, Computational modeling, Feature extraction, Histograms, Predictive models, Training, Wearable computers, online learning, real-time activity recognition, semi-supervised learning, wearable device}, pages = {1--1}, }
@article{gonzalez-muniz_health_2022, title = {Health indicator for machine condition monitoring built in the latent space of a deep autoencoder}, volume = {224}, issn = {0951-8320}, url = {https://www.sciencedirect.com/science/article/pii/S0951832022001417}, doi = {10.1016/j.ress.2022.108482}, abstract = {The construction of effective health indicators plays a key role in the engineering systems field: they reflect the degradation degree of the system under study, thus providing vital information for critical tasks ranging from anomaly detection to remaining useful life estimation, with benefits such as reduced maintenance costs, improved productivity or increased machine availability. The reconstruction error of deep autoencoders has been widely used in the literature for this purpose, but this approach does not fully exploit the hierarchical nature of deep models. Instead, we propose to take advantage of the disentangled representations of data that are available in the latent space of autoencoders, by using the latent reconstruction error as machine health indicator. We have tested our proposal on three different datasets, considering two types of autoencoders (deep autoencoder and variational autoencoder), and comparing its performance with that of state-of-the-art approaches in terms of well-known quality metrics. The results of the research demonstrate the capability of our health indicator to outperform conventional approaches, in the three datasets, and regardless of the type of autoencoder used to generate the residuals. In addition, we provide some intuition on the suitability of latent spaces for the monitoring of machinery condition.}, language = {en}, urldate = {2022-05-14}, journal = {Reliability Engineering \& System Safety}, author = {González-Muñiz, Ana and Díaz, Ignacio and Cuadrado, Abel A. and García-Pérez, Diego}, month = aug, year = {2022}, keywords = {Anomaly detection, Deep autoencoder, Engineering systems, Health indicator, Latent space}, pages = {108482}, }
@article{reinartz_pytep_2022, title = {{pyTEP}: {A} {Python} package for interactive simulations of the {Tennessee} {Eastman} process}, volume = {18}, issn = {2352-7110}, shorttitle = {{pyTEP}}, url = {https://www.sciencedirect.com/science/article/pii/S2352711022000449}, doi = {10.1016/j.softx.2022.101053}, abstract = {pyTEP is an open-source simulation API for the Tennessee Eastman process in Python. It facilitates the setup of complex simulation scenarios and provides the option of interactive simulation. The Tennessee Eastman process has been the go-to benchmark for statistical process monitoring and machine learning based fault-detection approaches for continuous chemical processes in recent years, but its potential outside these domains remains largely untapped. Existing simulators are tailored towards simulations of stationary operating conditions in the presence of faults, but further extensions for more complex simulation scenarios are time-consuming, which may discourage researchers from adopting the process. Through pyTEPs API, users can configure simulations, change operating conditions and store simulation data without being exposed to the underlying mechanics of the simulator. In addition to the newly introduced features, pyTEP promises more versatility and more straightforward usage than existing TEP simulators.}, language = {en}, urldate = {2022-05-04}, journal = {SoftwareX}, author = {Reinartz, Christopher and Enevoldsen, Thomas T.}, month = jun, year = {2022}, keywords = {Chemical process simulation, Python, Simulation framework, Tennessee Eastman process}, pages = {101053}, }
@article{haug_standardized_2022, title = {Standardized {Evaluation} of {Machine} {Learning} {Methods} for {Evolving} {Data} {Streams}}, url = {http://arxiv.org/abs/2204.13625}, abstract = {Due to the unspecified and dynamic nature of data streams, online machine learning requires powerful and flexible solutions. However, evaluating online machine learning methods under realistic conditions is difficult. Existing work therefore often draws on different heuristics and simulations that do not necessarily produce meaningful and reliable results. Indeed, in the absence of common evaluation standards, it often remains unclear how online learning methods will perform in practice or in comparison to similar work. In this paper, we propose a comprehensive set of properties for high-quality machine learning in evolving data streams. In particular, we discuss sensible performance measures and evaluation strategies for online predictive modelling, online feature selection and concept drift detection. As one of the first works, we also look at the interpretability of online learning methods. The proposed evaluation standards are provided in a new Python framework called float. Float is completely modular and allows the simultaneous integration of common libraries, such as scikit-multiflow or river, with custom code. Float is open-sourced and can be accessed at https://github.com/haugjo/float. In this sense, we hope that our work will contribute to more standardized, reliable and realistic testing and comparison of online machine learning methods.}, urldate = {2022-05-03}, journal = {arXiv:2204.13625 [cs, stat]}, author = {Haug, Johannes and Tramountani, Effi and Kasneci, Gjergji}, month = apr, year = {2022}, note = {arXiv: 2204.13625}, keywords = {Computer Science - Machine Learning, Statistics - Machine Learning}, }
@article{guo_online_2022, title = {Online {Anomaly} {Detection} of {Industrial} {IoT} {Based} on {Hybrid} {Machine} {Learning} {Architecture}}, volume = {2022}, issn = {1687-5265}, url = {https://www.hindawi.com/journals/cin/2022/8568917/}, doi = {10.1155/2022/8568917}, abstract = {Industrial IoT (IIoT) in Industry 4.0 integrates everything at the level of information technology with the level of technology of operation and aims to improve Business to Business (B2B) services (from production to public services). It includes Machine to Machine (M2M) interaction either for process control (e.g., factory processes, fleet tracking) or as part of self-organizing cyber-physical distributed control systems without human intervention. A critical factor in completing the abovementioned actions is the development of intelligent software systems in the context of automatic control of the business environment, with the ability to analyze in real-time the existing equipment through the available interfaces (hardware-in-the-loop). In this spirit, this paper presents an advanced intelligent approach to real-time monitoring of the operation of industrial equipment. A hybrid novel methodology that combines memory neural networks is used, and Bayesian methods that examine a variety of characteristic quantities of vibration signals that are exported in the field of time, with the aim of real-time detection of abnormalities in active IIoT equipment are also used.}, language = {en}, urldate = {2022-05-03}, journal = {Computational Intelligence and Neuroscience}, author = {Guo, Jia and Shen, Yue}, month = apr, year = {2022}, note = {Publisher: Hindawi}, pages = {e8568917}, }
@article{andersen_easy_2022, title = {An easy to use {GUI} for simulating big data using {Tennessee} {Eastman} process}, volume = {38}, issn = {1099-1638}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/qre.2975}, doi = {10.1002/qre.2975}, abstract = {Data-driven process monitoring and control techniques and their application to industrial chemical processes are gaining popularity due to the current focus on Industry 4.0, digitalization and the Internet of Things. However, for the development of such techniques, there are significant barriers that must be overcome in obtaining sufficiently large and reliable datasets. As a result, the use of real plant and process data in developing and testing data-driven process monitoring and control tools can be difficult without investing significant efforts in acquiring, treating, and interpreting the data. Therefore, researchers need a tool that effortlessly generates large amounts of realistic and reliable process data without the requirement for additional data treatment or interpretation. In this work, we propose a data generation platform based on the Tennessee Eastman Process simulation benchmark. A graphical user interface (GUI) developed in MATLAB Simulink is presented that enables users to generate massive amounts of data for testing applicability of big data concepts in the realm of process control for continuous time-dependent processes. An R-Shiny app that interacts with the data generation tool is also presented for illustration purposes. The app can visualize the results generated by the Tennessee Eastman Process and can carry out a standard fault detection and diagnosis studies based on PCA. The data generator GUI is available free of charge for research purposes at https://github.com/dtuprodana/TEP.}, language = {en}, number = {1}, urldate = {2022-05-02}, journal = {Quality and Reliability Engineering International}, author = {Andersen, Emil B. and Udugama, Isuru A. and Gernaey, Krist V. and Khan, Abdul R. and Bayer, Christoph and Kulahci, Murat}, year = {2022}, note = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1002/qre.2975}, keywords = {chemical process, digitalization, industry 4.0, process monitoring and control, process simulator, process surveillance}, pages = {264--282}, }
@article{watson_sequential_2022, title = {Sequential detection of a temporary change in multivariate time series}, volume = {127}, issn = {1051-2004}, url = {https://www.sciencedirect.com/science/article/pii/S1051200422001622}, doi = {10.1016/j.dsp.2022.103545}, abstract = {In this work, we aim to provide a new and efficient recursive detection method for temporarily monitored signals. Motivated by the case of the propagation of an event over a field of sensors, we assumed that the change in the statistical properties in the monitored signals can only be temporary. Unfortunately, to our best knowledge, existing recursive and simple detection techniques such as the ones based on the cumulative sum (CUSUM) do not consider the temporary aspect of the change in a multivariate time series. In this paper, we propose a novel simple and efficient sequential detection algorithm, named Temporary-Event-CUSUM (TE-CUSUM). By combining with a new adaptive way to aggregate local CUSUM variables from each data stream, we empirically show that the TE-CUSUM has a very good detection rate in the case of an event passing through a field of sensors in a very noisy environment.}, language = {en}, urldate = {2022-04-09}, journal = {Digital Signal Processing}, author = {Watson, Victor and Septier, François and Armand, Patrick and Duchenne, Christophe}, month = jul, year = {2022}, keywords = {CUSUM, Multivariate time series, Sequential detection, Temporary event}, pages = {103545}, }
@article{malik_human_2022, title = {Human action interpretation using convolutional neural network: a survey}, volume = {33}, issn = {1432-1769}, shorttitle = {Human action interpretation using convolutional neural network}, url = {https://doi.org/10.1007/s00138-022-01291-0}, doi = {10.1007/s00138-022-01291-0}, abstract = {Human action interpretation (HAI) is one of the trending domains in the era of computer vision. It can further be divided into human action recognition (HAR) and human action detection (HAD). The HAR analyzes frames and provides label(s) to overall video, whereas the HAD localizes actor first, in each frame, and then estimates the action score for the detected region. The effectiveness of a HAI model is highly dependent on the representation of spatiotemporal features and the model’s architectural design. For the effective representation of these features, various studies have been carried out. Moreover, to better learn these features and to get the action score on the basis of these features, different designs of deep architectures have also been proposed. Among various deep architectures, convolutional neural network (CNN) is relatively more explored for HAI due to its lesser computational cost. To provide overview of these efforts, various surveys have been published to date; however, none of these surveys is focusing the features’ representation and design of proposed architectures in detail. Secondly, none of these studies is focusing the pose assisted HAI techniques. This study provides a more detailed survey on existing CNN-based HAI techniques by incorporating the frame level as well as pose level spatiotemporal features-based techniques. Besides these, it offers comparative study on different publicly available datasets used to evaluate HAI models based on various spatiotemporal features’ representations. Furthermore, it also discusses the limitations and challenges of the HAI and concludes that human action interpretation from visual data is still very far from the actual interpretation of human action in realistic videos which are continuous in nature and may contain multiple human beings performing multiple actions sequentially or in parallel.}, language = {en}, number = {3}, urldate = {2022-03-25}, journal = {Machine Vision and Applications}, author = {Malik, Zainab and Shapiai, Mohd Ibrahim Bin}, month = mar, year = {2022}, pages = {37}, }
@article{han_streamdfp_2022, title = {{StreamDFP}: {A} {General} {Stream} {Mining} {Framework} for {Adaptive} {Disk} {Failure} {Prediction}}, issn = {1557-9956}, shorttitle = {{StreamDFP}}, doi = {10.1109/TC.2022.3160365}, abstract = {We explore machine learning for accurately predicting imminent disk failures and hence providing proactive fault tolerance for modern large-scale storage systems. Current disk failure prediction approaches are mostly offline and assume that the disk logs required for training learning models are available a priori. However, disk logs are often continuously generated as an evolving data stream, in which the statistical patterns vary over time (also known as concept drift). Such a challenge motivates the need of online techniques that perform training and prediction on the incoming stream of disk logs in real time, while being adaptive to concept drift. We first measure and demonstrate the existence of concept drift on various disk models from Backblaze and Alibaba Cloud. Motivated by our study, we design STREAMDFP, a general stream mining framework for disk failure prediction with concept-drift adaption based on three key techniques, namely online labeling, concept-drift-aware training, and general prediction, with a primary objective of supporting various machine learning algorithms. We extend STREAMDFP to support online transfer learning for minority disk models with concept-drift adaptation. Our evaluation shows that STREAMDFP improves the prediction accuracy significantly compared to without concept-drift adaptation under various settings, and achieves reasonably high stream processing performance.}, journal = {IEEE Transactions on Computers}, author = {Han, Shujie and Lee, Patrick P. C. and Shen, Zhirong and He, Cheng and Liu, Yi and Huang, Tao}, year = {2022}, note = {Conference Name: IEEE Transactions on Computers}, keywords = {Adaptation models, Machine learning algorithms, Prediction algorithms, Predictive models, Production, Random forests, Training, and online transfer learning, concept drift, disk failure prediction, stream mining}, pages = {1--1}, }
@article{zhang_real-time_2022, title = {Real-time prediction of rate of penetration by combining attention-based gated recurrent unit network and fully connected neural networks}, issn = {0920-4105}, url = {https://www.sciencedirect.com/science/article/pii/S0920410522002820}, doi = {10.1016/j.petrol.2022.110396}, abstract = {Data-driven models are widely used to predict rate of penetration. However, there are still challenges on real-time predictions considering influences of formation properties and bit wear. In this paper, a novel data-driven model is proposed to tackle this problem by combining an attention-based Gated Recurrent Unit network and fully connected neural networks. At first, input features of the model are elaborately selected by physical drilling laws and statistical analyzes. Then, four sub-networks are employed to construct the whole model structure, where formation properties are assessed using well-logging data and bit wear is evaluated by introducing an attention-based Gated Recurrent Unit network. Next, the model is dynamically updated with data streams by implementing the sliding window method to realize real-time predictions. Finally, the model performance is thoroughly analyzed based on ten field drilling datasets after optimizing model hyperparameters using the orthogonal experiment method. Results indicate that the model is accurate and robust to give predictions after training with the first several data streams. Compared with the conventional data-driven models, the proposed model shows great superiority due to the sub-network structure, the Gated Recurrent Unit network, and the attention mechanism. The model proposed herein opens opportunities for real-time prediction of rate of penetration in the field with high accuracy and robustness.}, language = {en}, urldate = {2022-03-19}, journal = {Journal of Petroleum Science and Engineering}, author = {Zhang, Chengkai and Song, Xianzhi and Su, Yinao and Li, Gensheng}, month = mar, year = {2022}, keywords = {Attention mechanism, Data-driven model, Gated recurrent unit (GRU), Neural network, Rate of penetration, Real-time prediction}, pages = {110396}, }
@article{spanninger_review_2022, title = {A review of train delay prediction approaches}, volume = {22}, issn = {2210-9706}, url = {https://www.sciencedirect.com/science/article/pii/S2210970622000166}, doi = {10.1016/j.jrtpm.2022.100312}, abstract = {Railway operations are vulnerable to delays. Accurate predictions of train arrival and departure delays improve the passenger service quality and are essential for real-time railway traffic management to minimise their further spreading. This review provides a synoptic overview and discussion covering the breadth of diverse approaches to predict train delays. We first categorise research contributions based on their underlying modelling paradigm (data-driven and event-driven) and their mathematical model. We then distinguish between very short to long-term predictions and classify different input data sources that have been considered in the literature. We further discuss advantages and disadvantages of producing deterministic versus stochastic predictions, the applicability of different approaches during disruptions and their interpretability. By comparing the results of the included contributions, we can indicate that the prediction error generally increases when broadening the prediction horizon. We find that data-driven approaches might have the edge on event-driven approaches in terms of prediction accuracy, whereas event-driven approaches that explicitly model the dynamics and dependencies of railway traffic have their strength in providing interpretable predictions, and are more robust concerning disruption scenarios. The growing availability of railway operations data is expected to increase the appeal of big-data and machine learning methods.}, language = {en}, urldate = {2022-03-19}, journal = {Journal of Rail Transport Planning \& Management}, author = {Spanninger, Thomas and Trivella, Alessio and Büchel, Beda and Corman, Francesco}, month = jun, year = {2022}, keywords = {Forecasting, Prediction, Railways, Train delay}, pages = {100312}, }
@article{balakrishna_d-acsm_2022, title = {D-{ACSM}: a technique for dynamically assigning and adjusting cluster patterns for {IoT} data analysis}, issn = {1573-0484}, shorttitle = {D-{ACSM}}, url = {https://doi.org/10.1007/s11227-022-04427-1}, doi = {10.1007/s11227-022-04427-1}, abstract = {With rapid advancements in wireless communications and sensor technologies, the Internet of Things (IoT) has advanced dramatically in past years. In IoT, the data created by a large number of sensors are extremely intricate, diverse, and enormous, and it is unprocessed. These may have underlying patterns that are not visible that must be discovered to do large-scale data analysis. Several clustering algorithms have been developed and proved effective in data analysis in recent decades; however, they are intentionally designed for dealing with static data and infeasible for processing huge data in IoT environments. As a result, this research proposes a Density-based Adaptive Cluster Split and Merge (D-ACSM) technique for dynamically assigning and changing cluster patterns for IoT data processing to solve this challenge. For successful cluster analysis, the local density and minimum distance between dynamic data objects were first measured. In addition, the D-ACSM technique used Cluster Splitting and Merging (CSM) to alter cluster patterns between surrounding dynamic data objects. In addition, the suggested D-ACSM technique’s results were evaluated using four IoT benchmarked datasets that varied in the number of arriving data objects. Finally, the proposed D-ACSM technique improves the results of the performance metrics by 4\%, 5\%, 3\%, and 6\% on the BWS-AS dataset, CRAWDAD dataset, Minute\_Weather dataset, and LinkedSensorData dataset, respectively, when compared to the AC-ICSM, IMMFC, and IAPNA techniques used for cluster analysis in all data chunks.}, language = {en}, urldate = {2022-03-17}, journal = {The Journal of Supercomputing}, author = {Balakrishna, Sivadi}, month = mar, year = {2022}, }
@article{liu_aircraft_2022, title = {Aircraft engine remaining useful life estimation via a double attention-based data-driven architecture}, volume = {221}, issn = {0951-8320}, url = {https://www.sciencedirect.com/science/article/pii/S0951832022000102}, doi = {10.1016/j.ress.2022.108330}, abstract = {Remaining useful life (RUL) estimation has been intensively studied, given its important role in prognostics and health management (PHM) of industry. Recently, data-driven structures such as convolutional neural networks (CNNs), have achieved outstanding RUL prediction performance. However, conventional CNNs do not include an adequate mechanism for adaptively weighing input features. In this paper, we propose a double attention-based data-driven framework for aircraft engine RUL prognostics. Specifically, a channel attention-based CNN was utilized to apply greater weights to more significant features. Next, a Transformer was used to focus attention on these features at critical time steps. We validated the effectiveness of the proposed framework on benchmark datasets for aircraft engine RUL estimation. The experimental results indicate that the proposed double attention-based architecture outperformed the existing state-of-the-art (SOTA) algorithms. The double attention-based RUL prediction method can detect the risk of equipment failure and reduce loss.}, language = {en}, urldate = {2022-03-15}, journal = {Reliability Engineering \& System Safety}, author = {Liu, Lu and Song, Xiao and Zhou, Zhetao}, month = may, year = {2022}, keywords = {Aircraft engine, Double attention, Remaining useful life estimation, Transformer network}, pages = {108330}, }
@article{de_pater_alarm-based_2022, title = {Alarm-based predictive maintenance scheduling for aircraft engines with imperfect {Remaining} {Useful} {Life} prognostics}, volume = {221}, issn = {0951-8320}, url = {https://www.sciencedirect.com/science/article/pii/S0951832022000175}, doi = {10.1016/j.ress.2022.108341}, abstract = {The increasing availability of condition monitoring data for aircraft components has incentivized the development of Remaining Useful Life (RUL) prognostics in the past years. However, only few studies consider the integration of such prognostics into maintenance planning. In this paper we propose a dynamic, predictive maintenance scheduling framework for a fleet of aircraft taking into account imperfect RUL prognostics. These prognostics are periodically updated. Based on the evolution of the prognostics over time, alarms are triggered. The scheduling of maintenance tasks is initiated only after these alarms are triggered. Alarms ensure that maintenance tasks are not rescheduled multiple times. A maintenance task is scheduled using a safety factor, to account for potential errors in the RUL prognostics and thus avoid component failures. We illustrate our approach for a fleet of 20 aircraft, each equipped with 2 turbofan engines. A Convolution Neural Network is proposed to obtain RUL prognostics. An integer linear program is used to schedule aircraft for maintenance. With our alarm-based maintenance framework, the costs with engine failures account for only 7.4\% of the total maintenance costs. In general, we provide a roadmap to integrate imperfect RUL prognostics into the maintenance planning of a fleet of vehicles.}, language = {en}, urldate = {2022-03-15}, journal = {Reliability Engineering \& System Safety}, author = {de Pater, Ingeborg and Reijns, Arthur and Mitici, Mihaela}, month = may, year = {2022}, keywords = {Aircraft maintenance, Fleet of aircraft, Predictive maintenance planning, RUL prognostics, Turbofan engines}, pages = {108341}, }
@article{lara-benitez_data_2022, title = {Data streams classification using deep learning under different speeds and drifts}, issn = {1367-0751}, url = {https://doi.org/10.1093/jigpal/jzac033}, doi = {10.1093/jigpal/jzac033}, abstract = {Processing data streams arriving at high speed requires the development of models that can provide fast and accurate predictions. Although deep neural networks are the state-of-the-art for many machine learning tasks, their performance in real-time data streaming scenarios is a research area that has not yet been fully addressed. Nevertheless, much effort has been put into the adaption of complex deep learning (DL) models to streaming tasks by reducing the processing time. The design of the asynchronous dual-pipeline DL framework allows making predictions of incoming instances and updating the model simultaneously, using two separate layers. The aim of this work is to assess the performance of different types of DL architectures for data streaming classification using this framework. We evaluate models such as multi-layer perceptrons, recurrent, convolutional and temporal convolutional neural networks over several time series datasets that are simulated as streams at different speeds. In addition, we evaluate how the different architectures react to concept drifts typically found in evolving data streams. The obtained results indicate that convolutional architectures achieve a higher performance in terms of accuracy and efficiency, but are also the most sensitive to concept drifts.}, urldate = {2022-03-14}, journal = {Logic Journal of the IGPL}, author = {Lara-Benítez, Pedro and Carranza-García, Manuel and Gutiérrez-Avilés, David and Riquelme, José C}, month = feb, year = {2022}, pages = {jzac033}, }
@article{chen_physics-informed_2022, title = {Physics-{Informed} {LSTM} hyperparameters selection for gearbox fault detection}, volume = {171}, issn = {0888-3270}, url = {https://www.sciencedirect.com/science/article/pii/S0888327022000942}, doi = {10.1016/j.ymssp.2022.108907}, abstract = {A situation often encountered in the condition monitoring (CM) and health management of gearboxes is that a large volume of CM data (e.g., vibration signal) collected from a healthy state is available but CM data from a faulty state unavailable. Fault detection under such a situation is usually tackled by modeling the baseline CM data and then detect the fault by examining any deviation of the baseline model versus newly monitored data. Given that the CM data is mostly time series, the long-short term memory (LSTM) neural network can be employed for baseline CM data modeling. The LSTM is free from the choice of the number of lagged input time series and can also store both long-term and short-term time series dependency information. However, we found that an LSTM with its hyperparameters selected whilst minimizing validation mean squared error (VAMSE) does not differentiate the faulty and healthy states well. There is still room for detectability improvement. In this paper, we propose a physics-informed hyperparameters selection strategy for the LSTM identification and subsequently the fault detection of gearboxes. The key idea of the proposed strategy is to select hyperparameters based on maximizing the discrepancy between healthy and physics-informed faulty states, as opposed to minimizing VAMSE. Case studies have been conducted to detect the gear tooth crack and tooth wear using laboratory test rigs. Results have shown that the proposed physics-informed hyperparameters selection strategy returns an LSTM that can better detect these faults than the LSTM returned from minimizing VAMSE.}, language = {en}, urldate = {2022-03-14}, journal = {Mechanical Systems and Signal Processing}, author = {Chen, Yuejian and Rao, Meng and Feng, Ke and Zuo, Ming J.}, month = may, year = {2022}, keywords = {Fault Detection, Gearbox, Long-Short Term Memory, Physics-Informed Hyperparameters Selection}, pages = {108907}, }
@article{wu_condition-based_2022, series = {The 29th {CIRP} {Conference} on {Life} {Cycle} {Engineering}, {April} 4 – 6, 2022, {Leuven}, {Belgium}.}, title = {Condition-{Based} {Monitoring} and {Novel} {Fault} {Detection} {Based} on {Incremental} {Learning} {Applied} to {Rotary} {Systems}}, volume = {105}, issn = {2212-8271}, url = {https://www.sciencedirect.com/science/article/pii/S2212827122001329}, doi = {10.1016/j.procir.2022.02.131}, abstract = {Thanks to the development of new technologies such as sensor networks and advanced computational power, the research field of condition-based monitoring (CBM) has drawn increasing attention in manufacturing. With the aim of enhancing equipment reliability, leading to a reduction in maintenance costs, one of the most crucial challenges dealing with CBM is the detection or prediction of unseen/uncharacterized event during manufacturing system operation. Therefore, the identification of novel fault conditions and learning of new patterns are believed to be an important and mandatory thrust in CBM research. In this work, a convolutional neural networks and autoencoder (CNN-AE) based incremental learning method is presented. It applies CNN-AE to identify various types and severities of faults under scenarios of previously unseen fault conditions. In this method, the spectrograms generated from raw sensory signals are acquired and labelled along with working condition information via inspection. A model composed of two sub-models for novel fault identification is then constructed and trained using the time-frequency spectrograms of available conditions of operation from a rotary system. One sub-model based on CNN is for known fault classification and the other sub-model based on AE is for novelty detection, where the two sub-models share an architecture for improving efficiency and accuracy. Finally, incremental learning is performed to retrain the model with the data identified as a novel fault condition. The performance of this method is validated via an experimental case study conducted on a fault machinery simulator.}, language = {en}, urldate = {2022-03-14}, journal = {Procedia CIRP}, author = {Wu, Haiyue and Huang, Aihua and Sutherland, John W.}, month = jan, year = {2022}, keywords = {condition-based monitoring, incremental learning, novel fault detection, rotary system}, pages = {788--793}, }
@article{kharitonov_comparative_2022, series = {3rd {International} {Conference} on {Industry} 4.0 and {Smart} {Manufacturing}}, title = {Comparative analysis of machine learning models for anomaly detection in manufacturing}, volume = {200}, issn = {1877-0509}, url = {https://www.sciencedirect.com/science/article/pii/S1877050922003398}, doi = {10.1016/j.procs.2022.01.330}, abstract = {The introduction of various technologies in the context of Industry 4.0 allowed collecting monitoring data for various fields in manufacturing. Shop-floor and production data can be used for further analysis to extract knowledge. In this paper, an extensive evaluation of ten Machine Learning (ML) models for anomaly detection in manufacturing is conducted. The evaluation is conducted on multiple distinct ML algorithms, including conventional ML and a representative of Deep Neural Network (DNN) based algorithms. The ML models are trained on real production schedules to detect anomalous behavior in the overall system efficiency as well as violations in the delivery date of jobs. Multiple combinations of relevant features are tested during the training of the models. In essence, the objective of the ML models is to detect anomalous unknown breakdowns in the machines that lead to disruption in the overall performance of the system. The evaluation of the ML models is conducted on independent datasets with artificially injected machine breakdowns to establish the ground truth. These schedules with machine breakdowns are obtained through a simulation model. The results point to the high performance of a conventional ML algorithm, KNN. The performance demonstrated by a DNN-based AutoEncoder, conventional Local Outlier Factor-based Feature Bagging, and a recent Copula-Based Outlier Detection (COPOD) algorithms suggest limited applicability, which strongly depends on the shape of the data.}, language = {en}, urldate = {2022-03-14}, journal = {Procedia Computer Science}, author = {Kharitonov, Andrey and Nahhas, Abdulrahman and Pohl, Matthias and Turowski, Klaus}, month = jan, year = {2022}, keywords = {Anomaly detection, Industry 4.0, Machine learning, Predictive Maintenance}, pages = {1288--1297}, }
@article{terziyan_explainable_2022, series = {3rd {International} {Conference} on {Industry} 4.0 and {Smart} {Manufacturing}}, title = {Explainable {AI} for {Industry} 4.0: {Semantic} {Representation} of {Deep} {Learning} {Models}}, volume = {200}, issn = {1877-0509}, shorttitle = {Explainable {AI} for {Industry} 4.0}, url = {https://www.sciencedirect.com/science/article/pii/S1877050922002290}, doi = {10.1016/j.procs.2022.01.220}, abstract = {Artificial Intelligence is an important asset of Industry 4.0. Current discoveries within machine learning and particularly in deep learning enable qualitative change within the industrial processes, applications, systems and products. However, there is an important challenge related to explainability of (and, therefore, trust to) the decisions made by the deep learning models (aka black-boxes) and their poor capacity for being integrated with each other. Explainable artificial intelligence is needed instead but without loss of effectiveness of the deep learning models. In this paper we present the transformation technique between black-box models and explainable (as well as interoperable) classifiers on the basis of semantic rules via automatic recreation of the training datasets and retraining the decision trees (explainable models) in between. Our transformation technique results to explainable rule-based classifiers with good performance and efficient training process due to embedded incremental ignorance discovery and adversarial samples (“corner cases”) generation algorithms. We have also shown the use-case scenario for such explainable and interoperable classifiers, which is collaborative condition monitoring, diagnostics and predictive maintenance of distributed (and isolated) smart industrial assets while preserving data and knowledge privacy of the users.}, language = {en}, urldate = {2022-03-14}, journal = {Procedia Computer Science}, author = {Terziyan, Vagan and Vitko, Oleksandra}, month = jan, year = {2022}, keywords = {Explainable Artificial Intelligence, Industry 4.0, deep learning, predictive maintenance, semantic web}, pages = {216--226}, }
@article{wandji_characterization_2022, series = {The 29th {CIRP} {Conference} on {Life} {Cycle} {Engineering}, {April} 4 – 6, 2022, {Leuven}, {Belgium}.}, title = {Characterization of the state of health of a complex system at the end of use}, volume = {105}, issn = {2212-8271}, url = {https://www.sciencedirect.com/science/article/pii/S2212827122000099}, doi = {10.1016/j.procir.2022.02.009}, abstract = {The state of health (SoH) of an end-of-life product is one of the levers for optimizing the circular economy (CE) process in order to allow the product life-extension. Many approaches have been developed in the literature to estimate the SoH of a complex system (CS). In this study, we asked ourselves the following two questions: First, how to optimize the circular lifecycle scenarios of the components of a product at its end of life? And second, how to estimate the SoH of a product at the end of its life? To answer these questions, we proceeded as follows. First of all, the state of health of a product needs to be considered as an important parameter as well as performance or reliability. To estimate the SoH, it is necessary to identify the product parameters to be observed. The problem here is to choose the most relevant parameters among all those available for a CS. To do this, we have proposed a conditional-based maintenance approach (CBM) which consists in establishing the fault tree of a product. It consists of functionally breaking down a product into its various components and identifying the main failures for each of them. Then, these failures are used to identify the parameters to be monitored. Second, based on the most relevant parameters, the health indicators needed to estimate the SoH of the product are obtained. Then, the Prognostic and Health Management approach (PHM) is proposed in order to estimate the SoH. In the objective of providing a general solution that could be used for estimating the health status of any product, we have proposed a generic framework for the PHM approach. It serves as a guide in choosing the right approach according to the situation. Then, we proposed a decision-making strategy to optimize the process of orienting components in circular loops. This strategy is based only on the technical-functional indicator, which is the SoH of the components. Finally, we showed an example of the implementation of the proposed method for the case of the electrical scooter motor.}, language = {en}, urldate = {2022-03-14}, journal = {Procedia CIRP}, author = {Wandji, Christian and Rejeb, Helmi Ben and Zwolinski, Peggy}, month = jan, year = {2022}, keywords = {Health Management, Health indicator, Prognostic, Sate of health, circular economy, condition-based maintenance, lifecycle engineering}, pages = {49--54}, }
@article{coelho_predictive_2022, series = {3rd {International} {Conference} on {Industry} 4.0 and {Smart} {Manufacturing}}, title = {Predictive maintenance on sensorized stamping presses by time series segmentation, anomaly detection, and classification algorithms}, volume = {200}, issn = {1877-0509}, url = {https://www.sciencedirect.com/science/article/pii/S1877050922003271}, doi = {10.1016/j.procs.2022.01.318}, abstract = {Sheet metal forming tools, like stamping presses, play an ubiquitous role in the manufacture of several products. With increasing requirements of quality and efficiency, ensuring maximum uptime of these tools is fundamental to marketplace competitiveness. Using anomaly detection and predictive maintenance techniques, it is possible to develop lower risk and more intelligent approaches to maintenance scheduling, however, industrial implementations of these methods remain scarce due to the difficulties of obtaining acceptable results in real-world scenarios, making applications of such techniques in stamping processes seldom found. In this work, we propose a combination of two distinct approaches: (a) time segmentation together with feature dimension reduction and anomaly detection; and (b) machine learning classification algorithms, for effective downtime prediction. The approach (a)+(b) allows for an improvement rate up to 22.971\% of the macro F1-score, when compared to sole approach (b). A ROC AUC index of 96\% is attained by using Randomized Decision Trees, being the best classifier of twelve tested. An use case with a decentralized predictive maintenance architecture for the downtime forecasting of a stamping press, which is a critical machine in the manufacturing facilities of Bosch Thermo Technology, is discussed.}, language = {en}, urldate = {2022-03-14}, journal = {Procedia Computer Science}, author = {Coelho, Daniel and Costa, Diogo and Rocha, Eugénio M. and Almeida, Duarte and Santos, José P.}, month = jan, year = {2022}, keywords = {Anomaly Detection, Machine Learning, Predictive Maintenance, Time Segmentation}, pages = {1184--1193}, }
@article{nikfar_two-phase_2022, series = {3rd {International} {Conference} on {Industry} 4.0 and {Smart} {Manufacturing}}, title = {A {Two}-{Phase} {Machine} {Learning} {Approach} for {Predictive} {Maintenance} of {Low} {Voltage} {Industrial} {Motors}}, volume = {200}, issn = {1877-0509}, url = {https://www.sciencedirect.com/science/article/pii/S1877050922002198}, doi = {10.1016/j.procs.2022.01.210}, abstract = {Predictive maintenance and sound operating industrial equipment are essential for nearly any production plant. The absence of a systematic maintenance program and data-driven mindset in making manufacturing decisions may result in serious safety risks, unexpected equipment damages, and financial strain. Condition monitoring and predictive maintenance management systems are commonly used in tandem with the Internet of Things, linking sensors on machines and transmitting the data through a wireless network to a data-logging center that will allow further analysis and support decision making. The system described in this paper measures vibrations using sensors attached to low voltage motors and then utilizes a two-phase machine learning approach for predictive maintenance. In the first phase, we conducted an analysis to look for any abnormal behavior, and in the second phase, we attempted to determine the type of specific faults that may occur. The proposed predictive maintenance system aims to reduce the fault detection time and assist with diagnosing the type of fault occurring. We utilized and tested three machine learning algorithms to detect abnormal motor behavior: support vector machine, backpropagation neural network, and random forest. For predicting the type of specific motor faults that may occur, we used a support vector machine. This two-phase machine learning approach demonstrated promising results in detecting abnormal behavior in low voltage motors. Therefore, integrating this machine learning component as a part of a predictive maintenance system can result in high confidence about the motor condition, reduce maintenance cost, and enhance the safety of the operators and the machines.}, language = {en}, urldate = {2022-03-14}, journal = {Procedia Computer Science}, author = {Nikfar, Mohsen and Bitencourt, Julia and Mykoniatis, Konstantinos}, month = jan, year = {2022}, keywords = {Predictive maintenance, condition monitoring, low voltage industrial motors, machine learning}, pages = {111--120}, }
@article{ong_predictive_2022, title = {Predictive {Maintenance} {Model} for {IIoT}-based {Manufacturing}: {A} {Transferable} {Deep} {Reinforcement} {Learning} {Approach}}, issn = {2327-4662}, shorttitle = {Predictive {Maintenance} {Model} for {IIoT}-based {Manufacturing}}, doi = {10.1109/JIOT.2022.3151862}, abstract = {The Industrial Internet of Things (IIoT) is crucial for accurately assessing the state of complex equipment in order to perform predictive maintenance (PdM) successfully. However, existing IIoT-based PdM frameworks do not consider the influence of various practical yet complex system factors, such as the real-time production states, machine health, and maintenance manpower resources. For this reason, we propose a generic PdM optimization framework to assist maintenance teams in prioritizing and resolving maintenance task conflicts under real-world manufacturing conditions. Specifically, the PdM framework aims to jointly optimize edge-based machine network uptime and allocation of manpower resources in a stochastic IIoT-enabled manufacturing environment using the model-free Deep Reinforcement Learning (DRL) methods. Since DRL requires a significant amount of training data, we propose and demonstrate the use of Transfer Learning (TL) method to assist DRL in learning more efficiently by incorporating expert demonstrations, termed TL with demonstrations (TLD). TLD reduces training wall-time by 58\% compared to baseline methods, and we conduct numerous experiments to illustrate the performance, robustness, and scalability of TLD. Finally, we discuss the general benefits and limitations of the proposed TL method, which are not well addressed in the existing literature but could be beneficial to both researchers and industry practitioners.}, journal = {IEEE Internet of Things Journal}, author = {Ong, Kevin Shen Hoong and Wang, Wenbo and Hieu, Nguyen Quang and Niyato, Dusit and Friedrichs, Thomas}, year = {2022}, note = {Conference Name: IEEE Internet of Things Journal}, keywords = {Industrial Internet of Things, Industrial Internet of Things (IIoT), Maintenance engineering, Manufacturing, Production, Resource management, Task analysis, Transfer learning, decision support, deep reinforcement learning., predictive maintenance, resource management, transfer learning}, pages = {1--1}, }
@article{yao_tool_2022, title = {Tool remaining useful life prediction using deep transfer reinforcement learning based on long short-term memory networks}, volume = {118}, issn = {1433-3015}, url = {https://doi.org/10.1007/s00170-021-07950-2}, doi = {10.1007/s00170-021-07950-2}, abstract = {Tool wear and faults will affect the quality of machined workpiece and damage the continuity of manufacturing. The accurate prediction of remaining useful life (RUL) is significant to guarantee the processing quality and improve the productivity of automatic system. At present, the most commonly used methods for tool RUL prediction are trained by history fault data. However, when researching on new types of tools or processing high value parts, fault datasets are difficult to acquire, which leads to RUL prediction a challenge under limited fault data. To overcome the shortcomings of above prediction methods, a deep transfer reinforcement learning (DTRL) network based on long short-term memory (LSTM) network is presented in this paper. Local features are extracted from consecutive sensor data to track the tool states, and the trained network size can be dynamically adjusted by controlling time sequence length. Then in DTRL network, LSTM network is employed to construct the value function approximation for smoothly processing temporal information and mining long-term dependencies. On this basis, a novel strategy of Q-function update and transfer is presented to transfer the deep reinforcement learning (DRL) network trained by historical fault data to a new tool for RUL prediction. Finally, tool wear experiments are performed to validate effectiveness of the DTRL model. The prediction results demonstrate that the proposed method has high accuracy and generalization for similar tools and cutting conditions.}, language = {en}, number = {3}, urldate = {2022-03-03}, journal = {The International Journal of Advanced Manufacturing Technology}, author = {Yao, Jiachen and Lu, Baochun and Zhang, Junli}, month = jan, year = {2022}, pages = {1077--1086}, }
@article{zhang_health_2022, title = {Health status assessment and prediction for pumped storage units using a novel health degradation index}, volume = {171}, issn = {0888-3270}, url = {https://www.sciencedirect.com/science/article/pii/S0888327022000978}, doi = {10.1016/j.ymssp.2022.108910}, abstract = {To improve the safety and stability of pumped storage units (PSUs), we propose a novel health degradation index (HDI) to achieve real-time health status assessment for PSUs. Based on the HDI, the health degradation trend is predicted by the combination of variational mode decomposition (VMD) and gated recurrent unit (GRU). Firstly, the complex fitting relationship between the operating parameters of the PSU and its shaft vibration is established based on health monitoring data by multi-head self-attentive neural network (MSNN), which is regarded as a health benchmark model (HBM). MSNN reveals the high-dimensional mutually coupled relationship between different factors influencing the vibration. Secondly, to well describe the uncertainty information inherent in the vibration, Gaussian cloud model (GCM) is used to describe the vibration from both quantitative and qualitative perspectives. Then, the HDI is defined by the Kullback-Leibler divergence between the observed GCM and the health GCM predicted by HBM. Finally, VMD is used to decompose the complex HDI series into some simplistic components, while GRU is used to predict separately on the components. The final results are obtained by combining the component prediction results. The proposed method is applied in a PSU in China. The experimental results as well as several comparative studies demonstrate its outstanding performance.}, language = {en}, urldate = {2022-02-15}, journal = {Mechanical Systems and Signal Processing}, author = {Zhang, Xiaoyuan and Jiang, Yajun and Li, Chaoshun and Zhang, Jinhao}, month = may, year = {2022}, keywords = {Gated recurrent units, Gaussian cloud model, Health status assessment and prediction, Multi-head self-attentive mechanism, Pumped storage units, Variational mode decomposition}, pages = {108910}, }
@article{velasco-gallego_real-time_2022, title = {A real-time data-driven framework for the identification of steady states of marine machinery}, volume = {121}, issn = {0141-1187}, url = {https://www.sciencedirect.com/science/article/pii/S0141118722000086}, doi = {10.1016/j.apor.2022.103052}, abstract = {While maritime transportation is the primary means of long-haul transportation of goods to and from the EU, it continues to present a significant number of casualties and fatalities owing to damage to ship equipment; damage attributed to machinery failures during daily ship operations. Therefore, the implementation of state-of-the-art inspection and maintenance activities are of paramount importance to adequately ensure the proper functioning of systems. Accordingly, Internet of Ships paradigm has emerged to guarantee the interconnectivity of maritime objects. Such technology is still in its infancy, and thus several challenges need to be addressed. An example of which is data preparation, critical to ensure data quality while avoiding biased results in further analysis to enhance transportation operations. As part of developing a real-time intelligent system to assist with instant decision-making strategies that enhance ship and systems availability, operability, and profitability, a data-driven framework for the identification of steady states of marine machinery based on image generation and connected component analysis is proposed. The identification of such states is of preeminent importance, as non-operational states may adversely alter the results outlined. A case study of three diesel generators of a tanker ship is introduced to validate the developed framework. Results of this study demonstrated the outperformance of the proposed model in relation to the widely implemented clustering models k-means and GMMs with EM algorithm. As such, the proposed framework can adequately identify steady states appropriately to guarantee the detection of such states in real-time, whilst ensuring computational efficiency and model effectiveness.}, language = {en}, urldate = {2022-02-15}, journal = {Applied Ocean Research}, author = {Velasco-Gallego, Christian and Lazakis, Iraklis}, month = apr, year = {2022}, keywords = {Connected component analysis, Marine machinery, Maritime transportation, Markov chains, Smart maintenance, Steady states identification}, pages = {103052}, }
@article{zhao_bayesian_2022, title = {A {Bayesian} approach to comparing human reliability analysis methods using human performance data}, volume = {219}, issn = {0951-8320}, url = {https://www.sciencedirect.com/science/article/pii/S0951832021006918}, doi = {10.1016/j.ress.2021.108213}, abstract = {Various methods for human reliability analysis have been developed, but a rigorous approach to quantitatively comparing these methods is still lacking. This research proposed a Bayesian approach with an attempt to address this problem. The Bayesian approach is based on ensemble modeling, which outputs the weighted average of the human error probability predictions by the human reliability analysis methods to be compared. Before incorporating any human performance data, the weights in the ensemble model represent the prior probabilities of or one’s prior beliefs in the human reliability analysis methods. Using human performance data, the weights can be updated based on Bayes’ rule to reflect one’s updated beliefs in the human reliability analysis methods. The ensemble model with updated weights itself can be further used as a posterior predictive model for human reliability. The proposed approach is demonstrated using the human performance data collected from the international human reliability analysis empirical study. The results show that the posterior beliefs vary with the data set used in the analysis. Future research using a larger human performance data set is expected to reach more conclusive comparisons.}, language = {en}, urldate = {2021-12-28}, journal = {Reliability Engineering \& System Safety}, author = {Zhao, Yunfei}, month = mar, year = {2022}, keywords = {Analyst-to-analyst variability, Bayesian analysis, Ensemble modeling, Human error probability, Human performance data, Human reliability analysis, Quantitative comparison}, pages = {108213}, }
@article{yin_quantitative_2022, title = {Quantitative analysis for resilience-based urban rail systems: {A} hybrid knowledge-based and data-driven approach}, volume = {219}, issn = {0951-8320}, shorttitle = {Quantitative analysis for resilience-based urban rail systems}, url = {https://www.sciencedirect.com/science/article/pii/S0951832021006670}, doi = {10.1016/j.ress.2021.108183}, abstract = {The rapid expansions of urban rail networks are faced with the growing number of disruptions caused by the complex rail signaling systems, incorrect driving behaviors, and extreme weather. Since urban rail systems are inherently complex and many of these disruptions are usually uncertain and inevitable, the rail managers have gradually paid more attention to the ability to withstand and quickly recover. Nevertheless, only a small number of recent developments have tried to address the ability of an urban rail system to recover from disruptions while considering the inherent structures. In this work, we propose a hybrid knowledge-based and data-driven approach for quantitative analysis of resilience. The aim is to model the causal relationships to quantify the importance of different perturbations to the overall resilience criteria. A set of key features related to the risk assessment and system resilience are summarized according to the historical data in Beijing Metro. Then, we develop a training procedure based on the structure of BN and historical data. Finally, we embed this hybrid approach into software that is applied to Beijing Metro. The results demonstrate the quantitative relationships between system resilience and different types of events.}, language = {en}, urldate = {2021-12-28}, journal = {Reliability Engineering \& System Safety}, author = {Yin, Jiateng and Ren, Xianliang and Liu, Ronghui and Tang, Tao and Su, Shuai}, month = mar, year = {2022}, keywords = {Bayesian network, Quantitative, Resilience, Transportation, Urban rail systems}, pages = {108183}, }
@article{tornyeviadzi_systematic_2022, title = {A systematic framework for dynamic nodal vulnerability assessment of water distribution networks based on multilayer networks}, volume = {219}, issn = {0951-8320}, url = {https://www.sciencedirect.com/science/article/pii/S0951832021006955}, doi = {10.1016/j.ress.2021.108217}, abstract = {Nodal demands vary throughout the day, as such any vulnerability analysis based on static networks, which considers daily average demands cannot realistically represent the criticality of nodes in the network. This study presents a systematic framework, which couples multilayer networks, structural reducibility and a Demand Adjusted Vulnerability Measure for dynamic nodal vulnerability assessment of water distribution networks (WDNs) under extended period simulation. Within this framework, we present the novel idea of characterizing the dynamics of WDNs with multi-slice networks, which captures the state of the network within a predefined temporal window taking into consideration the directional flow in pipes and the operational status of pumps, valves etc. Using a benchmark WDN, Net 3, as a case study we have demonstrated the importance of demand variations and operational status of various components, no matter how minuscule their operational time, on nodal vulnerability assessment in WDNs. The results indicated that the framework evaluates the criticality of all types of nodes, even intermediary nodes with zero base demand, within any temporal window much more realistically than conventional vulnerability analysis methods based on single (static) networks. Structural reducibility unearthed correlations between the operational status of source nodes and pumps on the general dynamics of the distribution system. The multilayer framework opens a new frontier in vulnerability analysis of WDNs and could serve as a tool for stakeholders in accessing node criticality, impact of various failure scenarios and optimal scheduling of maintenance routines.}, language = {en}, urldate = {2021-12-28}, journal = {Reliability Engineering \& System Safety}, author = {Tornyeviadzi, Hoese Michel and Owusu-Ansah, Emmanuel and Mohammed, Hadi and Seidu, Razak}, month = mar, year = {2022}, keywords = {Dynamic vulnerability assessment model, Multilayer networks, Node vulnerability, Structural reducibility, Water distribution networks}, pages = {108217}, }
@article{saraygord_afshari_machine_2022, title = {Machine learning-based methods in structural reliability analysis: {A} review}, volume = {219}, issn = {0951-8320}, shorttitle = {Machine learning-based methods in structural reliability analysis}, url = {https://www.sciencedirect.com/science/article/pii/S0951832021007018}, doi = {10.1016/j.ress.2021.108223}, abstract = {Structural Reliability analysis (SRA) is one of the prominent fields in civil and mechanical engineering. However, an accurate SRA in most cases deals with complex and costly numerical problems. Machine learning-based (ML) techniques have been introduced to the SRA problems to deal with this huge computational cost and increase accuracy. This paper presents a review of the development and use of ML models in SRA. The review includes the most common types of ML methods used in SRA. More specifically, the application of artificial neural networks (ANN), support vector machines (SVM), Bayesian methods and Kriging estimation with active learning perspective in SRA are explained, and a state-of-the-art review of the prominent literature in these fields is presented. Aiming towards a fast and accurate SRA, the ML techniques adopted for the approximation of the limit state function with Monte Carlo simulation (MCS), first/second-order reliability methods (FORM/SORM) or MCS with importance sampling well as the methods for efficiently computing the probabilities of rare events in complex structural systems. In this regard, the focus of the current manuscript is on the different models’ structures and diverse applications of each ML method in different aspects of SRA. Moreover, imperative considerations on the management of samples in the Monte Carlo simulation for SRA purposes and the treatment of the SRA problem as pattern recognition or classification task are provided. This review helps the researchers in civil and mechanical engineering, especially those who are focused on reliability and structural analysis or dealing with product assurance problems.}, language = {en}, urldate = {2021-12-28}, journal = {Reliability Engineering \& System Safety}, author = {Saraygord Afshari, Sajad and Enayatollahi, Fatemeh and Xu, Xiangyang and Liang, Xihui}, month = mar, year = {2022}, keywords = {Artificial neural networks, Bayesian analysis, Kriging estimation, Monte carlo simulation, Response surface method, Structural reliability, Support vector machines, Surrogate modeling}, pages = {108223}, }
@article{hesabi_deep_2022, title = {A deep learning predictive model for selective maintenance optimization}, volume = {219}, issn = {0951-8320}, url = {https://www.sciencedirect.com/science/article/pii/S095183202100675X}, doi = {10.1016/j.ress.2021.108191}, abstract = {This paper develops a predictive selective maintenance framework using deep learning and mathematical programming. We consider a multi-component system executing consecutive production missions with scheduled intermission maintenance breaks. During the intermission breaks, several maintenance actions can improve each component's remaining useful life at a given cost. An optimization model is developed to identify a subset of maintenance actions to perform on the components. The objective is to minimize the total cost under intermission break time limitation. The total cost is composed of maintenance and failure costs; it depends on the success probabilities of the subsequent missions. To estimate these probabilities, the optimization model interacts with a long short-term memory network. The resulting predictive selective maintenance framework is validated using a benchmarking data set provided by NASA for a Modular Aero-Propulsion System Simulation of a Commercial Turbofan Engine. Its performance is highlighted when compared with the model-based approach. The results illustrate the advantages of the predictive selective maintenance framework to predict the health condition of each component with accuracy and deal with the selective maintenance of series systems.}, language = {en}, urldate = {2021-12-28}, journal = {Reliability Engineering \& System Safety}, author = {Hesabi, Hadis and Nourelfath, Mustapha and Hajji, Adnène}, month = mar, year = {2022}, keywords = {Deep learning, Optimization, Predictive models, Remaining useful life, Selective maintenance}, pages = {108191}, }
@article{he_semi-supervised_2022, title = {A semi-supervised {GAN} method for {RUL} prediction using failure and suspension histories}, volume = {168}, issn = {0888-3270}, url = {https://www.sciencedirect.com/science/article/pii/S0888327021009833}, doi = {10.1016/j.ymssp.2021.108657}, abstract = {Deep learning methods have shown great potential to provide reliable remaining useful life (RUL) predictions in Prognostics and Health Management applications. However, deep learning models, particularly supervised learning methods, are strongly dependent on a large number of failure histories. In practice, engineering assets are generally replaced by new ones before failure during planned maintenance, resulting in a small number of failure histories and often times more than twice as many suspension histories. In this paper, a semi-supervised generative adversarial network (GAN) regression model is developed to consider both failure and suspension histories for RUL predictions. The proposed GAN model utilizes conditional multi-task objective functions to capture useful information from suspension histories to improve prediction accuracy, instead of simply treating them as unlabeled data. The method will not directly predict the failure times of suspension histories, but match the statistical information between similar failure and suspension histories to the greatest extent for model training. As a result, the failure information of suspension histories will not only rely on the failure histories but also on the generated data, thereby improving the model generalization, especially when the amount of data is limited. In addition, a robustness evaluation method is proposed to assess the uncertainty of the prognostic model caused by the scarce failure data. The accuracy and credibility of the proposed approach are validated by using two case studies.}, language = {en}, urldate = {2021-12-09}, journal = {Mechanical Systems and Signal Processing}, author = {He, Rui and Tian, Zhigang and Zuo, Ming J.}, month = apr, year = {2022}, keywords = {Generative adversarial network, Prediction, Remaining useful life, Semi-supervised learning, Suspension history}, pages = {108657}, }
@article{li_emerging_2022, title = {The emerging graph neural networks for intelligent fault diagnostics and prognostics: {A} guideline and a benchmark study}, volume = {168}, issn = {0888-3270}, shorttitle = {The emerging graph neural networks for intelligent fault diagnostics and prognostics}, url = {https://www.sciencedirect.com/science/article/pii/S0888327021009791}, doi = {10.1016/j.ymssp.2021.108653}, abstract = {Deep learning (DL)-based methods have advanced the field of Prognostics and Health Management (PHM) in recent years, because of their powerful feature representation ability. The data in PHM are typically regular data represented in the Euclidean space. Nevertheless, there are an increasing number of applications that consider the relationships and interdependencies of data and represent the data in the form of graphs. Such kind of irregular data in non-Euclidean space pose a huge challenge to the existing DL-based methods, making some important operations (e.g., convolutions) easily applied to Euclidean space but difficult to model graph data in non-Euclidean space. Recently, graph neural networks (GNNs), as the emerging neural networks, have been utilized to model and analyze the graph data. However, there still lacks a guideline on leveraging GNNs for realizing intelligent fault diagnostics and prognostics. To fill this research gap, a practical guideline is proposed in this paper, and a novel intelligent fault diagnostics and prognostics framework based on GNN is established to illustrate how the proposed guideline works. In this framework, three types of graph construction methods are provided, and seven kinds of graph convolutional networks (GCNs) with four different graph pooling methods are investigated. To afford benchmark results for helping further study, a comprehensive evaluation of these models is performed on eight datasets, including six fault diagnosis datasets and two prognosis datasets. Finally, four issues related to the performance of GCNs are discussed and potential research directions are provided. The code library is available at: https://github.com/HazeDT/PHMGNNBenchmark.}, language = {en}, urldate = {2021-12-09}, journal = {Mechanical Systems and Signal Processing}, author = {Li, Tianfu and Zhou, Zheng and Li, Sinan and Sun, Chuang and Yan, Ruqiang and Chen, Xuefeng}, month = apr, year = {2022}, keywords = {Benchmark results, Graph neural networks, Intelligent fault diagnostics and prognostics, Practical guideline, Prognostics and health management}, pages = {108653}, }
@article{ding_self-supervised_2022, title = {Self-supervised pretraining via contrast learning for intelligent incipient fault detection of bearings}, volume = {218}, issn = {0951-8320}, url = {https://www.sciencedirect.com/science/article/pii/S0951832021006207}, doi = {10.1016/j.ress.2021.108126}, abstract = {Data-driven approaches for prognostic and health management (PHM) increasingly rely on massive historical data, yet annotations are expensive and time-consuming. Learning approaches that utilize semi-labeled or unlabeled data are becoming increasingly popular. In this paper, a self-supervised pre-training via contrast learning (SSPCL) is introduced to learn discriminative representations from unlabeled bearing datasets. Specifically, the SSPCL employs momentum contrast learning (MCL) to investigate the local representation in terms of instance-level discrimination contrast. Further, we propose a specific architecture for SSPCL deployment on bearing vibration signals by presenting several data augmentations for 1D sequences. On this basis, we put forward an incipient fault detection method based on SSPCL for run-to-failure cycle of rolling bearings. This approach transfers the SSPCL pre-trained model to a specific semi-supervised downstream task, effectively utilizing all unlabeled data and relying on only a little priori knowledge. A case study on FEMTO-ST datasets shows that the fine-tuned model is competent for incipient fault detection, outperforming other state-of-the-art methods. Furthermore, a supplemental case on a self-built fault datasets further demonstrate the great potential and superiority of our proposed SSPCL method in PHM.}, language = {en}, urldate = {2021-11-29}, journal = {Reliability Engineering \& System Safety}, author = {Ding, Yifei and Zhuang, Jichao and Ding, Peng and Jia, Minping}, month = feb, year = {2022}, keywords = {Data augmentation, Fault diagnosis, Incipient fault detection, Prognostic and health management, Self-supervised pretraining, Unsupervised learning}, pages = {108126}, }
@article{dindar_hierarchical_2022, title = {A hierarchical {Bayesian}-based model for hazard analysis of climate effect on failures of railway turnout components}, volume = {218}, issn = {0951-8320}, url = {https://www.sciencedirect.com/science/article/pii/S0951832021006220}, doi = {10.1016/j.ress.2021.108130}, abstract = {There has been a considerable increase in derailment investigations, in particular at railway turnouts (RTs), as the majority of derailments lead to lengthy disruptions to the appropriate rail operation and catastrophic consequences, being potentially severely hazardous to human safety and health, as well as rail equipment. This paper investigates the impact of climates with different features across the US on the derailments to light up a scientific way for understanding importance of climatic impact. To achieve this, official derailment reports over the last five years are examined in detail. By means of geographic segmentation associated with spatial analysis, different exposure levels of various regions have been identified and implemented into a Bayesian hierarchical model using samples by the M–H algorithm. As a result, the paper reaches interesting scientific findings of climate behaviour on turnout-related component failures resulting in derailments. The findings show extreme climate patterns impact considerably the component failures of rail turnouts. Therefore, it is indicated that turnout-related failure estimates on a large-scale region with extreme cold and hot zones could be investigated when the suggested methodology of this paper is considered.}, language = {en}, urldate = {2021-11-29}, journal = {Reliability Engineering \& System Safety}, author = {Dindar, Serdar and Kaewunruen, Sakdirat and An, Min}, month = feb, year = {2022}, keywords = {Bayesian network, Climate effect, Derailment, Railway operation}, pages = {108130}, }
@article{zhou_maintenance_2022, title = {Maintenance optimisation of multicomponent systems using hierarchical coordinated reinforcement learning}, volume = {217}, issn = {0951-8320}, url = {https://www.sciencedirect.com/science/article/pii/S0951832021005767}, doi = {10.1016/j.ress.2021.108078}, abstract = {The Markov decision process (MDP) is a widely used method to optimise the maintenance of multicomponent systems, which can provide a system-level maintenance action at each decision point to address various dependences among components. However, MDP suffers from the “curse of dimensionality” and can only process small-scale systems. This paper develops a hierarchical coordinated reinforcement learning (HCRL) algorithm to optimise the maintenance of large-scale multicomponent systems. Both parameters of agents and the coordination relationship among agents are designed based on system characteristics. Furthermore, the hierarchical structure of agents is established according to the structural importance measures of components. The effectiveness of the proposed HCRL algorithm is validated using two maintenance optimisation problems, one on a natural gas plant system and the other using a 12-component series system under dependant competing risks. Results show that the proposed HCRL outperforms methods in two recently published papers and other benchmark approaches including the new emerging deep reinforcement learning.}, language = {en}, urldate = {2021-11-15}, journal = {Reliability Engineering \& System Safety}, author = {Zhou, Yifan and Li, Bangcheng and Lin, Tian Ran}, month = jan, year = {2022}, keywords = {Condition based maintenance, Coordinated reinforcement learning, Deep reinforcement learning, Hierarchical multiagent reinforcement learning}, pages = {108078}, }
@article{dinh_multi-level_2022, title = {Multi-level opportunistic predictive maintenance for multi-component systems with economic dependence and assembly/disassembly impacts}, volume = {217}, issn = {0951-8320}, url = {https://www.sciencedirect.com/science/article/pii/S0951832021005573}, doi = {10.1016/j.ress.2021.108055}, abstract = {For maintenance optimization of multi-component systems, opportunistic maintenance has been addressed in many studies since it allows considering the advantages of dependences between components in maintenance decision-making process. In the literature, economic dependence, which implies that joint maintenance of several components can reduce the maintenance cost, has been widely studied in the framework of opportunistic maintenance. There are however very few existing studies considering the advantages of structural dependence, whereby maintenance of a component requires disassembly of other components, in maintenance optimization. To face this issue, the objective of this paper is to propose a multi-level opportunistic predictive maintenance approach considering both economic and structural dependence. In that way, the economic and structural dependences between components are firstly formulated. A degradation model considering disassembly impacts is then developed. For opportunistic maintenance decision-making, two opportunistic thresholds are introduced. When corrective/preventive maintenance occurs, the first opportunistic threshold (eRo) is defined to select non-disassembled components for opportunistic maintenance. This first opportunistic decision allows considering the economic dependence between components. In addition, the maintenance of the selected components may require disassembly of other components which could be also good candidates to be opportunistically maintained. So, the second opportunistic threshold, sRo(sRo≥eRo), is then developed to select one or several disassembled components to be opportunistically maintained. To evaluate the performance of the proposed opportunistic maintenance approach, a cost model is developed. Particle swarm optimization algorithm is then applied to find the optimal decision variables. Finally, the proposed opportunistic maintenance approach is illustrated through a conveyor system to show its feasibility and added value in maintenance optimization framework.}, language = {en}, urldate = {2021-11-15}, journal = {Reliability Engineering \& System Safety}, author = {Dinh, Duc-Hanh and Do, Phuc and Iung, Benoit}, month = jan, year = {2022}, keywords = {Economic dependence, Maintenance optimization, Multi-component system, Opportunistic maintenance, Structural dependence}, pages = {108055}, }
@article{jia_reliability_2022, title = {Reliability analysis for complex system with multi-source data integration and multi-level data transmission}, volume = {217}, issn = {0951-8320}, url = {https://www.sciencedirect.com/science/article/pii/S0951832021005536}, doi = {10.1016/j.ress.2021.108050}, abstract = {The Bayesian theory is appealing for reliability analysis of complex system using the data incorporation. However, there are still gaps among existing methods, such as that the common method is applicable to two-level system, multi-source data for components are not considered, k-out-of-n and standby structures are not studied with series and parallel simultaneously, the reliability is usually estimated as discrete variable, etc. For these problems, a Bayesian-based method is proposed on system under multiple levels. First, multi-source data for each target are integrated in lower level by Bayesian theory to derive the posterior distribution for reliability. Next, they are transmitted to higher level through the deterministic function concerning reliability depending on system structure. Further, the transmitted data from lower level are transformed to induced prior distribution for parameters in distribution of lifetime and integrated with native data in higher level to obtain the posterior for reliability. Finally, the reliability and remaining useful lifetime with respect to times in system-level are presented after the successive information propagation. An illustrative example is given to show the application of proposed method. Together with the sensitivity analysis, it proves that this method is feasible, practical and robust.}, language = {en}, urldate = {2021-11-15}, journal = {Reliability Engineering \& System Safety}, author = {Jia, Xiang and Guo, Bo}, month = jan, year = {2022}, keywords = {Data integration, Data transmission, Multi-level data, Multi-source data, Reliability analysis, System}, pages = {108050}, }
@article{arias_chao_fusing_2022, title = {Fusing physics-based and deep learning models for prognostics}, volume = {217}, issn = {0951-8320}, url = {https://www.sciencedirect.com/science/article/pii/S0951832021004725}, doi = {10.1016/j.ress.2021.107961}, abstract = {Physics-based and data-driven models for remaining useful lifetime (RUL) prediction typically suffer from two major challenges that limit their applicability to complex real-world domains: (1) the incompleteness of physics-based models and (2) the limited representativeness of the training dataset for data-driven models. Combining the advantages of these two approaches while overcoming some of their limitations, we propose a novel hybrid framework for fusing the information from physics-based performance models with deep learning algorithms for prognostics of complex safety-critical systems. In the proposed framework, we use physics-based performance models to infer unobservable model parameters related to a system’s components health by solving a calibration problem. These parameters are subsequently combined with sensor readings and used as input to a deep neural network, thereby generating a data-driven prognostics model with physics-augmented features. The performance of the hybrid framework is evaluated on an extensive case study comprising run-to-failure degradation trajectories from a fleet of nine turbofan engines under real flight conditions. The experimental results show that the hybrid framework outperforms purely data-driven approaches by extending the prediction horizon by nearly 127\%. Furthermore, it requires less training data and is less sensitive to the limited representativeness of the dataset as compared to purely data-driven approaches. Furthermore, we demonstrated the feasibility of the proposed framework on the original CMAPSS dataset, thereby confirming its superior performance.}, language = {en}, urldate = {2021-11-15}, journal = {Reliability Engineering \& System Safety}, author = {Arias Chao, Manuel and Kulkarni, Chetan and Goebel, Kai and Fink, Olga}, month = jan, year = {2022}, keywords = {CMAPSS, Deep learning, Hybrid model, Prognostics}, pages = {107961}, }
@article{cao_kspmi_2022, title = {{KSPMI}: {A} {Knowledge}-based {System} for {Predictive} {Maintenance} in {Industry} 4.0}, volume = {74}, issn = {0736-5845}, shorttitle = {{KSPMI}}, url = {https://www.sciencedirect.com/science/article/pii/S0736584521001617}, doi = {10.1016/j.rcim.2021.102281}, abstract = {In the context of Industry 4.0, smart factories use advanced sensing and data analytic technologies to understand and monitor the manufacturing processes. To enhance production efficiency and reliability, statistical Artificial Intelligence (AI) technologies such as machine learning and data mining are used to detect and predict potential anomalies within manufacturing processes. However, due to the heterogeneous nature of industrial data, sometimes the knowledge extracted from industrial data is presented in a complex structure. This brings the semantic gap issue which stands for the lack of interoperability among different manufacturing systems. Furthermore, as the Cyber-Physical Systems (CPS) are becoming more knowledge-intensive, uniform knowledge representation of physical resources and real-time reasoning capabilities for analytic tasks are needed to automate the decision-making processes for these systems. These requirements highlight the potential of using symbolic AI for predictive maintenance. To automate and facilitate predictive analytics in Industry 4.0, in this paper, we present a novel Knowledge-based System for Predictive Maintenance in Industry 4.0 (KSPMI). KSPMI is developed based on a novel hybrid approach that leverages both statistical and symbolic AI technologies. The hybrid approach involves using statistical AI technologies such as machine learning and chronicle mining (a special type of sequential pattern mining approach) to extract machine degradation models from industrial data. On the other hand, symbolic AI technologies, especially domain ontologies and logic rules, will use the extracted chronicle patterns to query and reason on system input data with rich domain and contextual knowledge. This hybrid approach uses Semantic Web Rule Language (SWRL) rules generated from chronicle patterns together with domain ontologies to perform ontology reasoning, which enables the automatic detection of machinery anomalies and the prediction of future events’ occurrence. KSPMI is evaluated and tested on both real-world and synthetic data sets.}, language = {en}, urldate = {2021-11-15}, journal = {Robotics and Computer-Integrated Manufacturing}, author = {Cao, Qiushi and Zanni-Merk, Cecilia and Samet, Ahmed and Reich, Christoph and Beuvron, François de Bertrand de and Beckmann, Arnold and Giannetti, Cinzia}, month = apr, year = {2022}, keywords = {Chronicle mining, Industry 4.0, Knowledge-based system, Ontology reasoning, Predictive maintenance}, pages = {102281}, }
@article{giordano_data-driven_2022, title = {Data-driven strategies for predictive maintenance: {Lesson} learned from an automotive use case}, volume = {134}, issn = {0166-3615}, shorttitle = {Data-driven strategies for predictive maintenance}, url = {https://www.sciencedirect.com/science/article/pii/S0166361521001615}, doi = {10.1016/j.compind.2021.103554}, abstract = {Predictive maintenance is an ever-growing topic of interest, spanning different fields and approaches. In the automotive domain, thanks to on-board sensors and the possibility to transmit collected data to the cloud, car manufacturers can deploy predictive maintenance solutions to prevent components malfunctioning and eventually recall to the service the vehicle before the customer experiences the failure. In this paper we present PREPIPE, a data-driven pipeline for predictive maintenance. Given the raw time series of signals recorded by the on-board engine control unit of diesel engines, we exploit PREPIPE to predict the clogging status of the oxygen sensor, a key component of the exhaust system to control combustion efficiency and pollutant emissions. In the design of PREPIPE we deeply investigate: (i) how to choose the best subset of signals to best capture the sensor status, (ii) how much data needs to be collected to make the most accurate prediction, (iii) how to transform the original time series into features suitable for state-of-art classifiers, (iv) how to select the most important features, (v) how to include historical features to predict the clogging status of the sensor. We thoroughly assess PREPIPE performance and compare it with state-of-art deep learning architectures. Our results show that PREPIPE correctly identifies critical situations before the sensor reaches critical conditions. Furthermore, PREPIPE supports domain experts in optimizing the design of data-driven predictive maintenance pipelines with performance comparable to deep learning methodologies while keeping a degree of interpretability.}, language = {en}, urldate = {2021-11-15}, journal = {Computers in Industry}, author = {Giordano, Danilo and Giobergia, Flavio and Pastor, Eliana and La Macchia, Antonio and Cerquitelli, Tania and Baralis, Elena and Mellia, Marco and Tricarico, Davide}, month = jan, year = {2022}, keywords = {Automotive, Data-driven, Machine learning, Predictive maintenance, Time series}, pages = {103554}, }
@article{arena_novel_2022, title = {A novel decision support system for managing predictive maintenance strategies based on machine learning approaches}, volume = {146}, issn = {0925-7535}, url = {https://www.sciencedirect.com/science/article/pii/S0925753521003726}, doi = {10.1016/j.ssci.2021.105529}, abstract = {Nowadays, the industrial environment is characterised by growing competitiveness, short response times, cost reduction and reliability of production to meet customer needs. Thus, the new industrial paradigm of Industry 4.0 has gained interest worldwide, leading many manufacturers to a significant digital transformation. Digital technologies have enabled a novel approach to decision-making processes based on data-driven strategies, where knowledge extraction relies on the analysis of a large amount of data from sensor-equipped factories. In this context, Predictive Maintenance (PdM) based on Machine Learning (ML) is one of the most prominent data-driven analytical approaches for monitoring industrial systems aiming to maximise reliability and efficiency. In fact, PdM aims not only to reduce equipment failure rates but also to minimise operating costs by maximising equipment life. When considering industrial applications, industries deal with different issues and constraints relating to process digitalisation. The main purpose of this study is to develop a new decision support system based on decision trees (DTs) that guides the decision-making process of PdM implementation, considering context-aware information, quality and maturity of collected data, severity, occurrence and detectability of potential failures (identified through FMECA analysis) and direct and indirect maintenance costs. The decision trees allow the study of different scenarios to identify the conditions under which a PdM policy, based on the ML algorithm, is economically profitable compared to corrective maintenance, considered to be the current scenario. The results show that the proposed methodology is a simple and easy way to implement tool to support the decision process by assessing the different levels of occurrence and severity of failures. For each level, savings and the potential costs have been evaluated at leaf nodes of the trees aimed at defining the most suitable maintenance strategy implementation. Finally, the proposed DTs are applied to a real industrial case to illustrate their applicability and robustness.}, language = {en}, urldate = {2021-10-25}, journal = {Safety Science}, author = {Arena, S. and Florian, E. and Zennaro, I. and Orrù, P. F. and Sgarbossa, F.}, month = feb, year = {2022}, keywords = {Decision support system, Decision tree, FMECA, Machine learning, Predictive maintenance, Profitability}, pages = {105529}, }
@article{davari_survey_2021, title = {A {Survey} on {Data}-{Driven} {Predictive} {Maintenance} for the {Railway} {Industry}}, volume = {21}, copyright = {http://creativecommons.org/licenses/by/3.0/}, issn = {1424-8220}, url = {https://www.mdpi.com/1424-8220/21/17/5739}, doi = {10.3390/s21175739}, abstract = {In the last few years, many works have addressed Predictive Maintenance (PdM) by the use of Machine Learning (ML) and Deep Learning (DL) solutions, especially the latter. The monitoring and logging of industrial equipment events, like temporal behavior and fault events—anomaly detection in time-series—can be obtained from records generated by sensors installed in different parts of an industrial plant. However, such progress is incipient because we still have many challenges, and the performance of applications depends on the appropriate choice of the method. This article presents a survey of existing ML and DL techniques for handling PdM in the railway industry. This survey discusses the main approaches for this specific application within a taxonomy defined by the type of task, employed methods, metrics of evaluation, the specific equipment or process, and datasets. Lastly, we conclude and outline some suggestions for future research.}, language = {en}, number = {17}, urldate = {2023-10-26}, journal = {Sensors}, author = {Davari, Narjes and Veloso, Bruno and Costa, Gustavo de Assis and Pereira, Pedro Mota and Ribeiro, Rita P. and Gama, João}, month = jan, year = {2021}, note = {Number: 17 Publisher: Multidisciplinary Digital Publishing Institute}, keywords = {artificial intelligence, condition-based maintenance, deep learning, machine learning, predictive maintenance, railway industry}, pages = {5739}, }
@article{ji_autonomous_2021, title = {Autonomous underwater vehicle fault diagnosis dataset}, volume = {39}, issn = {2352-3409}, url = {https://www.sciencedirect.com/science/article/pii/S2352340921007587}, doi = {10.1016/j.dib.2021.107477}, abstract = {The dataset contains 1225 data samples for 5 fault types (labels). We divided the dataset into the training set and the test set through random stratified sampling. The test set accounted for 20\% of the total dataset. Our experimental subject is ‘Haizhe’, which is a small quadrotor AUV developed in the laboratory. For each fault type, ‘Haizhe’ was tested several times. For each time, ‘Haizhe’ ran the same program and sailed underwater for 10–20 s to ensure that state data was long enough. The state data recorded in each test were then used as a data sample, and the corresponding fault type was the true label of the data sample. The dataset was used to validate a model-free fault diagnosis method proposed in our paper [1] and the complete dynamic model of ‘Haizhe’ AUV was reported in [2].}, urldate = {2023-10-04}, journal = {Data in Brief}, author = {Ji, Daxiong and Yao, Xin and Li, Shuo and Tang, Yuangui and Tian, Yu}, month = dec, year = {2021}, keywords = {Autonomous underwater vehicles (AUV), Fault diagnosis, Fault type, Model-free, State data}, pages = {107477}, }
@article{inturi_comprehensive_2021, title = {Comprehensive fault diagnostics of wind turbine gearbox through adaptive condition monitoring scheme}, volume = {174}, issn = {0003-682X}, url = {https://www.sciencedirect.com/science/article/pii/S0003682X20308422}, doi = {10.1016/j.apacoust.2020.107738}, abstract = {The current work reports a multi-level classification to envisage the location, type/category and severity level of local defects at different stages of speed in a wind turbine gearbox with minimal human intervention. Experiments are conducted by subjecting a three-stage gearbox to fluctuating speeds with multiple sensors recording the real-time information generated. Wavelet coefficients are employed to extract the statistical features from the raw signatures decomposed through wavelet transform. A decision tree algorithm is used to identify features of significance and an integrated multi-variable feature data set is devised based on feature-level data fusion. The intended multi-level classification on the integrated feature data set is accomplished with the help of machine-learning algorithms. The results reveal that the adaptive neuro-fuzzy inference system (ANFIS) performs the intended four-level classification on the wind turbine gearbox with a classification accuracy of 92\%. Thus, the integration of multi-sensor information in conjunction with ANFIS as a classification algorithm, owing to its efficiency in predicting every possible detail about the health/condition of the different gearbox components, demonstrates its potential to be used as an adaptive condition monitoring as it.}, language = {en}, urldate = {2023-06-07}, journal = {Applied Acoustics}, author = {Inturi, Vamsi and Shreyas, N. and Chetti, Karthick and Sabareesh, G. R.}, month = mar, year = {2021}, keywords = {ANFIS, Condition monitoring, Fault diagnosis, Multi-level classification, Wind turbine gearbox}, pages = {107738}, }
@article{mutlu_virtual_2021, title = {The virtual and the physical: two frames of mind}, volume = {24}, issn = {2589-0042}, shorttitle = {The virtual and the physical}, url = {https://www.sciencedirect.com/science/article/pii/S2589004220311627}, doi = {10.1016/j.isci.2020.101965}, abstract = {Virtual and physical embodiments of interactive artificial agents utilize similar core technologies for perception, planning, and interaction and engage with people in similar ways. Thus, designers have typically considered these embodiments to be broadly interchangeable, and the choice of embodiment primarily depends on the practical demands of an application. This paper makes the case that virtual and physical embodiments elicit fundamentally different “frames of mind” in the users of the technology and follow different metaphors for interaction, resulting in diverging expectations, forms of engagement, and eventually interaction outcomes. It illustrates these differences through the lens of five key mechanisms: “situativity, interactivity, agency, proxemics, and believability”. It also outlines the design implications of the two frames of mind, arguing for different domains of interaction serving as appropriate context for virtual and physical embodiments.}, language = {en}, number = {2}, urldate = {2023-03-08}, journal = {iScience}, author = {Mutlu, Bilge}, month = feb, year = {2021}, keywords = {Artificial Intelligence, Cognitive Neuroscience, Psychology, Social Sciences}, pages = {101965}, }
@article{vinanzi_collaborative_2021, title = {The collaborative mind: intention reading and trust in human-robot interaction}, volume = {24}, issn = {2589-0042}, shorttitle = {The collaborative mind}, url = {https://www.sciencedirect.com/science/article/pii/S2589004221000985}, doi = {10.1016/j.isci.2021.102130}, abstract = {Robots are likely to become important social actors in our future and so require more human-like ways of assisting us. We state that collaboration between humans and robots is fostered by two cognitive skills: intention reading and trust. An agent possessing these abilities would be able to infer the non-verbal intentions of others and to evaluate how likely they are to achieve their goals, jointly understanding what kind and which degree of collaboration they require. For this reason, we propose a developmental artificial cognitive architecture that integrates unsupervised machine learning and probabilistic models to imbue a humanoid robot with intention reading and trusting capabilities. Our experimental results show that the synergistic implementation of these cognitive skills enable the robot to cooperate in a meaningful way, with the intention reading model allowing a correct goal prediction and with the trust component enhancing the likelihood of a positive outcome for the task.}, language = {en}, number = {2}, urldate = {2023-03-08}, journal = {iScience}, author = {Vinanzi, Samuele and Cangelosi, Angelo and Goerick, Christian}, month = feb, year = {2021}, keywords = {Artificial Intelligence, Human-Centered Computing, Human-Computer Interaction}, pages = {102130}, }
@article{lepri_ethical_2021, title = {Ethical machines: {The} human-centric use of artificial intelligence}, volume = {24}, issn = {2589-0042}, shorttitle = {Ethical machines}, url = {https://www.sciencedirect.com/science/article/pii/S2589004221002170}, doi = {10.1016/j.isci.2021.102249}, abstract = {Today's increased availability of large amounts of human behavioral data and advances in artificial intelligence (AI) are contributing to a growing reliance on algorithms to make consequential decisions for humans, including those related to access to credit or medical treatments, hiring, etc. Algorithmic decision-making processes might lead to more objective decisions than those made by humans who may be influenced by prejudice, conflicts of interest, or fatigue. However, algorithmic decision-making has been criticized for its potential to lead to privacy invasion, information asymmetry, opacity, and discrimination. In this paper, we describe available technical solutions in three large areas that we consider to be of critical importance to achieve a human-centric AI: (1) privacy and data ownership; (2) accountability and transparency; and (3) fairness. We also highlight the criticality and urgency to engage multi-disciplinary teams of researchers, practitioners, policy makers, and citizens to co-develop and evaluate in the real-world algorithmic decision-making processes designed to maximize fairness, accountability, and transparency while respecting privacy.}, language = {en}, number = {3}, urldate = {2023-03-08}, journal = {iScience}, author = {Lepri, Bruno and Oliver, Nuria and Pentland, Alex}, month = mar, year = {2021}, keywords = {Algorithms, Artificial Intelligence, Computer Privacy}, pages = {102249}, }
@article{franklin_blaming_2021, title = {Blaming automated vehicles in difficult situations}, volume = {24}, issn = {2589-0042}, url = {https://www.sciencedirect.com/science/article/pii/S2589004221002200}, doi = {10.1016/j.isci.2021.102252}, abstract = {Automated vehicles (AVs) have made huge strides toward large-scale deployment. Despite this progress, AVs continue to make mistakes, some resulting in death. Although some mistakes are avoidable, others are hard to avoid even by highly skilled drivers. As these mistakes continue to shape attitudes toward AVs, we need to understand whether people differentiate between them. We ask the following two questions. When an AV makes a mistake, does the perceived difficulty or novelty of the situation predict blame attributed to it? How does that blame attribution compare to a human driving a car? Through two studies, we find that the amount of blame people attribute to AVs and human drivers is sensitive to situation difficulty. However, while some situations could be more difficult for AVs and others for human drivers, people blamed AVs more, regardless. Our results provide novel insights in understanding psychological barriers influencing the public's view of AVs.}, language = {en}, number = {4}, urldate = {2023-03-08}, journal = {iScience}, author = {Franklin, Matija and Awad, Edmond and Lagnado, David}, month = apr, year = {2021}, keywords = {Artificial Intelligence, Psychology, Research Methodology Social Sciences}, pages = {102252}, }
@article{nonaka_brain_2021, title = {Brain hierarchy score: {Which} deep neural networks are hierarchically brain-like?}, volume = {24}, issn = {2589-0042}, shorttitle = {Brain hierarchy score}, url = {https://www.sciencedirect.com/science/article/pii/S2589004221009810}, doi = {10.1016/j.isci.2021.103013}, abstract = {Achievement of human-level image recognition by deep neural networks (DNNs) has spurred interest in whether and how DNNs are brain-like. Both DNNs and the visual cortex perform hierarchical processing, and correspondence has been shown between hierarchical visual areas and DNN layers in representing visual features. Here, we propose the brain hierarchy (BH) score as a metric to quantify the degree of hierarchical correspondence based on neural decoding and encoding analyses where DNN unit activations and human brain activity are predicted from each other. We find that BH scores for 29 pre-trained DNNs with various architectures are negatively correlated with image recognition performance, thus indicating that recently developed high-performance DNNs are not necessarily brain-like. Experimental manipulations of DNN models suggest that single-path sequential feedforward architecture with broad spatial integration is critical to brain-like hierarchy. Our method may provide new ways to design DNNs in light of their representational homology to the brain.}, language = {en}, number = {9}, urldate = {2023-03-08}, journal = {iScience}, author = {Nonaka, Soma and Majima, Kei and Aoki, Shuntaro C. and Kamitani, Yukiyasu}, month = sep, year = {2021}, keywords = {Human-centered computing, Neural networks, Neuroscience}, pages = {103013}, }
@inproceedings{turgis_prognostic_2021, address = {Anger, France}, title = {Prognostic {Expert} {System} for {Railway} {Fleet} {Maintenance}}, doi = {http://dx.doi.org/10.3850/978-981-18-2016-8_391-cd}, booktitle = {Proceedings of the 31st {European} {Safety} and {Reliability} {Conference}}, author = {Turgis, Fabien and Audier, Pierre and Marion, Rémy}, month = sep, year = {2021}, doi = {10.3850/978-981-18-2016-8_391-cd}, pages = {2111}, }
@misc{lee_predicting_2021, title = {Predicting {What} {You} {Already} {Know} {Helps}: {Provable} {Self}-{Supervised} {Learning}}, shorttitle = {Predicting {What} {You} {Already} {Know} {Helps}}, url = {http://arxiv.org/abs/2008.01064}, doi = {10.48550/arXiv.2008.01064}, abstract = {Self-supervised representation learning solves auxiliary prediction tasks (known as pretext tasks) without requiring labeled data to learn useful semantic representations. These pretext tasks are created solely using the input features, such as predicting a missing image patch, recovering the color channels of an image from context, or predicting missing words in text; yet predicting this {\textbackslash}textit\{known\} information helps in learning representations effective for downstream prediction tasks. We posit a mechanism exploiting the statistical connections between certain \{{\textbackslash}em reconstruction-based\} pretext tasks that guarantee to learn a good representation. Formally, we quantify how the approximate independence between the components of the pretext task (conditional on the label and latent variables) allows us to learn representations that can solve the downstream task by just training a linear layer on top of the learned representation. We prove the linear layer yields small approximation error even for complex ground truth function class and will drastically reduce labeled sample complexity. Next, we show a simple modification of our method leads to nonlinear CCA, analogous to the popular SimSiam algorithm, and show similar guarantees for nonlinear CCA.}, urldate = {2022-12-07}, publisher = {arXiv}, author = {Lee, Jason D. and Lei, Qi and Saunshi, Nikunj and Zhuo, Jiacheng}, month = nov, year = {2021}, note = {arXiv:2008.01064 [cs, stat]}, keywords = {Computer Science - Machine Learning, Statistics - Machine Learning}, }
@article{putina_online_2021, title = {Online {Anomaly} {Detection} {Leveraging} {Stream}-{Based} {Clustering} and {Real}-{Time} {Telemetry}}, volume = {18}, issn = {1932-4537}, doi = {10.1109/TNSM.2020.3037019}, abstract = {Recent technology evolution allows network equipment to continuously stream a wealth of “telemetry” information, which pertains to multiple protocols and layers of the stack, at a very fine spatial-grain and high-frequency. This deluge of telemetry data clearly offers new opportunities for network control and troubleshooting, but also poses a serious challenge for what concerns its real-time processing. We tackle this challenge by applying streaming machine-learning techniques to the continuous flow of control and data-plane telemetry data, with the purpose of real-time detection of anomalies. In particular, we implement an anomaly detection engine that leverages DenStream, an unsupervised clustering technique, and apply it to features collected from a large-scale testbed comprising tens of routers traversed up to 3Terabit/s worth of real application traffic. We contrast DenStream with offline algorithms such as DBScan and Local Outlier Factor (LOF), as well as online algorithms such as the windowed version of DBScan, ExactSTORM, Continuous Outlier Detection (COD) and Robust Random Cut Forest (RRCF). Our experimental campaign compares these seven algorithms under both accuracy and computational complexity viewpoints: results testify that DenStream (i) achieves detection results on par with RRCF, the best performing algorithm and (ii) is significantly faster than other approaches, notably over two orders of magnitude faster than RRCF. In spirit with the recent trend toward reproducibility of results, we make our code available as open source to the scientific community.}, number = {1}, journal = {IEEE Transactions on Network and Service Management}, author = {Putina, Andrian and Rossi, Dario}, month = mar, year = {2021}, note = {Conference Name: IEEE Transactions on Network and Service Management}, keywords = {Anomaly detection, Anomaly detection algorithms, Feature extraction, Principal component analysis, Protocols, Real-time systems, Support vector machines, Telemetry, machine learning, model driven telemetry, network monitoring and measurements, stream learning}, pages = {839--854}, }
@inproceedings{winkler_big_2021, title = {Big {Data} {Needs} and {Challenges} in {Smart} {Manufacturing}: {An} {Industry}-{Academia} {Survey}}, shorttitle = {Big {Data} {Needs} and {Challenges} in {Smart} {Manufacturing}}, doi = {10.1109/ETFA45728.2021.9613600}, abstract = {The increasing availability of data in Smart Manufacturing opens new challenges and required capabilities in the area of big data in industry and academia. Various organizations have started initiatives to collect and analyse data in their individual contexts with specific goals, e.g., for monitoring, optimization, or decision support in order to reduce risks and costs in their manufacturing systems. However, the variety of available application areas require to focus on most promising activities. Therefore, we see the need for investigating common challenges and priorities in academia and industry from expert and management perspective to identify the state of the practice and promising application areas for driving future research directions. The goal of this paper is to report on an industry-academia survey to capture the current state of the art, required capabilities and priorities in the area of big data applications. Therefore, we conducted a survey in winter 2020/21 in industry and academia. We received 22 responses from different application domains highlighting the need for supporting (a) fault detection and (b) fault classification based on (c) historical and (d) real-time data analysis concepts. Therefore, the survey results reveals current and upcoming challenges in big data applications, such as defect handling based on historical and real-time data.}, booktitle = {2021 26th {IEEE} {International} {Conference} on {Emerging} {Technologies} and {Factory} {Automation} ({ETFA} )}, author = {Winkler, Dietmar and Korobeinykov, Alexander and Novák, Petr and Lüder, Arndt and Biffl, Stefan}, month = sep, year = {2021}, keywords = {Big Data Application, Big Data applications, Fault detection, Industries, Monitoring, Optimization, Organizations, Real-time systems, Required Capabilities, Smart Manufacturing, State of the Practice, Survey}, pages = {1--8}, }
@article{pappaterra_systematic_2021, title = {A {Systematic} {Review} of {Artificial} {Intelligence} {Public} {Datasets} for {Railway} {Applications}}, volume = {6}, copyright = {http://creativecommons.org/licenses/by/3.0/}, issn = {2412-3811}, url = {https://www.mdpi.com/2412-3811/6/10/136}, doi = {10.3390/infrastructures6100136}, abstract = {The aim of this paper is to review existing publicly available and open artificial intelligence (AI) oriented datasets in different domains and subdomains of the railway sector. The contribution of this paper is an overview of AI-oriented railway data published under Creative Commons (CC) or any other copyright type that entails public availability and freedom of use. These data are of great value for open research and publications related to the application of AI in the railway sector. This paper includes insights on the public railway data: we distinguish different subdomains, including maintenance and inspection, traffic planning and management, safety and security and type of data including numerical, string, image and other. The datasets reviewed cover the last three decades, from January 1990 to January 2021. The study revealed that the number of open datasets is very small in comparison with the available literature related to AI applications in the railway industry. Another shortcoming is the lack of documentation and metadata on public datasets, including information related to missing data, collection schemes and other limitations. This study also presents quantitative data, such as the number of available open datasets divided by railway application, type of data and year of publication. This review also reveals that there are openly available APIs—maintained by government organizations and train operating companies (TOCs)—that can be of great use for data harvesting and can facilitate the creation of large public datasets. These data are usually well-curated real-time data that can greatly contribute to the accuracy of AI models. Furthermore, we conclude that the extension of AI applications in the railway sector merits a centralized hub for publicly available datasets and open APIs.}, language = {en}, number = {10}, urldate = {2022-05-15}, journal = {Infrastructures}, author = {Pappaterra, Mauro José and Flammini, Francesco and Vittorini, Valeria and Bešinović, Nikola}, month = oct, year = {2021}, note = {Number: 10 Publisher: Multidisciplinary Digital Publishing Institute}, keywords = {intelligent transportation, machine learning, predictive maintenance, public datasets, railways}, pages = {136}, }
@article{lomov_fault_2021, title = {Fault detection in {Tennessee} {Eastman} process with temporal deep learning models}, volume = {23}, issn = {2452-414X}, url = {https://www.sciencedirect.com/science/article/pii/S2452414X21000145}, doi = {10.1016/j.jii.2021.100216}, abstract = {Automated early process fault detection and prediction remains a challenging problem in industrial processes. Traditionally it has been done by multivariate statistical analysis of sensor readings and, more recently, with the help of machine learning methods. The quality of machine learning models strongly depends on feature engineering, that in turn heavily relies on expertise of the process engineers and model developers. With the recent advent of deep learning neural network methods and abundance of available sensor data, it became possible to develop advanced approaches to early fault detection and prediction that do not require feature engineering and provide more accurate and timely results. In this paper we investigate a wide range of recurrent and convolutional architectures on the publicly available simulated Tennessee Eastman Process extended TEP dataset for the fault detection in chemical processes. We have selected the best architecture for the task and proposed a novel temporal CNN1D2D architecture that achieves overall better performance on the dataset than any referenced method. We have also proposed to use Generative Adversarial Network GAN to extend and enrich data used in training.}, language = {en}, urldate = {2022-05-02}, journal = {Journal of Industrial Information Integration}, author = {Lomov, Ildar and Lyubimov, Mark and Makarov, Ilya and Zhukov, Leonid E.}, month = sep, year = {2021}, keywords = {Chemical processes, Deep learning, Fault detection, Generative adversarial networks, Industrial data integration, Industrial data management, Industrial machine learning, Tennessee Eastman process}, pages = {100216}, }
@article{zubaroglu_data_2021, title = {Data stream clustering: a review}, volume = {54}, issn = {1573-7462}, shorttitle = {Data stream clustering}, url = {https://doi.org/10.1007/s10462-020-09874-x}, doi = {10.1007/s10462-020-09874-x}, abstract = {Number of connected devices is steadily increasing and these devices continuously generate data streams. Real-time processing of data streams is arousing interest despite many challenges. Clustering is one of the most suitable methods for real-time data stream processing, because it can be applied with less prior information about the data and it does not need labeled instances. However, data stream clustering differs from traditional clustering in many aspects and it has several challenging issues. Here, we provide information regarding the concepts and common characteristics of data streams, such as concept drift, data structures for data streams, time window models and outlier detection. We comprehensively review recent data stream clustering algorithms and analyze them in terms of the base clustering technique, computational complexity and clustering accuracy. A comparison of these algorithms is given along with still open problems. We indicate popular data stream repositories and datasets, stream processing tools and platforms. Open problems about data stream clustering are also discussed.}, language = {en}, number = {2}, urldate = {2022-03-25}, journal = {Artificial Intelligence Review}, author = {Zubaroğlu, Alaettin and Atalay, Volkan}, month = feb, year = {2021}, pages = {1201--1236}, }
@incollection{cerri_new_2021, address = {New York, NY, USA}, title = {A new self-organizing map based algorithm for multi-label stream classification}, isbn = {978-1-4503-8104-8}, url = {https://doi.org/10.1145/3412841.3441922}, abstract = {Several algorithms have been proposed for offline multi-label classification. However, applications in areas such as traffic monitoring, social networks, and sensors produce data continuously, the so called data streams, posing challenges to batch multi-label learning. With the lack of stationarity in the distribution of data streams, new algorithms are needed to online adapt to such changes (concept drift). Also, in realistic applications, changes occur in scenarios with infinitely delayed labels, where the true classes of the arrival instances are never available. We propose an online unsupervised incremental method based on self-organizing maps for multi-label stream classification in scenarios with infinitely delayed labels. We consider the existence of an initial set of labeled instances to train a self-organizing map for each label. The learned models are then used and adapted in an evolving stream to classify new instances, considering that their classes will never be available. We adapt to incremental concept drifts by online updating the weight vectors of winner neurons and the dataset label cardinality. Predictions are obtained using the Bayes rule and the outputs of each neuron, adapting the prior probabilities and conditional probabilities of the classes in the stream. Experiments using synthetic and real datasets show that our method is highly competitive with several ones from the literature, in both stationary and concept drift scenarios.}, urldate = {2022-03-17}, booktitle = {Proceedings of the 36th {Annual} {ACM} {Symposium} on {Applied} {Computing}}, publisher = {Association for Computing Machinery}, author = {Cerri, Ricardo and Junior, Joel David C. and Faria, Elaine. R. and Gama, João}, month = mar, year = {2021}, keywords = {classification, concept drift, data streams, machine learning, multi-label, self-organizing maps}, pages = {418--426}, }
@inproceedings{bahri_survey_2021, address = {Yokohama, Yokohama, Japan}, series = {{IJCAI}'20}, title = {Survey on feature transformation techniques for data streams}, isbn = {978-0-9992411-6-5}, abstract = {Mining high-dimensional data streams poses a fundamental challenge to machine learning as the presence of high numbers of attributes can remarkably degrade any mining task's performance. In the past several years, dimension reduction (DR) approaches have been successfully applied for different purposes (e.g., visualization). Due to their high-computational costs and numerous passes over large data, these approaches pose a hindrance when processing infinite data streams that are potentially high-dimensional. The latter increases the resource-usage of algorithms that could suffer from the curse of dimensionality. To cope with these issues, some techniques for incremental DR have been proposed. In this paper, we provide a survey on reduction approaches designed to handle data streams and highlight the key benefits of using these approaches for stream mining algorithms.}, urldate = {2022-03-15}, booktitle = {Proceedings of the {Twenty}-{Ninth} {International} {Joint} {Conference} on {Artificial} {Intelligence}}, author = {Bahri, Maroua and Bifet, Albert and Maniu, Silviu and Gomes, Heitor Murilo}, month = jan, year = {2021}, pages = {4796--4802}, }
@article{zhong_long_2021, title = {Long short-term memory self-adapting online random forests for evolving data stream regression}, volume = {457}, issn = {0925-2312}, url = {https://www.sciencedirect.com/science/article/pii/S0925231221007797}, doi = {10.1016/j.neucom.2021.05.026}, abstract = {Evolving data stream, especially with concept drift is generally accepted as a challenging data type for regression task, because it usually makes machine learning models trained on old data not adapting to new data, and leads to dramatic performance degradation as a result. Moreover, the behavior of a data stream may change in different modes and therefore introduces various concept drifts, e.g., abrupt, incremental, gradual, recurring, even more, complex concept drifts. Although there are some algorithms that can adapt to stationary data streams or a specific type of concept drift in non-stationary data streams, a wide range of practical applications call for machine learning regression models to handle multi-type of data streams. In this work, we propose an online learning strategy called adaptive long and short-term memories online Random Forests regression(ALSM-RFR), where an adaptive memory activation mechanism is designed to make the model switch adaptively between long-term and hybrid (long-term plus short-term) memory modes in the face of stationary data streams or non-stationary data streams with different types of concept drift. In particular, leaf and tree weights in random forests are used to learn information at different timescales, namely, long-term and short-term memories. Moreover, we devise an adaptive memory activation mechanism to formulate the switch decision of memory modes as a classification problem. Numerical experiments show remarkable improvements of the proposed method in the adaptability of stream types and predictive accuracy in data streams across several real datasets and synthetic datasets, compared to the state-of-the-art online approaches. Besides, the convergence and the influence of the parameters involved in our method are evaluated.}, language = {en}, urldate = {2022-03-14}, journal = {Neurocomputing}, author = {Zhong, Yuan and Yang, Hongyu and Zhang, Yanci and Li, Ping and Ren, Cheng}, month = oct, year = {2021}, keywords = {Adaptive memory activation mechanism, Concept drift, Data stream, Long-term and short-term memory, Online learning, Online random forests regression}, pages = {265--276}, }
@article{duan_novel_2021, title = {A novel {ResNet}-based model structure and its applications in machine health monitoring}, volume = {27}, issn = {1077-5463}, url = {https://doi.org/10.1177/1077546320936506}, doi = {10.1177/1077546320936506}, abstract = {Machine health monitoring has become increasingly important in modern manufacturers because of its ability to reduce downtime of the machine and cut down the production cost. Enormous signals acquired from machinery are capable of reflecting current working conditions by in-depth analysis with various data-driven methods. Hand-crafted feature extraction and representation from the traditional methods are essential but daunting tasks, and these methods may not be suitable for these massive data. Compared with traditional methods, deep learning ones are able to extract the best feature combination during model training without any artificial intervention, which makes it easier, more efficient, and more effective to monitor machine health, but the training cost and training time hamper its application. The short-time Fourier transform is adopted as the data preprocessing method to cut down the training cost and boost the training procedure. Inspired by the great achievements of ResNet, the new optimized model based on ResNet has been proposed with layer-by-layer dimension reduction of the feature maps. The proposed model is also able to avoid information loss in the conventional pooling layer. All the potential candidate model blocks are introduced and compared, and the best one is selected as the final one. Repeated model block layers are adapted for the best feature combinations, followed by a two-layer full connection layer for the final targets. The proposed method is validated by conducting experiments on bearing fault diagnosis and tool wear prediction dataset. The final results show that the proposed model achieves the best accuracy rate in the classification task and the lowest root mean squared error in the prediction task.}, language = {en}, number = {9-10}, urldate = {2022-02-09}, journal = {Journal of Vibration and Control}, author = {Duan, Jian and Shi, Tielin and Zhou, Hongdi and Xuan, Jianping and Wang, Shuhua}, month = may, year = {2021}, note = {Publisher: SAGE Publications Ltd STM}, keywords = {ResNet, bearing, convolution neural network, machine health monitoring, tool Wear}, pages = {1036--1050}, }
@article{zhu_decoupled_2021, title = {Decoupled {Feature}-{Temporal} {CNN}: {Explaining} {Deep} {Learning}-{Based} {Machine} {Health} {Monitoring}}, volume = {70}, issn = {1557-9662}, shorttitle = {Decoupled {Feature}-{Temporal} {CNN}}, doi = {10.1109/TIM.2021.3084310}, abstract = {Machine learning, especially deep learning, has been extensively applied and studied in the area of machine health monitoring. For machine health monitoring systems (MHMS), major efforts have been put into designing and deploying more and more complex machine learning models. Those black-box models are nontransparent toward their working mechanism. However, this research trend brings huge potential risks in real life. Since machine health monitoring itself belongs to high stake decision applications, the outputs of the autonomous monitoring systems should be trustworthy and reliable, which refers to obtain explainability. Then, it comes to the following key question: why the deployed MHMS predicts what they predict. In this article, we shed some light on this meaningful research direction: explainable MHMSs (EMHMS). In EMHMS, the machine doctor could act like a real doctor who can not only make a diagnosis but also describe the patient's symptoms. First, we propose a specific convolutional neural network (CNN) structure, named DecouplEd Feature-Temporal CNN (DEFT-CNN), to balance precision-explainability tradeoff. Specifically, feature information and temporal information have been encoded in different stages of our model. The spatial attention module is added to boost the performance of the model. Then, to explain the decision of the model, we adopt gradient-based methods to generate features and temporal saliency maps highlighting which kinds of features and time steps are keys for the model's predictions. Finally, we conduct the experimental studies on two real datasets to verify the effectiveness of our proposed framework.}, journal = {IEEE Transactions on Instrumentation and Measurement}, author = {Zhu, Chaoyi and Chen, Zhenghua and Zhao, Rui and Wang, Jinjiang and Yan, Ruqiang}, year = {2021}, note = {Conference Name: IEEE Transactions on Instrumentation and Measurement}, keywords = {Attention mechanism, Computational modeling, Data models, Deep learning, Monitoring, Neural networks, Predictive models, Time-frequency analysis, convolutional neural network (CNN), deep learning, ecml, explainable machine learning, machine health monitoring}, pages = {1--13}, }
@article{liu_lstm-gan-ae_2021, title = {{LSTM}-{GAN}-{AE}: {A} {Promising} {Approach} for {Fault} {Diagnosis} in {Machine} {Health} {Monitoring}}, issn = {1557-9662}, shorttitle = {{LSTM}-{GAN}-{AE}}, doi = {10.1109/TIM.2021.3135328}, abstract = {Recent years have witnessed that, real-time health monitoring for machine gains more and more importance with the goal of achieving fault diagnosis and predictive maintenance. Conventional diagnosis methods face formidable challenges imposed by the high requirement for expert knowledge and extensive labor. The diagnosis scheme based on deep learning (DL) models has served as a promising solution and achieved great success. However, many of these DL-based models are fail to extract critical temporal information thoroughly. In addition, it is difficult to apply them to machine health monitoring (MHM) in real-time as those methods take long time for diagnosis in practice. To address the aforementioned issues, this paper introduces a novel intelligent fault diagnosis algorithm with three stages for MHM. It is a hybrid framework that combines generative adversarial networks (GAN) and auto-encoder (AE) based on the bi-directional long short-term memory (bi-LSTM). Firstly, GAN is employed to obtain the reconstruction residual and learn the discriminative representation. Then, AE is used to perform the critical temporal features extraction and dimension reduction. Finally, the supervised learning model is constructed to integrate feature information and predict diagnosis results. To verify the effectiveness of the proposed algorithm, typical rolling bearing datasets are taken as trial data. Preliminary simulation results demonstrate that the proposed algorithm achieves superior performance compared to the competing methods.}, journal = {IEEE Transactions on Instrumentation and Measurement}, author = {Liu, Haoqiang and Zhao, Hongbo and Wang, Jiayue and Yuan, Shuai and Feng, Wenquan}, year = {2021}, note = {Conference Name: IEEE Transactions on Instrumentation and Measurement}, keywords = {Data mining, Deep learning, Fault diagnosis, Feature extraction, Generative adversarial networks, Logic gates, Monitoring, Support vector machines, ecml, fault diagnosis, long short-term memory (LSTM), machine health monitoring}, pages = {1--1}, }
@article{hou_adaptive_2021, title = {Adaptive {Weighted} {Signal} {Preprocessing} {Technique} for {Machine} {Health} {Monitoring}}, volume = {70}, issn = {1557-9662}, doi = {10.1109/TIM.2020.3033471}, abstract = {Machine health monitoring (MHM) aims to timely detect the incipient faults and monotonically assess the machine degradation tendency for prediction of remaining useful life (RUL), which is the basis of condition-based maintenance. Construction of a health index (HI) is a core step to realize the aforementioned purposes. Among the existing HIs, sparsity measures (SMs), including kurtosis, smoothness index, Gini index, and negative entropy, have shown promising applications in fault diagnosis of rotating machines because they are widely used to quantify the repetitive transients caused by rotating faults. However, drawbacks of SMs for MHM still exist and they are that: 1) SMs are too fluctuating to detect the incipient faults; 2) SMs are prone to be affected by impulsive noise; and 3) SMs might not exhibit monotonic degradation tendency. To enhance the abilities of SMs as HIs for MHM, some improvements on SMs, coined as an adaptive weighted signal preprocessing technique (AWSPT), are proposed in this article. Subsequently, theoretical values of AWSPT-based SMs under healthy states are investigated. Numerical experiments reveal that AWSPT-based SMs can quantify the cyclostationarity and they are robust to the effects of impulsive noise. Bearing and gear run-to-failure data sets are used to show that the proposed AWSPT-based SMs can simultaneously detect the early bearing and gear faults and provide the monotonic degradation tendency. Moreover, AWSPT-based SMs are more effective in selecting an optimal envelope demodulation frequency band than traditional SMs.}, journal = {IEEE Transactions on Instrumentation and Measurement}, author = {Hou, Bingchang and Wang, Dong and Wang, Yi and Yan, Tongtong and Peng, Zhike and Tsui, Kwok-Leung}, year = {2021}, note = {Conference Name: IEEE Transactions on Instrumentation and Measurement}, keywords = {Degradation, Gaussian noise, Gears, Health index (HI), Indexes, Monitoring, Probability density function, Transient analysis, ecml, health monitoring, incipient fault detection, monotonic degradation assessment, sparsity measures (SMs)}, pages = {1--11}, }
@article{han_online_2021, title = {Online fault diagnosis for sucker rod pumping well by optimized density peak clustering}, issn = {0019-0578}, url = {https://www.sciencedirect.com/science/article/pii/S0019057821001634}, doi = {10.1016/j.isatra.2021.03.022}, abstract = {Online diagnosis for sucker rod pumping well has great significances for rapidly grasping operations of the oil well. Feature extraction of the working condition and determination of the online diagnostic algorithm are two indispensable parts. In this paper, five feature vectors are extracted using Freeman chain codes. Then, an optimized density peak clustering (DPC) method is proposed to realize online diagnosis solved by an improved brain storm optimization (BSO) algorithm, in which the cloud model is adopted to generate new solutions in the searching space. During the online diagnosis process, a new cluster updating strategy is used to update the cluster centers online. According to the proposed online diagnostic method, various samples are automatically classified into different classifications by the unsupervised learning. The simulation results verify that the proposed online diagnosis method is satisfactory, which can give a higher and more stable diagnostic accuracy.}, language = {en}, urldate = {2022-01-14}, journal = {ISA Transactions}, author = {Han, Ying and Li, Kun and Ge, Fawei and Wang, Yi’an and Xu, Wensu}, month = mar, year = {2021}, keywords = {Density peak clustering, Dynamometer card, Fault diagnosis, Optimization, Sucker rod pumping well}, }
@article{pham_data-driven_2021, title = {Data-driven fault detection of open circuits in multi-phase inverters based on current polarity using {Auto}-adaptive and {Dynamical} {Clustering}}, volume = {113}, issn = {0019-0578}, url = {https://www.sciencedirect.com/science/article/pii/S0019057820302561}, doi = {10.1016/j.isatra.2020.06.009}, abstract = {This paper proposes a data-driven method for the detection and isolation of open-circuit faults in multi-phase inverters using measurements of the motor currents. First, feature variables are formulated in terms of the averages of the phase currents and their absolute values. Next, by using an AUto-adaptive and Dynamical Clustering (AUDyC) based on Gaussian Mixture Models, feature data is clustered into different classes characterizing normal and faulty operation modes. Afterwards, these classes are used for deriving appropriate conditions for detecting and labelling faults. The proposed method requires minimal knowledge about the system operation. Furthermore, it allows us to update our knowledge of existing faults online, thus making it possible to detect unknown faults. Moreover, conditions are formulated to describe the influence of the method parameters on the detection time. Once parameters are tuned, the accuracy of the proposed method is illustrated on various experimental data sets, where single and double faults are detected with detection times in the order of the fundamental signal period.}, language = {en}, urldate = {2022-01-14}, journal = {ISA Transactions}, author = {Pham, Thanh-Hung and Lefteriu, Sanda and Duviella, Eric and Lecoeuche, Stéphane}, month = jul, year = {2021}, pages = {185--195}, }
@article{wei_learning_2021, title = {Learning the health index of complex systems using dynamic conditional variational autoencoders}, volume = {216}, issn = {0951-8320}, url = {https://www.sciencedirect.com/science/article/pii/S0951832021005147}, doi = {10.1016/j.ress.2021.108004}, abstract = {Recent advances in sensing technologies have enabled engineers to collect big data to predict the remaining useful life (RUL) of complex systems. Current modeling techniques for RUL predictions are usually not able to quantify the degradation behavior of a complex system through a health index. Although some studies have been conducted to learn the health index of degradation systems, most of the existing methods are highly dependent on pre-defined assumptions which may not be consistent with the real degradation behaviors. To address this issue, we introduce a time-dependent directed graphical model to characterize the probabilistic relationships among sensor signals, RUL, operational conditions, and health index. Based on the graphical model, a dynamic conditional variational autoencoder is proposed to learn the health index. The experimental results have shown that the proposed method can learn an effective and reliable health index that measures complex system degradation behavior. Moreover, the learned health index improves the accuracy of RUL predictions.}, language = {en}, urldate = {2022-01-13}, journal = {Reliability Engineering \& System Safety}, author = {Wei, Yupeng and Wu, Dazhong and Terpenny, Janis}, month = dec, year = {2021}, keywords = {Data fusion, Deep learning, Degradation modeling, Multiple sensors, Remaining useful life, sigkdd-rw}, pages = {108004}, }
@article{melani_framework_2021, title = {A framework to automate fault detection and diagnosis based on moving window principal component analysis and {Bayesian} network}, volume = {215}, issn = {0951-8320}, url = {https://www.sciencedirect.com/science/article/pii/S0951832021003574}, doi = {10.1016/j.ress.2021.107837}, abstract = {Through Condition-Based Maintenance strategy, planners can monitor the health of the machinery and recommend actions based on the information obtained. Nevertheless, this approach depends on the successful establishment of Fault Detection and Diagnosis (FDD) processes. Although FDD is a research area in full growth with the development of several methods and heuristics, the availability of data from systems under a fault condition is still scarce in many applications, mainly related to complex systems. In many circumstances, only data from the system in healthy conditions is available and the applied FDD method should be able to detect variations in system conditions and diagnose faults without the need for previous labeled fault data. In this context, this article proposes a hybrid framework to automate FDD based on Moving Window Principal Component Analysis (MWPCA) and Bayesian Network (BN). First, the knowledge base on technical systems is organized to support the next steps of the framework. Then, the detection and diagnosis processes are performed sequentially through MWPCA and BN. The framework was implemented in the analysis of a simplified model of a hydrogenerator, considering real and simulated data. The results showed that the proposed method was able to detect and diagnose several simulated failures.}, language = {en}, urldate = {2022-01-13}, journal = {Reliability Engineering \& System Safety}, author = {Melani, Arthur Henrique de Andrade and Michalski, Miguel Angelo de Carvalho and da Silva, Renan Favarão and de Souza, Gilberto Francisco Martha}, month = nov, year = {2021}, keywords = {Adaptative principal component analysis, Bayesian network, Fault detection and diagnosis, MWPCA, Principal component analysis, sigkdd-rw}, pages = {107837}, }
@article{de_lima_healthmon_2021, title = {{HealthMon}: {An} approach for monitoring machines degradation using time-series decomposition, clustering, and metaheuristics}, volume = {162}, issn = {0360-8352}, shorttitle = {{HealthMon}}, url = {https://www.sciencedirect.com/science/article/pii/S0360835221006136}, doi = {10.1016/j.cie.2021.107709}, abstract = {Monitoring the degradation of machines to anticipate potential failures represents a significant challenge. In Industry 4.0, this task is critical when the costs associated with maintenance and stoppages on the productive processes are high. Nowadays, many preventive maintenance techniques employ supervised or unsupervised machine learning algorithms. However, the definition of which features should be processed by such algorithms is not a simple task, being crucial to the proposed technique’s success. Against this background, we consider whether unsupervised algorithms combined with time-series decomposition can enhance the estimate of a machine’s health. This article proposes HealthMon as a novel approach to compute a health index of machines based on sensor measurements. HealthMon extracts time-series from such sensors, which are decomposed in an unsupervised way to present the health state along time. The health index is related to the degradation of the considered machine, thus optimizing the machine maintenance schedule. This work advances the state-of-the-art in the following ways: (i) it proposes a novel index of machines health, which yields a more direct and intuitive view of machine degradation; (ii) it devises the first approach capable of estimating the health index of a machine in a completely unsupervised way; (iii) it generalizes vibrating and rotating machines, thus being able to monitor a wide range of industrial equipment. We evaluated our method using both simulated and real data. The results show that the evolution of vibrating machines’ failures can be effectively detected under various input workloads. Finally, through HealthMon, industry decision-makers benefit from the guidelines for preventive actions at appropriate times, thus meeting Industry 4.0.}, language = {en}, urldate = {2021-10-04}, journal = {Computers \& Industrial Engineering}, author = {de Lima, Miromar Jose and Paredes Crovato, Cesar David and Goytia Mejia, Rodrigo Ivan and da Rosa Righi, Rodrigo and de Oliveira Ramos, Gabriel and André da Costa, Cristiano and Pesenti, Giovani}, month = dec, year = {2021}, keywords = {Health index, Machine learning, Monitoring, Prediction, Preventive maintenance, Time-series, Unsupervised learning, sigkdd-rw}, pages = {107709}, }
@article{pang_deep_2021, title = {Deep {Learning} for {Anomaly} {Detection}: {A} {Review}}, volume = {54}, issn = {0360-0300}, shorttitle = {Deep {Learning} for {Anomaly} {Detection}}, url = {https://doi.org/10.1145/3439950}, doi = {10.1145/3439950}, abstract = {Anomaly detection, a.k.a. outlier detection or novelty detection, has been a lasting yet active research area in various research communities for several decades. There are still some unique problem complexities and challenges that require advanced approaches. In recent years, deep learning enabled anomaly detection, i.e., deep anomaly detection, has emerged as a critical direction. This article surveys the research of deep anomaly detection with a comprehensive taxonomy, covering advancements in 3 high-level categories and 11 fine-grained categories of the methods. We review their key intuitions, objective functions, underlying assumptions, advantages, and disadvantages and discuss how they address the aforementioned challenges. We further discuss a set of possible future opportunities and new perspectives on addressing the challenges.}, number = {2}, urldate = {2021-11-27}, journal = {ACM Computing Surveys}, author = {Pang, Guansong and Shen, Chunhua and Cao, Longbing and Hengel, Anton Van Den}, month = mar, year = {2021}, keywords = {Anomaly detection, deep learning, novelty detection, one-class classification, outlier detection}, pages = {38:1--38:38}, }
@article{foorthuis_nature_2021, title = {On the nature and types of anomalies: a review of deviations in data}, volume = {12}, issn = {2364-4168}, shorttitle = {On the nature and types of anomalies}, url = {https://doi.org/10.1007/s41060-021-00265-1}, doi = {10.1007/s41060-021-00265-1}, abstract = {Anomalies are occurrences in a dataset that are in some way unusual and do not fit the general patterns. The concept of the anomaly is typically ill defined and perceived as vague and domain-dependent. Moreover, despite some 250 years of publications on the topic, no comprehensive and concrete overviews of the different types of anomalies have hitherto been published. By means of an extensive literature review this study therefore offers the first theoretically principled and domain-independent typology of data anomalies and presents a full overview of anomaly types and subtypes. To concretely define the concept of the anomaly and its different manifestations, the typology employs five dimensions: data type, cardinality of relationship, anomaly level, data structure, and data distribution. These fundamental and data-centric dimensions naturally yield 3 broad groups, 9 basic types, and 63 subtypes of anomalies. The typology facilitates the evaluation of the functional capabilities of anomaly detection algorithms, contributes to explainable data science, and provides insights into relevant topics such as local versus global anomalies.}, language = {en}, number = {4}, urldate = {2021-11-27}, journal = {International Journal of Data Science and Analytics}, author = {Foorthuis, Ralph}, month = oct, year = {2021}, pages = {297--331}, }
@article{tchaghe_anomaly_2021, title = {Anomaly explanation: {A} review}, issn = {0169-023X}, shorttitle = {Anomaly explanation}, url = {https://www.sciencedirect.com/science/article/pii/S0169023X21000720}, doi = {10.1016/j.datak.2021.101946}, abstract = {Anomaly detection has been studied intensively by the data mining community for several years. As a result, many methods to detect anomalies have emerged, and others are still under development. But during the recent years, anomaly detection, just like a lot of machine learning tasks, is facing a wall. This wall, erected by the lack of trust of the final users, has slowed down the usage of these algorithms in the real-world situations for which they are designed. Having the best empirical accuracy is not enough anymore; there is a need for algorithms to explain their outputs to the users in order to increase their trust. Consequently, a new expression has emerged recently: eXplainable Artificial Intelligence (XAI). This expression, which gathers all the methods that provide explanations to the output of algorithms has gained popularity, especially with the outbreak of deep learning. A lot of work has been devoted to anomaly detection in the literature, but not as much to anomaly explanation. There is so much work on anomaly detection that several reviews can be found on the topic. In contrast, we were not able to find a survey on anomaly explanation in particular, while there are a lot of surveys on XAI in general or on XAI for neural networks for example. With this paper, we want to provide a comprehensive review of the anomaly explanation field. After a brief recall of some important anomaly detection algorithms, the anomaly explanation methods that we discovered in the literature will be classified according to a taxonomy that we define. This taxonomy stems from an analysis of what is really important when trying to explain anomalies.}, language = {en}, urldate = {2021-11-26}, journal = {Data \& Knowledge Engineering}, author = {Tchaghe, Véronne Yepmo and Smits, Grégory and Pivert, Olivier}, month = nov, year = {2021}, keywords = {Anomaly detection, Anomaly explanation, Explainable Artificial Intelligence (XAI), Interpretability, Outlier interpretation}, pages = {101946}, }
@inproceedings{le_nguyen_complete_2021, title = {A {Complete} {Streaming} {Pipeline} for {Real}-time {Monitoring} and {Predictive} {Maintenance}}, doi = {10.3850/978-981-18-2016-8_400-cd}, booktitle = {Proceedings of the 31st {European} {Safety} and {Reliability} {Conference}}, author = {Le Nguyen, Minh Huong and Turgis, Fabien and Fayemi, Pierre-Emmanuel and Bifet, Albert}, year = {2021}, doi = {10.3850/978-981-18-2016-8_400-cd}, pages = {2119}, }
@article{balbi_relevance_2021, series = {17th {IFAC} {Symposium} on {Information} {Control} {Problems} in {Manufacturing} {INCOM} 2021}, title = {On the relevance of clustering strategies for collaborative prognostics}, volume = {54}, issn = {2405-8963}, url = {https://www.sciencedirect.com/science/article/pii/S2405896321007023}, doi = {10.1016/j.ifacol.2021.08.004}, abstract = {The innovative concept of Social Internet of Industrial Things is opening a promising perspective for collaborative prognostics in order to improve maintenance and operational policies. Given this context, the present work studies the exploitation of historical and collaborative information for on-line prognostic assessment. In particular, while aiming at a cost-effective prognostic algorithm, with an efficient use of the available data and a proper prediction accuracy, the work remarks the relevance of an optimized clustering strategy for the selection of the useful information.}, language = {en}, number = {1}, urldate = {2021-11-15}, journal = {IFAC-PapersOnLine}, author = {Balbi, Matteo and Cattaneo, Laura and Nucera, Domenico Daniele and Macchi, Marco}, month = jan, year = {2021}, keywords = {Collaborative prognostics, RUL prediction, clustering, data-driven prognostics}, pages = {37--42}, }
@article{nucera_data-driven_2021, series = {17th {IFAC} {Symposium} on {Information} {Control} {Problems} in {Manufacturing} {INCOM} 2021}, title = {Data-{Driven} {State} {Detection} for an asset working at heterogenous regimens⁎⁎{This} work is supported by {Lombardy} funded project {SMART4CPPS} ({ID}: 236789 {CUP}: {E19I18000000009})}, volume = {54}, issn = {2405-8963}, shorttitle = {Data-{Driven} {State} {Detection} for an asset working at heterogenous regimens⁎⁎{This} work is supported by {Lombardy} funded project {SMART4CPPS} ({ID}}, url = {https://www.sciencedirect.com/science/article/pii/S2405896321009137}, doi = {10.1016/j.ifacol.2021.08.149}, abstract = {The current trend of industrial digitalization paved the way to Machine Learning applications which are adding value to data coming from the assets. In this context, the case study of a State Detection in an asset characterized by heterogeneous working regimens is proposed, with the aim of automatically recognizing the type of the ongoing production and of identifying its different operating conditions. The activity is executed by exploiting the data available on the asset controller and applying and comparing two different clustering algorithms, namely K-Means and HDBSCAN. The paper describes hence the application case and the adopted approaches, while providing insights on the most preferable choice for any of the two objectives, in order to pave the ground for condition-based maintenance activities.}, language = {en}, number = {1}, urldate = {2021-11-15}, journal = {IFAC-PapersOnLine}, author = {Nucera, Domenico Daniele and Quadrini, Walter and Fumagalli, Luca and Scipioni, Marcello Paolo}, month = jan, year = {2021}, keywords = {Clustering, HDBSCAN, K-Means, Production activity control, Quality assurance, State Detection, maintenance}, pages = {1248--1253}, }
@article{wellsandt_towards_2021, series = {17th {IFAC} {Symposium} on {Information} {Control} {Problems} in {Manufacturing} {INCOM} 2021}, title = {Towards {Using} {Digital} {Intelligent} {Assistants} to {Put} {Humans} in the {Loop} of {Predictive} {Maintenance} {Systems}}, volume = {54}, issn = {2405-8963}, url = {https://www.sciencedirect.com/science/article/pii/S2405896321007047}, doi = {10.1016/j.ifacol.2021.08.005}, abstract = {Predictive maintenance systems are socio-technical systems where the interaction between maintenance personnel and the technical system is critical to achieving maintenance goals. Employees who use a predictive maintenance system should explore, modify, and verify their analysis and decision-making methods and rules. Conventional modes of interaction make this difficult since they are often hard to understand, obtrusive and unintuitive. Digital Intelligent Assistants (DIAs) provide fast, intuitive, and potentially hands-free access to systems through voice-based interaction and cognitive assistance. This paper introduces a novel approach to interact with predictive maintenance systems through DIAs. The aim is to integrate human knowledge more effectively into the predictive maintenance process to create a hybrid-intelligence system. In such systems, humans and computers complement and evolve together.}, language = {en}, number = {1}, urldate = {2021-11-15}, journal = {IFAC-PapersOnLine}, author = {Wellsandt, Stefan and Klein, Konstantin and Hribernik, Karl and Lewandowski, Marco and Bousdekis, Alexandros and Mentzas, Gregoris and Thoben, Klaus-Dieter}, month = jan, year = {2021}, keywords = {Engineering Applications of Artificial Intelligence, Human-Automation Integration, Hybrid Intelligence Systems, Predictive Maintenance}, pages = {49--54}, }
@article{davari_survey_2021, title = {A {Survey} on {Data}-{Driven} {Predictive} {Maintenance} for the {Railway} {Industry}}, volume = {21}, doi = {10.3390/s21175739}, abstract = {In the last few years, many works have addressed Predictive Maintenance (PdM) by the use of Machine Learning (ML) and Deep Learning (DL) solutions, especially the latter. The monitoring and logging of industrial equipment events, like temporal behavior and fault events-anomaly detection in time-series-can be obtained from records generated by sensors installed in different parts of an industrial plant. However, such progress is incipient because we still have many challenges, and the performance of applications depends on the appropriate choice of the method. This article presents a survey of existing ML and DL techniques for handling PdM in the railway industry. This survey discusses the main approaches for this specific application within a taxonomy defined by the type of task, employed methods, metrics of evaluation, the specific equipment or process, and datasets. Lastly, we conclude and outline some suggestions for future research.}, journal = {Sensors}, author = {Davari, Narjes and Veloso, Bruno and De Assis Costa, Gustavo and Pereira, Pedro and Ribeiro, Rita and Gama, João}, month = sep, year = {2021}, keywords = {data-driven pdm, pdm, railway, survey}, pages = {5739}, }
@article{michalowska_anomaly_2021, series = {13th {IFAC} {Conference} on {Control} {Applications} in {Marine} {Systems}, {Robotics}, and {Vehicles} {CAMS} 2021}, title = {Anomaly {Detection} with {Unknown} {Anomalies}: {Application} to {Maritime} {Machinery}}, volume = {54}, issn = {2405-8963}, shorttitle = {Anomaly {Detection} with {Unknown} {Anomalies}}, url = {https://www.sciencedirect.com/science/article/pii/S2405896321014828}, doi = {10.1016/j.ifacol.2021.10.080}, abstract = {We present a framework for deriving anomaly detection algorithms on timeseries data when the time and expression of anomalous behaviour is unknown. The framework is suited for problems in which individual machine learning paradigms cannot be directly implemented: supervised learning is not applicable due to the lack of labelled data, unsupervised learning is not effective since the normal operations are insufficiently defined and take complex and diverse forms, and deep learning risks confusing problematic behaviours for expected ones due to the possible repetitiveness of similar anomalies. The proposed approach is comprised of two phases: unsupervised discovery of anomalies, and semi-supervised construction and tuning of the anomaly detection algorithm. By leveraging data exploration methods and expert knowledge, the resulting algorithms are interpretable and detect a wide range of anomalous behaviours. The approach is applied to the early detection of wear and tear of maritime propulsion and manoeuvring machinery. We show that the final algorithm is able to detect different types of anomalies, including an actual internal leakage in a thruster which is otherwise overlooked by the present rule-based alarm system.}, language = {en}, number = {16}, urldate = {2021-11-08}, journal = {IFAC-PapersOnLine}, author = {Michałowska, Katarzyna and Riemer-Sørensen, Signe and Sterud, Camilla and Hjellset, Ole Magnus}, month = jan, year = {2021}, keywords = {anomaly detection, condition-based monitoring, diagnosis, fault detection, grey-box modelling, machine learning, predictive maintenance}, pages = {105--111}, }
@article{cai_novel_2021, title = {A novel {RUL} prognosis methodology of multilevel system with cascading failure: {Subsea} oil and gas transportation systems as a case study}, volume = {242}, issn = {0029-8018}, shorttitle = {A novel {RUL} prognosis methodology of multilevel system with cascading failure}, url = {https://www.sciencedirect.com/science/article/pii/S0029801821014621}, doi = {10.1016/j.oceaneng.2021.110141}, abstract = {Cascading failure has a great negative impact on the operation of multilevel systems. Although the frequency of this kind of failure is lower than that of general failure, it may cause outage and considerable human and economic losses. In this paper, a novel modeling methodology of cascading failure based on position importance and function importance is proposed by using dynamic Bayesian networks, and the remaining useful life (RUL) of multilevel systems considering cascading failure is estimated. By detecting the working state of the nodes, the position and function importance of the nodes are determined, and the performance of the next layer of related nodes is calculated. Through calculating iteratively to the last level, the number of failure nodes is finally determined, and the overall performance of the multilevel systems is evaluated. A subsea transportation system with three-level network and four nodes in each level is used to demonstrate the application of the proposed methodology, and the feasibility of the methodology is analyzed. The results show that the RUL of the systems is significantly reduced when considering the cascading failure, which is quite different from the degradation of the non-cascading failure mode.}, language = {en}, urldate = {2021-11-08}, journal = {Ocean Engineering}, author = {Cai, Baoping and Shao, Xiaoyan and Yuan, Xiaobing and Liu, Yonghong and Chen, Guoming and Feng, Qiang and Liu, Yiqi and Ren, Yi}, month = dec, year = {2021}, keywords = {Cascading failure, Dynamic Bayesian network, Multilevel systems, Position and function importance, Remaining useful life}, pages = {110141}, }
@phdthesis{aleskog_graph-based_2021, address = {Sweden}, title = {Graph-based {Multi}-view {Clustering} for {Continuous} {Pattern} {Mining}}, url = {http://urn.kb.se/resolve?urn=urn:nbn:se:bth-21850}, abstract = {Background. In many smart monitoring applications, such as smart healthcare, smart building, autonomous cars etc., data are collected from multiple sources and contain information about different perspectives/views of the monitored phenomenon, physical object, system. In addition, in many of those applications the availability of relevant labelled data is often low or even non-existing. Inspired by this, in this thesis study we propose a novel algorithm for multi-view stream clustering. The algorithm can be applied for continuous pattern mining and labeling of streaming data. Objectives. The main objective of this thesis is to develop and implement a novel multi-view stream clustering algorithm. In addition, the potential of the proposed algorithm is studied and evaluated on two datasets: synthetic and real-world. The conducted experiments study the new algorithm’s performance compared to a single-view clustering algorithm and an algorithm without transferring knowledge between chunks. Finally, the obtained results are analyzed, discussed and interpreted. Methods. Initially, we study the state-of-the-art multi-view (stream) clustering algorithms. Then we develop our multi-view clustering algorithm for streaming data by implementing transfer of knowledge feature. We present and explain in details the developed algorithm by motivating each choice made during the algorithm design phase. Finally, discussion of the algorithm configuration, experimental setup and the datasets chosen for the experiments are presented and motivated. Results. Different configurations of the proposed algorithm have been studied and evaluated under different experimental scenarios on two different datasets: synthetic and real-world. The proposed multi-view clustering algorithm has demonstrated higher performance on the synthetic data than on the real-world dataset. This is mainly due to not very good quality of the used real-world data. Conclusions. The proposed algorithm has demonstrated higher performance results on the synthetic dataset than on the real-world dataset. It can generate high-quality clustering solutions with respect to the used evaluation metrics. In addition, the transfer of knowledge feature has been shown to have a positive effect on the algorithm performance. A further study of the proposed algorithm on other richer and more suitable datasets, e.g., data collected from numerous sensors used for monitoring some phenomenon, is planned to be conducted in the future work.}, language = {eng}, urldate = {2021-11-07}, school = {Blekinge Insitute of Technology}, author = {Åleskog, Christoffer}, year = {2021}, }
@article{ang_efficient_2021, title = {Efficient linear predictive model with short term features for lithium-ion batteries state of health estimation}, volume = {44}, issn = {2352-152X}, url = {https://www.sciencedirect.com/science/article/pii/S2352152X21010951}, doi = {10.1016/j.est.2021.103409}, abstract = {The need to predict the State of Health (SoH) of lithium-ion batteries accurately and efficiently is rising with growing use of such batteries in safety critical applications. In this paper, an intuitive and efficient predictive algorithm that can estimate the SoH of lithium-ion batteries with accuracy on par with more complex and computationally demanding models is presented. The predictive algorithm uses the battery's temperature and voltage time discharge profile to predict its current SoH with root mean square error (RMSE) of 1\%. It is shown that the crux in achieving good prediction accuracy lies in data preprocessing, which are cleansing, normalization and retaining of key features that are rich in information from the raw measured data. Finally, a simplified version of the algorithm using only voltage time profiles for model training is proposed that provides less than 12\% RMSE error, comparable with current state of the art. This algorithm can be easily implemented in most applications since only measured voltage data is required. Throughout this paper, the proposed algorithm is tested with publicly available dataset and comparison is done with existing literature results to benchmark the proposed algorithm performance.}, language = {en}, urldate = {2021-10-25}, journal = {Journal of Energy Storage}, author = {Ang, Elisa Y. M. and Paw, Yew Chai}, month = dec, year = {2021}, keywords = {Battery state of health, Data analytics, Linear predictive model, Lithium-ion battery, Machine learning, Predictive maintenance}, pages = {103409}, }
@article{dikshit_explainable_2021, title = {Explainable {AI} in drought forecasting}, issn = {2666-8270}, url = {https://www.sciencedirect.com/science/article/pii/S2666827021000967}, doi = {10.1016/j.mlwa.2021.100192}, abstract = {Droughts are one of the disastrous natural hazards which has severe impacts on agricultural production, economy, and society. One of the critical steps for effective drought management is developing a robust forecasting model and understanding how the variables affect the model outcomes. The present study forecasts SPI-12 at a lead time of 3 months, using the Long Short-Term Memory (LSTM) model, and further interprets the spatial and temporal relationship between variables and forecasting results using SHapley Additive exPlanations (SHAP). The developed model is tested in four different regions in New South Wales (NSW), Australia. SPI-12 was computed using monthly rainfall data collected from Scientific Information for Land Owners (SILO) for 1901–2018. The model was trained from 1901–2000 and tested from 2001–2018, and the performance was measured using Coefficient of Determination (R2), Nash–SutcliffeEfficiency (NSE) and Root-Mean-Square-Error (RMSE). To understand the underlying impact of variables on the model outcomes, SHAPley values were calculated for the entire testing period and also at three different temporal ranges, which are during the Millennium Drought (2001–2010), post drought period (2011–2018) and at a seasonal scale (summer months). The comparison of the results shows a significant variation in the impact of variables on forecasting, both temporally and spatially. It also shows the need to study the model outcomes for specific regions and for a shorter duration than the entire testing period. This is a first of its study towards interpreting the forecasting model in drought studies, which could help understand the behaviour of drought variables.}, language = {en}, urldate = {2021-10-25}, journal = {Machine Learning with Applications}, author = {Dikshit, Abhirup and Pradhan, Biswajeet}, month = oct, year = {2021}, keywords = {Deep learning, Drought forecasting, Explainable AI, Standard precipitation index}, pages = {100192}, }
@inproceedings{mistry_aedbscanadaptive_2021, address = {Singapore}, series = {Advances in {Intelligent} {Systems} and {Computing}}, title = {{AEDBSCAN}—{Adaptive} {Epsilon} {Density}-{Based} {Spatial} {Clustering} of {Applications} with {Noise}}, isbn = {9789811563539}, doi = {10.1007/978-981-15-6353-9_20}, abstract = {The objectives of this research are related to study the DBSCAN algorithm and engineer an enhancement to this algorithm addressing its flaws. DBSCAN is criticized for its requirement to input two parameters, namely—epsilon radius (ϵ) and minimum number of points (MinPts). It is difficult to know beforehand the optimum value of both parameters, and hence many trials are required until desired clusters are obtained. Also, in a dataset, a cluster’s density can vary. DBSCAN fails to identify clusters with density variations present. The proposed algorithm Adaptive Epsilon DBSCAN (AEDBSCAN), generates epsilon dynamically in accordance with the neighborhood of a point and thereafter adopts DBSCAN clustering with the corresponding epsilon to obtain the clusters. Experimental results are obtained from testing AEDBSCAN on artificial datasets. The experimental results confirm that the proposed AEDBSCAN algorithm efficiently carries out multi-density clustering than the original DBSCAN.}, language = {en}, booktitle = {Progress in {Advanced} {Computing} and {Intelligent} {Engineering}}, publisher = {Springer}, author = {Mistry, Vidhi and Pandya, Urja and Rathwa, Anjana and Kachroo, Himani and Jivani, Anjali}, editor = {Panigrahi, Chhabi Rani and Pati, Bibudhendu and Mohapatra, Prasant and Buyya, Rajkumar and Li, Kuan-Ching}, year = {2021}, keywords = {Adaptive epsilon, DBSCAN, Data mining, Density-based clustering, Multi-density}, pages = {213--226}, }
@article{zhang_multi-label_2021, title = {Multi-label learning with label-specific features via weighting and label entropy guided clustering ensemble}, volume = {419}, issn = {0925-2312}, url = {https://www.sciencedirect.com/science/article/pii/S0925231220313059}, doi = {10.1016/j.neucom.2020.07.107}, abstract = {Multi-label learning has attracted more and more researchers’ attention. It deals with the problem where each instance is associated with multiple labels simultaneously. Some methods improve the performance by constructing label-specific features. Specifically, the LIFTACE method constructs label-specific features by clustering ensemble techniques, which ignores the importance of label vectors and does not explore label correlations when constructing the classification model. In this paper, we propose a multi-label learning method called LF-LELC, which considers the importance of label vectors and constructs the classification model by considering label correlations. Firstly, it performs clustering on the positive instances and negative instances respectively. The number of clusters is set by the information contained in the label vectors. After that, it employs clustering ensemble techniques that consider label correlations to make the clustering results more stable and effective. Then, it constructs label-specific features for each label. Finally, it builds the classification model by exploring label correlations. The label set for each test example is predicted by the classification model. Experiments show that LF-LELC can achieve better performance by considering the importance of label vectors and the correlations among labels.}, language = {en}, urldate = {2021-10-18}, journal = {Neurocomputing}, author = {Zhang, Chunyu and Li, Zhanshan}, month = jan, year = {2021}, keywords = {Label correlation, Label entropy, Label-specific features, Multi-label learning}, pages = {59--69}, }
@article{phan_dataset_2021, title = {Dataset of {Vietnamese} students’ academic perfectionism and school alienation}, volume = {39}, issn = {2352-3409}, url = {https://www.sciencedirect.com/science/article/pii/S2352340921007459}, doi = {10.1016/j.dib.2021.107463}, abstract = {Across the steadily superseding world, the younger generation is coming under pressure for an increase in the standard and a highly growing demand on their life themselves. This could lead to a variety of problems, including academic perfectionism and school alienation. To gain more insights into these phenomena, we conducted two research projects on students from eight upper secondary schools in Ho Chi Minh City, Vietnam using online surveys, and obtained two datasets. Dataset A covers (i) the level of students' perfectionism; (ii) belief in school meritocracy; (iii) The competitiveness among students; and (iv) the intrinsic motivation to achieve. Dataset B contains students' self-reports about (i) their perceptions of parents' and teachers' academic conditional regard; (ii) academic contingent self-worth; and (iii) school alienation. The numbers of respondents of dataset A and dataset B are 2942 and 2970, respectively.}, language = {en}, urldate = {2021-10-18}, journal = {Data in Brief}, author = {Phan, Thanh-Thao Thi and Nguyen, Linh-Chi and Nguyen, Ngoc-Quang and Nguyen, Yen-Chi}, month = dec, year = {2021}, keywords = {Classroom competitiveness, Contingent self-worth, Meritocracy belief, Parental conditional regard, School alienation, Student academic perfectionism, Teacher conditional regard, Vietnam}, pages = {107463}, }
@article{hoi_online_2021, title = {Online learning: {A} comprehensive survey}, volume = {459}, issn = {0925-2312}, shorttitle = {Online learning}, url = {https://www.sciencedirect.com/science/article/pii/S0925231221006706}, doi = {10.1016/j.neucom.2021.04.112}, abstract = {Online learning represents a family of machine learning methods, where a learner attempts to tackle some predictive (or any type of decision-making) task by learning from a sequence of data instances one by one at each time. The goal of online learning is to maximize the accuracy/correctness for the sequence of predictions/decisions made by the online learner given the knowledge of correct answers to previous prediction/learning tasks and possibly additional information. This is in contrast to traditional batch or offline machine learning methods that are often designed to learn a model from the entire training data set at once. Online learning has become a promising technique for learning from continuous streams of data in many real-world applications. This survey aims to provide a comprehensive survey of the online machine learning literature through a systematic review of basic ideas and key principles and a proper categorization of different algorithms and techniques. Generally speaking, according to the types of learning tasks and the forms of feedback information, the existing online learning works can be classified into three major categories: (i) online supervised learning where full feedback information is always available, (ii) online learning with limited feedback, and (iii) online unsupervised learning where no feedback is available. Due to space limitation, the survey will be mainly focused on the first category, but also briefly cover some basics of the other two categories. Finally, we also discuss some open issues and attempt to shed light on potential future research directions in this field.}, language = {en}, urldate = {2021-10-15}, journal = {Neurocomputing}, author = {Hoi, Steven C. H. and Sahoo, Doyen and Lu, Jing and Zhao, Peilin}, month = oct, year = {2021}, keywords = {Online convex optimization, Online learning, Sequential decision making}, pages = {249--289}, }
@article{lv_application_2021, title = {Application of {Multilayer} {Network} {Models} in {Bioinformatics}}, volume = {12}, issn = {1664-8021}, url = {https://www.frontiersin.org/article/10.3389/fgene.2021.664860}, doi = {10.3389/fgene.2021.664860}, abstract = {Multilayer networks provide an efficient tool for studying complex systems, and with current, dramatic development of bioinformatics tools and accumulation of data, researchers have applied network concepts to all aspects of research problems in the field of biology. Addressing the combination of multilayer networks and bioinformatics, through summarizing the applications of multilayer network models in bioinformatics, this review classifies applications and presents a summary of the latest results. Among them, we classify the applications of multilayer networks according to the object of study. Furthermore, because of the systemic nature of biology, we classify the subjects into several hierarchical categories, such as cells, tissues, organs, and groups, according to the hierarchical nature of biological composition. On the basis of the complexity of biological systems, we selected brain research for a detailed explanation. We describe the application of multilayer networks and chronological networks in brain research to demonstrate the primary ideas associated with the application of multilayer networks in biological studies. Finally, we mention a quality assessment method focusing on multilayer and single-layer networks as an evaluation method emphasizing network studies.}, urldate = {2021-10-14}, journal = {Frontiers in Genetics}, author = {Lv, Yuanyuan and Huang, Shan and Zhang, Tianjiao and Gao, Bo}, year = {2021}, pages = {380}, }
@article{galvez_fault_2021, title = {Fault {Detection} and {RUL} {Estimation} for {Railway} {HVAC} {Systems} {Using} a {Hybrid} {Model}-{Based} {Approach}}, volume = {13}, copyright = {http://creativecommons.org/licenses/by/3.0/}, url = {https://www.mdpi.com/2071-1050/13/12/6828}, doi = {10.3390/su13126828}, abstract = {Heating, ventilation, and air conditioning (HVAC) systems installed in a passenger train carriage are critical systems, whose failures can affect people or the environment. This, together with restrictive regulations, results in the replacement of critical components in initial stages of degradation, as well as a lack of data on advanced stages of degradation. This paper proposes a hybrid model-based approach (HyMA) to overcome the lack of failure data on a HVAC system installed in a passenger train carriage. The proposed HyMA combines physics-based models with data-driven models to deploy diagnostic and prognostic processes for a complex and critical system. The physics-based model generates data on healthy and faulty working conditions; the faults are generated in different levels of degradation and can appear individually or together. A fusion of synthetic data and measured data is used to train, validate, and test the proposed hybrid model (HyM) for fault detection and diagnostics (FDD) of the HVAC system. The model obtains an accuracy of 92.60\%. In addition, the physics-based model generates run-to-failure data for the HVAC air filter to develop a remaining useful life (RUL) prediction model, the RUL estimations performed obtained an accuracy in the range of 95.21–97.80\% Both models obtain a remarkable accuracy. The development presented will result in a tool which provides relevant information on the health state of the HVAC system, extends its useful life, reduces its life cycle cost, and improves its reliability and availability; thus enhancing the sustainability of the system.}, language = {en}, number = {12}, urldate = {2021-10-12}, journal = {Sustainability}, author = {Gálvez, Antonio and Diez-Olivan, Alberto and Seneviratne, Dammika and Galar, Diego}, month = jan, year = {2021}, note = {Number: 12 Publisher: Multidisciplinary Digital Publishing Institute}, keywords = {HVAC systems, fault detection, fault modelling, hvac, hybrid modelling, predictive maintenance, railway, rul, soft sensing, synthetic data}, pages = {6828}, }
@article{ciani_condition-based_2021, title = {Condition-{Based} {Maintenance} of {HVAC} on a {High}-{Speed} {Train} for {Fault} {Detection}}, volume = {10}, copyright = {http://creativecommons.org/licenses/by/3.0/}, url = {https://www.mdpi.com/2079-9292/10/12/1418}, doi = {10.3390/electronics10121418}, abstract = {Reliability-centered maintenance (RCM) is a well-established method for preventive maintenance planning. This paper focuses on the optimization of a maintenance plan for an HVAC (heating, ventilation and air conditioning) system located on high-speed trains. The first steps of the RCM procedure help in identifying the most critical items of the system in terms of safety and availability by means of a failure modes and effects analysis. Then, RMC proposes the optimal maintenance tasks for each item making up the system. However, the decision-making diagram that leads to the maintenance choice is extremely generic, with a consequent high subjectivity in the task selection. This paper proposes a new fuzzy-based decision-making diagram to minimize the subjectivity of the task choice and preserve the cost-efficiency of the procedure. It uses a case from the railway industry to illustrate the suggested approach, but the procedure could be easily applied to different industrial and technological fields. The results of the proposed fuzzy approach highlight the importance of an accurate diagnostics (with an overall 86\% of the task as diagnostic-based maintenance) and condition monitoring strategy (covering 54\% of the tasks) to optimize the maintenance plan and to minimize the system availability. The findings show that the framework strongly mitigates the issues related to the classical RCM procedure, notably the high subjectivity of experts. It lays the groundwork for a general fuzzy-based reliability-centered maintenance method.}, language = {en}, number = {12}, urldate = {2021-10-12}, journal = {Electronics}, author = {Ciani, Lorenzo and Guidi, Giulia and Patrizi, Gabriele and Galar, Diego}, month = jan, year = {2021}, note = {Number: 12 Publisher: Multidisciplinary Digital Publishing Institute}, keywords = {cbm, condition-based maintenance, fault detection, fuzzy logic, hvac, railway, rcm, reliability, reliability-centered maintenance}, pages = {1418}, }
@article{tan_multi-label_2021, title = {Multi-label classification for simultaneous fault diagnosis of marine machinery: {A} comparative study}, volume = {239}, issn = {0029-8018}, shorttitle = {Multi-label classification for simultaneous fault diagnosis of marine machinery}, url = {https://www.sciencedirect.com/science/article/pii/S0029801821010921}, doi = {10.1016/j.oceaneng.2021.109723}, abstract = {Fault diagnosis of marine machinery is of utmost importance in modern ships. The widely used machine learning techniques have made it possible to realize intelligent diagnosis by using large amounts of sensory data. However, the detection of simultaneous faults is still a challenge in the absence of simultaneous fault data. Multi-label classification has recently gained popularity in simultaneous fault diagnosis with promising results. The contribution of this work is to carry out a comparative study of several state-of-the-art multi-label classification algorithms for simultaneous fault diagnosis of marine machinery based on single fault data. The proposed method is experimentally validated with a dataset generated from a real data validated simulator of a Frigate. The experimental results show the effectiveness of the proposed method, which can provide decision support for the application of multi-label classification in the simultaneous fault diagnosis of similar marine systems.}, language = {en}, urldate = {2021-10-11}, journal = {Ocean Engineering}, author = {Tan, Yanghui and Zhang, Jundong and Tian, Hui and Jiang, Dingyu and Guo, Lei and Wang, Gaoming and Lin, Yejin}, month = nov, year = {2021}, keywords = {Marine machinery, Multi-label classification, Simultaneous fault diagnosis}, pages = {109723}, }
@article{manjunath_time-distributed_2021, title = {Time-{Distributed} {Feature} {Learning} in {Network} {Traffic} {Classification} for {Internet} of {Things}}, url = {http://arxiv.org/abs/2109.14696}, abstract = {The plethora of Internet of Things (IoT) devices leads to explosive network traffic. The network traffic classification (NTC) is an essential tool to explore behaviours of network flows, and NTC is required for Internet service providers (ISPs) to manage the performance of the IoT network. We propose a novel network data representation, treating the traffic data as a series of images. Thus, the network data is realized as a video stream to employ time-distributed (TD) feature learning. The intra-temporal information within the network statistical data is learned using convolutional neural networks (CNN) and long short-term memory (LSTM), and the inter pseudo-temporal feature among the flows is learned by TD multi-layer perceptron (MLP). We conduct experiments using a large data-set with more number of classes. The experimental result shows that the TD feature learning elevates the network classification performance by 10\%.}, urldate = {2021-10-07}, journal = {arXiv:2109.14696 [cs]}, author = {Manjunath, Yoga Suhas Kuruba and Zhao, Sihao and Zhang, Xiao-Ping}, month = sep, year = {2021}, note = {arXiv: 2109.14696}, keywords = {Computer Science - Machine Learning, Computer Science - Networking and Internet Architecture}, }
@inproceedings{sandhya_madhuri_review_2021, address = {Singapore}, series = {Lecture {Notes} in {Networks} and {Systems}}, title = {Review {Paper} on {Anomaly} {Detection} in {Data} {Streams}}, isbn = {9789811619410}, doi = {10.1007/978-981-16-1941-0_72}, abstract = {Anomaly is in general defined as deviation or diversion from the normal. The word anomaly came from the Greek word anomalia which means “uneven” or “irregular”. In our day-to-day lives, we have seen many such irregularities or deviations from normalcy. For example, a condition monitoring system beeps an alarm when it detects any value or parameter of the machine away from the minimum value limit to the maximum value limit, or a credit card fraud alerts the bank and the customer immediately. Now, the crucial task here is how we detect anomalies in data streams. When there is streaming data that is continuously generated from any source, it is called a data stream. The task of finding anomalies from such a stream of data will be a challenging job. In this paper, we will discuss elaborately about data streams and anomaly detection in data streams by reviewing several papers and articles written on this topic.}, language = {en}, booktitle = {Proceedings of the 2nd {International} {Conference} on {Computational} and {Bio} {Engineering}}, publisher = {Springer}, author = {Sandhya Madhuri, G. and {Yamuna} and Usha Rani, M.}, editor = {Jyothi, S. and Mamatha, D. M. and Zhang, Yu-Dong and Raju, K. Srujan}, year = {2021}, keywords = {Anomaly, Anomaly detection algorithms, Data streams, Outliers}, pages = {721--728}, }
@article{sejr_explainable_2021, title = {Explainable outlier detection: {What}, for {Whom} and {Why}?}, issn = {2666-8270}, shorttitle = {Explainable outlier detection}, url = {https://www.sciencedirect.com/science/article/pii/S2666827021000864}, doi = {10.1016/j.mlwa.2021.100172}, abstract = {Outlier algorithms are becoming increasingly complex. Thereby, they become much less interpretable to the data scientists applying the algorithms in real-life settings and to end-users using their predictions. We argue that outliers are context-dependent and, therefore, can only be detected via domain knowledge, algorithm insight, and interaction with end-users. As outlier detection is equivalent to unsupervised semantic binary classification, at the core of interpreting an outlier algorithm we find the semantics of the classes, i.e., the algorithm’s conceptual outlier definition. We investigate current interpretable and explainable outlier algorithms: what they are, for whom they are, and what their value proposition is. We then discuss how interpretation and explanation and user involvement have the potential to provide the missing link to bring modern complex outlier algorithms from computer science labs into real-life applications and the challenges they induce.}, language = {en}, urldate = {2021-10-04}, journal = {Machine Learning with Applications}, author = {Sejr, Jonas Herskind and Schneider-Kamp, Anna}, month = oct, year = {2021}, keywords = {Explainable artificial intelligence, Unsupervised outlier detection}, pages = {100172}, }
@article{jia_condition_2021, title = {Condition monitoring and performance forecasting of wind turbines based on denoising autoencoder and novel convolutional neural networks}, volume = {7}, issn = {2352-4847}, url = {https://www.sciencedirect.com/science/article/pii/S2352484721008854}, doi = {10.1016/j.egyr.2021.09.080}, abstract = {With the proportion of wind power in the grid increasing, the monitoring and maintenance of wind turbines is becoming more and more important for the reliability of the grid. In this study, a data-driven modelling framework based on deep convolutional neural networks is constructed for wind turbines condition monitoring (CM) and performance forecasting (PF). For CM, a robust denoising autoencoder (DAE) model is introduced to output the reconstruction error (RE) of raw signals. The RE is processed to a state indicator by exponentially weighted moving average (EWMA) and monitored on a control chart. For PF, two multi-steps ahead forecasting models are constructed for the forecasting of generator bearing and transformer temperature. To prevent overfitting caused by abundant features, the marginal effect analysis based on random forests is implemented to measure the importance of features. Besides, novel residual attention module (RAM) and training strategies are used improve their representation power of DAE and PF models. Experiments on a real wind turbine dataset prove the effectiveness of the proposed models and methods.}, language = {en}, urldate = {2021-10-04}, journal = {Energy Reports}, author = {Jia, Xiongjie and Han, Yang and Li, Yanjun and Sang, Yichen and Zhang, Guolei}, month = nov, year = {2021}, keywords = {Condition monitoring, Denoising autoencoder, Performance forecasting, Residual attention module, Wind turbine}, pages = {6354--6365}, }
@article{hoque_data_2021, title = {Data driven analysis of lithium-ion battery internal resistance towards reliable state of health prediction}, volume = {513}, issn = {0378-7753}, url = {https://www.sciencedirect.com/science/article/pii/S037877532101020X}, doi = {10.1016/j.jpowsour.2021.230519}, abstract = {Accurately predicting the lifetime of lithium-ion batteries in the early stage is critical for faster battery production, tuning the production line, and predictive maintenance of energy storage systems and battery-powered devices. Diverse usage patterns, variability in the devices housing the batteries, and diversity in their operating conditions pose significant challenges for this task. The contributions of this paper are three-fold. First, a public dataset is used to characterize the behavior of battery internal resistance. Internal resistance has non-linear dynamics as the battery ages, making it an excellent candidate for reliable battery health prediction during early cycles. Second, using these findings, battery health prediction models for different operating conditions are developed. The best models are more than 95\% accurate in predicting battery health using the internal resistance dynamics of 100 cycles at room temperature. Thirdly, instantaneous voltage drops due to multiple pulse discharge loads are shown to be capable of characterizing battery heterogeneity in as few as five cycles. The results pave the way toward improved battery models and better efficiency within the production and use of lithium-ion batteries.}, language = {en}, urldate = {2021-10-04}, journal = {Journal of Power Sources}, author = {Hoque, Mohammad A. and Nurmi, Petteri and Kumar, Arjun and Varjonen, Samu and Song, Junehwa and Pecht, Michael G. and Tarkoma, Sasu}, month = nov, year = {2021}, keywords = {Battery capacity, Internal resistance, Lithium-ion battery, State of health, health prediction, physical models}, pages = {230519}, }
@article{xiang_multicellular_2021, title = {Multicellular {LSTM}-based deep learning model for aero-engine remaining useful life prediction}, volume = {216}, issn = {0951-8320}, url = {https://www.sciencedirect.com/science/article/pii/S0951832021004439}, doi = {10.1016/j.ress.2021.107927}, abstract = {The prediction of aero-engine remaining useful life (RUL) is helpful for its operation and maintenance. Aiming at the challenge that most neural networks (NNs), including long short-term memory (LSTM), cannot process the input data in different update modes based on its importance degree, a novel variant of LSTM named multicellular LSTM (MCLSTM) is constructed. The level division unit is proposed to determine the importance degree of input data, and then multiple cellular units are designed to update the cell states according to the data level. Thus, MCLSTM can well mine different levels of degradation trends. Based on MCLSTM and a deep NN (DNN), a deep learning model for RUL prediction is set up, where MCLSTM and a branch of the DNN is used to extract health indicators (HIs) of aero-engine from raw data, and the other part of the DNN is applied to generate the HIs from human-made features and predict the RUL based on the concatenated HIs. The proposed RUL prediction model is successfully applied to predict the RULs of aero-engines via the Commercial Modular Aero Propulsion System Simulation datasets, and the comparative results show that it has a better comprehensive prediction performance than the commonly-used machine learning methods.}, language = {en}, urldate = {2021-10-02}, journal = {Reliability Engineering \& System Safety}, author = {Xiang, Sheng and Qin, Yi and Luo, Jun and Pu, Huayan and Tang, Baoping}, month = dec, year = {2021}, keywords = {Data level, Degradation trend, Health feature, Multi-resource data, RUL prediction}, pages = {107927}, }
@article{chen_two-phase_2021, title = {Two-phase degradation data analysis with change-point detection based on {Gaussian} process degradation model}, volume = {216}, issn = {0951-8320}, url = {https://www.sciencedirect.com/science/article/pii/S0951832021004324}, doi = {10.1016/j.ress.2021.107916}, abstract = {Degradation paths of the products exhibiting two-phase patterns are commonly seen in practice due to the changeable internal mechanisms and external environments. In this paper, we propose a two-phase Gaussian process (TPGP) degradation model with a change-point, which comprises the Wiener process-based change-point models as special cases, to describe the degradation paths with two-phase patterns. The change-point is used to represent the transition of degradation characteristics. The degradation rates and variations in the two phases are assumed to be different. Therefore, both monotonically increasing and decreasing or nonmonotonic dispersion trends and complicated auto-correlations in the degradation measurements can be captured by TPGP. Joint methods of the parameter estimation and change-point detection is developed for two different engineering scenarios. The distributions of the first passage time and the remaining useful life are derived in closed-form to promote the mathematical trackability and the applicability of the TPGP model. A comprehensive simulation study shows the effectiveness and validity of the proposed model and method. Finally, we use two real applications to demonstrate the proposed models and methods.}, language = {en}, urldate = {2021-10-02}, journal = {Reliability Engineering \& System Safety}, author = {Chen, Zhen and Li, Yaping and Zhou, Di and Xia, Tangbin and Pan, Ershun}, month = dec, year = {2021}, keywords = {Change-point, First passage time, Gaussian process, Two-phase degradation}, pages = {107916}, }
@article{braga_multivariate_2021, title = {Multivariate statistical aggregation and dimensionality reduction techniques to improve monitoring and maintenance in railways: {The} wheelset component}, volume = {216}, issn = {0951-8320}, shorttitle = {Multivariate statistical aggregation and dimensionality reduction techniques to improve monitoring and maintenance in railways}, url = {https://www.sciencedirect.com/science/article/pii/S0951832021004488}, doi = {10.1016/j.ress.2021.107932}, abstract = {Reliable monitoring and assessment of wear evolutions are critical for performing effective railway maintenance. Several characteristics and variables are used to quantify a worn condition of railway wheelsets. To measure all these wear quantities, emerging inspection technologies are being designed with increasingly complex architectures, working mechanisms and associated high costs. Moreover, data-driven models to support condition-based maintenance to the wheelset easily increase their complexity when too many variables are taken into account and may not provide a straightforward guideline to maintenance decision-makers. The purpose of this paper is to reduce the complexity when describing the wear level, by applying multivariate statistical techniques to real degradation data from railway wheelsets. Several wheelset condition variables and their relationships are analysed. Variables are synthetized through a principal component analysis (PCA) where the varimax rotation effect can be observed. A cluster analysis, which uses the principal components, allows identifying characteristics that lead to different wear evolutions. A strong correlation between the flange thickness and flange slope in the wear process is identified. Differences in wear trajectories between motor and trailer wheelsets are strongly significant. The findings are expected to support the improvement of state monitoring techniques and predictive maintenance optimization models.}, language = {en}, urldate = {2021-10-02}, journal = {Reliability Engineering \& System Safety}, author = {Braga, Joaquim A. P. and Andrade, António R.}, month = dec, year = {2021}, keywords = {Cluster analysis, Condition monitoring, Principal component analysis, Railway maintenance, Wheelset inspection, Wheelset wear}, pages = {107932}, }
@article{sun_hierarchical_2021, title = {A hierarchical modeling approach for degradation data with mixed-type covariates and latent heterogeneity}, volume = {216}, issn = {0951-8320}, url = {https://www.sciencedirect.com/science/article/pii/S0951832021004440}, doi = {10.1016/j.ress.2021.107928}, abstract = {Successful modeling of degradation data with covariates is essential for accurate reliability assessment of highly reliable product units. Due to the influences of different types of covariates, such as the external factors (e.g. accelerated operating conditions) and the internal factors (e.g. material microstructure characteristics), as well as latent heterogeneity due to the influences of the unobserved or unknown factors shared within each product unit, the degradation measurements of product units are highly heterogeneous over time. Many of existing degradation models often failed to simultaneously consider the influences of (i) both external accelerated conditions and internal material information, (ii) latent heterogeneity, and (iii) multiple material types. In this work, we propose a generic degradation modeling approach with mixed-type (e.g. both scalar and functional) covariates and latent heterogeneity to account for both the influences of observed internal and external factors as well as their interaction, and the influences of unobserved factors. Effective estimation algorithm is developed under expectation–maximization framework to jointly quantify the influences of mixed-type covariates and individual latent heterogeneity. The proposed algorithms further enables closed-form updating of model parameters at each iteration to ensure the estimation convenience. A real case study is provided to illustrate the proposed modeling approach and to demonstrate its effectiveness from both model prediction and interpretation perspectives.}, language = {en}, urldate = {2021-10-02}, journal = {Reliability Engineering \& System Safety}, author = {Sun, Xuxue and Cai, Wenjun and Li, Mingyang}, month = dec, year = {2021}, keywords = {Data augmentation, Degradation data, Functional data analysis, Latent heterogeneity, Mixed-type covariates}, pages = {107928}, }
@article{fecarotti_mathematical_2021, title = {A mathematical programming model to select maintenance strategies in railway networks}, volume = {216}, issn = {0951-8320}, url = {https://www.sciencedirect.com/science/article/pii/S0951832021004518}, doi = {10.1016/j.ress.2021.107940}, abstract = {This paper presents a nonlinear integer programming model to support the selection of maintenance strategies to implement on different segments of a railway network. Strategies are selected which collectively minimise the impact of sections’ conditions on service, given network availability and budget constraints. Different metrics related to the network topology, sections’ availability, service frequency, performance requirements and maintenance costs, are combined into a quantitative approach with a holistic view. The main contribution is to provide a simple yet effective modelling approach and solution method which are suitable for large networks and make use of standard solvers. Both an ad hoc heuristic solution and relaxation methods are developed, the latter enabling the quality of the heuristic solution to be estimated. The availability of railway lines is computed by exploiting the analogy with series–parallel networks. By varying the model parameters, a scenario analysis is performed to give insight into the influence of the system parameters on the selection of strategies, thus enabling more informed decisions. For its simple structure, the model is versatile to address similar problems arising in the maintenance of other types of networks, such as road and bridges networks, when deciding on the strategic allocation of maintenance efforts.}, language = {en}, urldate = {2021-10-02}, journal = {Reliability Engineering \& System Safety}, author = {Fecarotti, Claudia and Andrews, John and Pesenti, Raffaele}, month = dec, year = {2021}, keywords = {Availability, Maintenance optimisation, Mathematical programming, Railway networks}, pages = {107940}, }
@article{zhuang_temporal_2021, title = {Temporal convolution-based transferable cross-domain adaptation approach for remaining useful life estimation under variable failure behaviors}, volume = {216}, issn = {0951-8320}, url = {https://www.sciencedirect.com/science/article/pii/S0951832021004592}, doi = {10.1016/j.ress.2021.107946}, abstract = {Many data-driven models normally assume that the training and test data are independent and identically distributed to predict the remaining useful life (RUL) of industrial machines. However, different failure models caused by variable failure behaviors may lead to a domain shift. Meanwhile, conventional methods lack comprehensive attention to temporal information, resulting in a limitation. To handle the aforementioned challenges, a transferable cross-domain approach for RUL estimation is proposed. The hidden features are extracted adaptively by a temporal convolution network in which residual self-attention is able to fully consider the contextual degradation information. Furthermore, a new cross-domain adaption architecture with the contrastive loss and multi-kernel maximum mean discrepancy is designed to learn the domain invariant features. The effectiveness and superiority of the proposed method are proved by the case study on IEEE PHM challenge 2012 bearing dataset and the comparison with other methods.}, language = {en}, urldate = {2021-10-02}, journal = {Reliability Engineering \& System Safety}, author = {Zhuang, Jichao and Jia, Minping and Ding, Yifei and Ding, Peng}, month = dec, year = {2021}, keywords = {Cross-domain adaptation, Remaining useful life estimation, Rolling bearing, Temporal convolutional network, Transfer learning, Variable failure behaviors}, pages = {107946}, }
@article{duraj_outlier_2021, series = {Knowledge-{Based} and {Intelligent} {Information} \& {Engineering} {Systems}: {Proceedings} of the 25th {International} {Conference} {KES2021}}, title = {Outlier {Detection} in {Data} {Streams} — {A} {Comparative} {Study} of {Selected} {Methods}}, volume = {192}, issn = {1877-0509}, url = {https://www.sciencedirect.com/science/article/pii/S1877050921017841}, doi = {10.1016/j.procs.2021.09.047}, abstract = {Outlier detection is an increasingly important and intensively developing area of research. This paper focuses on the problem of outlier detection in data streams. It presents a performance comparison of selected statistical algorithms: AutoRegressive Integrated Moving Average (ARIMA), Exponential Smoothing State Space Model (ETS), Seasonal Hybrid Extreme Studentized Deviation (SHESD), Non-parametric methodology (NMV), and Chen-Liu method (CHL). Based on four data streams from the Kaggle Repository and DataHub Repository, the study provides results concerning the number of outliers detected by each algorithm and the algorithms’ operation times. The experiments were performed on data streams of different lengths (from a few hundred to 1200 records), characterized by the presence of different types of outliers.}, language = {en}, urldate = {2021-10-02}, journal = {Procedia Computer Science}, author = {Duraj, Agnieszka and Szczepaniak, Piotr S.}, month = jan, year = {2021}, keywords = {comparative study, outlier detection, stream data analysis}, pages = {2769--2778}, }
@article{dubey_comprehensive_2021, title = {A {Comprehensive} {Survey} and {Performance} {Analysis} of {Activation} {Functions} in {Deep} {Learning}}, url = {http://arxiv.org/abs/2109.14545}, abstract = {Neural networks have shown tremendous growth in recent years to solve numerous problems. Various types of neural networks have been introduced to deal with different types of problems. However, the main goal of any neural network is to transform the non-linearly separable input data into more linearly separable abstract features using a hierarchy of layers. These layers are combinations of linear and nonlinear functions. The most popular and common non-linearity layers are activation functions (AFs), such as Logistic Sigmoid, Tanh, ReLU, ELU, Swish and Mish. In this paper, a comprehensive overview and survey is presented for AFs in neural networks for deep learning. Different classes of AFs such as Logistic Sigmoid and Tanh based, ReLU based, ELU based, and Learning based are covered. Several characteristics of AFs such as output range, monotonicity, and smoothness are also pointed out. A performance comparison is also performed among 18 state-of-the-art AFs with different networks on different types of data. The insights of AFs are presented to benefit the researchers for doing further research and practitioners to select among different choices. The code used for experimental comparison is released at: {\textbackslash}url\{https://github.com/shivram1987/ActivationFunctions\}.}, urldate = {2021-10-01}, journal = {arXiv:2109.14545 [cs]}, author = {Dubey, Shiv Ram and Singh, Satish Kumar and Chaudhuri, Bidyut Baran}, month = sep, year = {2021}, note = {arXiv: 2109.14545}, keywords = {Computer Science - Machine Learning, Computer Science - Neural and Evolutionary Computing}, }
@inproceedings{mallouk_machine_2021, title = {Machine learning approach for predictive maintenance of transport systems}, doi = {10.1109/TST52996.2021.00023}, abstract = {Transportation companies must face to a huge competition and must reduce downtime and the associated costs. This can be achieved through predictive maintenance (PM), which defines maintenance actions based on the health of the system and its environment. Relevant information can be extracted from massive data related to health prognosis and management (PHM) by applying artificial intelligence (AI) techniques. This paper proposes a Machine Learning approach to develop a prediction model based on a supervised learning by comparing several regression algorithms. The model is then applied to the Remaining useful mileage prediction of trucks tires for a transport application of dangerous substances.}, booktitle = {2021 {Third} {International} {Conference} on {Transportation} and {Smart} {Technologies} ({TST})}, author = {Mallouk, Issam and Sallez, Yves and El Majd, Badr Abou}, month = may, year = {2021}, keywords = {Machine Learning, Machine learning, Prediction algorithms, Prediction model, Predictive models, Stakeholders, Supervised learning, Tires, Transportation, Useful life, regression, transportation}, pages = {96--100}, }
@article{de_pater_predictive_2021, title = {Predictive maintenance for multi-component systems of repairables with {Remaining}-{Useful}-{Life} prognostics and a limited stock of spare components}, volume = {214}, issn = {0951-8320}, url = {https://www.sciencedirect.com/science/article/pii/S095183202100288X}, doi = {10.1016/j.ress.2021.107761}, abstract = {Aircraft maintenance is undergoing a paradigm shift towards predictive maintenance, where the use of sensor data and Remaining-Useful-Life prognostics are central. This paper proposes an integrated approach for predictive aircraft maintenance planning for multiple multi-component systems, where the components are repairables. First, model-based Remaining-Useful-Life prognostics are developed. These prognostics are updated over time, as more sensor data become available. Then, a rolling horizon integer linear program is developed for the maintenance planning of multiple multi-component systems. This model integrates the Remaining-Useful-Life prognostics with the management of a limited stock of spare repairable components. The maintenance of the multiple systems is linked through the availability of spare components and shared maintenance time slots. Our approach is illustrated for a fleet of aircraft, each equipped with a Cooling System consisting of four Cooling Units. For an aircraft to be operational, a minimum of two Cooling Units out of the four need to be operational. The maintenance planning results show that our integrated approach reduces the costs with maintenance by 48\% relative to a corrective maintenance strategy and by 30\% relative to a preventive maintenance strategy. Moreover, using predictive maintenance, components are replaced in anticipation of failure without wasting their useful life. In general, our approach provides a roadmap from Remaining-Useful-Life prognostics to maintenance planning for multiple multi-component systems of repairables with a limited stock of spares.}, language = {en}, urldate = {2021-09-28}, journal = {Reliability Engineering \& System Safety}, author = {de Pater, Ingeborg and Mitici, Mihaela}, month = oct, year = {2021}, keywords = {Aircraft Cooling Units, Aircraft predictive maintenance of repairables, Management of spare components, Multiple multi-component systems, RUL prognostics}, pages = {107761}, }
@article{thuerey_physics-based_2021, title = {Physics-based {Deep} {Learning}}, url = {http://arxiv.org/abs/2109.05237}, abstract = {This digital book contains a practical and comprehensive introduction of everything related to deep learning in the context of physical simulations. As much as possible, all topics come with hands-on code examples in the form of Jupyter notebooks to quickly get started. Beyond standard supervised learning from data, we'll look at physical loss constraints, more tightly coupled learning algorithms with differentiable simulations, as well as reinforcement learning and uncertainty modeling. We live in exciting times: these methods have a huge potential to fundamentally change what computer simulations can achieve.}, urldate = {2021-09-15}, journal = {arXiv:2109.05237 [physics]}, author = {Thuerey, Nils and Holl, Philipp and Mueller, Maximilian and Schnell, Patrick and Trost, Felix and Um, Kiwon}, month = sep, year = {2021}, note = {arXiv: 2109.05237}, keywords = {Computer Science - Machine Learning, Physics - Computational Physics}, }
@article{chen_combining_2021, title = {Combining empirical mode decomposition and deep recurrent neural networks for predictive maintenance of lithium-ion battery}, volume = {50}, issn = {1474-0346}, url = {https://www.sciencedirect.com/science/article/pii/S1474034621001579}, doi = {10.1016/j.aei.2021.101405}, abstract = {Predictive maintenance of lithium-ion batteries has been one of the popular research subjects in recent years. Lithium-ion batteries can be used as the energy supply for industrial equipment, such as automated guided vehicles and battery electric vehicles. Predictive maintenance plays an important role in the application of smart manufacturing. This mechanism can provide different levels of pre-diagnosis for machines or components. Remaining useful life (RUL) prediction is crucial for the implementation of predictive maintenance strategies. RUL refers to the estimated useful life remaining before the machine cannot operate after a certain period of operation. This study develops a hybrid data science model based on empirical mode decomposition (EMD), grey relational analysis (GRA), and deep recurrent neural networks (RNN) for the RUL prediction of lithium-ion batteries. The EMD and GRA methods are first adopted to extract the characteristics of time series data. Then, various deep RNNs, including vanilla RNN, gated recurrent unit, long short-term memory network (LSTM), and bidirectional LSTM, are established to forecast state of health (SOH) and the RUL of lithium-ion batteries. Bayesian optimization is also used to find the best hyperparameters of deep RNNs. Experimental results with the lithium-ion batteries data of NASA Ames Prognostics Data Repository show that the proposed hybrid data science model can accurately predict the SOH and RUL of lithium-ion batteries. The LSTM network has the optimal results. The proposed hybrid data science model with multiple artificial intelligence-based technologies also demonstrates critical digital-technology enablers for digital transformation of smart manufacturing and transportation.}, language = {en}, urldate = {2021-09-13}, journal = {Advanced Engineering Informatics}, author = {Chen, James C. and Chen, Tzu-Li and Liu, Wei-Jun and Cheng, C. C. and Li, Meng-Gung}, month = oct, year = {2021}, keywords = {Bayesian optimization, Deep recurrent neural network, Empirical mode decomposition, Lithium-Ion battery, Predictive maintenance, Remaining useful life}, pages = {101405}, }
@article{muller_online_2021, series = {16th {IFAC} {Symposium} on {Advanced} {Control} of {Chemical} {Processes} {ADCHEM} 2021}, title = {Online wear detection for joints in progressing cavity pumps}, volume = {54}, issn = {2405-8963}, url = {https://www.sciencedirect.com/science/article/pii/S2405896321010119}, doi = {10.1016/j.ifacol.2021.08.240}, abstract = {The wear and failure of pin joints integrated in coupling rods of process equipment can lead to unpredictable breakdown with severe consequences. We present a simple algorithm for a non-invasive online detection of wear in pin joints where a progressing cavity pump serves as an example. The algorithm only requires a pressure sensor and a binary speed signal of the motor. We verify the algorithm with a laboratory test setup and an embedded system. The results show that the proposed algorithm can reliably monitor joint wear during regular pump operation.}, language = {en}, number = {3}, urldate = {2021-09-13}, journal = {IFAC-PapersOnLine}, author = {Müller, J. and Leonow, S. and Schulz, J. and Hansen, C. and Mönnigmann, M.}, month = jan, year = {2021}, keywords = {fault detection, joint wear, phase-locked loop, positive displacement pumps, predictive maintenance, progressing cavity pump}, pages = {188--193}, }
@article{budd_survey_2021, title = {A survey on active learning and human-in-the-loop deep learning for medical image analysis}, volume = {71}, issn = {1361-8415}, url = {https://www.sciencedirect.com/science/article/pii/S1361841521001080}, doi = {10.1016/j.media.2021.102062}, abstract = {Fully automatic deep learning has become the state-of-the-art technique for many tasks including image acquisition, analysis and interpretation, and for the extraction of clinically useful information for computer-aided detection, diagnosis, treatment planning, intervention and therapy. However, the unique challenges posed by medical image analysis suggest that retaining a human end-user in any deep learning enabled system will be beneficial. In this review we investigate the role that humans might play in the development and deployment of deep learning enabled diagnostic applications and focus on techniques that will retain a significant input from a human end user. Human-in-the-Loop computing is an area that we see as increasingly important in future research due to the safety-critical nature of working in the medical domain. We evaluate four key areas that we consider vital for deep learning in the clinical practice: (1) Active Learning to choose the best data to annotate for optimal model performance; (2) Interaction with model outputs - using iterative feedback to steer models to optima for a given prediction and offering meaningful ways to interpret and respond to predictions; (3) Practical considerations - developing full scale applications and the key considerations that need to be made before deployment; (4) Future Prospective and Unanswered Questions - knowledge gaps and related research fields that will benefit human-in-the-loop computing as they evolve. We offer our opinions on the most promising directions of research and how various aspects of each area might be unified towards common goals.}, language = {en}, urldate = {2021-09-13}, journal = {Medical Image Analysis}, author = {Budd, Samuel and Robinson, Emma C. and Kainz, Bernhard}, month = jul, year = {2021}, keywords = {Active learning, Deep Learning, Human-in-the-Loop, Medical image analysis}, pages = {102062}, }
@article{he_automl_2021, title = {{AutoML}: {A} {Survey} of the {State}-of-the-{Art}}, volume = {212}, issn = {09507051}, shorttitle = {{AutoML}}, url = {http://arxiv.org/abs/1908.00709}, doi = {10.1016/j.knosys.2020.106622}, abstract = {Deep learning (DL) techniques have penetrated all aspects of our lives and brought us great convenience. However, building a high-quality DL system for a specific task highly relies on human expertise, hindering the applications of DL to more areas. Automated machine learning (AutoML) becomes a promising solution to build a DL system without human assistance, and a growing number of researchers focus on AutoML. In this paper, we provide a comprehensive and up-to-date review of the state-of-the-art (SOTA) in AutoML. First, we introduce AutoML methods according to the pipeline, covering data preparation, feature engineering, hyperparameter optimization, and neural architecture search (NAS). We focus more on NAS, as it is currently very hot sub-topic of AutoML. We summarize the performance of the representative NAS algorithms on the CIFAR-10 and ImageNet datasets and further discuss several worthy studying directions of NAS methods: one/two-stage NAS, one-shot NAS, and joint hyperparameter and architecture optimization. Finally, we discuss some open problems of the existing AutoML methods for future research.}, urldate = {2021-08-19}, journal = {Knowledge-Based Systems}, author = {He, Xin and Zhao, Kaiyong and Chu, Xiaowen}, month = jan, year = {2021}, note = {arXiv: 1908.00709}, keywords = {Computer Science - Computer Vision and Pattern Recognition, Computer Science - Machine Learning, Statistics - Machine Learning}, pages = {106622}, }
@article{alfeo_degradation_2021, title = {Degradation stage classification via interpretable feature learning}, issn = {0278-6125}, url = {https://www.sciencedirect.com/science/article/pii/S027861252100100X}, doi = {10.1016/j.jmsy.2021.05.003}, abstract = {Predictive maintenance (PdM) advocates for the usage of machine learning technologies to monitor asset's health conditions and plan maintenance activities accordingly. However, according to the specific degradation process, some health-related measures (e.g. temperature) may be not informative enough to reliably assess the health stage. Moreover, each measure needs to be properly treated to extract the information linked to the health stage. Those issues are usually addressed by performing a manual feature engineering, which results in high management cost and poor generalization capability of those approaches. In this work, we address this issue by coupling a health stage classifier with a feature learning mechanism. With feature learning, minimally processed data are automatically transformed into informative features. Many effective feature learning approaches are based on deep learning. With those, the features are obtained as a non-linear combination of the inputs, thus it is difficult to understand the input's contribution to the classification outcome and so the reasoning behind the model. Still, these insights are increasingly required to interpret the results and assess the reliability of the model. In this regard, we propose a feature learning approach able to (i) effectively extract high-quality features by processing different input signals, and (ii) provide useful insights about the most informative domain transformations (e.g. Fourier transform or probability density function) of the input signals (e.g. vibration or temperature). The effectiveness of the proposed approach is tested with publicly available real-world datasets about bearings' progressive deterioration and compared with the traditional feature engineering approach.}, language = {en}, urldate = {2021-05-22}, journal = {Journal of Manufacturing Systems}, author = {Alfeo, Antonio L. and Cimino, Mario G. C. A. and Vaglini, Gigliola}, month = may, year = {2021}, keywords = {Autoencoder, Deep learning, Explainable artificial intelligence, Feature learning, Interpretable machine learning, Predictive maintenance}, }
@article{guo_study_2021, title = {Study on {Landscape} {Architecture} {Model} {Design} {Based} on {Big} {Data} {Intelligence}}, issn = {2214-5796}, url = {https://www.sciencedirect.com/science/article/pii/S2214579621000368}, doi = {10.1016/j.bdr.2021.100219}, abstract = {Because of the rapid development of Internet technology in recent years, the speed of information data growth is faster and faster, through the use of Internet information data to landscape architecture analysis and research, has become the main direction of industry development. Landscape planners are involved in a wide range of projects, and architectural objects are often very complex in layout. Projects are usually composed of multiple components. For the point clouds corresponding to these objects, the direct reconstruction of them is relatively complex, which requires macroscopic and reasonable planning on the regional scale. Generally, plots should be designed in three-dimensional space. The contents presented by planning and design of different scales are also different. In dealing with multi-level regional planning and design, this paper mainly focuses on the parametric design technology research of landscape architecture. First, it starts from the design of large areas, analyzes the status quo of regional scale, and then starts to design with the help of three-dimensional model. First will get the clusters of 3 d model the coarse segmentation class continue to split into more rules part, according to the characteristics of the model data, the first of 3 d model to differential information to estimate the point cloud data, calculate the normal vector and curvature of point cloud data, through a point cloud registration technology will point cloud unified under different Angle of view to the same coordinate system, through the big data landscape algorithm for geometric feature and image feature point detection, detailed point cloud data processing algorithms in the process of coarse segmentation of regional scale in the same cluster of different object segmentation, again through the panoramic image segmentation for interior point cloud of geometric structure information. The data can be classified intelligently, the corresponding point cloud data can be marked, and the geometric structure information can be marked into the point cloud data to be segmented through point cloud matching, so as to enhance knowledge reserves, find problems and solve problems timely.}, language = {en}, urldate = {2021-02-28}, journal = {Big Data Research}, author = {Guo, Shiyun and Tang, Jinping and Liu, Huabin and Gu, Xinren}, month = feb, year = {2021}, keywords = {Big data, Cluster analysis, Geographic information systems, Landscape architecture, Parameter model construction, Programming}, pages = {100219}, }
@article{maciag_unsupervised_2021, title = {Unsupervised {Anomaly} {Detection} in stream data with {Online} evolving {Spiking} {Neural} {Networks}}, issn = {0893-6080}, url = {https://www.sciencedirect.com/science/article/pii/S0893608021000599}, doi = {10.1016/j.neunet.2021.02.017}, abstract = {Unsupervised anomaly discovery in stream data is a research topic with many practical applications. However, in many cases, it is not easy to collect enough training data with labeled anomalies for supervised learning of an anomaly detector in order to deploy it later for identification of real anomalies in streaming data. It is thus important to design anomalies detectors that can correctly detect anomalies without access to labeled training data. Our idea is to adapt the Online evolving Spiking Neural Network (OeSNN) classifier to the anomaly detection task. As a result, we offer an Online evolving Spiking Neural Network for Unsupervised Anomaly Detection algorithm (OeSNN-UAD), which, unlike OeSNN, works in an unsupervised way and does not separate output neurons into disjoint decision classes. OeSNN-UAD uses our proposed new two-step anomaly detection method. Also, we derive new theoretical properties of neuronal model and input layer encoding of OeSNN, which enable more effective and efficient detection of anomalies in our OeSNN-UAD approach. The proposed OeSNN-UAD detector was experimentally compared with state-of-the-art unsupervised and semi-supervised detectors of anomalies in stream data from the Numenta Anomaly Benchmark and Yahoo Anomaly Datasets repositories. Our approach outperforms the other solutions provided in the literature in the case of data streams from the Numenta Anomaly Benchmark repository. Also, in the case of real data files of the Yahoo Anomaly Benchmark repository, OeSNN-UAD outperforms other selected algorithms, whereas in the case of Yahoo Anomaly Benchmark synthetic data files, it provides competitive results to the results recently reported in the literature.}, language = {en}, urldate = {2021-02-28}, journal = {Neural Networks}, author = {Maciąg, Piotr S. and Kryszkiewicz, Marzena and Bembenik, Robert and Lobo, Jesus L. and Del Ser, Javier}, month = feb, year = {2021}, keywords = {Anomaly detection, Evolving Spiking Neural Networks, Online learning, Outliers detection, Stream data, Time series data, Unsupervised anomaly detection}, }
@article{zhang_transfer_2021, title = {Transfer learning using deep representation regularization in remaining useful life prediction across operating conditions}, issn = {0951-8320}, url = {https://www.sciencedirect.com/science/article/pii/S0951832021001095}, doi = {10.1016/j.ress.2021.107556}, abstract = {Intelligent data-driven system prognostic methods have been popularly developed in the recent years. Despite the promising results, most approaches assume the training and testing data are from the same operating condition. In the real industries, it is quite common that different machine entities work under different scenarios, that results in performance deteriorations of the data-driven prognostic methods. This paper proposes a transfer learning method for remaining useful life predictions using deep representation regularization. The practical and challenging scenario is investigated, where the training and testing data are from different machinery operating conditions, and no target-domain run-to-failure data is available for training. In the deep learning framework, data alignment schemes are proposed in the representation sub-space, including healthy state alignment, degradation direction alignment, degradation level regularization and degradation fusion. In this way, the life-cycle data of different machine entities across domains can follow the same degradation trace, thus achieving prognostic knowledge transfer. Extensive experiments on the aero-engine dataset validate the effectiveness of the proposed method, which offers a promising solution for industrial prognostics.}, language = {en}, urldate = {2021-02-23}, journal = {Reliability Engineering \& System Safety}, author = {Zhang, Wei and Li, Xiang and Ma, Hui and Luo, Zhong and Li, Xu}, month = feb, year = {2021}, keywords = {Data alignment, Deep learning, Prognosis, Remaining useful life prediction, Representation learning}, pages = {107556}, }
@article{munirathinam_drift_2021, series = {Proceedings of the 2nd {International} {Conference} on {Industry} 4.0 and {Smart} {Manufacturing} ({ISM} 2020)}, title = {Drift {Detection} {Analytics} for {IoT} {Sensors}}, volume = {180}, issn = {1877-0509}, url = {https://www.sciencedirect.com/science/article/pii/S1877050921003951}, doi = {10.1016/j.procs.2021.01.341}, abstract = {The Industrial Internet of Things (IoT) has an unique opportunity to have a greater impact on the manufacturing sector. Monitoring the health of the expensive equipments in the factory is critical for the business and the opportunities where IoT can be truly transformational. Most often the industries operates on a primitive way of monitoring these equipments on a Statistical Process Control (SPC) Limits. The major flaw in this monitoring system is unable to detect drifts within the static limit and upon triggering of the limits, it is usually too late for the team in the manufacturing floor to take preventive actions before the system goes down. In this paper, we developed a generic model for detecting drifts and identifying potential outliers. The model uses a double linear regression method to identify both aggressive and progressive drift, as well as adjusted boxplot method to detect outliers in both symmetric and skewed distributions. Unlike conventional drift detection approaches, this model has low computational complexity and can be applied to both batch and stream data. This paper will also introduce the infrastructure and architecture on enabling near real-time analytics using the IoT platform and streaming cluster, which reduces the data latency available for analysis to 10 minutes. Enabling real-time monitoring allows the end users to react to the alarms in a timely manner. This system has proven that it is able to provide early detection before the impact was observed as compared to the existing system. Manufacturing operation team could establish a new business process to respond to the early drift alarms by using quality shift left approach.}, language = {en}, urldate = {2021-02-22}, journal = {Procedia Computer Science}, author = {Munirathinam, Sathyan}, month = jan, year = {2021}, keywords = {Analytics, Big Data, Concept Drift, Drift, IoT, Machine Learning, Sensor, Statistical Control Limits}, pages = {903--912}, }
@article{gallo_industry_2021, series = {Proceedings of the 2nd {International} {Conference} on {Industry} 4.0 and {Smart} {Manufacturing} ({ISM} 2020)}, title = {Industry 4.0 and human factor: {How} is technology changing the role of the maintenance operator?}, volume = {180}, issn = {1877-0509}, shorttitle = {Industry 4.0 and human factor}, url = {https://www.sciencedirect.com/science/article/pii/S1877050921004415}, doi = {10.1016/j.procs.2021.01.364}, abstract = {Industry 4.0 is revolutionizing not only the manufacturing industry but also maintenance strategies. As consequence of the introduction of Industry 4.0 technologies, new skills are demanded to maintenance operators that has to be able to interact, as instance, with Cyber Physical Systems and robots. In this paper, we first investigate the state-of-the-art of Industry 4.0 technologies that are transforming operations and production management and finally we discuss how the role of maintenance operators is changed in a such digitalized environment. We found that, the maintenance Operator 4.0 should be able to find relevant information and predict events by a proper use of Big Data analytics, in addition to the ability of interacting with computers, digital databases and robots. Finally, the ability to rapidly adapt his skills to innovations is also strongly demanded.}, language = {en}, urldate = {2021-02-22}, journal = {Procedia Computer Science}, author = {Gallo, Tommaso and Santolamazza, Annalisa}, month = jan, year = {2021}, keywords = {Industry 4.0, maintenance, maintenance Operator 4.0, smart factory}, pages = {388--393}, }
@article{bona_implementation_2021, series = {Proceedings of the 2nd {International} {Conference} on {Industry} 4.0 and {Smart} {Manufacturing} ({ISM} 2020)}, title = {Implementation of {Industry} 4.0 technology: {New} opportunities and challenges for maintenance strategy}, volume = {180}, issn = {1877-0509}, shorttitle = {Implementation of {Industry} 4.0 technology}, url = {https://www.sciencedirect.com/science/article/pii/S187705092100301X}, doi = {10.1016/j.procs.2021.01.258}, abstract = {Industry 4.0 is revolutionizing decision-making processes within the manufacturing industry. Maintenance strategies play a crucial role to improve progressively technical performances and economical savings. The introduction of Industry 4.0 technology results in relevant innovations able to condition maintenance policies. Moreover, innovative solutions can be introduced, such as “remote maintenance” and the “self-maintenance”. In this paper, we investigate the state-of-the-art of technologies in a “smart factory” with the aim to understand how Industry 4.0 technologies are affecting maintenance policies and to discuss their implication in strategies. We found important trends in maintenance policies, such as “remote maintenance” and the attractive option of the “autonomous maintenance”. This study represents the first comprehensive investigation in these research themes, and it desires to produce a broader insight and knowledge of current trends and main difficulties, highlighting critical aspects and disadvantages for the implementation of innovative policies.}, language = {en}, urldate = {2021-02-22}, journal = {Procedia Computer Science}, author = {Bona, Gianpaolo Di and Cesarotti, Vittorio and Arcese, Gabriella and Gallo, Tommaso}, month = jan, year = {2021}, keywords = {Industry 4.0, maintenance policies, maintenance strategies, smart factory}, pages = {424--429}, }
@article{sajid_data_2021, title = {Data science applications for predictive maintenance and materials science in context to {Industry} 4.0}, issn = {2214-7853}, url = {https://www.sciencedirect.com/science/article/pii/S221478532100448X}, doi = {10.1016/j.matpr.2021.01.357}, abstract = {With the revolutionising of the industry to the next generations, machines have become more complicated. If they are not put to regular maintenance then there is more breakdown and disruption in the production line. These days, data science techniques have applications over almost every field and likewise are being applied to Industry 4.0. In this advanced setup, massive data is created and stored every second. Experts with expertise in advanced mathematical and computational skills are in demand to identify root causes of failures and quality deviations of a machine, contributing to minimising a loss in time and money. Moreover, new elements with tailored properties can be discovered with material theories and computational skills. The integration of data science with industry 4.0 will increase efficiency and will be helpful to predict the quality of material minimising the production line cost and time. Different research articles on industry 4.0, data science and predictive maintenance are identified and studied. This paper identifies five critical processes of data scientists for predictive maintenance and discussed briefly through a literature review. Data science uses various processes, scientific methods, and algorithms to extract knowledge from a large amount of data. It can collect a massive amount of industrial data, which is further used to improve the manufacturing systems' efficiency and reliability. It helps analyse the data and become essential for Industry 4.0.}, language = {en}, urldate = {2021-02-22}, journal = {Materials Today: Proceedings}, author = {Sajid, Sufiyan and Haleem, Abid and Bahl, Shashi and Javaid, Mohd and Goyal, Tarun and Mittal, Manoj}, month = feb, year = {2021}, keywords = {Data science, Decision making, Industry 4.0, Machine learning, Materials, Predictive maintenance}, }
@article{drakaki_recent_2021, series = {Proceedings of the 2nd {International} {Conference} on {Industry} 4.0 and {Smart} {Manufacturing} ({ISM} 2020)}, title = {Recent {Developments} {Towards} {Industry} 4.0 {Oriented} {Predictive} {Maintenance} in {Induction} {Motors}}, volume = {180}, issn = {1877-0509}, url = {https://www.sciencedirect.com/science/article/pii/S1877050921003999}, doi = {10.1016/j.procs.2021.01.345}, abstract = {Predictive maintenance (PdM) for smart manufacturing and Industry 4.0 has been associated with manufacturing intelligence supported by Artificial Intelligence (AI). Therefore, PdM also relies on the smart manufacturing technologies including cyber-physical system (CPS) and big data analytics. The multi-agent system (MAS) technology and deep learning (DL) have shown the capacity to provide efficient tools for the implementation of PdM in a CPS enabled smart industrial production system gaining feedback from big data analytics. Induction motors (IM) constitute the main power source in the industrial production environment and therefore their maintenance and early fault detection and diagnosis (FD/D) is a critical process. Neural network (NN) based FD/D of IM has been widely used in order to identify different fault types. DL methods have recently emerged for FD/D of IM and can efficiently analyze massive data coming from different machine sensors. The MAS has recently been used in combination with artificial NNs as a decision support tool for FD/D of IM. This paper aims to provide a review of recent trends in PdM of IM focusing on MAS and DL based FD/D methods that have emerged in the last 5 years due to their potential to be implemented in a smart manufacturing system. A discussion of the presented methods is given in order to present the recent developments and trends and provide future directions for research.}, language = {en}, urldate = {2021-02-22}, journal = {Procedia Computer Science}, author = {Drakaki, Maria and Karnavas, Yannis L. and Tzionas, Panagiotis and Chasiotis, Ioannis D.}, month = jan, year = {2021}, keywords = {Predictive maintenance, deep learning, fault detection, fault diagnosis, induction motor, machinery health management, multi-agent system, neural networks}, pages = {943--949}, }
@article{han_remaining_2021, title = {Remaining useful life prediction and predictive maintenance strategies for multi-state manufacturing systems considering functional dependence}, volume = {210}, issn = {0951-8320}, url = {https://www.sciencedirect.com/science/article/pii/S0951832021001137}, doi = {10.1016/j.ress.2021.107560}, abstract = {The performance states of the manufacturing equipment and the quality states of the manufactured products are important indicators for the operational state evaluation and maintenance decision of the multi-state system. Further, the performance degradation of manufacturing components shows some dependence on the decline in product quality. However, the traditional remaining useful life (RUL) prediction and maintenance strategy of manufacturing system are limited to the dependence of the manufacturing components performance degradation. Based on the RUL prediction model that considers the components dependence for product quality requirements, a system predictive maintenance method based on the component functional importance is proposed. First, the connotation of degradation mechanism, functional dependence and RUL for manufacturing system is expounded. Second, a mission reliability oriented RUL prediction method for manufacturing systems is developed based on the functional dependence of components. Third, an approach for average maintenance cost calculation is proposed based on dynamic RUL prediction after each maintenance action, and the functional importance is applied to prioritize the predictive maintenance component-sets. Finally, the case results show that the proposed approach can ensure the ability of manufacturing systems to complete production tasks with high quality product, and reduce the maintenance cost in the production cycle simultaneously.}, language = {en}, urldate = {2021-02-22}, journal = {Reliability Engineering \& System Safety}, author = {Han, Xiao and Wang, Zili and Xie, Min and He, Yihai and Li, Yao and Wang, Wenzhuo}, month = jun, year = {2021}, keywords = {Functional dependence, average maintenance cost, mission reliability, predictive maintenance, remaining useful life}, pages = {107560}, }
@article{ingemarsdotter_challenges_2021, title = {Challenges and {Solutions} in condition-based maintenance implementation - a multiple case study}, issn = {0959-6526}, url = {https://www.sciencedirect.com/science/article/pii/S0959652621006405}, doi = {10.1016/j.jclepro.2021.126420}, abstract = {Previous literature has highlighted many opportunities for digital technologies, such as the Internet of Things (IoT) and data analytics, to enable circular strategies, i.e., strategies which support the transition to a circular economy (CE). As one of the key circular strategies for which the digital opportunities are apparent, maintenance is selected as the focus area for this study. In the field of maintenance, IoT and data analytics enable companies to implement condition-based maintenance (CBM), i.e., maintenance based on monitoring the actual condition of products in the field. CBM can lead to more timely and efficient maintenance, better performing products-in-use, reduced downtime in operations, and longer product lifetimes. Despite these benefits, CBM implementation in practice is still limited. The aim of this research is thus to understand the challenges related to CBM implementation in practice, and to extract solutions which companies have applied to address these challenges. Towards this aim, a multiple case study is conducted at three original equipment manufacturers (OEMs). A framework is derived which allows for a broad analysis of challenges and solutions in the cases. We identify 19 challenges and 16 solutions and translate these into a set of actionable recommendations. Our findings contribute to the field of CBM with a comprehensive view of challenges and solutions in practice, from the OEM’s point of view. Moreover, we contribute to CE literature with a concrete case study about IoT-enabled circular strategy implementation.}, language = {en}, urldate = {2021-02-22}, journal = {Journal of Cleaner Production}, author = {Ingemarsdotter, Emilia and Kambanou, Marianna Lena and Jamsin, Ella and Sakao, Tomohiko and Balkenende, Ruud}, month = feb, year = {2021}, keywords = {Case study, Circular Economy, Condition-Based Maintenance, Digitalization, Internet of Things}, pages = {126420}, }
@article{soares_unsupervised_2021, title = {Unsupervised {Machine} {Learning} {Techniques} to {Prevent} {Faults} in {Railroad} {Switch} {Machines}}, issn = {1874-5482}, url = {https://www.sciencedirect.com/science/article/pii/S1874548221000159}, doi = {10.1016/j.ijcip.2021.100423}, abstract = {Railroad switch machines are essential electromechanical equipment in a railway network, and the occurrence of failures in such equipment can cause railroad interruptions and lead to potential economic losses. Thus, early diagnosis of these failures can represent a reduction in costs and an increase in productivity. This paper aims to propose a predictive model based on computational intelligence techniques, to solve this problem. The applied methodology includes feature extraction and selection procedures based on hypothesis tests and unsupervised machine learning models. The proposed model was tested in a database made available by a Brazilian railway company and proved to be efficient once it has considered critical operations conducted in the vicinity of the ones classified as faults.}, language = {en}, urldate = {2021-02-15}, journal = {International Journal of Critical Infrastructure Protection}, author = {Soares, Nielson and Aguiar, Eduardo Pestana de and Souza, Amanda Campos and Goliatt, Leonardo}, month = feb, year = {2021}, keywords = {Computational Intelligence, Failure Prediction, Machine Learning, Railroad switch}, pages = {100423}, }
@article{gbadamosi_iot_2021, title = {{IoT} for predictive assets monitoring and maintenance: {An} implementation strategy for the {UK} rail industry}, volume = {122}, issn = {0926-5805}, shorttitle = {{IoT} for predictive assets monitoring and maintenance}, url = {http://www.sciencedirect.com/science/article/pii/S0926580520310669}, doi = {10.1016/j.autcon.2020.103486}, abstract = {With about 100\% increase in rail service usage over the last 20 years, it is pertinent that rail infrastructure continues to function at an optimal level to avoid service disruptions, cancellations or delays due to unforeseen asset breakdown. In an endeavour to propose a strategy for the implementation of Internet of Things (IoT) in rail asset maintenance, a qualitative methodology was adopted through a series of focus-group workshops to identify the priority areas and enabling digital technologies for IoT implementation. The methods of data collection included audio recording, note-taking, and concept mapping. The audio records were transcribed and used for thematic analysis, while the concept maps were integrated for conceptual modelling and analysis. This paper presents an implementation strategy for IoT for rail assets maintenance with focus on priority areas such as real-time condition monitoring using IoT sensors, predictive maintenance, remote inspection, and integrated asset data management platform.}, language = {en}, urldate = {2020-12-08}, journal = {Automation in Construction}, author = {Gbadamosi, Abdul-Quayyum and Oyedele, Lukumon O. and Delgado, Juan Manuel Davila and Kusimo, Habeeb and Akanbi, Lukman and Olawale, Oladimeji and Muhammed-yakubu, Naimah}, month = feb, year = {2021}, keywords = {Augmented reality, Internet of things, Predictive maintenance, Rail assets, Remote inspection}, pages = {103486}, }
@article{pinciroli_semi-supervised_2021, title = {A semi-supervised method for the characterization of degradation of nuclear power plants steam generators}, volume = {131}, issn = {0149-1970}, url = {http://www.sciencedirect.com/science/article/pii/S0149197020303279}, doi = {10.1016/j.pnucene.2020.103580}, abstract = {The digitalization of nuclear power plants, with the rapid growth of information technology, opens the door to the development of new methods of condition-based maintenance. In this work, a semi-supervised method for characterizing the level of degradation of nuclear power plant components using measurements collected during plant operational transients is proposed. It is based on the fusion of selected features extracted from the monitored signals. Feature selection is formulated as a multi-objective optimization problem. The objectives are the maximization of the feature monotonicity and trendability, and the maximization of a novel measure of correlation between the feature values and the results of non-destructive tests performed to assess the component degradation. The features of the Pareto optimal set are normalized and the component degradation level is defined as the median of the obtained values. The developed method is applied to real data collected from steam generators of pressurized water reactors. It is shown able to identify degradation level with errors comparable to those obtained by ad-hoc non-destructive tests.}, language = {en}, urldate = {2020-12-08}, journal = {Progress in Nuclear Energy}, author = {Pinciroli, Luca and Baraldi, Piero and Shokry, Ahmed and Zio, Enrico and Seraoui, Redouane and Mai, Carole}, month = jan, year = {2021}, keywords = {Condition-based maintenance, Degradation assessment, Feature selection, Nuclear power plant, Semi-supervised, Steam generator}, pages = {103580}, }
@article{voronov_forest-based_2021, title = {A forest-based algorithm for selecting informative variables using {Variable} {Depth} {Distribution}}, volume = {97}, issn = {0952-1976}, url = {http://www.sciencedirect.com/science/article/pii/S0952197620303341}, doi = {10.1016/j.engappai.2020.104073}, abstract = {Predictive maintenance of systems and their components in technical systems is a promising approach to optimize system usage and reduce system downtime. Various sensor data are logged during system operation for different purposes, but sometimes not directly related to the degradation of a specific component. Variable selection algorithms are necessary to reduce model complexity and improve interpretability of diagnostic and prognostic algorithms. This paper presents a forest-based variable selection algorithm that analyzes the distribution of a variable in the decision tree structure, called Variable Depth Distribution, to measure its importance. The proposed variable selection algorithm is developed for datasets with correlated variables that pose problems for existing forest-based variable selection methods. The proposed variable selection method is evaluated and analyzed using three case studies: survival analysis of lead–acid batteries in heavy-duty vehicles, engine misfire detection, and a simulated prognostics dataset. The results show the usefulness of the proposed algorithm, with respect to existing forest-based methods, and its ability to identify important variables in different applications. As an example, the battery prognostics case study shows that similar predictive performance is achieved when only 17\% percent of the variables are used compared to all measured signals.}, language = {en}, urldate = {2020-11-30}, journal = {Engineering Applications of Artificial Intelligence}, author = {Voronov, Sergii and Jung, Daniel and Frisk, Erik}, month = jan, year = {2021}, keywords = {Automotive, Random Forest, Random Survival Forest, Variable selection}, pages = {104073}, }
@article{sun_online_2021, title = {Online oil debris monitoring of rotating machinery: {A} detailed review of more than three decades}, volume = {149}, issn = {0888-3270}, shorttitle = {Online oil debris monitoring of rotating machinery}, url = {http://www.sciencedirect.com/science/article/pii/S0888327020307275}, doi = {10.1016/j.ymssp.2020.107341}, abstract = {Oil debris monitoring has played an irreplaceable role in ascertaining the health condition of rotating machinery (e.g. engine, gearbox). Although many sensing methods for detecting wear-generated particles of rotating elements have been presented, a comprehensive review paper on these technologies is still missing. To this end, this paper provides a detailed survey of the advances in oil debris monitoring for the online health monitoring of rotating machinery. According to the detection mechanism, these sensing technologies are classified under varying categories (magnetic: magnetic chip detectors and inductive sensors, electrical: resistive-capacitive sensors and electrostatic sensors, optical: photoelectric sensors and imaging sensors, acoustic). The systematic analysis and commentary on each sensing method are conducted, and real applications also be reviewed. These sensing technologies are not confined to research-related activities, of which some have already been patented and commercialized. Finally, future works are presented to meet the challenges faced by online monitoring of rotating machinery.}, language = {en}, urldate = {2020-10-19}, journal = {Mechanical Systems and Signal Processing}, author = {Sun, Jiayi and Wang, Liming and Li, Jianfeng and Li, Fangyi and Li, Jianyong and Lu, Haiyang}, month = feb, year = {2021}, keywords = {Lubricating oil, Oil debris monitoring, Online health monitoring, Rotating machinery, Wear debris sensor}, pages = {107341}, }
@article{wen_generalized_2021, title = {A generalized remaining useful life prediction method for complex systems based on composite health indicator}, volume = {205}, issn = {0951-8320}, url = {http://www.sciencedirect.com/science/article/pii/S0951832020307419}, doi = {10.1016/j.ress.2020.107241}, abstract = {As one of the key techniques in Prognostics and Health Management (PHM), accurate Remaining Useful Life (RUL) prediction can effectively reduce the number of downtime maintenance and significantly improve economic benefits. In this paper, a generalized RUL prediction method is proposed for complex systems with multiple Condition Monitoring (CM) signals. A stochastic degradation model is proposed to characterize the system degradation behavior, based on which the respective reliability characteristics such as the RUL and its Confidence Interval (CI) are explicitly derived. Considering the degradation model, two desirable properties of the Health Indicator (HI) are put forward and their respective quantitative evaluation methods are developed. With the desirable properties, a nonlinear data fusion method based on Genetic Programming (GP) is proposed to construct a superior composite HI. In this way, the multiple CM signals are fused to provide a better prediction capability. Finally, the proposed integrated methodology is validated on the C-MAPSS data set of aircraft turbine engines.}, language = {en}, urldate = {2020-10-05}, journal = {Reliability Engineering \& System Safety}, author = {Wen, Pengfei and Zhao, Shuai and Chen, Shaowei and Li, Yong}, month = jan, year = {2021}, keywords = {Data fusion, Degradation modeling, Multiple sensors, Prognostics, Remaining useful life}, pages = {107241}, }
@article{liu_complex_2020, title = {Complex engineered system health indexes extraction using low frequency raw time-series data based on deep learning methods}, volume = {161}, issn = {0263-2241}, url = {https://www.sciencedirect.com/science/article/pii/S0263224120304280}, doi = {10.1016/j.measurement.2020.107890}, abstract = {Data analysis methods based on deep learning are attracting more and more attention in the field of health monitoring, fault diagnosis and failure prognostics of complex systems, such as aircraft airborne systems and engines. In this study, several health monitoring methods proposed from deep learning are demonstrated based on a real data set from an airborne system of commercial aircraft, where Health Indexes (HIs) are derived based on the raw sensor data to characterize the health state of the system in-service. Determining the optimal degradation evaluation index is the key to further failure prognostics. So, a set of metrics to characterize the suitability of different of HI deriving methods has been proposed. This metrics includes monotonicity, prognosability, and trendability. The better HI selected can effectively characterize the health state of aircraft air conditioning system, which is helpful for further failure prognostics and converting the scheduled maintenance into condition-based maintenance.}, urldate = {2023-10-12}, journal = {Measurement}, author = {Liu, Cui and Sun, Jianzhong and Liu, He and Lei, Shiying and Hu, Xinhua}, month = sep, year = {2020}, keywords = {Air conditioning system, Condition-based maintenance, Deep learning, Long short term memory, System health monitoring}, pages = {107890}, }
@inproceedings{maschler_continual_2020, title = {Continual {Learning} of {Fault} {Prediction} for {Turbofan} {Engines} using {Deep} {Learning} with {Elastic} {Weight} {Consolidation}}, volume = {1}, doi = {10.1109/ETFA46521.2020.9211903}, abstract = {Fault prediction based upon deep learning algorithms has great potential in industrial automation: By automatically adapting to different usage contexts, it would greatly expand the usefulness of current predictive maintenance solutions. However, restrictions regarding the centralized accumulation of data necessary for such automatic adaption call for a distributed approach to training these algorithms. Therefore, in this paper, a continual learning based algorithm for fault prediction is presented, allowing for distributed, cooperative learning by elastic weight consolidation. This algorithm is then evaluated on a large NASA turbofan engine dataset and shows promising results regarding the performant training on decentral sub-datasets for industrial automation scenarios.}, booktitle = {2020 25th {IEEE} {International} {Conference} on {Emerging} {Technologies} and {Factory} {Automation} ({ETFA})}, author = {Maschler, Benjamin and Vietz, Hannes and Jazdi, Nasser and Weyrich, Michael}, month = sep, year = {2020}, note = {ISSN: 1946-0759}, keywords = {Continual learning, Deep learning, Elastic weight consolidation, Engines, Fault prognostics, Industrial Automation, Machine learning, NASA, Neural networks, Prediction algorithms, Prediction methods, Task analysis, Training, Training data, Transfer learning}, pages = {959--966}, }
@inproceedings{fanaee-t_cyclefootprint_2020, address = {Cham}, series = {Communications in {Computer} and {Information} {Science}}, title = {{CycleFootprint}: {A} {Fully} {Automated} {Method} for {Extracting} {Operation} {Cycles} from {Historical} {Raw} {Data} of {Multiple} {Sensors}}, isbn = {978-3-030-66770-2}, shorttitle = {{CycleFootprint}}, doi = {10.1007/978-3-030-66770-2_3}, abstract = {Extracting operation cycles from the historical reading of sensors is an essential step in IoT data analytics. For instance, we can exploit the obtained cycles for learning the normal states to feed into semi-supervised models or dictionaries for efficient real-time anomaly detection on the sensors. However, this is a difficult problem due to this fact that we may have different types of cycles, each of which with varying lengths. Current approaches are highly dependent on manual efforts by the aid of visualization and knowledge of domain experts, which is not feasible on a large scale. We propose a fully automated method called CycleFootprint that can: 1) identify the most relevant signal that has the most obvious recurring patterns among multiple signals; and 2) automatically find the cycles from the selected signal. The main idea behind CycleFootprint is mining footprints in the cycles. We assume that there should be a unique pattern in each cycle that shows up repeatedly in each cycle. By mining those footprints, we can identify cycles. We evaluate our method with existing labeled ground truth data of a real separator in marine application equipped with multiple health monitoring sensors. 86\% of cycles extracted by our method match fully or with at least 99\% overlap with true cycles, which sounds promising given its unsupervised and fully automated nature.}, language = {en}, booktitle = {{IoT} {Streams} for {Data}-{Driven} {Predictive} {Maintenance} and {IoT}, {Edge}, and {Mobile} for {Embedded} {Machine} {Learning}}, publisher = {Springer International Publishing}, author = {Fanaee-T, Hadi and Bouguelia, Mohamed-Rafik and Rahat, Mahmoud and Blixt, Jonathan and Singh, Harpal}, editor = {Gama, Joao and Pashami, Sepideh and Bifet, Albert and Sayed-Mouchawe, Moamar and Fröning, Holger and Pernkopf, Franz and Schiele, Gregor and Blott, Michaela}, year = {2020}, keywords = {Cycle detection, IoT, Sensors}, pages = {30--44}, }
@article{calikus_no_2020, title = {No free lunch but a cheaper supper: {A} general framework for streaming anomaly detection}, volume = {155}, issn = {0957-4174}, shorttitle = {No free lunch but a cheaper supper}, url = {https://www.sciencedirect.com/science/article/pii/S0957417420302773}, doi = {10.1016/j.eswa.2020.113453}, abstract = {In recent years, research interest in detecting anomalies in temporal streaming data has increased significantly. A variety of algorithms are being developed in the data mining community. They can be broadly divided into two categories, namely general-purpose and ad hoc ones. In most cases, general approaches assume a one-size-fits-all solution model, and strive to design a single “optimal” anomaly detector which can detect all anomalies in any domain. To date, there exists no universal method that has been shown to outperform the others across different anomaly types, use cases and datasets. In this paper, we propose SAFARI, a framework created by abstracting and unifying the fundamental tasks within the streaming anomaly detection. SAFARI provides a flexible and extensible anomaly detection procedure to overcome the limitations of one-size-fits-all solutions. Such abstraction helps to facilitate more elaborate algorithm comparisons by allowing us to isolate the effects of shared and unique characteristics of diverse algorithms on the performance. Using the framework, we have identified a research gap that motivated us to propose a novel learning strategy. We implemented twenty different anomaly detectors and conducted an extensive evaluation study, comparing their performances using real-world benchmark datasets with different properties. The results indicate that there is no single superior detector which works perfectly for every case, proving our hypothesis that “there is no free lunch” in the streaming anomaly detection world. Finally, we discuss the benefits and drawbacks of each method in-depth, drawing a set of conclusions and guidelines to guide future users of SAFARI.}, language = {en}, urldate = {2023-05-21}, journal = {Expert Systems with Applications}, author = {Calikus, Ece and Nowaczyk, Sławomir and Sant’Anna, Anita and Dikmen, Onur}, month = oct, year = {2020}, keywords = {Anomaly detection, Online learning, Reservoir sampling, Stream mining}, pages = {113453}, }
@article{fan_transfer_2020, title = {Transfer learning for remaining useful life prediction based on consensus self-organizing models}, volume = {203}, issn = {0951-8320}, url = {https://www.sciencedirect.com/science/article/pii/S0951832020305998}, doi = {10.1016/j.ress.2020.107098}, abstract = {The traditional paradigm for developing machine prognostics usually relies on generalization from data acquired in experiments under controlled conditions prior to deployment of the equipment. Detecting or predicting failures and estimating machine health in this way assumes that future field data will have a very similar distribution to the experiment data. However, many complex machines operate under dynamic environmental conditions and are used in many different ways. This makes collecting comprehensive data very challenging, and the assumption that pre-deployment data and post-deployment data follow very similar distributions is unlikely to hold. In this work, we present a feature-representation based transfer learning (TL) method for predicting Remaining Useful Life (RUL) of equipment, under scenarios that samples with previously unseen conditions are presented in the target domain and the labels are available only for the source domain, but not the target domain. This setting corresponds to generalizing from a limited number of run-to-failure experiments performed prior to deployment into making prognostics with data coming from deployed equipment that is being used under multiple new operating conditions and experiencing previously unseen faults. We employ a deviation detection method, Consensus Self-Organizing Models (COSMO), to create transferable features for building the RUL regression model. These features capture how different a particular equipment is in comparison to its peers. The efficiency of the proposed TL method is demonstrated using the NASA Turbofan Engine Degradation Simulation Data Set. Models using the COSMO transferable features show better performance than other methods on predicting RUL when the target domain is more complex than the source domain.}, language = {en}, urldate = {2023-05-21}, journal = {Reliability Engineering \& System Safety}, author = {Fan, Yuantao and Nowaczyk, Sławomir and Rögnvaldsson, Thorsteinn}, month = nov, year = {2020}, keywords = {Consensus self-organizing models, Domain adaptation, Feature-Representation transfer, Remaining useful life prediction, Transfer learning}, pages = {107098}, }
@article{schulz_computational_2020, title = {Computational {Psychiatry} for {Computers}}, volume = {23}, issn = {2589-0042}, url = {https://www.sciencedirect.com/science/article/pii/S258900422030969X}, doi = {10.1016/j.isci.2020.101772}, abstract = {Computational psychiatry is a nascent field that attempts to use multi-level analyses of the underlying computational problems that we face in navigating a complex, uncertain and changing world to illuminate mental dysfunction and disease. Two particular foci of the field are the costs and benefits of environmental adaptivity and the danger and necessity of heuristics. Here, we examine the extent to which these foci and others can be used to study the actual and potential flaws of the artificial computational devices that we are increasingly inventing and empowering to navigate this very same environment on our behalf.}, language = {en}, number = {12}, urldate = {2023-03-08}, journal = {iScience}, author = {Schulz, Eric and Dayan, Peter}, month = dec, year = {2020}, keywords = {Computer Science, Human-Computer Interaction, Psychology}, pages = {101772}, }
@article{shanahan_artificial_2020, title = {Artificial {Intelligence} and the {Common} {Sense} of {Animals}}, volume = {24}, issn = {1364-6613}, url = {https://www.sciencedirect.com/science/article/pii/S1364661320302163}, doi = {10.1016/j.tics.2020.09.002}, abstract = {The problem of common sense remains a major obstacle to progress in artificial intelligence. Here, we argue that common sense in humans is founded on a set of basic capacities that are possessed by many other animals, capacities pertaining to the understanding of objects, space, and causality. The field of animal cognition has developed numerous experimental protocols for studying these capacities and, thanks to progress in deep reinforcement learning (RL), it is now possible to apply these methods directly to evaluate RL agents in 3D environments. Besides evaluation, the animal cognition literature offers a rich source of behavioural data, which can serve as inspiration for RL tasks and curricula.}, language = {en}, number = {11}, urldate = {2023-03-08}, journal = {Trends in Cognitive Sciences}, author = {Shanahan, Murray and Crosby, Matthew and Beyret, Benjamin and Cheke, Lucy}, month = nov, year = {2020}, pages = {862--872}, }
@article{hadsell_embracing_2020, title = {Embracing {Change}: {Continual} {Learning} in {Deep} {Neural} {Networks}}, volume = {24}, issn = {1364-6613}, shorttitle = {Embracing {Change}}, url = {https://www.sciencedirect.com/science/article/pii/S1364661320302199}, doi = {10.1016/j.tics.2020.09.004}, abstract = {Artificial intelligence research has seen enormous progress over the past few decades, but it predominantly relies on fixed datasets and stationary environments. Continual learning is an increasingly relevant area of study that asks how artificial systems might learn sequentially, as biological systems do, from a continuous stream of correlated data. In the present review, we relate continual learning to the learning dynamics of neural networks, highlighting the potential it has to considerably improve data efficiency. We further consider the many new biologically inspired approaches that have emerged in recent years, focusing on those that utilize regularization, modularity, memory, and meta-learning, and highlight some of the most promising and impactful directions.}, language = {en}, number = {12}, urldate = {2023-03-05}, journal = {Trends in Cognitive Sciences}, author = {Hadsell, Raia and Rao, Dushyant and Rusu, Andrei A. and Pascanu, Razvan}, month = dec, year = {2020}, keywords = {artificial intelligence, lifelong, memory, meta-learning, non-stationary}, pages = {1028--1040}, }
@inproceedings{le_nguyen_challenges_2020, address = {Cham}, series = {Communications in {Computer} and {Information} {Science}}, title = {Challenges of {Stream} {Learning} for {Predictive} {Maintenance} in the {Railway} {Sector}}, isbn = {978-3-030-66770-2}, doi = {10.1007/978-3-030-66770-2_2}, abstract = {Smart trains nowadays are equipped with sensors that generate an abundance of data during operation. Such data may, directly or indirectly, reflect the health state of the trains. Thus, it is of interest to analyze these data in a timely manner, preferably on-the-fly as they are being generated, to make maintenance operations more proactive and efficient. This paper provides a brief overview of predictive maintenance and stream learning, with the primary goal of leveraging stream learning in order to enhance maintenance operations in the railway sector. We justify the applicability and promising benefits of stream learning via the example of a real-world railway dataset of the train doors.}, language = {en}, booktitle = {{IoT} {Streams} for {Data}-{Driven} {Predictive} {Maintenance} and {IoT}, {Edge}, and {Mobile} for {Embedded} {Machine} {Learning}}, publisher = {Springer International Publishing}, author = {Le Nguyen, Minh Huong and Turgis, Fabien and Fayemi, Pierre-Emmanuel and Bifet, Albert}, editor = {Gama, Joao and Pashami, Sepideh and Bifet, Albert and Sayed-Mouchawe, Moamar and Fröning, Holger and Pernkopf, Franz and Schiele, Gregor and Blott, Michaela}, year = {2020}, keywords = {Predictive maintenance, Railway, Stream learning}, pages = {14--29}, }
@inproceedings{miyata_concept_2020, address = {New York, NY, USA}, series = {{WIMS} 2020}, title = {Concept {Drift} {Detection} on {Data} {Stream} for {Revising} {DBSCAN} {Cluster}}, isbn = {978-1-4503-7542-9}, url = {https://doi.org/10.1145/3405962.3405990}, doi = {10.1145/3405962.3405990}, abstract = {Data stream mining of IoT data can help operators immediately isolate causes of equipment alarms. The challenge, however, is how to keep the classifiers high-purity (i.e., keep data of the same class in the right cluster) while dealing with the concept drifting ascribed to differences between alarm models and entities. We propose continuously revising the classification model in accordance with the data distribution and trend changes. Evaluations showed there was no purity deterioration for oscillation condition data with a drifting rate of 1\%. This result demonstrates that our approach can help operators improve their decision making.}, urldate = {2023-02-16}, booktitle = {Proceedings of the 10th {International} {Conference} on {Web} {Intelligence}, {Mining} and {Semantics}}, publisher = {Association for Computing Machinery}, author = {Miyata, Yasushi and Ishikawa, Hiroshi}, month = aug, year = {2020}, keywords = {Concept Drift, DBSCAN, clustering, data stream, power grid}, pages = {104--110}, }
@article{faouzi_pyts_2020, title = {pyts: {A} {Python} {Package} for {Time} {Series} {Classification}}, volume = {21}, issn = {1533-7928}, shorttitle = {pyts}, url = {http://jmlr.org/papers/v21/19-763.html}, abstract = {pyts is an open-source Python package for time series classification. This versatile toolbox provides implementations of many algorithms published in the literature, preprocessing functionalities, and data set loading utilities. pyts relies on the standard scientific Python packages numpy, scipy, scikit-learn, joblib, and numba, and is distributed under the BSD-3-Clause license. Documentation contains installation instructions, a detailed user guide, a full API description, and concrete self-contained examples.}, number = {46}, urldate = {2023-02-12}, journal = {Journal of Machine Learning Research}, author = {Faouzi, Johann and Janati, Hicham}, year = {2020}, pages = {1--6}, }
@article{benschoten_mpa_2020, title = {{MPA}: a novel cross-language {API} for time series analysis}, volume = {5}, issn = {2475-9066}, shorttitle = {{MPA}}, url = {https://joss.theoj.org/papers/10.21105/joss.02179}, doi = {10.21105/joss.02179}, abstract = {Van Benschoten et al., (2020). MPA: a novel cross-language API for time series analysis. Journal of Open Source Software, 5(49), 2179, https://doi.org/10.21105/joss.02179}, language = {en}, number = {49}, urldate = {2023-02-12}, journal = {Journal of Open Source Software}, author = {Benschoten, Andrew H. Van and Ouyang, Austin and Bischoff, Francisco and Marrs, Tyler W.}, month = may, year = {2020}, pages = {2179}, }
@article{charte_analysis_2020, title = {An analysis on the use of autoencoders for representation learning: {Fundamentals}, learning task case studies, explainability and challenges}, volume = {404}, issn = {0925-2312}, shorttitle = {An analysis on the use of autoencoders for representation learning}, url = {https://www.sciencedirect.com/science/article/pii/S092523122030624X}, doi = {10.1016/j.neucom.2020.04.057}, abstract = {In many machine learning tasks, learning a good representation of the data can be the key to building a well-performant solution. This is because most learning algorithms operate with the features in order to find models for the data. For instance, classification performance can improve if the data is mapped to a space where classes are easily separated, and regression can be facilitated by finding a manifold of data in the feature space. As a general rule, features are transformed by means of statistical methods such as principal component analysis, or manifold learning techniques such as Isomap or locally linear embedding. From a plethora of representation learning methods, one of the most versatile tools is the autoencoder. In this paper we aim to demonstrate how to influence its learned representations to achieve the desired learning behavior. To this end, we present a series of learning tasks: data embedding for visualization, image denoising, semantic hashing, detection of abnormal behaviors and instance generation. We model them from the representation learning perspective, following the state of the art methodologies in each field. A solution is proposed for each task employing autoencoders as the only learning method. The theoretical developments are put into practice using a selection of datasets for the different problems and implementing each solution, followed by a discussion of the results in each case study and a brief explanation of other six learning applications. We also explore the current challenges and approaches to explainability in the context of autoencoders. All of this helps conclude that, thanks to alterations in their structure as well as their objective function, autoencoders may be the core of a possible solution to many problems which can be modeled as a transformation of the feature space.}, language = {en}, urldate = {2022-12-07}, journal = {Neurocomputing}, author = {Charte, David and Charte, Francisco and del Jesus, María J. and Herrera, Francisco}, month = sep, year = {2020}, keywords = {Autoencoders, Deep learning, Feature extraction, Representation learning}, pages = {93--107}, }
@inproceedings{boniol_automated_2020, title = {Automated {Anomaly} {Detection} in {Large} {Sequences}}, doi = {10.1109/ICDE48307.2020.00182}, abstract = {Subsequence anomaly (or outlier) detection in long sequences is an important problem with applications in a wide range of domains. However, current approaches have severe limitations: they either require prior domain knowledge, or become cumbersome and expensive to use in situations with recurrent anomalies of the same type. In this work, we address these problems, and propose NorM, a novel approach, suitable for domain-agnostic anomaly detection. NorM is based on a new data series primitive, which permits to detect anomalies based on their (dis)similarity to a model that represents normal behavior. The experimental results on several real datasets demonstrate that the proposed approach outperforms by a large margin the current state-of-the art algorithms in terms of accuracy, while being orders of magnitude faster.}, booktitle = {2020 {IEEE} 36th {International} {Conference} on {Data} {Engineering} ({ICDE})}, author = {Boniol, Paul and Linardi, Michele and Roncallo, Federico and Palpanas, Themis}, month = apr, year = {2020}, note = {ISSN: 2375-026X}, keywords = {Anomaly detection, Anomaly discovery, Computational modeling, Conferences, Data Series, Data engineering, Data models, Electrocardiography, Time Series, Time series analysis}, pages = {1834--1837}, }
@article{udugama_novel_2020, title = {A novel use for an old problem: {The} {Tennessee} {Eastman} challenge process as an activating teaching tool}, volume = {30}, issn = {1749-7728}, shorttitle = {A novel use for an old problem}, url = {https://www.sciencedirect.com/science/article/pii/S1749772819300545}, doi = {10.1016/j.ece.2019.09.002}, abstract = {The domains of process design, operations and control are highly interdependent and thus affect operational efficiency and robustness of industrial facilities. Despite this, they are mostly kept in isolation from each other even in commercial projects, which is probably due to the perceived increase in complexity and aligned with prior experience from university education. Process design covers mostly the steady state, whereas process control is generally taught with a strong focus on classical control theory, which can be difficult to apply in industry. A reason for not combining the three domains of process design, operations and control is the lack of simple, student friendly teaching aids that can be used within time constraints of (under-)graduate teaching. The Tennessee Eastman Process (TEP) challenge, introduced in the early 90′s, originally as a process control benchmark simulation, can be used as a teaching aid to impart practical understanding of process design, dynamics and control to students. To this end, we will discuss the unique properties of the TEP, which make it an excellent tool to introduce process dynamics and control while reinforcing understanding of fundamentals, unit operations and the complexities and consequences of combining unit operations. We will then identify key developments that are needed to make the TEP a useful tool for teaching and discuss developments that have been carried out by the authors in this respect. Subsequently, it is shown how the modified model creates learning opportunities with respect to elementary as well as advanced control techniques and design assessments. The paper concludes with an outline of how this tool is currently used in a process design course with a strong emphasis on dynamics and control and in a classical undergraduate course on process control.}, language = {en}, urldate = {2022-05-17}, journal = {Education for Chemical Engineers}, author = {Udugama, Isuru A. and Gernaey, Krist V. and Taube, Michael A. and Bayer, Christoph}, month = jan, year = {2020}, keywords = {Control education, Operator training, Peaching software, Process design, Process dynamics}, pages = {20--31}, }
@article{mariscotti_data_2020, title = {Data sets of measured pantograph voltage and current of {European} {AC} railways}, volume = {30}, issn = {2352-3409}, url = {https://www.sciencedirect.com/science/article/pii/S2352340920303711}, doi = {10.1016/j.dib.2020.105477}, abstract = {AC railways are characterized by peculiar Power Quality phenomena, where moving loads (trains, locomotives, etc.) interact with the supply network that provides electrical energy through the overhead contact line. Distortion, resonances, transients overlap in a complex dynamic scenario, that sees several and various problems of Power Quality, network stability, power and energy metering and disturbance to systems and equipment. For all related studies and analysis raw experimental data are extremely important. The provided data consists of time-domain waveforms of sampled pantograph voltage and current: each recording is tagged with the specific train operating condition (traction, cruising/coasting, braking, standstill), the active power and the speed, to support correlation and clustering of data.}, language = {en}, urldate = {2022-05-16}, journal = {Data in Brief}, author = {Mariscotti, Andrea}, month = jun, year = {2020}, keywords = {Harmonics, Measurement of electrical quantities, Power quality, Railways, Rolling stock}, pages = {105477}, }
@article{signorino_dataset_2020, title = {Dataset of measured and commented pantograph electric arcs in {DC} railways}, volume = {31}, issn = {2352-3409}, url = {https://www.sciencedirect.com/science/article/pii/S2352340920308726}, doi = {10.1016/j.dib.2020.105978}, abstract = {DC railways are characterized by particularly intense arcing caused by pantograph detachment, due to the large current intensity and the general implementation of onboard resonant filters, whose transient response is triggered by electric transients including electric arcs. Electric arc depends on the train speed (the relative speed between the sliding contact over the pantograph and the hot spot on the catenary system), the intensity of the collected pantograph current and the line voltage level. Electric arcs are broadband in nature and can trigger the system transient response dominated by the resonant filter, besides interfering with the operation of onboard equipment (such as for energy conversion and metering).}, language = {en}, urldate = {2022-05-16}, journal = {Data in Brief}, author = {Signorino, Davide and Giordano, Domenico and Mariscotti, Andrea and Gallo, Daniele and Femine, Antonio Delle and Balic, Fabio and Quintana, Jorge and Donadio, Lorenzo and Biancucci, Alfredo}, month = aug, year = {2020}, keywords = {Electric arc, Measurement of electrical quantities, Pantograph, Power quality, Railways, Rolling stock}, pages = {105978}, }
@article{yuan_general_2020, title = {A general end-to-end diagnosis framework for manufacturing systems}, volume = {7}, issn = {2095-5138}, url = {https://doi.org/10.1093/nsr/nwz190}, doi = {10.1093/nsr/nwz190}, abstract = {The manufacturing sector is envisioned to be heavily influenced by artificial-intelligence-based technologies with the extraordinary increases in computational power and data volumes. A central challenge in the manufacturing sector lies in the requirement of a general framework to ensure satisfied diagnosis and monitoring performances in different manufacturing applications. Here, we propose a general data-driven, end-to-end framework for the monitoring of manufacturing systems. This framework, derived from deep-learning techniques, evaluates fused sensory measurements to detect and even predict faults and wearing conditions. This work exploits the predictive power of deep learning to automatically extract hidden degradation features from noisy, time-course data. We have experimented the proposed framework on 10 representative data sets drawn from a wide variety of manufacturing applications. Results reveal that the framework performs well in examined benchmark applications and can be applied in diverse contexts, indicating its potential use as a critical cornerstone in smart manufacturing.}, number = {2}, urldate = {2022-05-14}, journal = {National Science Review}, author = {Yuan, Ye and Ma, Guijun and Cheng, Cheng and Zhou, Beitong and Zhao, Huan and Zhang, Hai-Tao and Ding, Han}, month = feb, year = {2020}, pages = {418--429}, }
@article{fawwaz_real-time_2020, title = {Real-{Time} and {Robust} {Hydraulic} {System} {Fault} {Detection} via {Edge} {Computing}}, volume = {10}, copyright = {http://creativecommons.org/licenses/by/3.0/}, issn = {2076-3417}, url = {https://www.mdpi.com/2076-3417/10/17/5933}, doi = {10.3390/app10175933}, abstract = {We consider fault detection in a hydraulic system that maintains multivariate time-series sensor data. Such a real-world industrial environment could suffer from noisy data resulting from inaccuracies in hardware sensing or external interference. Thus, we propose a real-time and robust fault detection method for hydraulic systems that leverages cooperation between cloud and edge servers. The cloud server employs a new approach that includes a genetic algorithm (GA)-based feature selection that identifies feature-to-label correlations and feature-to-feature redundancies. A GA can efficiently process large search spaces, such as solving a combinatorial optimization problem to identify the optimal feature subset. By using fewer important features that require transmission and processing, this approach reduces detection time and improves model performance. We propose a long short-term memory autoencoder for a robust fault detection model that leverages temporal information on time-series sensor data and effectively handles noisy data. This detection model is then deployed at edge servers that provide computing resources near the data source to reduce latency. Our experimental results suggest that this method outperforms prior approaches by demonstrating lower detection times, higher accuracy, and increased robustness to noisy data. While we have a 63\% reduction of features, our model obtains a high accuracy of approximately 98\% and is robust to noisy data with a signal-to-noise ratio near 0 dB. Our method also performs at an average detection time of only 9.42 ms with a reduced average packet size of 179.98 KB from the maximum of 343.78 KB.}, language = {en}, number = {17}, urldate = {2022-05-14}, journal = {Applied Sciences}, author = {Fawwaz, Dzaky Zakiyal and Chung, Sang-Hwa}, month = jan, year = {2020}, note = {Number: 17 Publisher: Multidisciplinary Digital Publishing Institute}, keywords = {autoencoder, edge computing, fault detection, feature selection, genetic algorithm, hydraulic system, long short-term memory}, pages = {5933}, }
@article{wang_deep_2020, title = {Deep learning for fault-relevant feature extraction and fault classification with stacked supervised auto-encoder}, volume = {92}, issn = {0959-1524}, url = {https://www.sciencedirect.com/science/article/pii/S0959152420302225}, doi = {10.1016/j.jprocont.2020.05.015}, abstract = {Stacked auto-encoder (SAE)-based deep learning has been introduced for fault classification in recent years, which has the potential to extract deep abstract features from the raw input data. However, SAE cannot ensure the relevance of deep features with the fault types due to its unsupervised self-reconstruction in the pretraining stage. To overcome this problem, a stacked supervised auto-encoder is proposed to pretrain the deep network and obtain deep fault-relevant features from raw input data. In each supervised auto-encoder, informative features are learned from the input data with the goal that they can largely distinguish different fault types. By stacking multiple supervised auto-encoders hierarchically, high-level fault-relevant features are gradually learned from raw input data, which can improve the classification accuracy of the classifiers. The proposed SSAE is tested on the Tennessee–Eastman (TE) benchmark process and a real industrial hydrocracking process. The results show the effectiveness and flexibility of SSAE.}, language = {en}, urldate = {2022-05-02}, journal = {Journal of Process Control}, author = {Wang, Yalin and Yang, Haibing and Yuan, Xiaofeng and Shardt, Yuri A. W. and Yang, Chunhua and Gui, Weihua}, month = aug, year = {2020}, keywords = {Deep learning, Fault classification, Process monitoring, Stacked auto-encoder (SAE), Tennessee–Eastman process}, pages = {79--89}, }
@article{zheng_new_2020, title = {A new unsupervised data mining method based on the stacked autoencoder for chemical process fault diagnosis}, volume = {135}, issn = {0098-1354}, url = {https://www.sciencedirect.com/science/article/pii/S009813541930986X}, doi = {10.1016/j.compchemeng.2020.106755}, abstract = {Process monitoring plays an important role in chemical process safety management, and fault diagnosis is a vital step of process monitoring. Among fault diagnosis researches, supervised ones are inappropriate for industrial applications due to the lack of labeled historical data in real situations. Thereby, unsupervised methods which are capable of dealing with unlabeled data should be developed for fault diagnosis. In this work, a new unsupervised data mining method based on deep learning is proposed for isolating different conditions of chemical process, including normal operations and faults, and thus labeled database can be created efficiently for constructing fault diagnosis model. The proposed method mainly consists of three steps: feature extraction by the convolutional stacked autoencoder (SAE), feature visualization by the t-distributed stochastic neighbor embedding (t-SNE) algorithm, and clustering. The benchmark Tennessee Eastman process (TEP) and an industrial hydrocracking instance are utilized to illustrate the effectiveness of the proposed data mining method.}, language = {en}, urldate = {2022-05-02}, journal = {Computers \& Chemical Engineering}, author = {Zheng, Shaodong and Zhao, Jinsong}, month = apr, year = {2020}, keywords = {Clustering, Data mining, Fault diagnosis, The SAE, The TEP, Unsupervised}, pages = {106755}, }
@article{zhang_fault_2020, title = {Fault {Detection} in the {Tennessee} {Eastman} {Benchmark} {Process} {Using} {Principal} {Component} {Difference} {Based} on {K}-{Nearest} {Neighbors}}, volume = {8}, issn = {2169-3536}, doi = {10.1109/ACCESS.2020.2977421}, abstract = {Industrial data usually have nonlinear or multimodal characteristics which do not meet the data assumptions of statistics in principal component analysis (PCA). Therefore, PCA has a lower fault detection rate in industrial processes. Aiming at the above limitations of PCA, a fault detection method using principal component difference based on k-nearest neighbors (Diff-PCA) is proposed in this paper. First, find the k nearest neighbors set of each sample in the training data set and calculate its mean vector. Second, build an augmented vector using each sample and its corresponding mean vector. Third, calculate the loading matrix and score matrix using PCA. Next, calculate the estimated scores using the mean vector of each sample and missing data imputation technique for PCA. At last, build two new statistics using the difference between the real scores and estimated scores to detect faults. In addition, the fault diagnosis method based on contribution plots of monitored variables is also proposed in this paper. In Diff-PCA, the difference skill can eliminate the impact of the nonlinear and multimodal structure on fault detection. Meanwhile, the monitored subspaces by the two new statistics are different from that by T2 and SPE in PCA. The efficiency of the proposed strategy is implemented in two numerical cases (nonlinear and multimode) and the Tennessee Eastman (TE) processes. The fault detection results indicate that Diff-PCA outperforms the conventional PCA, Kernel PCA, dynamic PCA, principal component-based k nearest neighbor rule and k nearest neighbor rule.}, journal = {IEEE Access}, author = {Zhang, Cheng and Guo, Qingxiu and Li, Yuan}, year = {2020}, note = {Conference Name: IEEE Access}, keywords = {Covariance matrices, Fault detection, Fault detection and diagnosis, Fault diagnosis, Loading, Monitoring, Principal component analysis, Tennessee Eastman processes, Training data, k nearest neighbors, principal component analysis, principal component difference}, pages = {49999--50009}, }
@article{van_engelen_survey_2020, title = {A survey on semi-supervised learning}, volume = {109}, issn = {1573-0565}, url = {https://doi.org/10.1007/s10994-019-05855-6}, doi = {10.1007/s10994-019-05855-6}, abstract = {Semi-supervised learning is the branch of machine learning concerned with using labelled as well as unlabelled data to perform certain learning tasks. Conceptually situated between supervised and unsupervised learning, it permits harnessing the large amounts of unlabelled data available in many use cases in combination with typically smaller sets of labelled data. In recent years, research in this area has followed the general trends observed in machine learning, with much attention directed at neural network-based models and generative learning. The literature on the topic has also expanded in volume and scope, now encompassing a broad spectrum of theory, algorithms and applications. However, no recent surveys exist to collect and organize this knowledge, impeding the ability of researchers and engineers alike to utilize it. Filling this void, we present an up-to-date overview of semi-supervised learning methods, covering earlier work as well as more recent advances. We focus primarily on semi-supervised classification, where the large majority of semi-supervised learning research takes place. Our survey aims to provide researchers and practitioners new to the field as well as more advanced readers with a solid understanding of the main approaches and algorithms developed over the past two decades, with an emphasis on the most prominent and currently relevant work. Furthermore, we propose a new taxonomy of semi-supervised classification algorithms, which sheds light on the different conceptual and methodological approaches for incorporating unlabelled data into the training process. Lastly, we show how the fundamental assumptions underlying most semi-supervised learning algorithms are closely connected to each other, and how they relate to the well-known semi-supervised clustering assumption.}, language = {en}, number = {2}, urldate = {2022-04-25}, journal = {Machine Learning}, author = {van Engelen, Jesper E. and Hoos, Holger H.}, month = feb, year = {2020}, keywords = {Classification, Machine learning, Semi-supervised learning}, pages = {373--440}, }
@article{wang_generalizing_2020, title = {Generalizing from a {Few} {Examples}: {A} {Survey} on {Few}-shot {Learning}}, volume = {53}, issn = {0360-0300}, shorttitle = {Generalizing from a {Few} {Examples}}, url = {https://doi.org/10.1145/3386252}, doi = {10.1145/3386252}, abstract = {Machine learning has been highly successful in data-intensive applications but is often hampered when the data set is small. Recently, Few-shot Learning (FSL) is proposed to tackle this problem. Using prior knowledge, FSL can rapidly generalize to new tasks containing only a few samples with supervised information. In this article, we conduct a thorough survey to fully understand FSL. Starting from a formal definition of FSL, we distinguish FSL from several relevant machine learning problems. We then point out that the core issue in FSL is that the empirical risk minimizer is unreliable. Based on how prior knowledge can be used to handle this core issue, we categorize FSL methods from three perspectives: (i) data, which uses prior knowledge to augment the supervised experience; (ii) model, which uses prior knowledge to reduce the size of the hypothesis space; and (iii) algorithm, which uses prior knowledge to alter the search for the best hypothesis in the given hypothesis space. With this taxonomy, we review and discuss the pros and cons of each category. Promising directions, in the aspects of the FSL problem setups, techniques, applications, and theories, are also proposed to provide insights for future research.1}, number = {3}, urldate = {2022-04-25}, journal = {ACM Computing Surveys}, author = {Wang, Yaqing and Yao, Quanming and Kwok, James T. and Ni, Lionel M.}, month = jun, year = {2020}, keywords = {Few-shot learning, low-shot learning, meta-learning, one-shot learning, prior knowledge, small sample learning}, pages = {63:1--63:34}, }
@article{yu_continuous_2020, title = {Continuous {Support} {Vector} {Regression} for {Nonstationary} {Streaming} {Data}}, issn = {2168-2275}, doi = {10.1109/TCYB.2020.3015266}, abstract = {Quadratic programming is the process of solving a special type of mathematical optimization problem. Recent advances in online solutions for quadratic programming problems (QPPs) have created opportunities to widen the scope of applications for support vector regression (SVR). In this vein, efforts to make SVR compatible with streaming data have been met with substantial success. However, streaming data with concept drift remain problematic because the trained prediction function in SVR tends to drift as the data distribution drifts. Aiming to contribute a solution to this aspect of SVR's advancement, we have developed continuous SVR (C-SVR) to solve regression problems with nonstationary streaming data, that is, data where the optimal input-output prediction function can drift over time. The basic idea of C-SVR is to continuously learn a series of input-output functions over a series of time windows to make predictions about different periods. However, strikingly, the learning process in different time windows is not independent. An additional similarity term in the QPP, which is solved incrementally, threads the various input-output functions together by conveying some learned knowledge through consecutive time windows. How much learned knowledge is transferred is determined by the extent of the concept drift. Experimental evaluations with both synthetic and real-world datasets indicate that C-SVR has better performance than most existing methods for nonstationary streaming data regression.}, journal = {IEEE Transactions on Cybernetics}, author = {Yu, Hang and Lu, Jie and Zhang, Guangquan}, year = {2020}, note = {Conference Name: IEEE Transactions on Cybernetics}, keywords = {Concept drift, Cybernetics, Microsoft Windows, Prediction algorithms, Quadratic programming, Support vector machines, Training, Vegetation, continuous learning, streaming data, support vector regression (SVR)}, pages = {1--14}, }
@article{mcinnes_umap_2020, title = {{UMAP}: {Uniform} {Manifold} {Approximation} and {Projection} for {Dimension} {Reduction}}, shorttitle = {{UMAP}}, url = {http://arxiv.org/abs/1802.03426}, abstract = {UMAP (Uniform Manifold Approximation and Projection) is a novel manifold learning technique for dimension reduction. UMAP is constructed from a theoretical framework based in Riemannian geometry and algebraic topology. The result is a practical scalable algorithm that applies to real world data. The UMAP algorithm is competitive with t-SNE for visualization quality, and arguably preserves more of the global structure with superior run time performance. Furthermore, UMAP has no computational restrictions on embedding dimension, making it viable as a general purpose dimension reduction technique for machine learning.}, urldate = {2022-03-15}, journal = {arXiv:1802.03426 [cs, stat]}, author = {McInnes, Leland and Healy, John and Melville, James}, month = sep, year = {2020}, note = {arXiv: 1802.03426}, keywords = {Computer Science - Computational Geometry, Computer Science - Machine Learning, Statistics - Machine Learning}, }
@inproceedings{hoong_ong_predictive_2020, title = {Predictive {Maintenance} for {Edge}-{Based} {Sensor} {Networks}: {A} {Deep} {Reinforcement} {Learning} {Approach}}, shorttitle = {Predictive {Maintenance} for {Edge}-{Based} {Sensor} {Networks}}, doi = {10.1109/WF-IoT48130.2020.9221098}, abstract = {Failure of mission-critical equipment interrupts production and results in monetary loss. The risk of unplanned equipment downtime can be minimized through Predictive Maintenance of revenue generating assets to ensure optimal performance and safe operation of equipment. However, the increased sensorization of the equipment generates a data deluge, and existing machine-learning based predictive model alone becomes inadequate for timely equipment condition predictions. In this paper, a model-free Deep Reinforcement Learning algorithm is proposed for predictive equipment maintenance from an equipment-based sensor network context. Within each equipment, a sensor device aggregates raw sensor data, and the equipment health status is analyzed for anomalous events. Unlike traditional black-box regression models, the proposed algorithm self-learns an optimal maintenance policy and provides actionable recommendation for each equipment. Our experimental results demonstrate the potential for broader range of equipment maintenance applications as an automatic learning framework.}, booktitle = {2020 {IEEE} 6th {World} {Forum} on {Internet} of {Things} ({WF}-{IoT})}, author = {Hoong Ong, Kevin Shen and Niyato, Dusit and Yuen, Chau}, month = jun, year = {2020}, keywords = {Industries, Mission critical systems, Prediction algorithms, Predictive models, Productivity, Reinforcement learning, Schedules}, pages = {1--6}, }
@article{skordilis_deep_2020, title = {A deep reinforcement learning approach for real-time sensor-driven decision making and predictive analytics}, volume = {147}, issn = {0360-8352}, url = {https://www.sciencedirect.com/science/article/pii/S036083522030334X}, doi = {10.1016/j.cie.2020.106600}, abstract = {The increased complexity of sensor-intensive systems with expensive subsystems and costly repairs and failures calls for efficient real-time control and decision making policies. Deep reinforcement learning has demonstrated great potential in addressing highly complex and challenging control and decision making problems. Despite its potential to derive real-time policies using real-time data for dynamic systems, it has been rarely used for sensor-driven maintenance related problems. In this paper, we propose two novel decision making methods in which reinforcement learning and particle filtering are utilized for (i) deriving real-time maintenance policies and (ii) estimating remaining useful life for sensor-monitored degrading systems. The proposed framework introduces a new direction with many potential opportunities for system monitoring. To demonstrate the effectiveness of the proposed methods, numerical experiments are provided from a set of simulated data and a turbofan engine dataset provided by NASA.}, language = {en}, urldate = {2022-03-03}, journal = {Computers \& Industrial Engineering}, author = {Skordilis, Erotokritos and Moghaddass, Ramin}, month = sep, year = {2020}, keywords = {Decision-making, Deep reinforcement learning, Particle filters, Real-time control, Remaining useful life estimation}, pages = {106600}, }
@article{sheikh_battery_2020, title = {A {Battery} {Health} {Monitoring} {Method} {Using} {Machine} {Learning}: {A} {Data}-{Driven} {Approach}}, volume = {13}, copyright = {http://creativecommons.org/licenses/by/3.0/}, issn = {1996-1073}, shorttitle = {A {Battery} {Health} {Monitoring} {Method} {Using} {Machine} {Learning}}, url = {https://www.mdpi.com/1996-1073/13/14/3658}, doi = {10.3390/en13143658}, abstract = {Batteries are combinations of electrochemical cells that generate electricity to power electrical devices. Batteries are continuously converting chemical energy to electrical energy, and require appropriate maintenance to provide maximum efficiency. Management systems having specialized monitoring features; such as charge controlling mechanisms and temperature regulation are used to prevent health, safety, and property hazards that complement the use of batteries. These systems utilize measures of merit to regulate battery performances. Figures such as the state-of-health (SOH) and state-of-charge (SOC) are used to estimate the performance and state of the battery. In this paper, we propose an intelligent method to investigate the aforementioned parameters using a data-driven approach. We use a machine learning algorithm that extracts significant features from the discharge curves to estimate these parameters. Extensive simulations have been carried out to evaluate the performance of the proposed method under different currents and temperatures.}, language = {en}, number = {14}, urldate = {2022-02-09}, journal = {Energies}, author = {Sheikh, Shehzar Shahzad and Anjum, Mahnoor and Khan, Muhammad Abdullah and Hassan, Syed Ali and Khalid, Hassan Abdullah and Gastli, Adel and Ben-Brahim, Lazhar}, month = jan, year = {2020}, note = {Number: 14 Publisher: Multidisciplinary Digital Publishing Institute}, keywords = {battery health monitoring, ecml, feature extraction, knee-point calculation, machine learning, state of health}, pages = {3658}, }
@inproceedings{farbiz_cognitive_2020, title = {A {Cognitive} {Analytics} based {Approach} for {Machine} {Health} {Monitoring}, {Anomaly} {Detection}, and {Predictive} {Maintenance}}, doi = {10.1109/ICIEA48937.2020.9248409}, abstract = {Traditionally, there are two major limitations for machine learning (ML)-assisted manufacturing applications. First, it would require a tremendous amount of manual data annotations for ML models. Second, ML models are often learned offline and unable to capture the machine dynamism and adapt to changes over the time. In this paper, we propose a framework based on the concept of cognitive analytics with unsupervised learning for machine health monitoring, anomaly detection and predictive maintenance. The experimental results on an industrial robot demonstrates the effectiveness of the proposed framework in the identified use case.}, booktitle = {2020 15th {IEEE} {Conference} on {Industrial} {Electronics} and {Applications} ({ICIEA})}, author = {Farbiz, Farzam and Miaolong, Yuan and Yu, Zhou}, month = nov, year = {2020}, note = {ISSN: 2158-2297}, keywords = {Adaptation models, Anomaly detection, Machine learning, Manufacturing, Monitoring, Predictive maintenance, Service robots, Unsupervised learning, anomaly detection, cognitive analytics, ecml, machine health monitoring, predictive maintenance}, pages = {1104--1109}, }
@article{dai_machinery_2020, title = {Machinery {Health} {Monitoring} {Based} on {Unsupervised} {Feature} {Learning} via {Generative} {Adversarial} {Networks}}, volume = {25}, issn = {1941-014X}, doi = {10.1109/TMECH.2020.3012179}, abstract = {It confronts great difficulty to apply traditional artificial intelligence (AI) techniques to machinery prognostics and health management in manufacturing systems due to the lack of abnormal samples corresponding to different fault conditions. This article explores an unsupervised feature learning method for machinery health monitoring by proposing a generative adversarial networks (GAN) model that exploits the merits of the autoencoder and the traditional GAN. The major contribution is that the data distribution of the normal samples is accurately learned by the GAN model within both the signal spectrum and latent representation spaces. Specifically, the discriminative feature for machinery health monitoring is learned in an unsupervised manner by the proposed method in three steps. First, the proposed GAN model is trained by the normal samples of the inspected machine with the aim to correctly reconstruct the signal spectrum and its latent representation. Then, the trained model is applied to test the online samples of the same machine with unknown health conditions. Finally, the dissimilarity between the tested samples and their reconstructed ones in the latent representation space is taken as the discriminative feature. The feature value will increase significantly if a fault occurs in the inspected machine because the abnormal samples are never trained in the proposed GAN model. Experimental studies on three different machines are conducted to validate the proposed method and its superiority over the traditional methods in detecting abnormal points and characterizing fault propagation.}, number = {5}, journal = {IEEE/ASME Transactions on Mechatronics}, author = {Dai, Jun and Wang, Jun and Huang, Weiguo and Shi, Juanjuan and Zhu, Zhongkui}, month = oct, year = {2020}, note = {Conference Name: IEEE/ASME Transactions on Mechatronics}, keywords = {Artificial intelligence (AI), Feature extraction, Gallium nitride, Generative adversarial networks, Generators, Monitoring, Training, deep learning, ecml, generative adversarial networks (GAN), machinery health monitoring, smart manufacturing, unsupervised learning}, pages = {2252--2263}, }
@article{yu_multiscale_2020, title = {Multiscale intelligent fault detection system based on agglomerative hierarchical clustering using stacked denoising autoencoder with temporal information}, volume = {95}, issn = {1568-4946}, url = {https://www.sciencedirect.com/science/article/pii/S1568494620304646}, doi = {10.1016/j.asoc.2020.106525}, abstract = {Deep learning-based process monitoring has achieved remarkable progress. Generally, a deep model is empirically selected before the data features are learned. In this study, the interpretability and suitability of stacked denoising autoencoder (SDAE) in process monitoring territory are theoretically analyzed and validated. Considering that the data will show different feature representations at different scales, such as overall outline, local information, and microscopic details, this study utilizes the concept of multiscale analysis to mine the feature information of raw data deeply in different scales. The multiscale analysis is performed on the basis of agglomerative hierarchical clustering and silhouette coefficient, which makes the analysis data characteristics-based and intelligently abandons the intervention of manual prior knowledge. Then, the SDAE models are established under each scale to learn the high-order and robust features from the data with noise and fluctuation, and all monitoring results of the different scales are integrated using the Bayesian inference. Finally, given the temporal information in sequence data, the state representation of previous events is embedded into the current decision through a sliding window. The numerical process, benchmark Tennessee Eastman and real steel plate process are used to analyze the superiority of the proposed method (MSDAE-TP) over other deep learning-based monitoring methods.}, language = {en}, urldate = {2022-01-14}, journal = {Applied Soft Computing}, author = {Yu, Jianbo and Yan, Xuefeng}, month = oct, year = {2020}, keywords = {Multiscale analysis, Process monitoring, Robust features, Stacked denoising autoencoder, Temporal information}, pages = {106525}, }
@article{aydemir_anomaly_2020, title = {Anomaly monitoring improves remaining useful life estimation of industrial machinery}, volume = {56}, issn = {0278-6125}, url = {https://www.sciencedirect.com/science/article/pii/S0278612520301060}, doi = {10.1016/j.jmsy.2020.06.014}, abstract = {Estimating remaining useful life (RUL) of industrial machinery based on their degradation data is very critical for various industries. Machine learning models are powerful and very popular tools for predicting time to failure of such industrial machinery. However, RUL is ill-defined during healthy operation. This paper proposes to use anomaly monitoring during both RUL estimator training and deployment to tackle with this problem. In this approach, raw sensor data is monitored and when a statistically significant change is detected, it is taken as the degradation onset point and a data-driven RUL estimation model is triggered. Initial results with a simple anomaly detector, suited for non-varying operating conditions, and multiple RUL estimation models showed that the anomaly triggered RUL estimation scheme enhances the estimation accuracy, on in-house simulation and benchmark C-MAPSS turbofan engine degradation data. The scheme can be employed to varying operating conditions with a suitable anomaly detector.}, language = {en}, urldate = {2021-09-28}, journal = {Journal of Manufacturing Systems}, author = {Aydemir, Gurkan and Acar, Burak}, month = jul, year = {2020}, keywords = {Anomaly detection, Industrial prognostics and health management, Machine learning, Remaining Useful Life (RUL) estimation, sigkdd-rw}, pages = {463--469}, }
@article{scutari_bayesian_2020, title = {Bayesian network models for incomplete and dynamic data}, volume = {74}, issn = {1467-9574}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/stan.12197}, doi = {10.1111/stan.12197}, abstract = {Bayesian networks are a versatile and powerful tool to model complex phenomena and the interplay of their components in a probabilistically principled way. Moving beyond the comparatively simple case of completely observed, static data, which has received the most attention in the literature, in this paper, we will review how Bayesian networks can model dynamic data and data with incomplete observations. Such data are the norm at the forefront of research and in practical applications, and Bayesian networks are uniquely positioned to model them due to their explainability and interpretability.}, language = {en}, number = {3}, urldate = {2021-11-27}, journal = {Statistica Neerlandica}, author = {Scutari, Marco}, year = {2020}, note = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1111/stan.12197}, keywords = {Bayesian networks, dynamic data, incomplete data, inference, structure learning}, pages = {397--419}, }
@article{huang_bayesian_2020, title = {A {Bayesian} network model to predict the effects of interruptions on train operations}, volume = {114}, issn = {0968-090X}, url = {https://www.sciencedirect.com/science/article/pii/S0968090X19311118}, doi = {10.1016/j.trc.2020.02.021}, abstract = {Based on the Bayesian network (BN) paradigm, we propose a hybrid model to predict the three main consequences of disruptions and disturbances during train operations, namely, the primary delay (L), the number of affected trains (N), and the total delay times (T). To obtain an effective BN structure, we first analyze the dependencies of the involved factors on each station and among adjacent stations, given domain knowledge and expertise about operational characteristics. We then put forward four candidate BN structures, integrating expert knowledge, the interdependencies learned from real-world data, and real-time prediction and operational requirements. Next, we train the candidate structures based on a 5-fold cross-validation method, using the operational data from Wuhan-Guangzhou (W-G) and Xiamen-Shenzhen (X-S) high-speed railway (HSR) lines in China. The best performing structure is nominated to predict the consequences of disruptions and disturbances in the two HSR lines. Comparisons results show that the proposed model outperforms three other commonly used predictive models, reaching an average prediction accuracy of 96.6\%, 74.8\%, and 91.0\% on the W-G HSR line, and 94.8\%, 91.1\%, and 87.9\% on the X-S HSR line for variables L, N, and T, respectively.}, language = {en}, urldate = {2021-11-27}, journal = {Transportation Research Part C: Emerging Technologies}, author = {Huang, Ping and Lessan, Javad and Wen, Chao and Peng, Qiyuan and Fu, Liping and Li, Li and Xu, Xinyue}, month = may, year = {2020}, keywords = {Bayesian networks, Disturbances and disruptions, Real-time prediction, Train operation}, pages = {338--358}, }
@article{fu_overview_2020, title = {An overview of recent multi-view clustering}, volume = {402}, issn = {0925-2312}, url = {https://www.sciencedirect.com/science/article/pii/S0925231220303222}, doi = {10.1016/j.neucom.2020.02.104}, abstract = {With the widespread deployment of sensors and the Internet-of-Things, multi-view data has become more common and publicly available. Compared to traditional data that describes objects from single perspective, multi-view data is semantically richer, more useful, however more complex. Since traditional clustering algorithms cannot handle such data, multi-view clustering has become a research hotspot. In this paper, we review some of the latest multi-view clustering algorithms, which are reasonably divided into three categories. To evaluate their performance, we perform extensive experiments on seven real-world data sets. Three mainstream metrics are used, including clustering accuracy, normalized mutual information and purity. Based on the experimental results and a large number of literature reading, we also discuss existing problems in current multi-view clustering and point out possible research directions in the future. This research provides some insights for researchers in related fields and may further promote the development of multi-view clustering algorithms.}, language = {en}, urldate = {2021-11-07}, journal = {Neurocomputing}, author = {Fu, Lele and Lin, Pengfei and Vasilakos, Athanasios V. and Wang, Shiping}, month = aug, year = {2020}, keywords = {Graph-based clustering, Machine learning, Multi-view clustering, Space learning, Unsupervised learning, clustering, multiview, multiview clustering}, pages = {148--161}, }
@article{huang_mvstream_2020, title = {{MVStream}: {Multiview} {Data} {Stream} {Clustering}}, volume = {31}, issn = {2162-2388}, shorttitle = {{MVStream}}, doi = {10.1109/TNNLS.2019.2944851}, abstract = {This article studies a new problem of data stream clustering, namely, multiview data stream (MVStream) clustering. Although many data stream clustering algorithms have been developed, they are restricted to the single-view streaming data, and clustering MVStreams still remains largely unsolved. In addition to the many issues encountered by the conventional single-view data stream clustering, such as capturing cluster evolution and discovering clusters of arbitrary shapes under the limited computational resources, the main challenge of MVStream clustering lies in integrating information from multiple views in a streaming manner and abstracting summary statistics from the integrated features simultaneously. In this article, we propose a novel MVStream clustering algorithm for the first time. The main idea is to design a multiview support vector domain description (MVSVDD) model, by which the information from multiple insufficient views can be integrated, and the outputting support vectors (SVs) are utilized to abstract the summary statistics of the historical multiview data objects. Based on the MVSVDD model, a new multiview cluster labeling method is designed, whereby clusters of arbitrary shapes can be discovered for each view. By tracking the cluster labels of SVs in each view, the cluster evolution associated with concept drift can be captured. Since the SVs occupy only a small portion of data objects, the proposed MVStream algorithm is quite efficient with the limited computational resources. Extensive experiments are conducted to demonstrate the effectiveness and efficiency of the proposed method.}, number = {9}, journal = {IEEE Transactions on Neural Networks and Learning Systems}, author = {Huang, Ling and Wang, Chang-Dong and Chao, Hong-Yang and Yu, Philip S.}, month = sep, year = {2020}, note = {Conference Name: IEEE Transactions on Neural Networks and Learning Systems}, keywords = {Clustering, Clustering algorithms, Computer science, Data models, Indexes, Shape, Support vector machines, Task analysis, clusters of arbitrary shapes, data stream, multiview, online, stream, stream learning, support vector (SV)}, pages = {3482--3496}, }
@article{chen_multi-view_2020, title = {Multi-{View} {Clustering} in {Latent} {Embedding} {Space}}, volume = {34}, copyright = {Copyright (c) 2020 Association for the Advancement of Artificial Intelligence}, issn = {2374-3468}, url = {https://ojs.aaai.org/index.php/AAAI/article/view/5756}, doi = {10.1609/aaai.v34i04.5756}, abstract = {Previous multi-view clustering algorithms mostly partition the multi-view data in their original feature space, the efficacy of which heavily and implicitly relies on the quality of the original feature presentation. In light of this, this paper proposes a novel approach termed Multi-view Clustering in Latent Embedding Space (MCLES), which is able to cluster the multi-view data in a learned latent embedding space while simultaneously learning the global structure and the cluster indicator matrix in a unified optimization framework. Specifically, in our framework, a latent embedding representation is firstly discovered which can effectively exploit the complementary information from different views. The global structure learning is then performed based on the learned latent embedding representation. Further, the cluster indicator matrix can be acquired directly with the learned global structure. An alternating optimization scheme is introduced to solve the optimization problem. Extensive experiments conducted on several real-world multi-view datasets have demonstrated the superiority of our approach.}, language = {en}, number = {04}, urldate = {2021-11-07}, journal = {Proceedings of the AAAI Conference on Artificial Intelligence}, author = {Chen, Man-Sheng and Huang, Ling and Wang, Chang-Dong and Huang, Dong}, month = apr, year = {2020}, note = {Number: 04}, pages = {3513--3520}, }
@article{li_esa-stream_2020, title = {{ESA}-{Stream}: {Efficient} {Self}-{Adaptive} {Online} {Data} {Stream} {Clustering}}, issn = {1558-2191}, shorttitle = {{ESA}-{Stream}}, doi = {10.1109/TKDE.2020.2990196}, abstract = {Many big data applications produce a massive amount of high-dimensional, real-time, and evolving streaming data. Clustering such data streams with both effectiveness and efficiency are critical for these applications. Although there are well-known data stream clustering algorithms that are based on the popular online-offline framework, these algorithms still face some major challenges. Several critical questions are still not answer satisfactorily: How to perform dimensionality reduction effectively and efficiently in the online dynamic environment? How to enable the clustering algorithm to achieve complete real-time online processing? How to make algorithm parameters learn in a self-supervised or self-adaptive manner to cope with high-speed evolving streams? In this paper, we focus on tackling these challenges by proposing a fully online stream clustering algorithm (called ESA-Stream) that can learn parameters online dynamically in a self-adaptive manner, speedup dimensionality reduction, and cluster data streams effectively and efficiently in an online and dynamic environment Experiments on a wide range of synthetic and real-world data streams show that ESA-Stream outperforms state-of-the-art baselines considerably in both effectiveness and efficiency.}, journal = {IEEE Transactions on Knowledge and Data Engineering}, author = {Li, Yanni and Li, Hui and Wang, Zhi and Liu, Bing and Cui, Jiangtao and Fei, Hang}, year = {2020}, note = {Conference Name: IEEE Transactions on Knowledge and Data Engineering}, keywords = {Clustering algorithms, Clustering methods, Data Stream, Dimensionality reduction, Heuristic algorithms, Indexes, Online Clustering, Partitioning algorithms, Real-time systems, Self-Adaptive}, pages = {1--1}, }
@article{brito_da_silva_incremental_2020, title = {Incremental {Cluster} {Validity} {Indices} for {Online} {Learning} of {Hard} {Partitions}: {Extensions} and {Comparative} {Study}}, volume = {8}, issn = {2169-3536}, shorttitle = {Incremental {Cluster} {Validity} {Indices} for {Online} {Learning} of {Hard} {Partitions}}, doi = {10.1109/ACCESS.2020.2969849}, abstract = {Validation is one of the most important aspects of clustering, particularly when the user is designing a trustworthy or explainable system. However, most clustering validation approaches require batch calculation. This is an important gap because of the value of clustering in real-time data streaming and other online learning applications. Therefore, interest has grown in providing online alternatives for validation. This paper extends the incremental cluster validity index (iCVI) family by presenting incremental versions of Calinski-Harabasz (iCH), Pakhira-Bandyopadhyay-Maulik (iPBM), WB index (iWB), Silhouette (iSIL), Negentropy Increment (iNI), Representative Cross Information Potential (irCIP), Representative Cross Entropy (irH), and Conn\_Index (iConn\_Index). This paper also provides a thorough comparative study of correct, under- and over-partitioning on the behavior of these iCVIs, the Partition Separation (PS) index as well as four recently introduced iCVIs: incremental Xie-Beni (iXB), incremental Davies-Bouldin (iDB), and incremental generalized Dunn's indices 43 and 53 (iGD43 and iGD53). Experiments were carried out using a framework that was designed to be as agnostic as possible to the clustering algorithms. The results on synthetic benchmark data sets showed that while evidence of most under-partitioning cases could be inferred from the behaviors of the majority of these iCVIs, over-partitioning was found to be a more challenging problem, detected by fewer of them. Interestingly, over-partitioning, rather then under-partitioning, was more prominently detected on the real-world data experiments within this study. The expansion of iCVIs provides significant novel opportunities for assessing and interpreting the results of unsupervised lifelong learning in real-time, wherein samples cannot be reprocessed due to memory and/or application constraints.}, journal = {IEEE Access}, author = {Brito Da Silva, Leonardo Enzo and Melton, Niklas Max and Wunsch, Donald C.}, year = {2020}, note = {Conference Name: IEEE Access}, keywords = {Clustering, Clustering algorithms, Entropy, Government, Indexes, Partitioning algorithms, Prototypes, Subspace constraints, adaptive resonance theory (ART), cluster validation, cluster validity, data streams, incremental (online) clustering algorithms, incremental cluster validity index (iCVI), validation}, pages = {22025--22047}, }
@article{yilmaz_pysad_2020, title = {{PySAD}: {A} {Streaming} {Anomaly} {Detection} {Framework} in {Python}}, shorttitle = {{PySAD}}, url = {http://arxiv.org/abs/2009.02572}, abstract = {PySAD is an open-source python framework for anomaly detection on streaming data. PySAD serves various state-of-the-art methods for streaming anomaly detection. The framework provides a complete set of tools to design anomaly detection experiments ranging from projectors to probability calibrators. PySAD builds upon popular open-source frameworks such as PyOD and scikit-learn. We enforce software quality by enforcing compliance with PEP8 guidelines, functional testing and using continuous integration. The source code is publicly available on https://github.com/selimfirat/pysad.}, urldate = {2021-10-15}, journal = {arXiv:2009.02572 [cs, stat]}, author = {Yilmaz, Selim F. and Kozat, Suleyman S.}, month = sep, year = {2020}, note = {arXiv: 2009.02572}, keywords = {Computer Science - Machine Learning, Statistics - Machine Learning}, }
@article{hammoud_multilayer_2020, title = {Multilayer networks: aspects, implementations, and application in biomedicine}, volume = {5}, issn = {2058-6345}, shorttitle = {Multilayer networks}, url = {https://doi.org/10.1186/s41044-020-00046-0}, doi = {10.1186/s41044-020-00046-0}, abstract = {Modeling and analyses of complex systems using network theory have been an object of study for a long time. They have caught attention in many disciplines such as sociology, epidemiology, ecology, psychology, biology, biomedicine, and other fields. Network theory is especially an efficient tool to model biological networks such as gene co-expression networks, protein-protein interaction networks, or pathways. Considering the enhanced resolutions of complex real-world systems, the interest has been directed to multilayered networks. However, despite this surge of recent attention, the use of the multilayer framework in the biological field is still in its youth. In this paper, we review the different aspects and terminologies of multilayered networks. We also briefly discuss the variant applications of the multilayer framework, and finally, we give an overview of various existing applications of the multilayer model in network biology.}, number = {1}, urldate = {2021-10-14}, journal = {Big Data Analytics}, author = {Hammoud, Zaynab and Kramer, Frank}, month = jul, year = {2020}, keywords = {Graph theory, Multilayered graphs, Network biology}, pages = {2}, }
@inproceedings{wang_railway_2020, title = {Railway {Wagon} {Wheelset} {Fault} {Diagnosis} {Method} {Based} on {DBN}}, doi = {10.1109/PHM-Shanghai49105.2020.9280980}, abstract = {Wheelset is a crucial part of the operation and braking part of railway wagons, and its failure will affect the operation safety of the entire railway system. And fault diagnosis is an important basis for realizing railway scientific maintenance decisions. Learn the method of deep belief network (DBN) to process and diagnose fault data of freighters. The paper uses dynamic factors to optimize the update mechanism of particle velocity, proposes an active factor controlled particle swarm algorithm (APSO), and uses the APSO algorithm to optimize the selection of DBN weights to form a round-pair fault detection algorithm (APSO-DBN) This speeds up DBN training and improves the overall performance of the diagnostic model. The experimental steps and the effectiveness of the method are verified by experiments.}, booktitle = {2020 {Global} {Reliability} and {Prognostics} and {Health} {Management} ({PHM}-{Shanghai})}, author = {Wang, Hongkun and Li, Honghui and Li, Yusheng and Duan, Yuhang}, month = oct, year = {2020}, keywords = {Classification algorithms, Fault detection, Fault diagnosis, Heuristic algorithms, Particle swarm optimization, Rail transportation, Training, deep belief network, deep learning, fault detection, particle swarm algorithm, railway freighter wheelset}, pages = {1--6}, }
@article{saucedo-dorantes_industrial_2020, title = {Industrial {Data}-{Driven} {Monitoring} {Based} on {Incremental} {Learning} {Applied} to the {Detection} of {Novel} {Faults}}, volume = {16}, issn = {1941-0050}, doi = {10.1109/TII.2020.2973731}, abstract = {The detection of uncharacterized events during electromechanical systems operation represents one of the most critical data challenges dealing with condition-based monitoring under the Industry 4.0 framework. Thus, the detection of novelty conditions and the learning of new patterns are considered as mandatory competencies in modern industrial applications. In this regard, this article proposes a novel multifault detection and identification scheme, based on machine learning, information data-fusion, novelty-detection, and incremental learning. First, statistical time-domain features estimated from multiple physical magnitudes acquired from the electrical motor under inspection are fused under a feature-fusion level scheme. Second, a self-organizing map structure is proposed to construct a data-based model of the available conditions of operation. Third, the incremental learning of the condition-based monitoring scheme is performed adding self-organizing structures and optimizing their projections through a linear discriminant analysis. The performance of the proposed scheme is validated under a complete set of experimental scenarios from two different cases of study, and the results compared with a classical approach.}, number = {9}, journal = {IEEE Transactions on Industrial Informatics}, author = {Saucedo-Dorantes, Juan Jose and Delgado-Prieto, Miguel and Osornio-Rios, Roque Alfredo and Romero-Troncoso, Rene de Jesus}, month = sep, year = {2020}, note = {Conference Name: IEEE Transactions on Industrial Informatics}, keywords = {Condition monitoring, Data models, Electromechanical systems, Fault detection, Industries, Monitoring, Self-organizing feature maps, Training, fault detection, feature extraction, incremental learning, machine learning, novelty detection}, pages = {5985--5995}, }
@article{shen_deep_2020, title = {A {Deep} {Multi}-{Label} {Learning} {Framework} for the {Intelligent} {Fault} {Diagnosis} of {Machines}}, volume = {8}, issn = {2169-3536}, doi = {10.1109/ACCESS.2020.3002826}, abstract = {Deep learning has been applied in intelligent fault diagnosis of machines since it trains deep neural networks to simultaneously learn features and recognize faults. In the intelligent fault diagnosis methods based on deep learning, feature learning and fault recognition are achieved by solving a multi-class classification problem. The multi-class classification, however, has not considered the relationships of fault labels, leading to two weaknesses of these methods. One is that it cannot ensure to learn the correlated features for related faults and the other is that it cannot handle missing label problem. To overcome these weaknesses, we introduce a concept of multi-label classification into intelligent fault diagnosis and propose a deep multi-label learning framework called multi-label convolutional neural network (MLCNN). MLCNN builds the relationship between the labels, and thus it is able to learn the correlated features from mechanical vibration signals and be well trained using the samples with missing labels. A motor bearing diagnosis case and a compound fault diagnosis case are used to verify the proposed method, respectively. The results show that the relationships between features are learned by MLCNN, and the classification accuracies of MLCNN are higher than traditional methods when the missing label problem occurs.}, journal = {IEEE Access}, author = {Shen, Jianjun and Li, Shihao and Jia, Feng and Zuo, Hao and Ma, Junxing}, year = {2020}, note = {Conference Name: IEEE Access}, keywords = {Compounds, Deep learning, Fault diagnosis, Neural networks, Task analysis, Training, Vibrations, bearing, compound fault, intelligent fault diagnosis, missing label problem, multi-label classification, multilabel}, pages = {113557--113566}, }
@article{bechini_tsf-dbscan_2020, title = {{TSF}-{DBSCAN}: a {Novel} {Fuzzy} {Density}-based {Approach} for {Clustering} {Unbounded} {Data} {Streams}}, issn = {1941-0034}, shorttitle = {{TSF}-{DBSCAN}}, doi = {10.1109/TFUZZ.2020.3042645}, abstract = {In recent years, several clustering algorithms have been proposed with the aim of mining knowledge from streams of data generated at a high speed by a variety of hardware platforms and software applications. Among these algorithms, density-based approaches have proved to be particularly attractive, thanks to their capability of handling outliers and capturing clusters with arbitrary shapes. The streaming setting poses additional challenges that need to be addressed as well: data streams are potentially unbounded and affected by concept drift, i.e. a modification over time in the underlying data generation process. In this paper, we propose Temporal Streaming Fuzzy DBSCAN (TSF-DBSCAN), a novel fuzzy clustering algorithm for streaming data. TSF-DBSCAN is an extension of the well-known DBSCAN algorithm, one of the most popular density-based clustering approaches. Fuzziness is introduced in TSF-DBSCAN to model the uncertainty about the distance threshold that defines the neighborhood of an object. As a consequence, TSF-DBSCAN identifies clusters with fuzzy overlapping borders. A fading model, which makes objects less relevant as they become more remote in time, endows TSF-DBSCAN with the capability of adapting to evolving data streams. The integration of the model in a two-stage approach ensures computational and memory efficiency: during the online stage continuously arriving objects are organized in proper data structures that are later exploited in the offline stage to determine a fine-grained partition. An extensive experimental analysis on synthetic and real world datasets shows that TSF-DBSCAN yields competitive performance when compared to other clustering algorithms recently proposed for streaming data.}, journal = {IEEE Transactions on Fuzzy Systems}, author = {Bechini, Alessio and Marcelloni, Francesco and Renda, Alessandro}, year = {2020}, note = {Conference Name: IEEE Transactions on Fuzzy Systems}, keywords = {Adaptation models, Clustering algorithms, DBSCAN, Data models, Data stream clustering, Fading channels, Partitioning algorithms, Proposals, Shape, density-based clustering, fuzzy clustering, streaming data}, pages = {1--1}, }
@article{lei_applications_2020, title = {Applications of machine learning to machine fault diagnosis: {A} review and roadmap}, volume = {138}, issn = {0888-3270}, shorttitle = {Applications of machine learning to machine fault diagnosis}, url = {https://www.sciencedirect.com/science/article/pii/S0888327019308088}, doi = {10.1016/j.ymssp.2019.106587}, abstract = {Intelligent fault diagnosis (IFD) refers to applications of machine learning theories to machine fault diagnosis. This is a promising way to release the contribution from human labor and automatically recognize the health states of machines, thus it has attracted much attention in the last two or three decades. Although IFD has achieved a considerable number of successes, a review still leaves a blank space to systematically cover the development of IFD from the cradle to the bloom, and rarely provides potential guidelines for the future development. To bridge the gap, this article presents a review and roadmap to systematically cover the development of IFD following the progress of machine learning theories and offer a future perspective. In the past, traditional machine learning theories began to weak the contribution of human labor and brought the era of artificial intelligence to machine fault diagnosis. Over the recent years, the advent of deep learning theories has reformed IFD in further releasing the artificial assistance since the 2010s, which encourages to construct an end-to-end diagnosis procedure. It means to directly bridge the relationship between the increasingly-grown monitoring data and the health states of machines. In the future, transfer learning theories attempt to use the diagnosis knowledge from one or multiple diagnosis tasks to other related ones, which prospectively overcomes the obstacles in applications of IFD to engineering scenarios. Finally, the roadmap of IFD is pictured to show potential research trends when combined with the challenges in this field.}, language = {en}, urldate = {2021-09-30}, journal = {Mechanical Systems and Signal Processing}, author = {Lei, Yaguo and Yang, Bin and Jiang, Xinwei and Jia, Feng and Li, Naipeng and Nandi, Asoke K.}, month = apr, year = {2020}, keywords = {Deep learning, Intelligent fault diagnosis, Machine learning, Machines, Review and roadmap, Transfer learning}, pages = {106587}, }
@article{cheng_online_2020, title = {Online {Bearing} {Remaining} {Useful} {Life} {Prediction} {Based} on a {Novel} {Degradation} {Indicator} and {Convolutional} {Neural} {Networks}}, volume = {25}, issn = {1083-4435, 1941-014X}, url = {http://arxiv.org/abs/1812.03315}, doi = {10.1109/TMECH.2020.2971503}, abstract = {In industrial applications, nearly half the failures of motors are caused by the degradation of rolling element bearings (REBs). Therefore, accurately estimating the remaining useful life (RUL) for REBs are of crucial importance to ensure the reliability and safety of mechanical systems. To tackle this challenge, model-based approaches are often limited by the complexity of mathematical modeling. Conventional data-driven approaches, on the other hand, require massive efforts to extract the degradation features and construct health index. In this paper, a novel online data-driven framework is proposed to exploit the adoption of deep convolutional neural networks (CNN) in predicting the RUL of bearings. More concretely, the raw vibrations of training bearings are first processed using the Hilbert-Huang transform (HHT) and a novel nonlinear degradation indicator is constructed as the label for learning. The CNN is then employed to identify the hidden pattern between the extracted degradation indicator and the vibration of training bearings, which makes it possible to estimate the degradation of the test bearings automatically. Finally, testing bearings' RULs are predicted by using a \${\textbackslash}epsilon\$-support vector regression model. The superior performance of the proposed RUL estimation framework, compared with the state-of-the-art approaches, is demonstrated through the experimental results. The generality of the proposed CNN model is also validated by transferring to bearings undergoing different operating conditions.}, number = {3}, urldate = {2021-09-30}, journal = {IEEE/ASME Transactions on Mechatronics}, author = {Cheng, Cheng and Ma, Guijun and Zhang, Yong and Sun, Mingyang and Teng, Fei and Ding, Han and Yuan, Ye}, month = jun, year = {2020}, note = {arXiv: 1812.03315}, keywords = {Computer Science - Machine Learning, Electrical Engineering and Systems Science - Signal Processing, Statistics - Machine Learning}, pages = {1243--1254}, }
@article{wang_integrated_2020, title = {An integrated fault diagnosis and prognosis approach for predictive maintenance of wind turbine bearing with limited samples}, volume = {145}, issn = {0960-1481}, url = {https://www.sciencedirect.com/science/article/pii/S0960148119309371}, doi = {10.1016/j.renene.2019.06.103}, abstract = {Predictive maintenance has raised much research interest to improve the system reliability of a wind turbine. This paper presents a new model based approach of integrated fault diagnosis and prognosis for wind turbine remaining useful life estimation, especially the cases with limited degradation data. Firstly, a wavelet transform based fault diagnosis method is investigated to analyze the bearing incipient defect signatures, and the extracted features are then fused by the Health Index algorithm to represent the bearing defect conditions. Taking the empirical physical knowledge and statistical model in a Bayesian framework, the bearing remaining useful life prediction with uncertainty quantification is achieved by particle filter in a recursive manner. The integrated fault diagnosis and prognosis approach is validated using bearing lifetime test data acquired from a wind turbine in field, and the performance comparison with typical data driven technique outlines the significance of the presented method.}, language = {en}, urldate = {2021-09-28}, journal = {Renewable Energy}, author = {Wang, Jinjiang and Liang, Yuanyuan and Zheng, Yinghao and Gao, Robert X. and Zhang, Fengli}, month = jan, year = {2020}, keywords = {Defect diagnosis, Defect prognosis, Particle filter, Wind turbine bearing}, pages = {642--650}, }
@article{lee_integrated_2020, title = {An integrated assessment of safety and efficiency of aircraft maintenance strategies using agent-based modelling and stochastic {Petri} nets}, volume = {202}, issn = {0951-8320}, url = {https://www.sciencedirect.com/science/article/pii/S0951832020305536}, doi = {10.1016/j.ress.2020.107052}, abstract = {Aircraft maintenance is key for safe and efficient aircraft operations. While most studies propose cost-efficient maintenance strategies, the safety and efficiency of these strategies need to be quantified. This paper proposes a formal framework to assess the safety and efficiency of maintenance strategies by means of agent-based modelling, stochastically and dynamically coloured Petri nets, and Monte Carlo simulation. We model an end-to-end aircraft maintenance process, considering several maintenance stakeholders. We apply our framework for aircraft landing gear brakes, and use a Gamma process to model the degradation trends of the brakes. The numerical results show that applying data-driven strategies reduces the number of inspections by 36\%, while maintaining the same level of safety as in the case of traditional time-based maintenance strategies. Furthermore, in order to discuss the possibility to substitute all inspections by sensor monitoring, an advanced data-driven strategy using prognostics is considered. Overall, our proposed framework is generic and can readily be applied to assess the safety and efficiency of the maintenance of other aircraft components and maintenance strategies.}, language = {en}, urldate = {2021-09-28}, journal = {Reliability Engineering \& System Safety}, author = {Lee, Juseong and Mitici, Mihaela}, month = oct, year = {2020}, keywords = {Aircraft maintenance, Gamma process, Landing gear brake, Safety, Simulation, Stochastically and dynamically coloured Petri nets}, pages = {107052}, }
@inproceedings{tegen_effects_2020, title = {The {Effects} of {Reluctant} and {Fallible} {Users} in {Interactive} {Online} {Machine} {Learning}}, url = {http://urn.kb.se/resolve?urn=urn:nbn:se:mau:diva-17673}, abstract = {DiVA portal is a finding tool for research publications and student theses written at the following 50 universities and research institutions.}, language = {eng}, urldate = {2021-09-05}, publisher = {CEUR Workshops}, author = {Tegen, Agnes and Davidsson, Paul and Persson, Jan A.}, year = {2020}, pages = {55--71}, }
@article{tavenard_tslearn_2020, title = {Tslearn, {A} {Machine} {Learning} {Toolkit} for {Time} {Series} {Data}}, volume = {21}, issn = {1533-7928}, url = {http://jmlr.org/papers/v21/20-091.html}, abstract = {tslearn is a general-purpose Python machine learning library for time series that offers tools for pre-processing and feature extraction as well as dedicated models for clustering, classification and regression. It follows scikit-learn's Application Programming Interface for transformers and estimators, allowing the use of standard pipelines and model selection tools on top of tslearn objects. It is distributed under the BSD-2-Clause license, and its source code is available at https://github.com/tslearn-team/tslearn.}, number = {118}, urldate = {2021-08-27}, journal = {Journal of Machine Learning Research}, author = {Tavenard, Romain and Faouzi, Johann and Vandewiele, Gilles and Divo, Felix and Androz, Guillaume and Holtz, Chester and Payne, Marie and Yurchak, Roman and Rußwurm, Marc and Kolar, Kushal and Woods, Eli}, year = {2020}, pages = {1--6}, }
@inproceedings{tamburri_sustainable_2020, title = {Sustainable {MLOps}: {Trends} and {Challenges}}, shorttitle = {Sustainable {MLOps}}, doi = {10.1109/SYNASC51798.2020.00015}, abstract = {Even simply through a GoogleTrends search it becomes clear that Machine-Learning Operations-or MLOps, for short-are climbing in interest from both a scientific and practical perspective. On the one hand, software components and middleware are proliferating to support all manners of MLOps, from AutoML (i.e., software which enables developers with limited machine-learning expertise to train high-quality models specific to their domain or data) to feature-specific ML engineering, e.g., Explainability and Interpretability. On the other hand, the more these platforms penetrate the day-to-day activities of software operations, the more the risk for AI Software becoming unsustainable from a social, technical, or organisational perspective. This paper offers a concise definition of MLOps and AI Software Sustainability and outlines key challenges in its pursuit.}, booktitle = {2020 22nd {International} {Symposium} on {Symbolic} and {Numeric} {Algorithms} for {Scientific} {Computing} ({SYNASC})}, author = {Tamburri, Damian A.}, month = sep, year = {2020}, keywords = {DataOps, Decision making, MLOps, Machine learning, Machine-Learning Operations, Market research, Middleware, Scientific computing, Software Sustainability, Software systems, Sustainable development}, pages = {17--23}, }
@article{li_survey_2020, title = {A {Survey} of {Data}-driven and {Knowledge}-aware {eXplainable} {AI}}, issn = {1558-2191}, doi = {10.1109/TKDE.2020.2983930}, abstract = {We are witnessing a fast development of Artificial Intelligence (AI), but it becomes dramatically challenging to explain AI models in the past decade. “Explanation” has a flexible philosophical concept of “satisfying the subjective curiosity for causal information”, driving a wide spectrum of methods being invented and/or adapted from many aspects and communities, including machine learning, visual analytics, human-computer interaction and so on. Nevertheless, from the view-point of data and knowledge engineering (DKE), a best explaining practice that is cost-effective in terms of extra intelligence acquisition should exploit the causal information and scenarios which is hidden richly in the data itself. In the past several years, there are plenty of works contributing in this line but there is a lack of a clear taxonomy and systematic review of the current effort. To this end, we propose this survey, reviewing and taxonomizing existing efforts from the view-point of DKE, summarizing their contribution, technical essence and comparative characteristics. Specifically, we categorize methods into data-driven methods where explanation comes from the task-related data, and knowledge-aware methods where extraneous knowledge is incorporated. Furthermore, in the light of practice, we provide survey of state-of-art evaluation metrics and deployed explanation applications in industrial practice.}, journal = {IEEE Transactions on Knowledge and Data Engineering}, author = {Li, Xiao-Hui and Cao, Caleb Chen and Shi, Yuhan and Bai, Wei and Gao, Han and Qiu, Luyu and Wang, Cong and Gao, Yuanyuan and Zhang, Shenjia and Xue, Xun and Chen, Lei}, year = {2020}, note = {Conference Name: IEEE Transactions on Knowledge and Data Engineering}, keywords = {Algorithms, Data models, Data visualization, Deep Learning, Explainable AI (XAI), Feature extraction, Knowledge-base, Metrics, Predictive models, Task analysis, Taxonomy}, pages = {1--1}, }
@inproceedings{salierno_architecture_2020, address = {Cham}, series = {Lecture {Notes} in {Business} {Information} {Processing}}, title = {An {Architecture} for {Predictive} {Maintenance} of {Railway} {Points} {Based} on {Big} {Data} {Analytics}}, isbn = {978-3-030-49165-9}, doi = {10.1007/978-3-030-49165-9_3}, abstract = {Massive amounts of data produced by railway systems are a valuable resource to enable Big Data analytics. Despite its richness, several challenges arise when dealing with the deployment of a big data architecture into a railway system. In this paper, we propose a four-layers big data architecture with the goal of establishing a data management policy to manage massive amounts of data produced by railway switch points and perform analytical tasks efficiently. An implementation of the architecture is given along with the realization of a Long Short-Term Memory prediction model for detecting failures on the Italian Railway Line of Milano - Monza - Chiasso.}, language = {en}, booktitle = {Advanced {Information} {Systems} {Engineering} {Workshops}}, publisher = {Springer International Publishing}, author = {Salierno, Giulio and Morvillo, Sabatino and Leonardi, Letizia and Cabri, Giacomo}, editor = {Dupuy-Chessa, Sophie and Proper, Henderik A.}, year = {2020}, keywords = {Big data architecture, Predictive maintenance, Railway data}, pages = {29--40}, }
@inproceedings{guderlei_evaluating_2020, address = {Barcelona, Spain (Online)}, title = {Evaluating {Unsupervised} {Representation} {Learning} for {Detecting} {Stances} of {Fake} {News}}, url = {https://www.aclweb.org/anthology/2020.coling-main.558}, doi = {10.18653/v1/2020.coling-main.558}, abstract = {Our goal is to evaluate the usefulness of unsupervised representation learning techniques for detecting stances of Fake News. Therefore we examine several pre-trained language models with respect to their performance on two Fake News related data sets, both consisting of instances with a headline, an associated news article and the stance of the article towards the respective headline. Specifically, the aim is to understand how much hyperparameter tuning is necessary when fine-tuning the pre-trained architectures, how well transfer learning works in this specific case of stance detection and how sensitive the models are to changes in hyperparameters like batch size, learning rate (schedule), sequence length as well as the freezing technique. The results indicate that the computationally more expensive autoregression approach of XLNet (Yanget al., 2019) is outperformed by BERT-based models, notably by RoBERTa (Liu et al., 2019).While the learning rate seems to be the most important hyperparameter, experiments with different freezing techniques indicate that all evaluated architectures had already learned powerful language representations that pose a good starting point for fine-tuning them.}, urldate = {2021-01-29}, booktitle = {Proceedings of the 28th {International} {Conference} on {Computational} {Linguistics}}, publisher = {International Committee on Computational Linguistics}, author = {Guderlei, Maike and Aßenmacher, Matthias}, month = dec, year = {2020}, pages = {6339--6349}, }
@article{verma_edge-cloud_2020, title = {Edge-cloud computing performance benchmarking for {IoT} based machinery vibration monitoring}, issn = {2213-8463}, url = {http://www.sciencedirect.com/science/article/pii/S2213846320301759}, doi = {10.1016/j.mfglet.2020.12.004}, abstract = {Advances in low cost and reliable sensing, connectivity (Internet of Things), computational power, and advanced analytics, are leading to a new wave of innovation in machinery status sensing and condition monitoring. Significant research efforts are directed towards cloud computing architectures. However, given the latency, bandwidth, cost, security, and privacy concerns, further supported by the ever-increasing capabilities of edge computing devices, there is a need to consider both edge and cloud computing together to make informed decisions based upon context and performance. We present an edge-cloud performance evaluation for IoT based machinery vibration monitoring, to foster deployment for the contexts considered}, language = {en}, urldate = {2020-12-15}, journal = {Manufacturing Letters}, author = {Verma, Ankur and Goyal, Ayush and Kumara, Soundar and Kurfess, Thomas}, month = dec, year = {2020}, keywords = {Edge computing, Internet of Things, machinery vibration monitoring, smart manufacturing}, }
@article{zhang_bibliometric_2020, title = {A bibliometric review of a decade of research: {Big} data in business research – {Setting} a research agenda}, issn = {0148-2963}, shorttitle = {A bibliometric review of a decade of research}, url = {http://www.sciencedirect.com/science/article/pii/S0148296320307475}, doi = {10.1016/j.jbusres.2020.11.004}, abstract = {The last several years have witnessed a surge of interest in artificial intelligence (AI). As the foundation of AI technologies, big data has attracted attention of researchers. Big data and data science have been recognized as new tools and methodologies for developing theories in business research (George, 2014). While several qualitative reviews have been conducted, there is still a lack of a quantitative and systematic review of big data in business research. Our review study fills this gap by depicting the development of big data in business research using bibliometric methods, such as publication counts and trends analysis, co-citation analysis, co-authorship analysis and keywords co-occurrence analysis. Based on the sample of 1366 primary focal articles and 55,718 secondary references, we visualize the landscape and evolution of big-data business research and capture the developmental trajectory and trends over time (between 2008 and 2018). Furthermore, based on our analyses, we provide several promising directions for future research. In doing so, we provide scholars with a systematic understanding of the development and panoramic roadmap of big data research in business.}, language = {en}, urldate = {2020-12-12}, journal = {Journal of Business Research}, author = {Zhang, Yucheng and Zhang, Meng and Li, Jing and Liu, Guangjian and Yang, Miles M. and Liu, Siqi}, month = dec, year = {2020}, keywords = {Bibliometric review, Big Data, Management and business, Scientific visualization}, }
@article{pearson_boldly_2020, title = {To {Boldly} {Go} {Where} {No} {Data} {Stream} {Has} {Gone} {Before}}, volume = {1}, issn = {2666-3899}, url = {http://www.sciencedirect.com/science/article/pii/S266638992030235X}, doi = {10.1016/j.patter.2020.100171}, abstract = {As humanity explores the Solar System, the further our spacecraft get from Earth the further their data signals have to travel. We look at some of the biggest obstacles that come up when attempting to transfer data billions of kilometers across space using a power- and weight-limited spacecraft.}, language = {en}, number = {9}, urldate = {2020-12-12}, journal = {Patterns}, author = {Pearson, Ezzy}, month = dec, year = {2020}, pages = {100171}, }
@article{korycki_adaptive_2020, title = {Adaptive {Deep} {Forest} for {Online} {Learning} from {Drifting} {Data} {Streams}}, url = {http://arxiv.org/abs/2010.07340}, abstract = {Learning from data streams is among the most vital fields of contemporary data mining. The online analysis of information coming from those potentially unbounded data sources allows for designing reactive up-to-date models capable of adjusting themselves to continuous flows of data. While a plethora of shallow methods have been proposed for simpler low-dimensional streaming problems, almost none of them addressed the issue of learning from complex contextual data, such as images or texts. The former is represented mainly by adaptive decision trees that have been proven to be very efficient in streaming scenarios. The latter has been predominantly addressed by offline deep learning. In this work, we attempt to bridge the gap between these two worlds and propose Adaptive Deep Forest (ADF) - a natural combination of the successful tree-based streaming classifiers with deep forest, which represents an interesting alternative idea for learning from contextual data. The conducted experiments show that the deep forest approach can be effectively transformed into an online algorithm, forming a model that outperforms all state-of-the-art shallow adaptive classifiers, especially for high-dimensional complex streams.}, urldate = {2020-12-12}, journal = {arXiv:2010.07340 [cs]}, author = {Korycki, Łukasz and Krawczyk, Bartosz}, month = oct, year = {2020}, note = {arXiv: 2010.07340}, keywords = {Computer Science - Machine Learning, I.2.0, I.5.0}, }
@article{pang_bayesian_2020, title = {A {Bayesian} {Inference} for {Remaining} {Useful} {Life} {Estimation} by {Fusing} {Accelerated} {Degradation} {Data} and {Condition} {Monitoring} {Data}}, issn = {0951-8320}, url = {http://www.sciencedirect.com/science/article/pii/S0951832020308334}, doi = {10.1016/j.ress.2020.107341}, abstract = {This article addresses the problem of estimating the remaining useful life (RUL) of degrading products by fusing the accelerated degradation data and condition monitoring (CM) data. The proposed model differs from the existing models in adopting the non-conjugate prior distributions for random-effect parameters. First, a nonlinear diffusion process model is developed to characterize the degradation process of a product. Next, the relationship between the model parameters and accelerated stress level is established, and the accelerated degradation data are used to determine the prior distribution types and estimate the hyperparameters of the model parameters. Then, to fuse the accelerated degradation data and CM data, the Bayesian inference is used to update the posterior distributions of model parameters once the new degradation observations are available. In addition, the Markov Chain Monte Carlo (MCMC) method based on Gibbs sampling is used to obtain the Bayesian solution numerically. Finally, the approximate RUL distribution considering the randomness of model parameters is obtained by the MCMC method based on the concept of the first hitting time. The proposed method is verified by the practical case study of accelerometers. Comparison results demonstrate that the proposed method can obtain higher RUL estimation accuracy and less uncertainty.}, language = {en}, urldate = {2020-12-08}, journal = {Reliability Engineering \& System Safety}, author = {Pang, ZHENAN and Si, XIAOSHENG and Hu, CHANGHUA and Du, DANGBO and Pei, HONG}, month = dec, year = {2020}, keywords = {Bayesian inference, accelerated degradation data, diffusion model, non-conjugate prior distribution, remaining useful life}, pages = {107341}, }
@article{georgievskaia_predictive_2020, series = {1st {Virtual} {European} {Conference} on {Fracture} - {VECF1}}, title = {Predictive analytics as a way to smart maintenance of hydraulic turbines}, volume = {28}, issn = {2452-3216}, url = {http://www.sciencedirect.com/science/article/pii/S2452321620306004}, doi = {10.1016/j.prostr.2020.10.098}, abstract = {Today, most energy companies face serious problems with the reliability and safety of large power equipment due to the long-term operation at off-design modes. This is especially true for hydraulic units that are traditionally used to ensure the required level of energy output and maintain the stability of a power grid due to their maneuverability. Off-design operational modes cause increased loads and stresses in the unit’s components, accelerate the growth of defects, stimulate premature failures, and can lead to a grave accident with large losses. Standard diagnostic systems for hydraulic units usually do not allow tracking hazardous defects such as fatigue cracks in the runner blades, guide vines, shaft, etc. The individuality of hydraulic units also excludes the use of any statistical methods to determine the time when the equipment will go to the limit state and its operation will become unreasonably dangerous. Therefore, the predictive analytics system is proposed as an additional approach to forecasting the appearance and growth of dangerous operational defects. The system realizes an analytical algorithm based on evaluating the fatigue strength of hydraulic unit elements under variable operating conditions and allows summing up damage from various external loads and in different time ranges. Input data for this predictive system is information about the actual operating time at every working mode and expected regime parameters for the upcoming period. The output data is information about actual and residual lifetime. All individual features are taken into account by the digital model which is a multidimensional matrix of equipment’s response to external influences. Data is generated in the cells for each unit’s elements with reference to the operating parameters and time scale. The proposed predictive analytics system allows not only to reduce the risk of accidents and unplanned shutdowns but it also is a way to smart maintenance of hydraulic turbines due to the development of the most effective, most reasonable, most lifetime-saving strategy of using the equipment.}, language = {en}, urldate = {2020-12-08}, journal = {Procedia Structural Integrity}, author = {Georgievskaia, Evgeniia}, month = jan, year = {2020}, keywords = {crack, failure, hydraulic turbines, lifetime, predictive analytics, reliability, smart maintenance}, pages = {836--842}, }
@article{subramanian_white-box_2020, title = {White-box {Machine} learning approaches to identify governing equations for overall dynamics of manufacturing systems: {A} case study on distillation column}, issn = {2666-8270}, shorttitle = {White-box {Machine} learning approaches to identify governing equations for overall dynamics of manufacturing systems}, url = {http://www.sciencedirect.com/science/article/pii/S2666827020300141}, doi = {10.1016/j.mlwa.2020.100014}, abstract = {Dynamical equations form the basis of design for manufacturing processes and control systems, however, identifying governing equations using mechanistic approach is tedious. Recently, Machine learning (ML) has shown promise to identify the governing dynamical equations for physical systems faster. This possibility of rapid identification of governing equations provides an exciting opportunity for advancing dynamical systems modeling. However, applicability of ML approach identifying governing mechanisms for dynamics of complex systems relevant to manufacturins systems has not been tested. We test and compare the efficacy of two white-box ML (SINDy and SymReg) approaches for predicting dynamics and structure of dynamical equations for overall dynamics in distillation column. Results demonstrate that a combination of ML approach should be used to identify full range of equations. In terms of physical law, few terms were interpretable as related to Fick’s law of diffusion and Henry’s law in SINDy whereas SymReg identified energy balance as driving dynamics.}, language = {en}, urldate = {2020-12-04}, journal = {Machine Learning with Applications}, author = {Subramanian, Renganathan and Moar, Raghav Rajesh and Singh, Shweta}, month = dec, year = {2020}, keywords = {ASPEN dynamics, Distillation column, Dynamic equation, Genetic programming, Machine learning, SINDy, Symbolic regression}, pages = {100014}, }
@article{quintanar-gago_assessment_2020, title = {Assessment of steam turbine blade failure and damage mechanisms using a {Bayesian} network}, issn = {0951-8320}, url = {http://www.sciencedirect.com/science/article/pii/S095183202030822X}, doi = {10.1016/j.ress.2020.107329}, abstract = {Damage mechanisms that affect components within complex machines are often hard to detect and identify, especially if they are difficult to access, inspect and/or that are under continuous duty, compromising the reliability and performance of systems. In this paper, a Bayesian network model is developed to handle the interactions among common damage mechanisms and failure modes in nuclear steam turbine rotating blades. This model enables maintenance and inspection planning to better predict which portions(s) of the turbine will need repair. To compute the conditional probability tables, the model's unique quantification method combines expert judgement, the Recursive Noisy OR, and a damage mechanism susceptibility ranking that takes into account the synergistic interactions of the damage mechanisms. The approach can be suited to different turbine designs and purposes. The Bayesian network model development is described in detail, validated, and several examples of its application are presented.}, language = {en}, urldate = {2020-11-30}, journal = {Reliability Engineering \& System Safety}, author = {Quintanar-Gago, David A. and Nelson, Pamela F. and Díaz-Sánchez, Ángeles and Boldrick, Michael S.}, month = nov, year = {2020}, keywords = {Bayesian Network, Damage Mechanism, Maintenance, Recursive Noisy OR, Steam Turbine Blade}, pages = {107329}, }
@article{teixeira_condition-based_2020, series = {30th {International} {Conference} on {Flexible} {Automation} and {Intelligent} {Manufacturing} ({FAIM2021})}, title = {Condition-based maintenance implementation: a literature review}, volume = {51}, issn = {2351-9789}, shorttitle = {Condition-based maintenance implementation}, url = {http://www.sciencedirect.com/science/article/pii/S2351978920318886}, doi = {10.1016/j.promfg.2020.10.033}, abstract = {Industrial companies are increasingly dependent on the availability and performance of their equipment to remain competitive. This circumstance demands accurate and timely maintenance actions in alignment with the organizational objectives. Condition-Based Maintenance (CBM) is a strategy that considers information about the equipment condition to recommend appropriate maintenance actions. The main purpose of CBM is to prevent functional failures or a significant performance decrease of the monitored equipment. CBM relies on a wide range of resources and techniques required to detect deviations from the normal operating conditions, diagnose incipient failures or predict the future condition of an asset. To obtain meaningful information for maintenance decision making, relevant data must be collected and properly analyzed. Recent advances in Big Data analytics and Internet of Things (IoT) enable real-time decision making based on abundant data acquired from several different sources. However, each appliance must be designed according to the equipment configuration and considering the nature of specific failure modes. CBM implementation is a complex matter, regardless of the equipment characteristics. Therefore, to ensure cost-effectiveness, it must be addressed in a systematic and organized manner, considering the technical and financial issues involved. This paper presents a literature review on approaches to support CBM implementation. Published studies and standards that provide guidelines to implement CBM are analyzed and compared. For each existing approach, the steps recommended to implement CBM are listed and the main gaps are identified. Based on the literature, factors that can affect the effective implementation of CBM are also highlighted and discussed.}, language = {en}, urldate = {2020-11-23}, journal = {Procedia Manufacturing}, author = {Teixeira, Humberto Nuno and Lopes, Isabel and Braga, Ana Cristina}, month = jan, year = {2020}, keywords = {Condition monitoring (CM), Condition-Based Maintenance (CBM), Health Management (PHM), Prognostics}, pages = {228--235}, }
@article{nikolakis_microservice_2020, series = {30th {International} {Conference} on {Flexible} {Automation} and {Intelligent} {Manufacturing} ({FAIM2021})}, title = {A microservice architecture for predictive analytics in manufacturing}, volume = {51}, issn = {2351-9789}, url = {http://www.sciencedirect.com/science/article/pii/S2351978920320102}, doi = {10.1016/j.promfg.2020.10.153}, abstract = {This paper discusses on the design, development and deployment of a flexible and modular platform supporting smart predictive maintenance operations, enabled by microservices architecture and virtualization technologies. Virtualization allows the platform to be deployed in a multi-tenant environment, while facilitating resource isolation and independency from specific technologies or services. Moreover, the proposed platform supports scalable data storage supporting an effective and efficient management of large volume of Industry 4.0 data. Methodologies of data-driven predictive maintenance are provided to the user as-a-service, facilitating offline training and online execution of pre-trained analytics models, while the connection of the raw data to contextual information support their understanding and interpretation, while guaranteeing interoperability across heterogeneous systems. A use case related to the predictive maintenance operations of a robotic manipulator is examined to demonstrate the effectiveness and the efficiency of the proposed platform.}, language = {en}, urldate = {2020-11-23}, journal = {Procedia Manufacturing}, author = {Nikolakis, N. and Marguglio, A. and Veneziano, G. and Greco, P. and Panicucci, S. and Cerquitelli, T. and Macii, E. and Andolina, S. and Alexopoulos, K.}, month = jan, year = {2020}, keywords = {Machine learning, Microservice architecture, Play approach, Plug, Robotics industry, Service-oriented platform}, pages = {1091--1097}, }
@article{ferreira_novel_2020, series = {30th {International} {Conference} on {Flexible} {Automation} and {Intelligent} {Manufacturing} ({FAIM2021})}, title = {A novel approach to improve maintenance operations}, volume = {51}, issn = {2351-9789}, url = {http://www.sciencedirect.com/science/article/pii/S2351978920320849}, doi = {10.1016/j.promfg.2020.10.213}, abstract = {In a market constantly changing and more demanding, the need to have any production cycle working without flaws is more important than ever. Thus, maintenance operations have taken an increasingly important role. However, different organizations may take different approaches depending on their area of business, legal aspects, as well as the company policy. Even though some concepts such as Total Productive Maintenance (TPM) and quality tools are normally used in most of the companies, there are no standard procedures or defined ways for using them across industries. Thus, it is usual many organizations implement statistical tests in order to see whether or not those improvements were successful. This paper intends to establish a procedure to be used across organizations, improving their maintenance procedures. This procedure establishes the correct sequence of tools to be used in the improvement process, conducting the maintenance operations to a higher level of efficiency and effectiveness, allowing to determine if the implemented improvements were successful, or if a different approach needs to be implemented.}, language = {en}, urldate = {2020-11-23}, journal = {Procedia Manufacturing}, author = {Ferreira, S. and Martins, L. and Silva, F. J. G. and Casais, R. B. and Campilho, R. D. S. G. and Sá, J. C.}, month = jan, year = {2020}, keywords = {Maintenance, Maintenance efficiency, Maintenance improvement, Maintenance tools, TPM, Total Productive Maintenance}, pages = {1531--1537}, }
@article{liu_enhanced_2020, title = {An enhanced encoder–decoder framework for bearing remaining useful life prediction}, issn = {0263-2241}, url = {http://www.sciencedirect.com/science/article/pii/S0263224120312537}, doi = {10.1016/j.measurement.2020.108753}, abstract = {In recent years, data-driven approaches for remaining useful life (RUL) prognostics have aroused widespread concern. Bearings act as the fundamental component of machinery and their conditioning status is closely associated with the normal operation of equipment. Hence, it is crucial to accurately predict the remaining useful life of bearings. This paper explores the degradation process of bearings and proposes an enhanced encoder–decoder framework. The framework attempts to construct a decoder with the ability to look back and selectively mine underlying information in the encoder. Additionally, trigonometric functions and cumulative operation are employed to enhance the quality of health indicators. To verify the effectiveness of the proposed method, vibration data from PRONOSTIA platform are utilized for RUL prognostics. Compared with several state-of-the-art methods, the experimental results demonstrate the superiority and feasibility of the proposed method.}, language = {en}, urldate = {2020-11-23}, journal = {Measurement}, author = {Liu, Lu and Song, Xiao and Chen, Kai and Hou, Baocun and Chai, Xudong and Ning, Huansheng}, month = nov, year = {2020}, keywords = {Cumulative operation, Encoder–decoder, Fitness analysis, Trigonometric functions}, pages = {108753}, }
@article{altendeitering_scalable_2020, series = {30th {International} {Conference} on {Flexible} {Automation} and {Intelligent} {Manufacturing} ({FAIM2021})}, title = {Scalable {Detection} of {Concept} {Drift}: {A} {Learning} {Technique} {Based} on {Support} {Vector} {Machines}}, volume = {51}, issn = {2351-9789}, shorttitle = {Scalable {Detection} of {Concept} {Drift}}, url = {http://www.sciencedirect.com/science/article/pii/S2351978920319120}, doi = {10.1016/j.promfg.2020.10.057}, abstract = {The issue of concept drift describes how static machine-learning models build on historical data can become unreliable over time and pose a significant challenge to many applications. Although, there is a growing body of literature investigating concept drift existing solutions are often limited to a small number of samples or features and do not work well in Industry 4.0 scenarios. We are proposing a novel algorithm that extends the existing concept drift algorithm FLORA3 by utilizing support vector machines for the classification process. Through this combination of dynamic and static approaches the algorithm is capable of effectively analyzing data streams of high volume. For evaluation, we tested our algorithm on the publicly available data set ‘elec2’, which is based on the energy market in Australia. Our results show that the proposed algorithm needs less computational resources compared to other algorithms while maintaining a high level of accuracy.}, language = {en}, urldate = {2020-11-23}, journal = {Procedia Manufacturing}, author = {Altendeitering, Marcel and Dübler, Stephan}, month = jan, year = {2020}, keywords = {FLORA3, SVM, concept drift, energy data, machine learning}, pages = {400--407}, }
@article{castellanos_accordant_2020, title = {{ACCORDANT}: {A} domain specific model and {DevOps} approach for big data analytics architectures}, issn = {0164-1212}, shorttitle = {{ACCORDANT}}, url = {http://www.sciencedirect.com/science/article/pii/S0164121220302594}, doi = {10.1016/j.jss.2020.110869}, abstract = {Big data analytics (BDA) applications use machine learning algorithms to extract valuable insights from large, fast, and heterogeneous data sources. New software engineering challenges for BDA applications include ensuring performance levels of data-driven algorithms even in the presence of large data volume, velocity, and variety (3Vs). BDA software complexity frequently leads to delayed deployments, longer development cycles and challenging performance assessment. This paper proposes a Domain-Specific Model (DSM), and DevOps practices to design, deploy, and monitor performance metrics in BDA applications. Our proposal includes a design process, and a framework to define architectural inputs, software components, and deployment strategies through integrated high-level abstractions to enable QS monitoring. We evaluate our approach with four use cases from different domains to demonstrate a high level of generalization. Our results show a shorter deployment and monitoring times, and a higher gain factor per iteration compared to similar approaches.}, language = {en}, urldate = {2020-11-21}, journal = {Journal of Systems and Software}, author = {Castellanos, Camilo and Varela, Carlos A. and Correal, Dario}, month = nov, year = {2020}, keywords = {Big data analytics deployment, DevOps, Domain specific model, Performance monitoring, Quality scenarios, Software architecture}, pages = {110869}, }
@article{fila_cloud_2020, series = {The 11th {International} {Conference} on {Emerging} {Ubiquitous} {Systems} and {Pervasive} {Networks} ({EUSPN} 2020) / {The} 10th {International} {Conference} on {Current} and {Future} {Trends} of {Information} and {Communication} {Technologies} in {Healthcare} ({ICTH} 2020) / {Affiliated} {Workshops}}, title = {Cloud {Computing} for {Industrial} {Predictive} {Maintenance} {Based} on {Prognostics} and {Health} {Management}}, volume = {177}, issn = {1877-0509}, url = {http://www.sciencedirect.com/science/article/pii/S1877050920323619}, doi = {10.1016/j.procs.2020.10.090}, abstract = {Predictive maintenance is based primarily on Prognostics and Health Management (PHM). The prognosis is a process for learning about the health status of a system and estimating its residual time before failure. A good maintenance decision is the result of a better estimate of the latter. Recently, the emergence of IT systems in the industrial field and in particular connected objects and cloud computing have contributed strongly to the improvement of the prognosis process. In this paper, we propose a new prognosis approach based on the Cloud Computing model and the principle of multitenancy in order to present the Prognosis as a Service. This approach provides an effective prognosis solution at the request of a client while ensuring a better quality of service. The effectiveness of our solution depends on the criteria for the performance of the prognosis system based on accuracy, accuracy, mean squared error and a Quality of Service (Qos).}, language = {en}, urldate = {2020-11-16}, journal = {Procedia Computer Science}, author = {Fila, Redouane and Khaili, Mohamed El and Mestari, Mohamed}, month = jan, year = {2020}, keywords = {Cloud Computing, Health Management (PHM), Performance Measurement, Predictive Maintenance, Prognosis as a Service, Prognostics, Quality of Service (Qos), Residual Life (RUL)}, pages = {631--638}, }
@article{kimera_improving_2020, title = {Improving ship yard ballast pumps’ operations: {A} {PCA} approach to predictive maintenance}, volume = {1}, issn = {2666-822X}, shorttitle = {Improving ship yard ballast pumps’ operations}, url = {http://www.sciencedirect.com/science/article/pii/S2666822X20300034}, doi = {10.1016/j.martra.2020.100003}, abstract = {This paper investigates a predictive maintenance approach for marine mechanical systems via an early warning system. A machine learning methodology was used to process and analyze the dock pump back pressure, flow rate, amperage and suction pressure data. Operating parameters for a dock pump were monitored for 40 weeks and the values were manually input into the tool. Unsupervised machine learning was used in order to draw inferences from data via MATLAB. A principal component analysis (PCA) algorithm was used to improve on the selection of the key operating parameters of the dock pumps. The dock pump flow rate and suction pressure, were the principal components that were 99.707\% sufficient to explain the variation in the data. Using the dataset explained by the PCA, two data classes were later used in the SVM algorithm for a binary classification approach. The developed tool predicted that the dock pump may fail/requires maintenance between seventh and eighth weeks. This prediction deviated from the actual ten weeks that it took the dock pump to fail. A prediction deviation from the actual failure time to failure could be attributed to the quality of the historical failure and maintenance data. Nevertheless, with less ambiguity of the data, the maintenance prediction tool can be used as a basis before sensor technology on the dock pumps is implemented.}, language = {en}, urldate = {2020-11-16}, journal = {Maritime Transport Research}, author = {Kimera, David and Nangolo, Filemon N.}, month = jan, year = {2020}, keywords = {Ballast pumps, Floating docks, Machine learning, Predictive maintenance, Principle component analysis}, pages = {100003}, }
@article{geetha_overview_2020, title = {Overview of machine learning and its adaptability in mechanical engineering}, issn = {2214-7853}, url = {http://www.sciencedirect.com/science/article/pii/S2214785320373363}, doi = {10.1016/j.matpr.2020.09.611}, abstract = {Artificial Intelligence unto a mechanical engineer is to take the system to a next level to develop a better device. To better understand the physical phenomenon, engineers design the products that we all interact with. Machine learning (ML) plays a central role in translating that technology to make the world a better place. Many of the tools in ML are embedded with these methods and techniques to design machines which are interactive. The present study is an overview of applications of ML in Mechanical Engineering.}, language = {en}, urldate = {2020-11-13}, journal = {Materials Today: Proceedings}, author = {Geetha, N. K. and Bridjesh, P.}, month = nov, year = {2020}, keywords = {Algorithm, Applications, Artificial intelligence, Machine learning, Mechanical engineering}, }
@article{luong_heterogeneous_2020, title = {Heterogeneous ensemble selection for evolving data streams}, issn = {0031-3203}, url = {http://www.sciencedirect.com/science/article/pii/S003132032030546X}, doi = {10.1016/j.patcog.2020.107743}, abstract = {Ensemble learning has been widely applied to both batch data classification and streaming data classification. For the latter setting, most existing ensemble systems are homogenous, which means they are generated from only one type of learning model. In contrast, by combining several types of different learning models, a heterogeneous ensemble system can achieve greater diversity among its members, which helps to improve its performance. Although heterogeneous ensemble systems have achieved many successes in the batch classification setting, it is not trivial to extend them directly to the data stream setting. In this study, we propose a novel HEterogeneous Ensemble Selection (HEES) method, which dynamically selects an appropriate subset of base classifiers to predict data under the stream setting. We are inspired by the observation that a well-chosen subset of good base classifiers may outperform the whole ensemble system. Here, we define a good candidate as one that expresses not only high predictive performance but also high confidence in its prediction. Our selection process is thus divided into two sub-processes: accurate-candidate selection and confident-candidate selection. We define an accurate candidate in the stream context as a base classifier with high accuracy over the current concept, while a confident candidate as one with a confidence score higher than a certain threshold. In the first sub-process, we employ the prequential accuracy to estimate the performance of a base classifier at a specific time, while in the latter sub-process, we propose a new measure to quantify the predictive confidence and provide a method to learn the threshold incrementally. The final ensemble is formed by taking the intersection of the sets of confident classifiers and accurate classifiers. Experiments on a wide range of data streams show that the proposed method achieves competitive performance with lower running time in comparison to the state-of-the-art online ensemble methods.}, language = {en}, urldate = {2020-11-13}, journal = {Pattern Recognition}, author = {Luong, Anh Vu and Nguyen, Tien Thanh and Liew, Alan Wee-Chung and Wang, Shilin}, month = nov, year = {2020}, keywords = {Data streams, Ensemble selection, Heterogeneous ensembles}, pages = {107743}, }
@article{cakir_experimental_2020, title = {The experimental application of popular machine learning algorithms on predictive maintenance and the design of {IIoT} based condition monitoring system}, issn = {0360-8352}, url = {http://www.sciencedirect.com/science/article/pii/S0360835220306252}, doi = {10.1016/j.cie.2020.106948}, abstract = {With the fourth industrial revolution, which has become increasingly widespread in the manufacturing industry, traditional maintenance has been replaced by the industrial internet of things (IIoT) based on condition monitoring system (CMS). The IIoT concept provides easier and reliable maintenance. Unlike traditional maintenance, IIoT systems that perform real-time monitoring can provide great advantages to the company by notifying the related maintenance team members of the factory before a serious failure occurs. It is very important to detect faulty bearings before they reach the critical level during the rotation. In this study, an industry 4.0 compatible, IIoT based and low-cost CMS was created and it consists of three main parts. Firstly experimental setup, secondly IIoT based condition monitoring application (CMA) and finally machine learning (ML) models and their evaluation. The experimental setup contains mechanical and electronic materials. Although the most common method used in the classification of bearing damage is vibration data, it observed that characteristics such as sound level, current, rotational speed, and temperature should be included in the data set in order to increase the success of the classification. All these data were collected from the setup, which is 6203 type bearing connected to the universal motor shaft. The designed CMA provides real-time monitoring and recording of the data, which comes wirelessly from the setup, on a mobile device that has an Android operating system. The CMA can also send SMS and e-mail notifications to maintenance team supervisors over mobile devices in case critical thresholds are exceeded. Lastly, the data collected from the experimental setup was modeled for classification with popular ML algorithms such as support vector machine (SVM), linear discrimination analysis (LDA), random forest (RF), decision tree (DT), and k-nearest neighbor (kNN). The models were evaluated with accuracy, precision, TPR, TNR, FPR, FNR, F1 score and, Kappa metrics. During the evaluation of all models, it was observed that with the increase in the number of features in the data set, the accuracy, sensitivity, TPR, TNR, F1 score and Kappa metrics increased above 99\% at 95\% confidence interval, and FPR and FNR metrics fell below 1\%. Although ML models gave successful results, LDA and DT models gave results much faster than others did. On the other hand, the classification success of the LDA model is relatively low. However, DT model is the optimum choice for CMS due to its convenience in determining threshold values, and its ability to give fast and acceptable classification rates.}, language = {en}, urldate = {2020-11-03}, journal = {Computers \& Industrial Engineering}, author = {Cakir, Mustafa and Guvenc, Mehmet Ali and Mistikoglu, Selcuk}, month = oct, year = {2020}, keywords = {Condition monitoring, Industry 4.0, Internet of things, Machine learning, Predictive maintenance}, pages = {106948}, }
@article{li_remaining_2020, title = {Remaining {Useful} {Life} {Prediction} based on a {Multi}-{Sensor} {Data} {Fusion} {Model}}, issn = {0951-8320}, url = {http://www.sciencedirect.com/science/article/pii/S0951832020307493}, doi = {10.1016/j.ress.2020.107249}, abstract = {With the rapid development of Industrial Internet of Things, more and more sensors have been used for condition monitoring and prognostics of industrial systems. Big data collected from sensor networks bring abundant information resources as well as technical challenges for remaining useful life (RUL) prediction. The major technical challenges include how to select informative sensors and fuse multi-sensor data to improve the prediction performance. To deal with the challenges, this paper proposes a RUL prediction method based on a multi-sensor data fusion model. In this method, the inherent degradation process of the system state is expressed using a state transition function following a Wiener process. Multi-sensor signals are explicated as various proxies of the inherent system degradation process using a multivariate measurement function. The system state is estimated by fusing multi-sensor signals using particle filtering. Informative sensors are selected by a prioritized sensor group selection algorithm. This algorithm first prioritizes sensors according to their individual performances in RUL prediction, and then selects an optimal sensor group based on their combined performances. The effectiveness of the proposed method is demonstrated using a simulation study and aircraft engine degradation data from NASA repository.}, language = {en}, urldate = {2020-10-26}, journal = {Reliability Engineering \& System Safety}, author = {Li, Naipeng and Gebraeel, Nagi and Lei, Yaguo and Fang, Xiaolei and Cai, Xiao and Yan, Tao}, month = oct, year = {2020}, keywords = {Prognostic degradation modeling, big data, multi-sensor fusion, remaining useful life prediction, state-space model}, pages = {107249}, }
@article{luo_multiple_2020, title = {Multiple degradation mode analysis via gated recurrent unit mode recognizer and life predictors for complex equipment}, volume = {123}, issn = {0166-3615}, url = {http://www.sciencedirect.com/science/article/pii/S0166361520305662}, doi = {10.1016/j.compind.2020.103332}, abstract = {In order to ensure the reliability and safety of industrial complex equipment, it is necessary to predict and manage the health status of the equipment. Remaining useful life (RUL) prediction is the decision basis of condition-based maintenance (CBM) and one of the main tasks in prognostics and health management (PHM). Complex systems tend to have multiple degradation modes, while similar degradation features may have significantly different RUL labels in different degradation modes, which can be called feature multi-label problem. To solve the problem, a novel RUL prediction method was proposed, which first analyzed the degradation mode and then utilized the predictor for RUL prediction under the specific mode. In particular, a modified de-noising auto-encoder (DAE) was proposed for nonlinear feature extraction and noise reduction. Mode recognizer and life predictors based on gated recurrent unit (GRU) and fuzzy k-means were proposed as the core modules. Case studies of commercial modular aero-propulsion system simulation data and the life cycle data of bearing were conducted to verify the effectiveness of the proposed method. Results show that the proposed method achieved much higher prediction accuracy than other methods.}, language = {en}, urldate = {2020-10-26}, journal = {Computers in Industry}, author = {Luo, Qinyuan and Chang, Yuanhong and Chen, Jinglong and Jing, Hongjie and Lv, Haixin and Pan, Tongyang}, month = dec, year = {2020}, keywords = {Complex equipment, Multiple degradation mode, Recurrent neural network, Remaining useful life}, pages = {103332}, }
@article{erhan_smart_2020, title = {Smart anomaly detection in sensor systems: {A} multi-perspective review}, issn = {1566-2535}, shorttitle = {Smart anomaly detection in sensor systems}, url = {http://www.sciencedirect.com/science/article/pii/S1566253520303717}, doi = {10.1016/j.inffus.2020.10.001}, abstract = {Anomaly detection is concerned with identifying data patterns that deviate remarkably from the expected behaviour. This is an important research problem, due to its broad set of application domains, from data analysis to e-health, cybersecurity, predictive maintenance, fault prevention, and industrial automation. Herein, we review state-of-the-art methods that may be employed to detect anomalies in the specific area of sensor systems, which poses hard challenges in terms of information fusion, data volumes, data speed, and network/energy efficiency, to mention but the most pressing ones. In this context, anomaly detection is a particularly hard problem, given the need to find computing-energy-accuracy trade-offs in a constrained environment. We taxonomize methods ranging from conventional techniques (statistical methods, time-series analysis, signal processing, etc.) to data-driven techniques (supervised learning, reinforcement learning, deep learning, etc.). We also look at the impact that different architectural environments (Cloud, Fog, Edge) can have on the sensors ecosystem. The review points to the most promising intelligent-sensing methods, and pinpoints a set of interesting open issues and challenges.}, language = {en}, urldate = {2020-10-19}, journal = {Information Fusion}, author = {Erhan, L. and Ndubuaku, M. and Di Mauro, M. and Song, W. and Chen, M. and Fortino, G. and Bagdasar, O. and Liotta, A.}, month = oct, year = {2020}, keywords = {Anomaly detection, Intelligent sensing, Internet of things, Machine learning, Sensor systems}, }
@article{zonta_predictive_2020, title = {Predictive maintenance in the {Industry} 4.0: {A} systematic literature review}, volume = {150}, issn = {0360-8352}, shorttitle = {Predictive maintenance in the {Industry} 4.0}, url = {http://www.sciencedirect.com/science/article/pii/S0360835220305787}, doi = {10.1016/j.cie.2020.106889}, abstract = {Industry 4.0 is collaborating directly for the technological revolution. Both machines and managers are daily confronted with decision making involving a massive input of data and customization in the manufacturing process. The ability to predict the need for maintenance of assets at a specific future moment is one of the main challenges in this scope. The possibility of performing predictive maintenance contributes to enhancing machine downtime, costs, control, and quality of production. We observed that surveys and tutorials about Industry 4.0 focus mainly on addressing data analytics and machine learning methods to change production procedures, so not comprising predictive maintenance methods and their organization. In this context, this article presents a systematic literature review of initiatives of predictive maintenance in Industry 4.0, identifying and cataloging methods, standards, and applications. As the main contributions, this survey discusses the current challenges and limitations in predictive maintenance, in addition to proposing a novel taxonomy to classify this research area considering the needs of the Industry 4.0. We concluded that computer science, including artificial intelligence and distributed computing fields, is more and more present in an area where engineering was the dominant expertise, so detaching the importance of a multidisciplinary approach to address Industry 4.0 effectively.}, language = {en}, urldate = {2020-10-12}, journal = {Computers \& Industrial Engineering}, author = {Zonta, Tiago and da Costa, Cristiano André and da Rosa Righi, Rodrigo and de Lima, Miromar José and da Trindade, Eduardo Silveira and Li, Guann Pyng}, month = dec, year = {2020}, keywords = {Artificial intelligence, Conditional-based maintenance, Industry 4.0, Predictive Maintenance, Remaining Useful Life}, pages = {106889}, }
@article{ahmer_unified_2020, series = {53rd {CIRP} {Conference} on {Manufacturing} {Systems} 2020}, title = {A unified approach towards performance monitoring and condition-based maintenance in grinding machines}, volume = {93}, issn = {2212-8271}, url = {http://www.sciencedirect.com/science/article/pii/S2212827120307289}, doi = {10.1016/j.procir.2020.04.094}, abstract = {The process controller in a precision grinder for bearing rings puts high performance demands on the machine to achieve desired quality in production. This paper presents a unique approach of adding additional sensors for machine condition monitoring for the purpose of learning and using high fidelity condition indicators. The consolidation of real-time sensor data and the process control signals yields high-dimensional dataset. Automatic segmentation helps optimize the amount of data for processing and data mining ahead of fault diagnosis. The proposed setup is state of the art for prognostics as part of condition-based maintenance in a production machine.}, language = {en}, urldate = {2020-10-05}, journal = {Procedia CIRP}, author = {Ahmer, Muhammad and Marklund, Pär and Gustafsson, Martin and Berglund, Kim}, month = jan, year = {2020}, keywords = {Analytics, Automation, Condition monitoring, Grinding, Machining, Maintenance, Manufacturing, Measurement, Process Monitoring, Sensor}, pages = {1388--1393}, }
@article{dogan_machine_2020, title = {Machine {Learning} and {Data} {Mining} in {Manufacturing}}, issn = {0957-4174}, url = {http://www.sciencedirect.com/science/article/pii/S095741742030823X}, doi = {10.1016/j.eswa.2020.114060}, abstract = {Manufacturing organizations need to use different kinds of techniques and tools in order to fulfill their foundation goals. In this aspect, using machine learning (ML) and data mining (DM) techniques and tools could be very helpful for dealing with challenges in manufacturing. Therefore, in this paper, a comprehensive literature review is presented to provide an overview of how machine learning techniques can be applied to realize manufacturing mechanisms with intelligent actions. Furthermore, it points to several significant research questions that are unanswered in the recent literature having the same target. Our survey aims to provide researchers with a solid understanding of the main approaches and algorithms used to improve manufacturing processes over the past two decades. It presents the previous ML studies and recent advances in manufacturing by grouping them under four main subjects: scheduling, monitoring, quality, and failure. It comprehensively discusses existing solutions in manufacturing according to various aspects, including tasks (i.e., clustering, classification, regression), algorithms (i.e., support vector machine, neural network), learning types (i.e., ensemble learning, deep learning), and performance metrics (i.e., accuracy, mean absolute error). Furthermore, the main steps of knowledge discovery in databases (KDD) process to be followed in manufacturing applications are explained in detail. In addition, some statistics about the current state are also given from different perspectives. Besides, it explains the advantages of using machine learning techniques in manufacturing, expresses the ways to overcome certain challenges, and offers some possible further research directions.}, language = {en}, urldate = {2020-10-05}, journal = {Expert Systems with Applications}, author = {Dogan, Alican and Birant, Derya}, month = sep, year = {2020}, keywords = {Machine learning, classification, clustering, data mining, manufacturing}, pages = {114060}, }
@article{langone_interpretable_2020, title = {Interpretable {Anomaly} {Prediction}: {Predicting} anomalous behavior in industry 4.0 settings via regularized logistic regression tools}, issn = {0169-023X}, shorttitle = {Interpretable {Anomaly} {Prediction}}, url = {http://www.sciencedirect.com/science/article/pii/S0169023X1830644X}, doi = {10.1016/j.datak.2020.101850}, abstract = {Prediction of anomalous behavior in industrial assets based on sensor reading represents a key focus in modern business practice. As a matter of fact, forecast of forthcoming faults is crucial to implement predictive maintenance, i.e. maintenance decision making based on real time information from components and systems, which allows, among other benefits, to reduce maintenance cost, minimize downtime, increase safety, enhance product quality and productivity. However, building a model able to predict the future occurrence of a failure is challenging for various reasons. First, data are usually highly imbalanced, meaning that patterns describing a faulty regime are much less numerous than normal behavior instances, which makes model design difficult. Second, model predictions should be not only accurate (to avoid false alarms and missed detections) but also explainable to operators responsible for scheduling maintenance or control actions. In this paper we introduce a method called Interpretable Anomaly Prediction (IAP) allowing to handle these issues by using regularized logistic regression as core prediction model. In particular, in contrast to anomaly detection algorithms which permit to identify if the current data are anomalous or not, the proposed technique is able to predict the probability that future data will be abnormal. Furthermore, feature extraction and selection mechanisms give insights on the possible root causes leading to failures. The proposed strategy is validated with a large imbalanced multivariate time-series dataset consisting of measurements of several process variables surrounding an high pressure plunger pump situated in a complex chemical plant.}, language = {en}, urldate = {2020-10-05}, journal = {Data \& Knowledge Engineering}, author = {Langone, Rocco and Cuzzocrea, Alfredo and Skantzos, Nikolaos}, month = aug, year = {2020}, pages = {101850}, }
@article{shi_dual-lstm_2020, title = {A {Dual}-{LSTM} {Framework} {Combining} {Change} {Point} {Detection} and {Remaining} {Useful} {Life} {Prediction}}, issn = {0951-8320}, url = {http://www.sciencedirect.com/science/article/pii/S0951832020307572}, doi = {10.1016/j.ress.2020.107257}, abstract = {Remaining Useful Life (RUL) prediction is a key task of Condition-based Maintenance (CBM). The massive data collected from multiple sensors enables monitoring the complex systems in near real-time. However, such multiple sensors data environments pose a challenging task of combining the sensor data to infer the quality and RUL of the system. To address this task, we propose a Dual-LSTM framework that leverages Long-Short Term Memory (LSTM) for degradation analysis and RUL prediction. The Dual-LSTM relaxes the strong assumption of the fixed change point and detects the uncertain change point unit by unit at first. Then, the Dual-LSTM predicts the health index beyond the change point which can be leveraged to calculate the RUL. The proposed Dual-LSTM (i) achieves real-time high-precision RUL prediction by connecting the change point detection and RUL prediction with the health index construction, (ii) introduces a novel one-dimension health index function, (iii) leverages historical information to achieve detection and prediction tasks by characterizing both long and short-term dependencies of sensor signals through LSTM network. The effectiveness of the proposed Dual-LSTM framework is validated and compared to state-of-art benchmark methods on two publicly available turbofan engine degradation datasets.}, language = {en}, urldate = {2020-10-05}, journal = {Reliability Engineering \& System Safety}, author = {Shi, Zunya and Chehade, Abdallah}, month = oct, year = {2020}, keywords = {Change point detection, Long short-term memory, Neural networks, Prognosis, Remaining useful life, Sensor fusion}, pages = {107257}, }
@article{javed_benchmark_2020, title = {A benchmark study on time series clustering}, volume = {1}, issn = {2666-8270}, url = {http://www.sciencedirect.com/science/article/pii/S2666827020300013}, doi = {10.1016/j.mlwa.2020.100001}, abstract = {This paper presents the first time series clustering benchmark utilizing all time series datasets currently available in the University of California Riverside (UCR) archive — the state of the art repository of time series data. Specifically, the benchmark examines eight popular clustering methods representing three categories of clustering algorithms (partitional, hierarchical and density-based) and three types of distance measures (Euclidean, dynamic time warping, and shape-based), while adhering to six restrictions on datasets and methods to make the comparison as unbiased as possible. A phased evaluation approach was then designed for summarizing dataset-level assessment metrics and discussing the results. The benchmark study presented can be a useful reference for the research community on its own; and the dataset-level assessment metrics reported may be used for designing evaluation frameworks to answer different research questions.}, language = {en}, urldate = {2020-10-05}, journal = {Machine Learning with Applications}, author = {Javed, Ali and Lee, Byung Suk and Rizzo, Donna M.}, month = sep, year = {2020}, keywords = {Benchmark, Clustering, Time series, UCR archive}, pages = {100001}, }
@article{burg_evaluation_2020, title = {An {Evaluation} of {Change} {Point} {Detection} {Algorithms}}, url = {http://arxiv.org/abs/2003.06222}, abstract = {Change point detection is an important part of time series analysis, as the presence of a change point indicates an abrupt and significant change in the data generating process. While many algorithms for change point detection exist, little attention has been paid to evaluating their performance on real-world time series. Algorithms are typically evaluated on simulated data and a small number of commonly-used series with unreliable ground truth. Clearly this does not provide sufficient insight into the comparative performance of these algorithms. Therefore, instead of developing yet another change point detection method, we consider it vastly more important to properly evaluate existing algorithms on real-world data. To achieve this, we present the first data set specifically designed for the evaluation of change point detection algorithms, consisting of 37 time series from various domains. Each time series was annotated by five expert human annotators to provide ground truth on the presence and location of change points. We analyze the consistency of the human annotators, and describe evaluation metrics that can be used to measure algorithm performance in the presence of multiple ground truth annotations. Subsequently, we present a benchmark study where 14 existing algorithms are evaluated on each of the time series in the data set. This study shows that binary segmentation (Scott and Knott, 1974) and Bayesian online change point detection (Adams and MacKay, 2007) are among the best performing methods. Our aim is that this data set will serve as a proving ground in the development of novel change point detection algorithms.}, urldate = {2020-10-02}, journal = {arXiv:2003.06222 [cs, stat]}, author = {Burg, Gerrit J. J. van den and Williams, Christopher K. I.}, month = may, year = {2020}, note = {arXiv: 2003.06222}, keywords = {62M10, Computer Science - Machine Learning, G.3, Statistics - Machine Learning, Statistics - Methodology}, }
@article{truong_selective_2020, title = {Selective review of offline change point detection methods}, volume = {167}, issn = {0165-1684}, url = {http://www.sciencedirect.com/science/article/pii/S0165168419303494}, doi = {10.1016/j.sigpro.2019.107299}, abstract = {This article presents a selective survey of algorithms for the offline detection of multiple change points in multivariate time series. A general yet structuring methodological strategy is adopted to organize this vast body of work. More precisely, detection algorithms considered in this review are characterized by three elements: a cost function, a search method and a constraint on the number of changes. Each of those elements is described, reviewed and discussed separately. Implementations of the main algorithms described in this article are provided within a Python package called ruptures.}, language = {en}, urldate = {2020-10-01}, journal = {Signal Processing}, author = {Truong, Charles and Oudre, Laurent and Vayatis, Nicolas}, month = feb, year = {2020}, keywords = {Change point detection, Segmentation, Statistical signal processing}, pages = {107299}, }
@article{duan_variable-length_2020, title = {Variable-length {Subsequence} {Clustering} in {Time} {Series}}, issn = {1558-2191}, doi = {10.1109/TKDE.2020.2986965}, abstract = {Subsequence clustering is an important issue in time series data mining. Observing that most time series consist of various patterns with different unknown lengths, we propose an optimization framework to adaptively estimate the lengths and representations for different patterns. Our framework minimizes the inner subsequence cluster errors with respect to subsequence clusters and segmentation under time series cover constraint where the subsequence cluster lengths can be variable. To optimize our framework, we first generate abundant initial subsequence clusters with different lengths. Then, three cluster operations, i.e., cluster splitting, combination and removing, are used to iteratively refine the cluster lengths and representations by respectively splitting clusters consisting of different patterns, joining neighboring clusters belonging to the same pattern and removing clusters to the predefined cluster number. During each cluster refinement, we employ an efficient algorithm to alternatively optimize subsequence clusters and segmentation based on dynamic programming. Our method can automatically and efficiently extract the unknown variable-length subsequence clusters in the time series. Comparative results with the state-of-the-art are conducted on various synthetic and real time series, and quantitative and qualitative performances demonstrate the effectiveness of our method.}, journal = {IEEE Transactions on Knowledge and Data Engineering}, author = {Duan, Jiangyong and Guo, Lili}, year = {2020}, note = {Conference Name: IEEE Transactions on Knowledge and Data Engineering}, keywords = {Adaptation models, Clustering algorithms, Clustering methods, Data mining, Feature extraction, Optimization, Time series analysis, subsequence clustering, time series data mining, time series segmentation, variable-length patterns}, pages = {1--1}, }
@article{keriven_newma_2020, title = {{NEWMA}: {A} {New} {Method} for {Scalable} {Model}-{Free} {Online} {Change}-{Point} {Detection}}, volume = {68}, issn = {1941-0476}, shorttitle = {{NEWMA}}, doi = {10.1109/TSP.2020.2990597}, abstract = {We consider the problem of detecting abrupt changes in the distribution of a multi-dimensional time series, with limited computing power and memory. In this paper, we propose a new, simple method for model-free online change-point detection that relies only on fast and light recursive statistics, inspired by the classical Exponential Weighted Moving Average algorithm (EWMA). The proposed idea is to compute two EWMA statistics on the stream of data with different forgetting factors, and to compare them. By doing so, we show that we implicitly compare recent samples with older ones, without the need to explicitly store them. Additionally, we leverage Random Features (RFs) to efficiently use the Maximum Mean Discrepancy as a distance between distributions, furthermore exploiting recent optical hardware to compute high-dimensional RFs in near constant time. We show that our method is significantly faster than usual non-parametric methods for a given accuracy.}, journal = {IEEE Transactions on Signal Processing}, author = {Keriven, Nicolas and Garreau, Damien and Poli, Iacopo}, year = {2020}, note = {Conference Name: IEEE Transactions on Signal Processing}, keywords = {Brain modeling, Change detection algorithms, Computational modeling, Hilbert space, Kernel, Microsoft Windows, Radio frequency, Signal processing algorithms, Streaming media, method of moments, optical computing}, pages = {3515--3528}, }
@article{foerster_dynamic_2020, series = {53rd {CIRP} {Conference} on {Manufacturing} {Systems} 2020}, title = {Dynamic risk consideration of predicted maintenance needs regarding economic efficiency}, volume = {93}, issn = {2212-8271}, url = {http://www.sciencedirect.com/science/article/pii/S2212827120306545}, doi = {10.1016/j.procir.2020.04.067}, abstract = {In the context of predictive maintenance, where machine failures are predicted, it is assumed that a failure prognosis has already taken place. To ensure that such a forecast is adequately implemented, the results must be appropriately integrated into the planning processes. However, this leads to a planning conflict if a machine is addressed by both production and maintenance planning. For this reason, a prioritization of alternative actions is derived based on the concept of risk management. In this way, the appropriate measures can be economically quantified and finally weighed against each other. The result is a quantitative evaluation basis for a economic consideration of impending machine malfunctions on the one hand and value-adding production orders on the other.}, language = {en}, urldate = {2020-09-28}, journal = {Procedia CIRP}, author = {Foerster, Fabian and Nikelowski, Lukas}, month = jan, year = {2020}, keywords = {economic efficiency, maintenance planning, predictive maintenance, production scheduling, risk management}, pages = {915--920}, }
@article{hennig_comparison_2020, series = {53rd {CIRP} {Conference} on {Manufacturing} {Systems} 2020}, title = {Comparison of {Time} {Series} {Clustering} {Algorithms} for {Machine} {State} {Detection}}, volume = {93}, issn = {2212-8271}, url = {http://www.sciencedirect.com/science/article/pii/S2212827120307149}, doi = {10.1016/j.procir.2020.03.084}, abstract = {New developments in domains like mathematics and statistical learning and availability of easy-to-use, often freely accessible software tools offer great potential to transform the manufacturing domain and their grasp on the increased manufacturing data repositories sustainably. One of the most exciting developments is in the area of machine learning. Time series clustering could be utilized in machine state detection which can be used in predictive maintenance or online optimization. This paper presents a comparison of freely available time series clustering algorithms, by applying several combinations of different algorithms to a database of public benchmark technical data.}, language = {en}, urldate = {2020-09-28}, journal = {Procedia CIRP}, author = {Hennig, Martin and Grafinger, Manfred and Gerhard, Detlef and Dumss, Stefan and Rosenberger, Patrick}, month = jan, year = {2020}, keywords = {Industry 4.0, Internet of Things, Machine Learning, Predictive Maintenance, Time Series Clustering, Unsupervised Learning}, pages = {1352--1357}, }
@article{nentwich_data-driven_2020, series = {53rd {CIRP} {Conference} on {Manufacturing} {Systems} 2020}, title = {Data-driven {Models} for {Fault} {Classification} and {Prediction} of {Industrial} {Robots}}, volume = {93}, issn = {2212-8271}, url = {http://www.sciencedirect.com/science/article/pii/S2212827120307642}, doi = {10.1016/j.procir.2020.04.126}, abstract = {Economic data acquisition and storage have been key enablers to pave the way for data-driven predictions of machine downtimes. Regarding industrial robots, such predictions can maximize the robot’s availability and effective life span. This paper focuses on the comparison of different data-driven models for robot fault prediction and classification by applying them to a data set derived from a robot test bed and illuminates the data transformation process from raw sensor data to domain knowledge motivated robot health indicators.}, language = {en}, urldate = {2020-09-28}, journal = {Procedia CIRP}, author = {Nentwich, Corbinian and Junker, Sebastian and Reinhart, Gunther}, month = jan, year = {2020}, keywords = {Type your keywords here, separated by semicolons}, pages = {1055--1060}, }
@article{fahle_systematic_2020, series = {53rd {CIRP} {Conference} on {Manufacturing} {Systems} 2020}, title = {Systematic review on machine learning ({ML}) methods for manufacturing processes – {Identifying} artificial intelligence ({AI}) methods for field application}, volume = {93}, issn = {2212-8271}, url = {http://www.sciencedirect.com/science/article/pii/S2212827120307435}, doi = {10.1016/j.procir.2020.04.109}, abstract = {Artificial Intelligence (AI) and especially machine learning (ML) become increasingly more frequently applicable in factory operations. This paper presents a systematic review of today’s applications of ML techniques in the factory environment. The utilization of ML methods related to manufacturing process planning and control, predictive maintenance, quality control, in situ process control and optimization, logistics, robotics, assistance and learning systems for shopfloor employees are being analyzed. Moreover, an overview of ML training concepts in learning factories is given. Furthermore, these concepts will be analyzed regarding the implemented ML method. Finally, research gaps are identified.}, language = {en}, urldate = {2020-09-28}, journal = {Procedia CIRP}, author = {Fahle, Simon and Prinz, Christopher and Kuhlenkötter, Bernd}, month = jan, year = {2020}, keywords = {Artificial Intelligence, factory operation, machine learning, production systems}, pages = {413--418}, }
@article{neto_digital_2020, series = {53rd {CIRP} {Conference} on {Manufacturing} {Systems} 2020}, title = {Digital twins in manufacturing: an assessment of drivers, enablers and barriers to implementation}, volume = {93}, issn = {2212-8271}, shorttitle = {Digital twins in manufacturing}, url = {http://www.sciencedirect.com/science/article/pii/S2212827120307733}, doi = {10.1016/j.procir.2020.04.131}, abstract = {As we live through the fourth industrial revolution, cutting-edge technologies look to change the way manufacturing systems operate. In this context, an important technological framework gaining popularity is the digital twin, which enables a virtual mirror of a real subject, used in manufacturing to assess performance and predict behavior. In this study, we interview experts and review the literature to gain an overview of what exactly drives companies to look for digital twin solutions in the manufacturing environment, what factors enable these initiatives to be successful, and what are the barriers that compromise or slow down implementation efforts.}, language = {en}, urldate = {2020-09-28}, journal = {Procedia CIRP}, author = {Neto, Anis Assad and Deschamps, Fernando and da Silva, Elias Ribeiro and de Lima, Edson Pinheiro}, month = jan, year = {2020}, keywords = {Industry 4.0, digital twin, manufacturing, smart factory}, pages = {210--215}, }
@article{muhlbauer_automated_2020, series = {53rd {CIRP} {Conference} on {Manufacturing} {Systems} 2020}, title = {Automated {Data} {Labeling} and {Anomaly} {Detection} {Using} {Airborne} {Sound} {Analysis}}, volume = {93}, issn = {2212-8271}, url = {http://www.sciencedirect.com/science/article/pii/S2212827120307587}, doi = {10.1016/j.procir.2020.04.121}, abstract = {Anomaly detection is facing the challenge of generating a maximum of information with a limited number of sensors in production machines and minimal effort of data analysis. Machine operators are often able to perceive changes acoustically and based on experience. In order to imitate this intelligent human capability, a systematic methodology has been developed in this work. Firstly, an introduction to this topic will be given and the acoustic sensor set up as well as the data preprocessing will be described. Secondly, an approach for automatic data labeling as an input for the intelligent anomaly detection will be presented. Using a model-based approach, anomaly detection is performed in the next step. This allows not only the detection of gradual and sudden process changes but also systematic problem solving. Finally, the anomaly detection will be validated for cutting processes.}, language = {en}, urldate = {2020-09-28}, journal = {Procedia CIRP}, author = {Mühlbauer, Matthias and Würschinger, Hubert and Polzer, Dominik and Ju, Shu and Hanenkamp, Nico}, month = jan, year = {2020}, keywords = {Airborne Sound, Anomaly Detection, Automated Data Labeling, Cutting Processes, Cyclic Production Processes, Process Monitoring}, pages = {1247--1252}, }
@article{kozjek_multi-objective_2020, series = {53rd {CIRP} {Conference} on {Manufacturing} {Systems} 2020}, title = {Multi-objective adjustment of remaining useful life predictions based on reinforcement learning}, volume = {93}, issn = {2212-8271}, url = {http://www.sciencedirect.com/science/article/pii/S2212827120306582}, doi = {10.1016/j.procir.2020.03.051}, abstract = {Effective tracking of degradation in machine tools or vehicle, ship, and aircraft engines is key to ensure their high utilization, effective maintenance, and safety. Data from the built-in sensors can be used to build models that accurately predict the remaining useful life (RUL) of the observed system. However, existing approaches often lack the ability to incorporate domain-specific knowledge in form of degradation models. This paper proposes a reinforcement-learning based approach for encoding the degradation model used for multi-objective adjustment of RUL predictions. The approach is demonstrated with a case of RUL prediction for aircraft engines.}, language = {en}, urldate = {2020-09-28}, journal = {Procedia CIRP}, author = {Kozjek, Dominik and Malus, Andreja and Vrabič, Rok}, month = jan, year = {2020}, keywords = {predictive maintainance, reinforcement learning, remaining useful life}, pages = {425--430}, }
@article{sahal_big_2020, title = {Big data and stream processing platforms for {Industry} 4.0 requirements mapping for a predictive maintenance use case}, volume = {54}, issn = {0278-6125}, url = {http://www.sciencedirect.com/science/article/pii/S0278612519300937}, doi = {10.1016/j.jmsy.2019.11.004}, language = {en}, urldate = {2020-03-24}, journal = {Journal of Manufacturing Systems}, author = {Sahal, Radhya and Breslin, John G. and Ali, Muhammad Intizar}, month = jan, year = {2020}, keywords = {Big Data, Industry 4.0, Predictive maintenance, Railway, Stream processing, Wind turbines}, pages = {138--151}, }
@article{montero_jimenez_towards_2020, title = {Towards multi-model approaches to predictive maintenance: {A} systematic literature survey on diagnostics and prognostics}, volume = {56}, issn = {0278-6125}, shorttitle = {Towards multi-model approaches to predictive maintenance}, url = {http://www.sciencedirect.com/science/article/pii/S0278612520301187}, doi = {10.1016/j.jmsy.2020.07.008}, abstract = {The use of a modern technological system requires a good engineering approach, optimized operations, and proper maintenance in order to keep the system in an optimal state. Predictive maintenance focuses on the organization of maintenance actions according to the actual health state of the system, aiming at giving a precise indication of when a maintenance intervention will be necessary. Predictive maintenance is normally implemented by means of specialized computational systems that incorporate one of several models to fulfil diagnostics and prognostics tasks. As complexity of technological systems increases over time, single-model approaches hardly fulfil all functions and objectives for predictive maintenance systems. It is increasingly common to find research studies that combine different models in multi-model approaches to overcome complexity of predictive maintenance tasks, considering the advantages and disadvantages of each single model and trying to combine the best of them. These multi-model approaches have not been extensively addressed by previous review studies on predictive maintenance. Besides, many of the possible combinations for multi-model approaches remain unexplored in predictive maintenance applications; this offers a vast field of opportunities when architecting new predictive maintenance systems. This systematic survey aims at presenting the current trends in diagnostics and prognostics giving special attention to multi-model approaches and summarizing the current challenges and research opportunities.}, language = {en}, urldate = {2020-09-19}, journal = {Journal of Manufacturing Systems}, author = {Montero Jimenez, Juan José and Schwartz, Sébastien and Vingerhoeds, Rob and Grabot, Bernard and Salaün, Michel}, month = jul, year = {2020}, keywords = {Diagnostics, Multi-model approaches, Predictive maintenance, Prognostics, Single-model approaches, Systematic literature review}, pages = {539--557}, }
@article{franceschi_unsupervised_2020, title = {Unsupervised {Scalable} {Representation} {Learning} for {Multivariate} {Time} {Series}}, url = {http://arxiv.org/abs/1901.10738}, abstract = {Time series constitute a challenging data type for machine learning algorithms, due to their highly variable lengths and sparse labeling in practice. In this paper, we tackle this challenge by proposing an unsupervised method to learn universal embeddings of time series. Unlike previous works, it is scalable with respect to their length and we demonstrate the quality, transferability and practicability of the learned representations with thorough experiments and comparisons. To this end, we combine an encoder based on causal dilated convolutions with a novel triplet loss employing time-based negative sampling, obtaining general-purpose representations for variable length and multivariate time series.}, urldate = {2020-07-20}, journal = {arXiv:1901.10738 [cs, stat]}, author = {Franceschi, Jean-Yves and Dieuleveut, Aymeric and Jaggi, Martin}, month = jan, year = {2020}, note = {arXiv: 1901.10738}, keywords = {Computer Science - Machine Learning, Computer Science - Neural and Evolutionary Computing, Statistics - Machine Learning}, }
@inproceedings{pirasteh_interactive_2019, address = {New York, NY, USA}, series = {{WIDM}'19}, title = {Interactive feature extraction for diagnostic trouble codes in predictive maintenance: {A} case study from automotive domain}, isbn = {978-1-4503-6296-2}, shorttitle = {Interactive feature extraction for diagnostic trouble codes in predictive maintenance}, url = {https://dl.acm.org/doi/10.1145/3304079.3310288}, doi = {10.1145/3304079.3310288}, abstract = {Predicting future maintenance needs of equipment can be addressed in a variety of ways. Methods based on machine learning approaches provide an interesting platform for mining large data sets to find patterns that might correlate with a given fault. In this paper, we approach predictive maintenance as a classification problem and use Random Forest to separate data readouts within a particular time window into those corresponding to faulty and non-faulty component categories. We utilize diagnostic trouble codes (DTCs) as an example of event-based data, and propose four categories of features that can be derived from DTCs as a predictive maintenance framework. We test the approach using large-scale data from a fleet of heavy duty trucks, and show that DTCs can be used within our framework as indicators of imminent failures in different components.}, urldate = {2023-05-21}, booktitle = {Proceedings of the {Workshop} on {Interactive} {Data} {Mining}}, publisher = {Association for Computing Machinery}, author = {Pirasteh, Parivash and Nowaczyk, Slawomir and Pashami, Sepideh and Löwenadler, Magnus and Thunberg, Klas and Ydreskog, Henrik and Berck, Peter}, month = feb, year = {2019}, keywords = {Predictive maintenance, diagnostic trouble codes, failure detection, feature extraction}, pages = {1--10}, }
@inproceedings{le_nguyen_semi-supervised_2019, title = {Semi-supervised {Learning} over {Streaming} {Data} using {MOA}}, doi = {10.1109/BigData47090.2019.9006217}, abstract = {Machine learning algorithms for data streams usually suppose that all data examples available for learning are strictly labeled. Unfortunately, in real-world scenarios, data examples are not always labeled. Semi-supervised learning is a challenging task to learn using labeled and unlabeled data at the same time. It is especially relevant in the context of data streams, where the data is generated in real-time, and the labels may be missing due to various factors (e.g., network delay, errors during the communication between sensors, expensive labeling process, and others). In this paper, we present two novel approaches to handle missing labels for classification learning in data streams, namely cluster-and-label and self-training. We discuss the strengths and weaknesses of each solution to establish a baseline to evaluate semi-supervised learning techniques in data streams. These methods are implemented inside the MOA (Massive Online Analysis) open-source software as an internal benchmark component, to help researchers to run experimental comparisons on semi-supervised learning on data streams easily.}, booktitle = {2019 {IEEE} {International} {Conference} on {Big} {Data} ({Big} {Data})}, author = {Le Nguyen, Minh Huong and Gomes, Heitor Murilo and Bifet, Albert}, month = dec, year = {2019}, keywords = {Clustering algorithms, Data models, Labeling, Prediction algorithms, Predictive models, Semisupervised learning, Supervised learning, data streams, semi-supervised learning}, pages = {553--562}, }
@inproceedings{verdun_remote_2019, address = {Tokyo, Japan}, title = {Remote {Diagnosis} and {Condition}-based {Maintenance} for {Rolling} {Stock} at {SNCF}}, abstract = {For the rolling stock, the new generation of highly complex and communicating trains will fundamentally transform the current landscape and the industrial sector in the next few years. As the rolling stock is changing, its maintenance must evolve to meet new challenges. Nowadays, the maintenance system is mostly characterized by a combination of several maintenance types and strategies which goes from preventive systematic maintenance to corrective maintenance. The introduction of on-board/wayside diagnostic systems presents a significant opportunity for reducing maintenance costs, while also having a strong positive impact on reliability, availability and quality of service. Following this way, a new approach based on remote diagnosis and condition based maintenance has been incorporated into the SNCF maintenance process few years ago. This paper gives an overview of the work carried out over the last years, with the challenges encountered and the first results.}, author = {Verdun, Cyril and Turgis, Fabien and Audier, Pierre}, year = {2019}, }
@inproceedings{bouchikhi_kernel_2019, title = {Kernel {Based} {Online} {Change} {Point} {Detection}}, doi = {10.23919/EUSIPCO.2019.8902582}, abstract = {Detecting change points in time series data is a challenging problem, in particular when no prior information on the data distribution and the nature of the change is available. In a former work, we introduced an online non-parametric change-point detection framework built upon direct density ratio estimation over two consecutive time segments, rather than modeling densities separately. This algorithm based on the theory of reproducing kernels showed positive and reliable detection results for a variety of problems. To further improve the detection performance of this approach, we propose in this paper to modify the original cost function in order to achieve unbiasedness of the density ratio estimation under the null hypothesis. Theoretical analysis and numerical simulations confirm the improved behavior of this method, as well as its efficiency compared to a state of the art one. Application to sentiment change detection in Twitter data streams is also presented.}, booktitle = {2019 27th {European} {Signal} {Processing} {Conference} ({EUSIPCO})}, author = {Bouchikhi, Ikram and Ferrari, André and Richard, Cédric and Bourrier, Anthony and Bernot, Marc}, month = sep, year = {2019}, note = {ISSN: 2076-1465}, keywords = {Change detection algorithms, Dictionaries, Estimation, Europe, Kernel, Non-parametric change-point detection, Signal processing, Signal processing algorithms, convergence analysis, kernel least-mean-square algorithm, online learning, reproducing kernel Hilbert space}, pages = {1--5}, }
@article{fahy_dynamic_2019, title = {Dynamic {Feature} {Selection} for {Clustering} {High} {Dimensional} {Data} {Streams}}, volume = {7}, issn = {2169-3536}, doi = {10.1109/ACCESS.2019.2932308}, abstract = {Change in a data stream can occur at the concept level and at the feature level. Change at the feature level can occur if new, additional features appear in the stream or if the importance and relevance of a feature changes as the stream progresses. This type of change has not received as much attention as concept-level change. Furthermore, a lot of the methods proposed for clustering streams (density-based, graph-based, and grid-based) rely on some form of distance as a similarity metric and this is problematic in high-dimensional data where the curse of dimensionality renders distance measurements and any concept of “density” difficult. To address these two challenges we propose combining them and framing the problem as a feature selection problem, specifically a dynamic feature selection problem. We propose a dynamic feature mask for clustering high dimensional data streams. Redundant features are masked and clustering is performed along unmasked, relevant features. If a feature's perceived importance changes, the mask is updated accordingly; previously unimportant features are unmasked and features which lose relevance become masked. The proposed method is algorithm-independent and can be used with any of the existing density-based clustering algorithms which typically do not have a mechanism for dealing with feature drift and struggle with high-dimensional data. We evaluate the proposed method on four density-based clustering algorithms across four high-dimensional streams; two text streams and two image streams. In each case, the proposed dynamic feature mask improves clustering performance and reduces the processing time required by the underlying algorithm. Furthermore, change at the feature level can be observed and tracked.}, journal = {IEEE Access}, author = {Fahy, Conor and Yang, Shengxiang}, year = {2019}, note = {Conference Name: IEEE Access}, keywords = {Clustering algorithms, Correlation, Data stream clustering, Feature extraction, Heuristic algorithms, Measurement, Microsoft Windows, Streaming media, dynamic feature selection, feature drift, feature evolution, unsupervised feature selection}, pages = {127128--127140}, }
@inproceedings{chawathe_condition_2019, title = {Condition {Monitoring} of {Hydraulic} {Systems} by {Classifying} {Sensor} {Data} {Streams}}, doi = {10.1109/CCWC.2019.8666564}, abstract = {Condition-based maintenance (CBM) of hydraulic systems requires methods for condition monitoring: Sensors installed in a hydraulic system for this purpose generate streams of real-time data that must be analyzed to accurately characterize the health of the system. Prior work has developed an experimental hydraulic system with such an installation and yielded a public dataset of sensor readings with associated values of condition variables that quantify the system's health. This paper presents classification-based methods for inferring these condition variables from the sensor data streams. These methods significantly improve on the classification accuracy reported in prior work on this data. Further, this accuracy is maintained even when the number of sensor-based attributes used as input is substantially reduced.}, booktitle = {2019 {IEEE} 9th {Annual} {Computing} and {Communication} {Workshop} and {Conference} ({CCWC})}, author = {Chawathe, Sudarshan S.}, month = jan, year = {2019}, keywords = {Bars, Condition monitoring, Feature extraction, Hydraulic systems, Monitoring, Training, classification, condition monitoring, condition-based maintenance, hydraulic systems, sensors}, pages = {0898--0904}, }
@article{peng_new_2019, title = {A {New} {Hierarchical} {Framework} for {Detection} and {Isolation} of {Multiple} {Faults} in {Complex} {Industrial} {Processes}}, volume = {7}, issn = {2169-3536}, doi = {10.1109/ACCESS.2019.2892487}, abstract = {In actual production practice, the occurrence probability of multiple faults is much higher than that of a single fault. Since the composition of multiple faults is uncertain, it is difficult to establish a single model for multifault diagnosis. In this paper, a new hierarchical framework is proposed for solving multifault detection and isolation problems. First, an adaptive dynamic kernel independent component analysis method is proposed for time-varying and unknown multifault detection. After that, a sparse local exponential discriminant analysis method is developed for the multimodal multifault isolation problem. Finally, the Tennessee Eastman process is used to validate the performance of the proposed methods, and the experimental results show that the proposed methods can efficiently detect and isolate multiple faults.}, journal = {IEEE Access}, author = {Peng, Kaixiang and Ren, Zhihao and Dong, Jie and Ma, Liang}, year = {2019}, note = {Conference Name: IEEE Access}, keywords = {Covariance matrices, Eigenvalues and eigenfunctions, Independent component analysis, Kernel, Monitoring, Multiple faults, Principal component analysis, Production, accurate isolation, complex industrial processes, hierarchical framework, real-time detection}, pages = {12006--12015}, }
@article{luo_discriminant_2019, title = {Discriminant autoencoder for feature extraction in fault diagnosis}, volume = {192}, issn = {0169-7439}, url = {https://www.sciencedirect.com/science/article/pii/S0169743918306257}, doi = {10.1016/j.chemolab.2019.103814}, abstract = {Nowadays, some traditional autoencoders and their extensions have been widely applied in data-driven fault diagnosis for feature extraction. However, because of the fact that traditional autoencoders could not make use of label information, the representations extracted by these traditional autoencoders may show disappointing results when handling ultimate discriminative task. In this paper, we propose a novel semi-supervised autoencoder, which is named as Discriminant Autoencoder. The training of proposed Discriminant Autoencoder includes a supervised process and an unsupervised process. And a distance penalty is added into the loss function, which enables the proposed Discriminant Autoencoder to extract more suitable representations from industrial data samples. In order to explain the effectiveness of this semi-supervised autoencoder, we carry out some experiments and give out a mathematical derivation. Here we use an industrial batch process dataset as the criterion dataset to test the performance of proposed Discriminant Autoencoder and other conventional autoencoders.}, language = {en}, urldate = {2022-05-02}, journal = {Chemometrics and Intelligent Laboratory Systems}, author = {Luo, Xiaoyi and Li, Xianmin and Wang, Ziyang and Liang, Jun}, month = sep, year = {2019}, keywords = {Autoencoder, Fault diagnosis, Feature extraction, Semi-supervised autoencoder}, pages = {103814}, }
@article{kolesnikov_revisiting_2019, title = {Revisiting {Self}-{Supervised} {Visual} {Representation} {Learning}}, url = {http://arxiv.org/abs/1901.09005}, abstract = {Unsupervised visual representation learning remains a largely unsolved problem in computer vision research. Among a big body of recently proposed approaches for unsupervised learning of visual representations, a class of self-supervised techniques achieves superior performance on many challenging benchmarks. A large number of the pretext tasks for self-supervised learning have been studied, but other important aspects, such as the choice of convolutional neural networks (CNN), has not received equal attention. Therefore, we revisit numerous previously proposed self-supervised models, conduct a thorough large scale study and, as a result, uncover multiple crucial insights. We challenge a number of common practices in selfsupervised visual representation learning and observe that standard recipes for CNN design do not always translate to self-supervised representation learning. As part of our study, we drastically boost the performance of previously proposed techniques and outperform previously published state-of-the-art results by a large margin.}, urldate = {2022-04-25}, journal = {arXiv:1901.09005 [cs]}, author = {Kolesnikov, Alexander and Zhai, Xiaohua and Beyer, Lucas}, month = jan, year = {2019}, note = {arXiv: 1901.09005}, keywords = {Computer Science - Computer Vision and Pattern Recognition}, }
@article{van_de_ven_three_2019, title = {Three scenarios for continual learning}, url = {http://arxiv.org/abs/1904.07734}, abstract = {Standard artificial neural networks suffer from the well-known issue of catastrophic forgetting, making continual or lifelong learning difficult for machine learning. In recent years, numerous methods have been proposed for continual learning, but due to differences in evaluation protocols it is difficult to directly compare their performance. To enable more structured comparisons, we describe three continual learning scenarios based on whether at test time task identity is provided and--in case it is not--whether it must be inferred. Any sequence of well-defined tasks can be performed according to each scenario. Using the split and permuted MNIST task protocols, for each scenario we carry out an extensive comparison of recently proposed continual learning methods. We demonstrate substantial differences between the three scenarios in terms of difficulty and in terms of how efficient different methods are. In particular, when task identity must be inferred (i.e., class incremental learning), we find that regularization-based approaches (e.g., elastic weight consolidation) fail and that replaying representations of previous experiences seems required for solving this scenario.}, urldate = {2022-03-19}, journal = {arXiv:1904.07734 [cs, stat]}, author = {van de Ven, Gido M. and Tolias, Andreas S.}, month = apr, year = {2019}, note = {arXiv: 1904.07734}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Computer Vision and Pattern Recognition, Computer Science - Machine Learning, Statistics - Machine Learning}, }
@article{wang_deep_2019, series = {Deep {Learning} for {Pattern} {Recognition}}, title = {Deep learning for sensor-based activity recognition: {A} survey}, volume = {119}, issn = {0167-8655}, shorttitle = {Deep learning for sensor-based activity recognition}, url = {https://www.sciencedirect.com/science/article/pii/S016786551830045X}, doi = {10.1016/j.patrec.2018.02.010}, abstract = {Sensor-based activity recognition seeks the profound high-level knowledge about human activities from multitudes of low-level sensor readings. Conventional pattern recognition approaches have made tremendous progress in the past years. However, those methods often heavily rely on heuristic hand-crafted feature extraction, which could hinder their generalization performance. Additionally, existing methods are undermined for unsupervised and incremental learning tasks. Recently, the recent advancement of deep learning makes it possible to perform automatic high-level feature extraction thus achieves promising performance in many areas. Since then, deep learning based methods have been widely adopted for the sensor-based activity recognition tasks. This paper surveys the recent advance of deep learning based sensor-based activity recognition. We summarize existing literature from three aspects: sensor modality, deep model, and application. We also present detailed insights on existing work and propose grand challenges for future research.}, language = {en}, urldate = {2022-03-17}, journal = {Pattern Recognition Letters}, author = {Wang, Jindong and Chen, Yiqiang and Hao, Shuji and Peng, Xiaohui and Hu, Lisha}, month = mar, year = {2019}, keywords = {Activity recognition, Deep learning, Pattern recognition, Pervasive computing}, pages = {3--11}, }
@inproceedings{costa_junior_novelty_2019, title = {Novelty {Detection} for {Multi}-{Label} {Stream} {Classification}}, doi = {10.1109/BRACIS.2019.00034}, abstract = {In Multi-Label Stream Classification (MLSC) examples arriving in a stream can be simultaneously classified into multiple classes. This is a very challenging task, especially considering that new classes can emerge during the stream (Concept Evolution), and known classes can change over time (Concept Drift). In real situations, these characteristics come together with a scenario with Infinitely Delayed Labels, where we can never access the true class labels of the examples to update classifiers. In order to overcome these issues, this paper proposes a new method called MultI-label learNing Algorithm for Data Streams with Binary Relevance transformation (MINAS-BR). Our proposal uses a new Novelty Detection (ND) procedure to detect concept evolution and concept drift, being updated in an unsupervised fashion. We also propose a new methodology to evaluate MLSC methods in scenarios with Infinitely Delayed Labels. Experiments over synthetic data sets attested the potential of MINAS-BR, which was able to adapt to different concept drift and concept evolution scenarios, obtaining superior or competitive performances in comparison to literature baselines.}, booktitle = {2019 8th {Brazilian} {Conference} on {Intelligent} {Systems} ({BRACIS})}, author = {Costa Júnior, Joel D. and Faria, Elaine R. and Silva, Jonathan A. and Gama, João and Cerri, Ricardo}, month = oct, year = {2019}, note = {ISSN: 2643-6264}, keywords = {Adaptation models, Computational complexity, Computational modeling, Concept Evolution, Data models, Infinitely Delayed Labels, Multi-label Stream Classification, Novelty Detection, Task analysis, Training, Training data}, pages = {144--149}, }
@article{carnein_optimizing_2019, title = {Optimizing {Data} {Stream} {Representation}: {An} {Extensive} {Survey} on {Stream} {Clustering} {Algorithms}}, volume = {61}, issn = {1867-0202}, shorttitle = {Optimizing {Data} {Stream} {Representation}}, url = {https://doi.org/10.1007/s12599-019-00576-5}, doi = {10.1007/s12599-019-00576-5}, abstract = {Analyzing data streams has received considerable attention over the past decades due to the widespread usage of sensors, social media and other streaming data sources. A core research area in this field is stream clustering which aims to recognize patterns in an unordered, infinite and evolving stream of observations. Clustering can be a crucial support in decision making, since it aims for an optimized aggregated representation of a continuous data stream over time and allows to identify patterns in large and high-dimensional data. A multitude of algorithms and approaches has been developed that are able to find and maintain clusters over time in the challenging streaming scenario. This survey explores, summarizes and categorizes a total of 51 stream clustering algorithms and identifies core research threads over the past decades. In particular, it identifies categories of algorithms based on distance thresholds, density grids and statistical models as well as algorithms for high dimensional data. Furthermore, it discusses applications scenarios, available software and how to configure stream clustering algorithms. This survey is considerably more extensive than comparable studies, more up-to-date and highlights how concepts are interrelated and have been developed over time.}, language = {en}, number = {3}, urldate = {2022-03-15}, journal = {Business \& Information Systems Engineering}, author = {Carnein, Matthias and Trautmann, Heike}, month = jun, year = {2019}, pages = {277--297}, }
@inproceedings{krishnamurthy_application_2019, title = {Application of {Machine} {Learning} and {Spatial} {Bootstrapping} to {Image} {Processing} for {Predictive} {Maintenance}}, doi = {10.1109/BigData47090.2019.9006439}, abstract = {Image processing and machine learning have become valuable tools for predictive maintenance applications for a wide variety of industrial and commercial components. We present a novel light transmission image processing methodology utilizing statistical distance algorithms (Wasserstein distance (WD), Kolmogorov-Smirnov statistic (K-S)) for physical attribute correlation combined with Bayesian linear regression to estimate wear level and lifetime prediction for air filters. Robustness of this machine learning algorithm was evaluated using spatial block bootstrapping to generate synthetic training data to estimate the 95\% prediction interval for air filter lifetime. Validation of this lifetime prediction was performed using imaging measurements on a test air filter, which showed good agreement with the machine learning model. The proposed machine learning based image analytics framework effectively enables robust predictions of component wear for predictive maintenance.}, booktitle = {2019 {IEEE} {International} {Conference} on {Big} {Data} ({Big} {Data})}, author = {Krishnamurthy, Vikram and Nezafati, Kusha and Singh, Vikrant}, month = dec, year = {2019}, keywords = {Image Processing, Imaging, Kolmogorov-Smirnov statistic, Machine Learning, Machine learning, Measurement, Predictive Maintenance, Predictive maintenance, Sensors, Spatial Bootstrapping, Training, Training data, Wasserstein Distance}, pages = {4395--4401}, }
@article{zang_methods_2019, title = {Methods for fault diagnosis of high-speed railways: {A} review}, volume = {233}, issn = {1748-006X}, shorttitle = {Methods for fault diagnosis of high-speed railways}, url = {https://doi.org/10.1177/1748006X18823932}, doi = {10.1177/1748006X18823932}, abstract = {High-speed railways have a high demand for safety, but they are complex systems when it comes to fault diagnosis. The failure propagation path is difficult to trace which makes it hard to detect and identify a fault in the traditional way like signal-based methods. In recent years, artificial intelligence methods have been successfully applied in system health diagnosis and prognosis. Fault diagnosis methods based on artificial intelligence methods provide a new inspiration for fault diagnosis in the high-speed railway systems. In this article, the current research status of fault diagnosis was introduced, and the practical application of fault diagnosis methods in high-speed railways was summarized. Then taking the train control system as an example, fault diagnosis based on the artificial intelligence methods was discussed using several case studies; the results proved that the fusion of different methods has the potential to improve the diagnostic accuracy. Finally, the future research direction of fault diagnosis for high-speed railways was proposed.}, language = {en}, number = {5}, urldate = {2022-03-05}, journal = {Proceedings of the Institution of Mechanical Engineers, Part O: Journal of Risk and Reliability}, author = {Zang, Yu and Shangguan, Wei and Cai, Baigen and Wang, Huashen and Pecht, Michael G}, month = oct, year = {2019}, note = {Publisher: SAGE Publications}, keywords = {High-speed railways, artificial intelligence, case study, development trend, fault diagnosis}, pages = {908--922}, }
@article{ding_intelligent_2019, title = {Intelligent fault diagnosis for rotating machinery using deep {Q}-network based health state classification: {A} deep reinforcement learning approach}, volume = {42}, issn = {1474-0346}, shorttitle = {Intelligent fault diagnosis for rotating machinery using deep {Q}-network based health state classification}, url = {https://www.sciencedirect.com/science/article/pii/S1474034619305506}, doi = {10.1016/j.aei.2019.100977}, abstract = {Fault diagnosis methods for rotating machinery have always been a hot research topic, and artificial intelligence-based approaches have attracted increasing attention from both researchers and engineers. Among those related studies and methods, artificial neural networks, especially deep learning-based methods, are widely used to extract fault features or classify fault features obtained by other signal processing techniques. Although such methods could solve the fault diagnosis problems of rotating machinery, there are still two deficiencies. (1) Unable to establish direct linear or non-linear mapping between raw data and the corresponding fault modes, the performance of such fault diagnosis methods highly depends on the quality of the extracted features. (2) The optimization of neural network architecture and parameters, especially for deep neural networks, requires considerable manual modification and expert experience, which limits the applicability and generalization of such methods. As a remarkable breakthrough in artificial intelligence, AlphaGo, a representative achievement of deep reinforcement learning, provides inspiration and direction for the aforementioned shortcomings. Combining the advantages of deep learning and reinforcement learning, deep reinforcement learning is able to build an end-to-end fault diagnosis architecture that can directly map raw fault data to the corresponding fault modes. Thus, based on deep reinforcement learning, a novel intelligent diagnosis method is proposed that is able to overcome the shortcomings of the aforementioned diagnosis methods. Validation tests of the proposed method are carried out using datasets of two types of rotating machinery, rolling bearings and hydraulic pumps, which contain a large number of measured raw vibration signals under different health states and working conditions. The diagnosis results show that the proposed method is able to obtain intelligent fault diagnosis agents that can mine the relationships between the raw vibration signals and fault modes autonomously and effectively. Considering that the learning process of the proposed method depends only on the replayed memories of the agent and the overall rewards, which represent much weaker feedback than that obtained by the supervised learning-based method, the proposed method is promising in establishing a general fault diagnosis architecture for rotating machinery.}, language = {en}, urldate = {2022-03-03}, journal = {Advanced Engineering Informatics}, author = {Ding, Yu and Ma, Liang and Ma, Jian and Suo, Mingliang and Tao, Laifa and Cheng, Yujie and Lu, Chen}, month = oct, year = {2019}, keywords = {Deep Q-network, Deep reinforcement learning, Fault diagnosis, Rotating machinery}, pages = {100977}, }
@inproceedings{zhang_equipment_2019, address = {Cham}, series = {Lecture {Notes} in {Computer} {Science}}, title = {Equipment {Health} {Indicator} {Learning} {Using} {Deep} {Reinforcement} {Learning}}, isbn = {978-3-030-10997-4}, doi = {10.1007/978-3-030-10997-4_30}, abstract = {Predictive Maintenance (PdM) is gaining popularity in industrial operations as it leverages the power of Machine Learning and Internet of Things (IoT) to predict the future health status of equipment. Health Indicator Learning (HIL) plays an important role in PdM as it learns a health curve representing the health conditions of equipment over time, so that health degradation is visually monitored and optimal planning can be performed accordingly to minimize the equipment downtime. However, HIL is a hard problem due to the fact that there is usually no way to access the actual health of the equipment during most of its operation. Traditionally, HIL is addressed by hand-crafting domain-specific performance indicators or through physical modeling, which is expensive and inapplicable for some industries. In this paper, we propose a purely data-driven approach for solving the HIL problem based on Deep Reinforcement Learning (DRL). Our key insight is that the HIL problem can be mapped to a credit assignment problem. Then DRL learns from failures by naturally backpropagating the credit of failures into intermediate states. In particular, given the observed time series of sensor, operating and event (failure) data, we learn a sequence of health indicators that represent the underlying health conditions of physical equipment. We demonstrate that the proposed methods significantly outperform the state-of-the-art methods for HIL and provide explainable insights about the equipment health. In addition, we propose the use of the learned health indicators to predict when the equipment is going to reach its end-of-life, and demonstrate how an explainable health curve is way more useful for a decision maker than a single-number prediction by a black-box model. The proposed approach has a great potential in a broader range of systems (e.g., economical and biological) as a general framework for the automatic learning of the underlying performance of complex systems.}, language = {en}, booktitle = {Machine {Learning} and {Knowledge} {Discovery} in {Databases}}, publisher = {Springer International Publishing}, author = {Zhang, Chi and Gupta, Chetan and Farahat, Ahmed and Ristovski, Kosta and Ghosh, Dipanjan}, editor = {Brefeld, Ulf and Curry, Edward and Daly, Elizabeth and MacNamee, Brian and Marascu, Alice and Pinelli, Fabio and Berlingerio, Michele and Hurley, Neil}, year = {2019}, keywords = {Deep Reinforcement Learning, Health indicator learning, Predictive Maintenance}, pages = {488--504}, }
@inproceedings{tian_concept_2019, address = {New York, NY, USA}, series = {{CIKM} '19}, title = {Concept {Drift} {Adaption} for {Online} {Anomaly} {Detection} in {Structural} {Health} {Monitoring}}, isbn = {978-1-4503-6976-3}, url = {https://doi.org/10.1145/3357384.3357816}, doi = {10.1145/3357384.3357816}, abstract = {Despite its success for anomaly detection in the scenario where only data representing normal behavior are available, one-class support vector machine (OCSVM) still has challenge in dealing with non-stationary data stream, where the underlying distributions of data are time-varying. Existing OCSVM-based online learning methods incrementally update the model to address the challenge, however, they solely rely on the location relationship between a test sample and error support vectors. To better accommodate normal behavior evolution, online anomaly detection in non-stationary data stream is formulated as a concept drift adaptation problem in this paper. It is proposed that OCSVM-based incremental learning is only performed in the case of a normal drift. For an incoming sample, its relative relationship with three sets of vectors in OCSVM, namely margin support vectors, error support vectors, and reserve vectors is fully utilized to estimate whether a normal drift is emerging. Extensive experiments in the field of structural health monitoring have been conducted and the results have shown that the proposed simple approach outperforms the existing OCSVM-based online learning algorithms for anomaly detection.}, urldate = {2021-03-26}, booktitle = {Proceedings of the 28th {ACM} {International} {Conference} on {Information} and {Knowledge} {Management}}, publisher = {Association for Computing Machinery}, author = {Tian, Hongda and Khoa, Nguyen Lu Dang and Anaissi, Ali and Wang, Yang and Chen, Fang}, month = nov, year = {2019}, keywords = {anomaly detection, concept drift, data stream, ecml, incremental/online learning, one-class support vector machine}, pages = {2813--2821}, }
@article{cheng_machine_2019, title = {Machine {Health} {Monitoring} {Using} {Adaptive} {Kernel} {Spectral} {Clustering} and {Deep} {Long} {Short}-{Term} {Memory} {Recurrent} {Neural} {Networks}}, volume = {15}, issn = {1941-0050}, doi = {10.1109/TII.2018.2866549}, abstract = {Machine health monitoring is of great importance in industrial informatics field. Recently, deep learning methods applied to machine health monitoring have been proven effective. However, the existing methods face enormous difficulties in extracting heterogeneous features indicating the variation until failure and revealing the inherent high-dimensional features of massive signals, which affect the accuracy and efficiency of machine health monitoring. In this paper, a novel data-driven machine health monitoring method is proposed using adaptive kernel spectral clustering (AKSC) and deep long short-term memory recurrent neural networks (LSTM-RNN). This method include three steps: First, features in the time domain, frequency domain, and time-frequency domain are, respectively, extracted from massive measured signals. And, an Euclidean distance based algorithm is designed to select degradation features. Second, the AKSC algorithm is introduced to adaptively identify machine anomaly behaviors from multiple degradation features. Third, a new deep learning model (LSTM-RNN) is constructed to update and predict the failure time of the machine. The effectiveness of the proposed method is validated using a set of test-to-failure experimental data. The results show that the performance of the proposed method is competitive with other existing methods.}, number = {2}, journal = {IEEE Transactions on Industrial Informatics}, author = {Cheng, Yiwei and Zhu, Haiping and Wu, Jun and Shao, Xinyu}, month = feb, year = {2019}, note = {Conference Name: IEEE Transactions on Industrial Informatics}, keywords = {Adaptive kernel spectral clustering (AKSC), Degradation, Feature extraction, Frequency-domain analysis, Machine learning, Monitoring, Time-domain analysis, anomaly detection, deep long short-term memory recurrent neural networks (LSTM-RNN), ecml, failure prognostics, machine health monitoring}, pages = {987--997}, }
@article{tsui_big_2019, title = {Big {Data} {Opportunities}: {System} {Health} {Monitoring} and {Management}}, volume = {7}, issn = {2169-3536}, shorttitle = {Big {Data} {Opportunities}}, doi = {10.1109/ACCESS.2019.2917891}, abstract = {The concept of a system, generally defined as an organized set of detailed methods, procedures, and routines that are created to carry out a specific activity or solve a specific problem, has been successfully applied to many domains, ranging from mechanical systems to public health. System health monitoring and management (SHMM) refers to the framework of continuous surveillance, analysis, and interpretation of relevant data for system maintenance, management, and strategic planning. This framework is essential to ensure that an entire system is stable and under control. A fundamental problem in SHMM is the optimal use of correlated active and passive data in tasks including prediction and forecasting, monitoring and surveillance, fault detection and diagnostics, engineering management, and supply chain management. In this paper, we provide a new perspective on SHMM in a big data environment, discuss its relationship with other disciplines, and present several of its applications to complex systems.}, journal = {IEEE Access}, author = {Tsui, Kwok Leung and Zhao, Yang and Wang, Dong}, year = {2019}, note = {Conference Name: IEEE Access}, keywords = {Active and passive data, Big Data, Complex systems, Data analysis, Data models, Informatics, Surveillance, big data, complex systems, ecml, system health monitoring and management}, pages = {68853--68867}, }
@article{feng_online_2019, title = {Online {State}-of-{Health} {Estimation} for {Li}-{Ion} {Battery} {Using} {Partial} {Charging} {Segment} {Based} on {Support} {Vector} {Machine}}, volume = {68}, issn = {1939-9359}, doi = {10.1109/TVT.2019.2927120}, abstract = {The online estimation of battery state-of-health (SOH) is an ever significant issue for the intelligent energy management of the autonomous electric vehicles. Machine-learning based approaches are promising for the online SOH estimation. This paper proposes a machine-learning based algorithm for the online SOH estimation of Li-ion battery. A predictive diagnosis model used in the algorithm is established based on support vector machine (SVM). The support vectors, which reflects the intrinsic characteristics of the Li-ion battery, are determined from the charging data of fresh cells. Furthermore, the coefficients of the SVMs for cells at different SOH are identified once the support vectors are determined. The algorithm functions by comparing partial charging curves with the stored SVMs. Similarity factor is defined after comparison to quantify the SOH of the data under evaluation. The operation of the algorithm only requires partial charging curves, e.g., 15 min charging curves, making fast on-board diagnosis of battery SOH into reality. The partial charging curves can be intercepted from a wide range of voltage section, thereby relieving the pain that there is little chance that the driver charges the battery pack from a predefined state-of-charge. Train, validation, and test are conducted for two commercial Li-ion batteries with Li(NiCoMn)1/3O2 cathode and graphite anode, indicating that the algorithm can estimate the battery SOH with less than 2\% error for 80\% of all the cases, and less than 3\% error for 95\% of all the cases.}, number = {9}, journal = {IEEE Transactions on Vehicular Technology}, author = {Feng, Xuning and Weng, Caihao and He, Xiangming and Han, Xuebing and Lu, Languang and Ren, Dongsheng and Ouyang, Minggao}, month = sep, year = {2019}, note = {Conference Name: IEEE Transactions on Vehicular Technology}, keywords = {Calibration, Electric vehicle, Estimation, Lithium-ion batteries, State of charge, Support vector machines, Voltage measurement, batteries, ecml, energy storage, state estimation, state-of-health}, pages = {8583--8592}, }
@article{zhao_deep_2019, title = {Deep learning and its applications to machine health monitoring}, volume = {115}, issn = {0888-3270}, url = {https://www.sciencedirect.com/science/article/pii/S0888327018303108}, doi = {10.1016/j.ymssp.2018.05.050}, abstract = {Since 2006, deep learning (DL) has become a rapidly growing research direction, redefining state-of-the-art performances in a wide range of areas such as object recognition, image segmentation, speech recognition and machine translation. In modern manufacturing systems, data-driven machine health monitoring is gaining in popularity due to the widespread deployment of low-cost sensors and their connection to the Internet. Meanwhile, deep learning provides useful tools for processing and analyzing these big machinery data. The main purpose of this paper is to review and summarize the emerging research work of deep learning on machine health monitoring. After the brief introduction of deep learning techniques, the applications of deep learning in machine health monitoring systems are reviewed mainly from the following aspects: Auto-encoder (AE) and its variants, Restricted Boltzmann Machines and its variants including Deep Belief Network (DBN) and Deep Boltzmann Machines (DBM), Convolutional Neural Networks (CNN) and Recurrent Neural Networks (RNN). In addition, an experimental study on the performances of these approaches has been conducted, in which the data and code have been online. Finally, some new trends of DL-based machine health monitoring methods are discussed.}, language = {en}, urldate = {2022-02-09}, journal = {Mechanical Systems and Signal Processing}, author = {Zhao, Rui and Yan, Ruqiang and Chen, Zhenghua and Mao, Kezhi and Wang, Peng and Gao, Robert X.}, month = jan, year = {2019}, keywords = {Big data, Deep learning, Machine health monitoring, ecml, health monitoring}, pages = {213--237}, }
@article{luo_development_2019, title = {Development of clustering-based sensor fault detection and diagnosis strategy for chilled water system}, volume = {186}, issn = {0378-7788}, url = {https://www.sciencedirect.com/science/article/pii/S0378778818329207}, doi = {10.1016/j.enbuild.2019.01.006}, abstract = {This paper presents a new clustering-based sensor fault detection and diagnosis (SFDD) strategy for chilled water system. For data clustering, k-means algorithm was used and the optimal quantity of clusters was determined by Davis-Bouldin value. With the cluster centroid dataset, the featuring centroid score (CS) was determined for the fault-free sensor reading dataset thus the threshold for fault detection could be set. The database for sensor fault detection was then formed. By characterizing the CS patterns of different types of sensor fault, the database for sensor fault diagnosis was generated accordingly. Various sensor fault types could be handled, including bias, drift, precision degradation and complete failure. In this study, the developed SFDD strategy was applied to the sensor of primary chilled water return temperature in a water-cooled chilled water system. With the databases of sensor fault detection and diagnosis, the real-time measured sensor readings can be examined. Once sensor fault is detected, the fault type can be confirmed within a day at soonest or 2 days at most. The smallest detectable absolute bias value, absolute drifting rate and precision degradation error could be down to 0.25 °C, 0.025 °C/h and 0.1 °C respectively, demonstrating robustness of the proposed SFDD strategy.}, language = {en}, urldate = {2022-01-14}, journal = {Energy and Buildings}, author = {Luo, X. J. and Fong, K. F. and Sun, Y. J. and Leung, M. K. H.}, month = mar, year = {2019}, keywords = {-means clustering, Centroid score, Chilled water system, Data mining, Fault detection and diagnosis, Sensor}, pages = {17--36}, }
@article{zhao_pyod_2019, title = {{PyOD}: {A} {Python} {Toolbox} for {Scalable} {Outlier} {Detection}}, shorttitle = {{PyOD}}, url = {http://arxiv.org/abs/1901.01588}, abstract = {PyOD is an open-source Python toolbox for performing scalable outlier detection on multivariate data. Uniquely, it provides access to a wide range of outlier detection algorithms, including established outlier ensembles and more recent neural network-based approaches, under a single, well-documented API designed for use by both practitioners and researchers. With robustness and scalability in mind, best practices such as unit testing, continuous integration, code coverage, maintainability checks, interactive examples and parallelization are emphasized as core components in the toolbox's development. PyOD is compatible with both Python 2 and 3 and can be installed through Python Package Index (PyPI) or https://github.com/yzhao062/pyod.}, urldate = {2022-01-10}, journal = {arXiv:1901.01588 [cs, stat]}, author = {Zhao, Yue and Nasrullah, Zain and Li, Zheng}, month = jun, year = {2019}, note = {arXiv: 1901.01588}, keywords = {Computer Science - Information Retrieval, Computer Science - Machine Learning, Statistics - Machine Learning}, }
@misc{chen_tennessee_2019, title = {Tennessee {Eastman} simulation dataset}, url = {https://dx.doi.org/10.21227/4519-z502}, doi = {10.21227/4519-z502}, abstract = {Tenessee Eastman (TE) process simulates actual chemical processes and is widely used as a benchmark in test fault diagnosis and process control. The overall process consists of five operating units: reactor, condenser, vapor-liquid separator, recycle compressor and product stripper. It has standard training and test data sets for fault detection and diagnosis, classification, etc. Each data set is under different operating conditions。}, language = {en}, urldate = {2021-12-08}, author = {Chen, Xiaolu}, year = {2019}, note = {Publication Title: IEEE Dataport Publisher: IEEE}, }
@article{guan_particle_2019, title = {Particle swarm {Optimized} {Density}-based {Clustering} and {Classification}: {Supervised} and unsupervised learning approaches}, volume = {44}, issn = {2210-6502}, shorttitle = {Particle swarm {Optimized} {Density}-based {Clustering} and {Classification}}, url = {https://www.sciencedirect.com/science/article/pii/S2210650217302638}, doi = {10.1016/j.swevo.2018.09.008}, abstract = {Two pattern recognition technologies in the field of machine learning, clustering and classification, have been applied in many domains. Density-based clustering is an essential clustering algorithm. The best known density-based clustering method is Density-Based Spatial Clustering of Applications with Noise (DBSCAN), which can find arbitrary shaped clusters in datasets. DBSCAN has three drawbacks: firstly, the parameters for DBSCAN are hard to set; secondly, the number of clusters cannot be controlled by the users; and thirdly, DBSCAN cannot directly be used as a classifier. In this paper a novel Particle swarm Optimized Density-based Clustering and Classification (PODCC) is proposed, designed to offset the drawbacks of DBSCAN. Particle Swarm Optimization (PSO), a widely used Evolutionary and Swarm Algorithm (ESA), has been applied in optimization problems in different research domains including data analytics. In PODCC, a variant of PSO, SPSO-2011, is used to search the parameter space so as to identify the best parameters for density-based clustering and classification. PODCC can function in terms of both Supervised and Unsupervised Learnings by applying the appropriate fitness functions proposed in this paper. With the proposed fitness function, users can set the number of clusters as input for PODCC. The proposed method was evaluated by testing ten synthetic datasets and ten benchmarking datasets selected from various open sources. The experimental results indicate that the proposed PODCC can perform better than some established methods, especially with respect to imbalanced datasets.}, language = {en}, urldate = {2021-11-29}, journal = {Swarm and Evolutionary Computation}, author = {Guan, Chun and Yuen, Kevin Kam Fung and Coenen, Frans}, month = feb, year = {2019}, keywords = {Classification, Density-based clustering, Imbalanced dataset, Parameter tuning, Particle Swarm Optimization}, pages = {876--896}, }
@article{helske_mixture_2019, title = {Mixture {Hidden} {Markov} {Models} for {Sequence} {Data}: {The} {seqHMM} {Package} in {R}}, volume = {88}, copyright = {Copyright (c) 2019 Satu Helske, Jouni Helske}, issn = {1548-7660}, shorttitle = {Mixture {Hidden} {Markov} {Models} for {Sequence} {Data}}, url = {https://doi.org/10.18637/jss.v088.i03}, doi = {10.18637/jss.v088.i03}, abstract = {Sequence analysis is being more and more widely used for the analysis of social sequences and other multivariate categorical time series data. However, it is often complex to describe, visualize, and compare large sequence data, especially when there are multiple parallel sequences per subject. Hidden (latent) Markov models (HMMs) are able to detect underlying latent structures and they can be used in various longitudinal settings: to account for measurement error, to detect unobservable states, or to compress information across several types of observations. Extending to mixture hidden Markov models (MHMMs) allows clustering data into homogeneous subsets, with or without external covariates. The seqHMM package in R is designed for the efficient modeling of sequences and other categorical time series data containing one or multiple subjects with one or multiple interdependent sequences using HMMs and MHMMs. Also other restricted variants of the MHMM can be fitted, e.g., latent class models, Markov models, mixture Markov models, or even ordinary multinomial regression models with suitable parameterization of the HMM. Good graphical presentations of data and models are useful during the whole analysis process from the first glimpse at the data to model fitting and presentation of results. The package provides easy options for plotting parallel sequence data, and proposes visualizing HMMs as directed graphs.}, language = {en}, urldate = {2021-11-17}, journal = {Journal of Statistical Software}, author = {Helske, Satu and Helske, Jouni}, month = jan, year = {2019}, keywords = {R}, pages = {1--32}, }
@article{parpinelli_review_2019, title = {A review of techniques for online control of parameters in swarm intelligence and evolutionary computation algorithms}, copyright = {Copyright © 2018 Inderscience Enterprises Ltd.}, url = {https://www.inderscienceonline.com/doi/abs/10.1504/IJBIC.2019.097731}, abstract = {The two major groups representing biologically inspired algorithms are swarm intelligence (SI) and evolutionary computation (EC). Both SI and EC share common features such as the use of stochastic components during the optimisation process and various parameters for configuration. The setup of parameters in swarm and in evolutionary algorithms has an important role in defining their behaviour, guiding the search and biasing the quality of final solutions. In addition, an appropriate setting for the parameters may change during the optimisation process making this task even harder. The present work brings an up-to-date discussion focusing on online parameter control strategies applied in SI and EC. Also, this review analyses and points out the key techniques and algorithms used and suggests some directions for future research.}, language = {en}, urldate = {2021-11-11}, journal = {International Journal of Bio-Inspired Computation}, author = {Parpinelli, Rafael Stubs and Plichoski, Guilherme Felippe and Silva, Renan Samuel Da and Narloch, Pedro Henrique}, month = feb, year = {2019}, note = {Publisher: Inderscience Publishers (IEL)}, keywords = {online, swarm intelligence}, }
@inproceedings{shahin_input-output_2019, title = {Input-{Output} {Hidden} {Markov} {Model} for {Diagnosis} of {Complex} {System}}, abstract = {Prognosis system state of degradation and estimating its remaining useful life requires the system health assessment. For a correct prognostic, a good diagnostic as health assessment is required. Complex systems are difficult to manage for modeling reasons considering complexity, environmental and operational conditions. This paper deals with a stochastic model for generic modeling purposes and considers operating conditions in order to determine the system health. The proposed model is an Input-Output Hidden Markov Model that is able to model a degradation process of complex systems given operational conditions and allows assessing the system health. Well-known algorithms dedicated to HMM are adapted to IOHMM for multiple observation sequences and inputs.}, author = {Shahin, Kamrul Islam and Simon, Christophe and Weber, Philippe}, month = jun, year = {2019}, }
@article{zhao_condition-based_2019, title = {A condition-based opportunistic maintenance strategy for multi-component system}, volume = {18}, issn = {1475-9217}, url = {https://doi.org/10.1177/1475921717751871}, doi = {10.1177/1475921717751871}, abstract = {As a new dynamic maintenance strategy, the condition-based opportunistic maintenance strategy for multi-component system is presented in this work. In the strategy, the degeneration of each component is described by Weibull proportional hazards model or Weibull proportional intensity model, and the condition indicator is defined to characterize the operating state of each component. Then, when and how to maintain a component can be confirmed by comparing the value of the condition indicator with that of the maintenance threshold function. Condition-based maintenance will be implemented on a component if the value of its condition indicator exceeds that of its condition-based maintenance threshold function. Meanwhile, opportunistic maintenance will also be implemented on a component if the value of its condition indicator exceeds that of its opportunistic maintenance threshold function. The two maintenance threshold functions can be determined by minimizing maintenance cost. Finally, taking the wind turbine as an example of a multi-component system, simulation analyses are described to validate the feasibility and effectiveness of the condition-based opportunistic maintenance strategy.}, language = {en}, number = {1}, urldate = {2021-10-28}, journal = {Structural Health Monitoring}, author = {Zhao, Hongshan and Xu, Fanhao and Liang, Botong and Zhang, Jianping and Song, Peng}, month = jan, year = {2019}, note = {Publisher: SAGE Publications}, keywords = {Condition indicator, Multi-component, condition-based maintenance, condition-based opportunistic maintenance, maintenance threshold function, multicomponent, opportunistic maintenance, weibull}, pages = {270--283}, }
@article{nguyen_multi-label_2019, title = {Multi-label classification via incremental clustering on an evolving data stream}, volume = {95}, issn = {0031-3203}, url = {https://www.sciencedirect.com/science/article/pii/S0031320319302328}, doi = {10.1016/j.patcog.2019.06.001}, abstract = {With the advancement of storage and processing technology, an enormous amount of data is collected on a daily basis in many applications. Nowadays, advanced data analytics have been used to mine the collected data for useful information and make predictions, contributing to the competitive advantages of companies. The increasing data volume, however, has posed many problems to classical batch learning systems, such as the need to retrain the model completely with the newly arrived samples or the impracticality of storing and accessing a large volume of data. This has prompted interest on incremental learning that operates on data streams. In this study, we develop an incremental online multi-label classification (OMLC) method based on a weighted clustering model. The model is made to adapt to the change of data via the decay mechanism in which each sample's weight dwindles away over time. The clustering model therefore always focuses more on newly arrived samples. In the classification process, only clusters whose weights are greater than a threshold (called mature clusters) are employed to assign labels for the samples. In our method, not only is the clustering model incrementally maintained with the revealed ground truth labels of the arrived samples, the number of predicted labels in a sample are also adjusted based on the Hoeffding inequality and the label cardinality. The experimental results show that our method is competitive compared to several well-known benchmark algorithms on six performance measures in both the stationary and the concept drift settings.}, language = {en}, urldate = {2021-10-18}, journal = {Pattern Recognition}, author = {Nguyen, Tien Thanh and Dang, Manh Truong and Luong, Anh Vu and Liew, Alan Wee-Chung and Liang, Tiancai and McCall, John}, month = nov, year = {2019}, keywords = {Clustering, Concept drift, Data stream, Incremental learning, Multi-label classification, Online learning, cluster convergence, hoeffding, mature cluster}, pages = {96--113}, }
@article{zeni_fixing_2019, title = {Fixing {Mislabeling} by {Human} {Annotators} {Leveraging} {Conflict} {Resolution} and {Prior} {Knowledge}}, volume = {3}, url = {https://doi.org/10.1145/3314419}, doi = {10.1145/3314419}, abstract = {According to the "human in the loop" paradigm, machine learning algorithms can improve when leveraging on human intelligence, usually in the form of labels or annotation from domain experts. However, in the case of research areas such as ubiquitous computing or lifelong learning, where the annotator is not an expert and is continuously asked for feedback, humans can provide significant fractions of incorrect labels. We propose to address this issue in a series of experiments where students are asked to provide information about their behavior via a dedicated mobile application. Their trustworthiness is tested by employing an architecture where the machine uses all its available knowledge to check the correctness of its own and the user labeling to build a uniform confidence measure for both of them to be used when a contradiction arises. The overarching system runs through a series of modes with progressively higher confidence and features a conflict resolution component to settle the inconsistencies. The results are very promising and show the pervasiveness of annotation mistakes, the extreme diversity of the users' behaviors which provides evidence of the impracticality of a uniform fits-it-all solution, and the substantially improved performance of a skeptical supervised learning strategy.}, number = {1}, urldate = {2021-10-18}, journal = {Proceedings of the ACM on Interactive, Mobile, Wearable and Ubiquitous Technologies}, author = {Zeni, Mattia and Zhang, Wanyi and Bignotti, Enrico and Passerini, Andrea and Giunchiglia, Fausto}, month = mar, year = {2019}, keywords = {Annotation Errors, Collaborative and Social Computing, Ubiquitous and Mobile Devices}, pages = {32:1--32:23}, }
@article{yang_learning_2019, title = {Learning structured and non-redundant representations with deep neural networks}, volume = {86}, issn = {0031-3203}, url = {https://www.sciencedirect.com/science/article/pii/S0031320318303169}, doi = {10.1016/j.patcog.2018.08.017}, abstract = {This paper proposes a novel regularizer named Structured Decorrelation Constraint, to address both the generalization and optimization of deep neural networks, including multiple-layer perceptrons and convolutional neural networks. Our proposed regularizer reduces overfitting by breaking the co-adaptions between the neurons with an explicit penalty. As a result, the network is capable of learning non-redundant representations. Meanwhile, the proposed regularizer encourages the networks to learn structured high-level features to aid the networks’ optimization during training. To this end, neurons are constrained to behave obeying a group prior. Our regularizer applies to various types of layers, including fully connected layers, convolutional layers and normalization layers. The loss of our regularizer can be directly minimized along with the network’s classification loss by stochastic gradient descent. Experiments show that the proposed regularizer obviously relieves the overfitting problem of the existing deep networks. It yields much better performance on extensive datasets than the conventional regularizers like Dropout.}, language = {en}, urldate = {2021-10-18}, journal = {Pattern Recognition}, author = {Yang, Jihai and Xiong, Wei and Li, Shijun and Xu, Chang}, month = feb, year = {2019}, keywords = {Decorrelation, Deep networks, Overfitting}, pages = {224--235}, }
@article{li_incremental_2019, title = {Incremental semi-supervised learning on streaming data}, volume = {88}, issn = {0031-3203}, url = {https://www.sciencedirect.com/science/article/pii/S0031320318303923}, doi = {10.1016/j.patcog.2018.11.006}, abstract = {In streaming data classification, most of the existing methods assume that all arrived evolving data are completely labeled. One challenge is that some applications where only small amount of labeled examples are available for training. Incremental semi-supervised learning algorithms have been proposed for regularizing neural networks by incorporating various side information, such as pairwise constraints or user-provided labels. However, it is hard to put them into practice, especially for non-stationary environments due to the effectiveness and parameter sensitivity of such algorithms. In this paper, we propose a novel incremental semi-supervised learning framework on streaming data. Each layer of model is comprised of a generative network, a discriminant structure and the bridge. The generative network uses dynamic feature learning based on autoencoders to learn generative features from streaming data which has been demonstrated its potential in learning latent feature representations. In addition, the discriminant structure regularizes the network construction via building pairwise similarity and dissimilarity constraints. It is also used for facilitating the parameter learning of the generative network. The network and structure are integrated into a joint learning framework and bridged by enforcing the correlation of their parameters, which balances the flexible incorporation of supervision information and numerical tractability for non-stationary environments as well as explores the intrinsic data structure. Moreover, an efficient algorithm is designed to solve the proposed optimization problem and we also give an ensemble method. Particularly, when multiple layers of model are stacked, the performance is significantly boosted. Finally, to validate the effectiveness of the proposed method, extensive experiments are conducted on synthetic and real-life datasets. The experimental results demonstrate that the performance of the proposed algorithms is superior to some state-of-the-art approaches.}, language = {en}, urldate = {2021-10-18}, journal = {Pattern Recognition}, author = {Li, Yanchao and Wang, Yongli and Liu, Qi and Bi, Cheng and Jiang, Xiaohui and Sun, Shurong}, month = apr, year = {2019}, keywords = {Classification, Dynamic feature learning, Semi-supervised learning, Streaming data}, pages = {383--396}, }
@article{shao_synchronization-based_2019, title = {Synchronization-based clustering on evolving data stream}, volume = {501}, issn = {0020-0255}, url = {https://www.sciencedirect.com/science/article/pii/S0020025518307400}, doi = {10.1016/j.ins.2018.09.035}, abstract = {Clustering streams of data is of increasing importance in many applications. In this paper, we propose a new synchronization-based clustering approach for evolving data streams, called SyncTree, which maintains all micro-clusters at different levels of granularity depending upon the data recency. Instead of using a sliding window or decay function to focus on recent data, SyncTree summarizes all continuously-arriving objects as synchronized micro-clusters sequentially in a batch fashion. Owing to the powerful concept of synchronization, the derived micro-clusters truly reflect the intrinsic cluster structure rather than summarize statistics of data, and old micro-clusters can be intuitively summarized at a higher level by iterative clustering to fit memory constraints. Building upon the hierarchical micro-clusters, SyncTree allows investigating the cluster structure of the data stream between any two time stamps in the past, and also provides a principled way to analyze the cluster evolution. Empirical results demonstrate that our method has good performance compared to state-of-the-art algorithms.}, language = {en}, urldate = {2021-10-18}, journal = {Information Sciences}, author = {Shao, Junming and Tan, Yue and Gao, Lianli and Yang, Qinli and Plant, Claudia and Assent, Ira}, month = oct, year = {2019}, keywords = {Clustering, Data stream, Evolving analysis, Synchronization}, pages = {573--587}, }
@article{xu_density-based_2019, title = {A density-based competitive data stream clustering network with self-adaptive distance metric}, volume = {110}, issn = {0893-6080}, url = {https://www.sciencedirect.com/science/article/pii/S0893608018303307}, doi = {10.1016/j.neunet.2018.11.008}, abstract = {Data stream clustering is a branch of clustering where patterns are processed as an ordered sequence. In this paper, we propose an unsupervised learning neural network named Density Based Self Organizing Incremental Neural Network(DenSOINN) for data stream clustering tasks. DenSOINN is a self organizing competitive network that grows incrementally to learn suitable nodes to fit the distribution of learning data, combining online unsupervised learning and topology learning by means of competitive Hebbian learning rule. By adopting a density-based clustering mechanism, DenSOINN discovers arbitrarily shaped clusters and diminishes the negative effect of noise. In addition, we adopt a self-adaptive distance framework to obtain good performance for learning unnormalized input data. Experiments show that the DenSOINN can achieve high standard performance comparing to state-of-the-art methods.}, language = {en}, urldate = {2021-10-18}, journal = {Neural Networks}, author = {Xu, Baile and Shen, Furao and Zhao, Jinxi}, month = feb, year = {2019}, keywords = {Clustering methods, Competitive neural networks, Stream learning, Unsupervised learning}, pages = {141--158}, }
@article{breitenstein_approaching_2019, title = {Approaching neural net feature interpretation using stacked autoencoders: gene expression profiling of systemic lupus erythematosus patients}, volume = {2019}, issn = {2153-4063}, shorttitle = {Approaching neural net feature interpretation using stacked autoencoders}, url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6568105/}, abstract = {Systemic lupus erythematosus (SLE) is a rare, autoimmune disorder known to affect most organ sites. Complicating clinical management is a poorly differentiated, heterogenous SLE disease state. While some small molecule drugs and biologics are available for treatment, additional therapeutic options are needed. Parsing complex biological signatures using powerful, yet human interpretable approaches is critical to advancing our understanding of SLE etiology and identifying therapeutic repositioning opportunities. To approach this goal, we developed a semi-supervised deep neural network pipeline for gene expression profiling of SLE patients and subsequent characterization of individual gene features. Our pipeline performed exemplar multinomial classification of SLE patients in independent balanced validation (F1=0.956) and unbalanced, under-powered testing (F1=0.944) cohorts. A stacked autoencoder disambiguated individual feature representativeness by regenerating an input-like(A ‘) feature matrix. A to A’ comparisons suggest the top associated features to be key features in gene expression profiling using neural nets.}, urldate = {2021-10-15}, journal = {AMIA Summits on Translational Science Proceedings}, author = {Breitenstein, Matthew K. and Hu, Vincent JY and Bhatnagar, Roopal and Ratnagiri, Madhavi}, month = may, year = {2019}, pmid = {31258997}, pmcid = {PMC6568105}, pages = {435--442}, }
@article{yu_online_2019, title = {Online {Fault} {Diagnosis} for {Industrial} {Processes} {With} {Bayesian} {Network}-{Based} {Probabilistic} {Ensemble} {Learning} {Strategy}}, volume = {16}, issn = {1558-3783}, doi = {10.1109/TASE.2019.2915286}, abstract = {The efficient mitigation of the detrimental effects of a fault in complex systems requires online fault diagnosis techniques that are able to identify the cause of an observable anomaly. However, an individual diagnosis model can only acquire a limited diagnostic effect and may be insufficient for a particular application. In this paper, a Bayesian network-based probabilistic ensemble learning (PEL-BN) strategy is proposed to address the aforementioned issue. First, an ensemble index is proposed to evaluate the candidate diagnosis models in a probabilistic manner so that the diagnosis models with better diagnosis performance can be selected. Then, based on the selected classifiers, the architecture of the Bayesian network can be constructed using the proposed three types of basic topologies. Finally, the advantages of different diagnosis models are integrated using the developed Bayesian network, and thus, the fault causes of the observable anomaly can be accurately inferred. In addition, the proposed method can effectively capture the mixed fault characteristics of multifaults (MFs) by integrating decisions derived from different diagnosis models. Hence, this method can also provide a feasible solution for diagnosing MFs in real industrial processes. A simulation process and a real industrial process are adopted to verify the performance of the proposed method, and the experimental results illustrate that the proposed PEL-BN strategy improves the diagnosis performance of single faults and is a feasible solution for MF diagnosis. Note to Practitioners-The focus of this paper is to develop a probabilistic ensemble learning strategy based on the Bayesian network (PEL-BN) to diagnose different kinds of faults in industrial processes. The PEL-BN strategy can automatically select the base classifiers to establish the architecture of the Bayesian network. In this way, the conclusions of these base classifiers can be effectively integrated to provide better diagnosis performance. In addition, the proposed method is also a feasible technique for diagnosing MFs resulted from the joint effects of multiple faults.}, number = {4}, journal = {IEEE Transactions on Automation Science and Engineering}, author = {Yu, Wanke and Zhao, Chunhui}, month = oct, year = {2019}, note = {Conference Name: IEEE Transactions on Automation Science and Engineering}, keywords = {Base classifiers selection, Bayes methods, Bayesian network, Fault diagnosis, Network topology, Probabilistic logic, diagnosis, diagnostics, ensemble, fault diagnosis probabilistic ensemble learning}, pages = {1922--1932}, }
@inproceedings{chen_railway_2019, title = {Railway {Vehicle} {Door} {Fault} {Diagnosis} {Method} with {Bayesian} {Network}}, doi = {10.1109/ICCRE.2019.8724211}, abstract = {Recent years, more attention has been given to fault diagnosis of railway vehicle door system. In order to handle the uncertainties in fault diagnosis of the door system, a fault diagnosis method based on Bayesian Network was proposed. Fault data provided by a subway company was counted. A model based on Bayesian Network of railway vehicle door was built up and the prior probability of failure was calculated. Inputting fault evidence in the Bayesian model, the posterior probability of each fault would be obtained. Simulation experiment and engineering application show that Bayesian Network can reason through the fault of door system correctly and the result can provide reference and advice for fault diagnosis and maintenance of railway vehicle door.}, booktitle = {2019 4th {International} {Conference} on {Control} and {Robotics} {Engineering} ({ICCRE})}, author = {Chen, Ruwen and Zhu, Songqing and Hao, Fei and Zhu, Bin and Zhao, Zhendong and Xu, Youxiong}, month = apr, year = {2019}, keywords = {Bayes methods, Bayesian Network, Fault diagnosis, Fault trees, Logic gates, Maintenance engineering, Rails, Switches, door, fault diagnosis, posterior probability, prior probability, railway, railway vehicle door}, pages = {70--74}, }
@article{atoui_single_2019, title = {A single {Bayesian} network classifier for monitoring with unknown classes}, volume = {85}, issn = {0952-1976}, url = {https://www.sciencedirect.com/science/article/pii/S0952197619301800}, doi = {10.1016/j.engappai.2019.07.016}, abstract = {In this paper, the Conditional Gaussian Networks (CGNs), a form of Bayesian Networks (BN), are used as a statistical process monitoring approach to detect and diagnose faults. The proposed approach improves the structure of Bayesian networks and generalizes a few results regarding statistical tests and the use of an exclusion criterion. The proposed framework is evaluated using data from the benchmark Tennessee Eastman Process (TEP) with various scenarios.}, language = {en}, urldate = {2021-10-14}, journal = {Engineering Applications of Artificial Intelligence}, author = {Atoui, M. Amine and Cohen, Achraf and Verron, Sylvain and Kobi, Abdessamad}, month = oct, year = {2019}, keywords = {Bayesian networks, Classification, Exclusion criteria, Fault detection and diagnosis}, pages = {681--690}, }
@article{dineva_fault_2019, title = {Fault {Diagnosis} of {Rotating} {Electrical} {Machines} {Using} {Multi}-{Label} {Classification}}, volume = {9}, copyright = {http://creativecommons.org/licenses/by/3.0/}, url = {https://www.mdpi.com/2076-3417/9/23/5086}, doi = {10.3390/app9235086}, abstract = {Fault Detection and Diagnosis of electrical machine and drive systems are of utmost importance in modern industrial automation. The widespread use of Machine Learning techniques has made it possible to replace traditional motor fault detection techniques with more efficient solutions that are capable of early fault recognition by using large amounts of sensory data. However, the detection of concurrent failures is still a challenge in the presence of disturbing noises or when the multiple faults cause overlapping features. Multi-label classification has recently gained popularity in various application domains as an efficient method for fault detection and monitoring of systems with promising results. The contribution of this work is to propose a novel methodology for multi-label classification for simultaneously diagnosing multiple faults and evaluating the fault severity under noisy conditions. In this research, the Electrical Signature Analysis as well as traditional vibration data have been considered for modeling. Furthermore, the performance of various multi-label classification models is compared. Current and vibration signals are acquired under normal and fault conditions. The applicability of the proposed method is experimentally validated under diverse fault conditions such as unbalance and misalignment.}, language = {en}, number = {23}, urldate = {2021-10-11}, journal = {Applied Sciences}, author = {Dineva, Adrienn and Mosavi, Amir and Gyimesi, Mate and Vajda, Istvan and Nabipour, Narjes and Rabczuk, Timon}, month = jan, year = {2019}, note = {Number: 23 Publisher: Multidisciplinary Digital Publishing Institute}, keywords = {big data, data science, drive systems and power electronics, electric machine, energy conversion, fault classifiers, fault severity, machine learning, multi-label classification, multiple fault detection, rotating electrical machines, soft computing}, pages = {5086}, }
@inproceedings{sarma_dbscan_2019, title = {μ{DBSCAN}: {An} {Exact} {Scalable} {DBSCAN} {Algorithm} for {Big} {Data} {Exploiting} {Spatial} {Locality}}, shorttitle = {μ{DBSCAN}}, doi = {10.1109/CLUSTER.2019.8891020}, abstract = {DBSCAN is one of the most popular and effective clustering algorithms that is capable of identifying arbitrary-shaped clusters and noise efficiently. However, its super-linear complexity makes it infeasible for applications involving clustering of Big Data. A major portion of the computation time of DBSCAN is taken up by the neighborhood queries, which becomes a bottleneck to its performance. We address this issue in our proposed micro-cluster based DBSCAN algorithm, μDBSCAN, which identifies core-points even without performing neighbourhood queries and becomes instrumental in reducing the run-time of the algorithm. It also significantly reduces the computation time per neighbourhood query while producing exact DBSCAN clusters. Moreover, the micro-cluster based solution makes it scalable for high dimensional data. We also propose a highly scalable distributed implementation of μDBSCAN, μDBSCAN-D, to exploit a commodity cluster infrastructure. Experimental results demonstrate tremendous improvements in performance of our proposed algorithms as compared to their respective state-of-the-art solutions for various standard datasets. μDBSCAN-D is an exact parallel solution for DBSCAN which is capable of processing massive amounts of data efficiently (1 billion data points in 41 minutes on a 32 node cluster), while producing a clustering that is same as that of traditional DBSCAN.}, booktitle = {2019 {IEEE} {International} {Conference} on {Cluster} {Computing} ({CLUSTER})}, author = {Sarma, Aditya and Goyal, Poonam and Kumari, Sonal and Wani, Anand and Challa, Jagat Sesh and Islam, Saiyedul and Goyal, Navneet}, month = sep, year = {2019}, note = {ISSN: 2168-9253}, keywords = {Approximation algorithms, Big Data, Clustering algorithms, Density-based Clustering, Distributed Computing, Distributed databases, Exact Clustering Algorithm, Optimized Neighborhood Query, Parallel algorithms, Partitioning algorithms, Spatial Locality, Time complexity}, pages = {1--11}, }
@inproceedings{aliperti_fuzzy_2019, title = {A {Fuzzy} {Density}-based {Clustering} {Algorithm} for {Streaming} {Data}}, doi = {10.1109/FUZZ-IEEE.2019.8858909}, abstract = {The exploitation of data streams, nowadays provided nonstop by a myriad of diverse applications, asks for specific analysis methods. In this paper, we propose SF-DBSCAN, a fuzzy version of the DBSCAN algorithm, aimed to perform unsupervised analysis of streaming data. Fuzziness is introduced by fuzzy borders of density-based clusters. We describe and discuss the proposed algorithm, which evolves the clusters at each occurrence of a new object. Three synthetic datasets are used to show the ability of SF-DBSCAN to successfully track changes of data distribution, thus properly addressing concept drift. SF-DBSCAN is compared with a basic, crisp streaming version of DBSCAN with regard to modelling effectiveness.}, booktitle = {2019 {IEEE} {International} {Conference} on {Fuzzy} {Systems} ({FUZZ}-{IEEE})}, author = {Aliperti, Andrea and Bechini, Alessio and Marcelloni, Francesco and Renda, Alessandro}, month = jun, year = {2019}, note = {ISSN: 1558-4739}, keywords = {Clustering algorithms, Data structures, Memory management, Partitioning algorithms, Proposals, Sensitivity, Shape}, pages = {1--6}, }
@article{zaremoodi_concept-evolution_2019, title = {Concept-evolution detection in non-stationary data streams: a fuzzy clustering approach}, volume = {60}, issn = {0219-3116}, shorttitle = {Concept-evolution detection in non-stationary data streams}, url = {https://doi.org/10.1007/s10115-018-1266-y}, doi = {10.1007/s10115-018-1266-y}, abstract = {We have entered the era of networked communications where concepts such as big data and social networks are emerging. The explosion and profusion of available data in a broad range of application domains cause data streams to become an inevitable part of the most real-world applications. In the classification of data streams, there are four major challenges: infinite length, concept drift, recurring and evolving concepts. This paper proposes a novel method to address the mentioned challenges with a focus on the last one. Unlike the existing methods for detection of evolving concepts, we cast joint classification and detection of evolving concepts into optimizing an objective function by extending a fuzzy agglomerative clustering method. Moreover, rather than keeping instances or hyper-sphere summaries of previously seen classes, we just maintain boundaries in the kernel space and generate instances of each class on demand. This approach enhances the accuracy and reduces the memory usage of the proposed method. We empirically evaluated and showed the effectiveness of the proposed approach on several synthetic and real datasets. Experimental results on synthetic and real datasets show the superiority of the proposed method over the related state-of-the-art methods in this area.}, language = {en}, number = {3}, urldate = {2021-10-01}, journal = {Knowledge and Information Systems}, author = {ZareMoodi, Poorya and Kamali Siahroudi, Sajjad and Beigy, Hamid}, month = sep, year = {2019}, pages = {1329--1352}, }
@article{li_deep_2019, title = {Deep learning-based remaining useful life estimation of bearings using multi-scale feature extraction}, volume = {182}, issn = {0951-8320}, url = {https://www.sciencedirect.com/science/article/pii/S0951832018308299}, doi = {10.1016/j.ress.2018.11.011}, abstract = {Accurate evaluation of machine degradation during long-time operation is of great importance. With the rapid development of modern industries, physical model is becoming less capable of describing sophisticated systems, and data-driven approaches have been widely developed. This paper proposes a novel intelligent remaining useful life (RUL) prediction method based on deep learning. The time-frequency domain information is explored for prognostics, and multi-scale feature extraction is implemented using convolutional neural networks. Experiments on a popular rolling bearing dataset prepared from the PRONOSTIA platform are carried out to show the effectiveness of the proposed method, and its superiority is demonstrated by the comparisons with other approaches. In general, high accuracy on the RUL prediction is achieved, and the proposed method is promising for industrial applications.}, language = {en}, urldate = {2021-09-28}, journal = {Reliability Engineering \& System Safety}, author = {Li, Xiang and Zhang, Wei and Ding, Qian}, month = feb, year = {2019}, keywords = {Deep learning, Multi-scale feature extraction, Prognostics and health management, Remaining useful life, Rolling bearing}, pages = {208--218}, }
@article{al-dulaimi_multimodal_2019, title = {A multimodal and hybrid deep neural network model for {Remaining} {Useful} {Life} estimation}, volume = {108}, issn = {0166-3615}, url = {https://www.sciencedirect.com/science/article/pii/S0166361518304925}, doi = {10.1016/j.compind.2019.02.004}, abstract = {Aging critical infrastructures and valuable machineries together with recent catastrophic incidents such as the collapse of Morandi bridge calls for an urgent quest to design advanced and innovative data-driven solutions and efficiently incorporate multi-sensor streaming data sources for condition-based maintenance. Remaining Useful Life (RUL) is a crucial measure used in this regard within manufacturing and industrial systems, and its accurate estimation enables improved decision-making for operations and maintenance. Capitalizing on the recent success of multiple-model (also referred to as hybrid or mixture of experts) deep learning techniques, the paper proposes a hybrid deep neural network framework for RUL estimation, referred to as the Hybrid Deep Neural Network Model (HDNN). The proposed HDNN framework is the first hybrid deep neural network model designed for RUL estimation that integrates two deep learning models simultaneously and in a parallel fashion. More specifically, in contrary to the majority of existing data-driven prognostic approaches for RUL estimation, which are developed based on a single deep model and can hardly maintain good generalization performance across various prognostic scenarios, the proposed HDNN framework consists of two parallel paths (one LSTM and one CNN) followed by a fully connected multilayer fusion neural network which acts as the fusion centre combining the output of the two paths to form the target RUL. The HDNN uses the LSTM path to extract temporal features while simultaneously the CNN is utilized to extract spatial features. The proposed HDNN framework is tested on the NASA commercial modular aero-propulsion system simulation (C-MAPSS) dataset. Our comprehensive experiments and comparisons with several recently proposed RUL estimation methodologies developed based on the same data-sets show that the proposed HDNN framework significantly outperforms all its counterparts in the complicated prognostic scenarios with increased number of operating conditions and fault modes.}, language = {en}, urldate = {2021-09-28}, journal = {Computers in Industry}, author = {Al-Dulaimi, Ali and Zabihi, Soheil and Asif, Amir and Mohammadi, Arash}, month = jun, year = {2019}, keywords = {Convolutional Neural Networks (CNN), Deep learning, Hybrid models, Long Short-Term Memory Neural Network (LSTM), Machine Health Monitoring, Prognostic Health Management, Remaining Useful Life (RUL)}, pages = {186--196}, }
@article{yu_remaining_2019, title = {Remaining useful life estimation using a bidirectional recurrent neural network based autoencoder scheme}, volume = {129}, issn = {0888-3270}, url = {https://www.sciencedirect.com/science/article/pii/S0888327019303061}, doi = {10.1016/j.ymssp.2019.05.005}, abstract = {System remaining useful life (RUL) estimation is one of the major prognostic activities in industrial applications. In this paper, we propose a sensor-based data-driven scheme using a deep learning tool and the similarity-based curve matching technique to estimate the RUL of a system. The whole procedure consists of two steps: in the first step, a bidirectional recurrent neural network based autoencoder is trained in an unsupervised way to convert the multi-sensor (high-dimensional) readings collected from historical run-to-failure instances (i.e. multiple units of the same system) to low-dimensional embeddings, which are used to construct the one-dimensional health index (HI) values to reflect various health degradation patterns of the instances. In the second step, the test HI curve obtained from sensor readings collected from an on-line instance is compared with the degradation patterns built in the offline phase using the similarity-based curve matching technique, from which the RUL of the test unit can be estimated at an early stage. The proposed scheme was tested on two publicly available run-to-failure datasets: the turbofan engine datasets (simulation datasets) and the milling datasets (experimental datasets). The prognostic performance of the proposed procedure was directly compared with the existing state-of-art prognostic models in terms of various prognostic metrics on the two datasets respectively. The comparison results demonstrate the competitiveness of the proposed method used for RUL estimation of systems.}, language = {en}, urldate = {2021-09-28}, journal = {Mechanical Systems and Signal Processing}, author = {Yu, Wennian and Kim, II Yong and Mechefske, Chris}, month = aug, year = {2019}, keywords = {Autoencoder, Bidirectional recurrent neural network, Health index, Remaining useful life}, pages = {764--780}, }
@article{nguyen_new_2019, title = {A new dynamic predictive maintenance framework using deep learning for failure prognostics}, volume = {188}, issn = {0951-8320}, url = {https://www.sciencedirect.com/science/article/pii/S0951832018311050}, doi = {10.1016/j.ress.2019.03.018}, abstract = {In Prognostic Health and Management (PHM) literature, the predictive maintenance studies can be classified into two groups. The first group focuses on the prognostics step but does not consider the maintenance decisions. The second group addresses the maintenance optimization question based on the assumptions that the prognostics information or the degradation models of the system are already known. However, none of the two groups provides a complete framework (from data-driven prognostics to maintenance decisions) investigating the impact of the imperfect prognostics on maintenance decision. Therefore, this paper aims to fill this gap of literature. It presents a novel dynamic predicive maintenance framework based on sensor measurements. In this framework, the prognostics step, based on the Long Short-Term Memory network, is oriented towards the requirements of operation planners. It provides the probabilities that the system can fail in different time horizons to decide the moment for preparing and performing maintenance activities. The proposed framework is validated on a real application case study. Its performance is highlighted when compared with two benchmark maintenance policies: classical periodic and ideal predicted maintenance. In addition, the impact of the imperfect prognostics information on maintenance decisions is discussed in this paper.}, language = {en}, urldate = {2021-09-28}, journal = {Reliability Engineering \& System Safety}, author = {Nguyen, Khanh T. P. and Medjaher, Kamal}, month = aug, year = {2019}, keywords = {Deep learning, Inventory management, PHM, Predictive maintenance, Prognostics information, Residual life prediction}, pages = {251--262}, }
@article{cao_smart_2019, title = {Smart {Condition} {Monitoring} for {Industry} 4.0 {Manufacturing} {Processes}: {An} {Ontology}-{Based} {Approach}}, volume = {50}, shorttitle = {Smart {Condition} {Monitoring} for {Industry} 4.0 {Manufacturing} {Processes}}, doi = {10.1080/01969722.2019.1565118}, abstract = {Following the trend of Industry 4.0, automation in different manufacturing processes has triggered the use of intelligent condition monitoring systems, which are crucial for improving productivity and availability of production systems. To develop such an intelligent system, semantic technologies are of paramount importance. This paper introduces an ontology that will be used to develop an intelligent condition monitoring system. The proposed ontology formalizes domain knowledge related to condition monitoring tasks of manufacturing processes. After introducing the ontology in detail, we evaluate the proposed ontology by instantiating it with a case study: a conditional maintenance task of bearings in rotating machinery.}, journal = {Cybernetics and Systems}, author = {Cao, Qiushi and Giustozzi, Franco and Zanni-Merk, Cecilia and De Bertrand de Beuvron, François and Reich, Christoph}, month = feb, year = {2019}, pages = {1--15}, }
@incollection{nalmpantis_signal2vec_2019, title = {{Signal2Vec}: {Time} {Series} {Embedding} {Representation}}, isbn = {978-3-642-54671-6}, shorttitle = {{Signal2Vec}}, abstract = {The rise of Internet-of-Things (IoT) and the exponential increase of devices using sensors, has lead to an increasing interest in data mining of time series. In this context, several representation methods have been proposed. Signal2vec is a novel framework, which can represent any time-series in a vector space. It is unsupervised, computationally efficient, scalable and generic. The framework is evaluated via a theoretical analysis and real world applications, with a focus on energy data. The experimental results are compared against a baseline using raw data and two other popular representations, SAX and PAA. Signal2vec is superior not only in terms of performance, but also in efficiency, due to dimensionality reduction.}, author = {Nalmpantis, Christoforos and Vrakas, Dimitris}, month = may, year = {2019}, doi = {10.1007/978-3-030-20257-6_7}, pages = {80--90}, }
@inproceedings{turgis_industrialization_2019, title = {Industrialization of {Condition} {Based} {Maintenance} for {Complex} {Systems} in a {Complex} {Maintenance} {Environment}, {Example} of {NAT}}, abstract = {NAT train is composed by several complex systems like passenger access systems and batteries. In mass transit rolling stock, they are considered as critical systems as they are directly related to reliability and belongs to top contributors on train maintenance costs. These systems are complex by definition, as they are composed of various sub systems, themselves composed of various technologies binding to numerous functionalities and stressful environment. To realize maintenance of such systems into a large rolling fleet (more than 250 trains, 4000 passenger access systems to cover) with operational constraints due to mass transit, the maintenance plan cannot be fully covered by conditioned based maintenance. In order to reduce downtime, lower maintenance costs and to increase reliability, a mixed maintenance solution was proposed to optimize dependencies between systematic, corrective and condition-based maintenance.}, author = {Turgis, Fabien and Audier, Pierre and Coutadeur, Quentin and Verdun, Cyril}, month = dec, year = {2019}, }
@article{ismail_fawaz_deep_2019, title = {Deep learning for time series classification: a review}, volume = {33}, issn = {1573-756X}, shorttitle = {Deep learning for time series classification}, url = {https://doi.org/10.1007/s10618-019-00619-1}, doi = {10.1007/s10618-019-00619-1}, abstract = {Time Series Classification (TSC) is an important and challenging problem in data mining. With the increase of time series data availability, hundreds of TSC algorithms have been proposed. Among these methods, only a few have considered Deep Neural Networks (DNNs) to perform this task. This is surprising as deep learning has seen very successful applications in the last years. DNNs have indeed revolutionized the field of computer vision especially with the advent of novel deeper architectures such as Residual and Convolutional Neural Networks. Apart from images, sequential data such as text and audio can also be processed with DNNs to reach state-of-the-art performance for document classification and speech recognition. In this article, we study the current state-of-the-art performance of deep learning algorithms for TSC by presenting an empirical study of the most recent DNN architectures for TSC. We give an overview of the most successful deep learning applications in various time series domains under a unified taxonomy of DNNs for TSC. We also provide an open source deep learning framework to the TSC community where we implemented each of the compared approaches and evaluated them on a univariate TSC benchmark (the UCR/UEA archive) and 12 multivariate time series datasets. By training 8730 deep learning models on 97 time series datasets, we propose the most exhaustive study of DNNs for TSC to date.}, language = {en}, number = {4}, urldate = {2020-12-12}, journal = {Data Mining and Knowledge Discovery}, author = {Ismail Fawaz, Hassan and Forestier, Germain and Weber, Jonathan and Idoumghar, Lhassane and Muller, Pierre-Alain}, month = jul, year = {2019}, pages = {917--963}, }
@inproceedings{korycki_active_2019, title = {Active {Learning} with {Abstaining} {Classifiers} for {Imbalanced} {Drifting} {Data} {Streams}}, doi = {10.1109/BigData47090.2019.9006453}, abstract = {Learning from data streams is one of the most promising and challenging domains in modern machine learning. Proliferating online data sources provide us access to real-time knowledge we have never had before. At the same time, new obstacles emerge and we have to overcome them in order to fully and effectively utilize the potential of the data. Prohibitive time and memory constraints or non-stationary distributions are only some of the problems. When dealing with classification tasks, one has to remember that effective adaptation has to be achieved on weak foundations of partially labeled and often imbalanced data. In our work, we propose an online framework for binary classification, that aims to handle the complex problem of working with dynamic, sparsely labeled and imbalanced streams. The main part of it is a novel active learning strategy (MD-OAL) that is able to prioritize labeling of minority instances and, as a result, improve the balance of the learning process. We combine the strategy with a dynamic ensemble of base learners that can abstain from making decisions, if they are very uncertain. We adjust the abstaining mechanism in favor of minority instances, providing an effective method for handling remaining imbalance and a concept drift simultaneously. The conducted evaluation shows that in the challenging and realistic scenarios our framework outperforms state-of-the-art algorithms, providing higher resilience to the combined effect of limited labeling and imbalance.}, booktitle = {2019 {IEEE} {International} {Conference} on {Big} {Data} ({Big} {Data})}, author = {Korycki, Ł and Cano, A. and Krawczyk, B.}, month = dec, year = {2019}, keywords = {Computer science, Data mining, Data models, Heuristic algorithms, Labeling, Machine learning, Uncertainty, abstaining classifiers, active learning, active learning strategy, binary classification, classification tasks, data stream mining, ensemble learning., imbalanced data, imbalanced drifting data streams, learning (artificial intelligence), machine learning, online data sources, pattern classification, real-time knowledge}, pages = {2334--2343}, }
@article{hu_novel_2019, title = {A {Novel} {Segmentation} and {Representation} {Approach} for {Streaming} {Time} {Series}}, volume = {7}, issn = {2169-3536}, doi = {10.1109/ACCESS.2018.2828320}, abstract = {Along with the coming of Internet of Everything era, massive numbers of pervasive connected devices in various fields are continuously producing oceans of time series stream data. In order to carry out different kinds of data mining tasks (similarity search, classification, clustering, and prediction) based on streaming time series efficiently and effectively, segmentation and representation which segment a streaming time series into several subsequences and provide approximative representation for the raw data, should be done as the first step. With the virtue of solid theoretical foundations, piecewise linear representation (PLR) has been gained success in yielding more compact representation and fewer segments. However, the current state of art PLR methods have their own flaws: For one thing, most of current PLR methods focus on the guaranteed error bound instead of the holistic approximation error, which may lead to excessive fitting errors of segments and loss of factual research significance. For another, most of current PLR methods process streaming time series with some fixed criteria, which cannot provide a more flexible way to represent streaming time series. Motivated by the above analysis, we propose a novel continuous segmentation and multi-resolution representation approach based on turning points, which subdivides the streaming time series by a set of temporal feature points and represents the time series flexibly. Our method can not only generate more accurate approximation than the state-of-the-art of PLR algorithm, but also represent the streaming time series in a more flexible way to meet different needs of users. Extensive experiments on different kinds of typical time series datasets have been conducted to demonstrate the superiorities of our method.}, journal = {IEEE Access}, author = {Hu, Yupeng and Guan, Peiyuan and Zhan, Peng and Ding, Yiming and Li, Xueqing}, year = {2019}, note = {Conference Name: IEEE Access}, keywords = {Approximation algorithms, Data mining, Indexes, Internet of Everything era, Internet of Things, Microsoft Windows, Task analysis, Time series analysis, Turning, continuous segmentation approach, data mining, data structures, multi-resolution representation, multiresolution representation approach, online segmentation, piecewise linear representation, piecewise linear techniques, streaming time series, time series, time series stream data, typical time series datasets}, pages = {184423--184437}, }
@article{mabrok_pattern_2019, title = {Pattern detection for time series trajectories in human in the loop applications}, volume = {37}, issn = {1064-1246}, url = {https://content.iospress.com/articles/journal-of-intelligent-and-fuzzy-systems/ifs179070}, doi = {10.3233/JIFS-179070}, abstract = {Extracting repeated unknown maneuvers or patterns preformed by a human operator in a cyber-physical systems can lead to better understanding of the behavior of the human operator who is controlling or sharing tasks with dynamical systems. These repea}, language = {en}, number = {1}, urldate = {2020-10-01}, journal = {Journal of Intelligent \& Fuzzy Systems}, author = {Mabrok, Mohamed A. and Abdel-Aty, Abdel-Haleem}, month = jan, year = {2019}, note = {Publisher: IOS Press}, pages = {115--123}, }
@article{gan_survey_2019, title = {A {Survey} of {Parallel} {Sequential} {Pattern} {Mining}}, url = {http://arxiv.org/abs/1805.10515}, doi = {10.1145/3314107}, abstract = {With the growing popularity of shared resources, large volumes of complex data of different types are collected automatically. Traditional data mining algorithms generally have problems and challenges including huge memory cost, low processing speed, and inadequate hard disk space. As a fundamental task of data mining, sequential pattern mining (SPM) is used in a wide variety of real-life applications. However, it is more complex and challenging than other pattern mining tasks, i.e., frequent itemset mining and association rule mining, and also suffers from the above challenges when handling the large-scale data. To solve these problems, mining sequential patterns in a parallel or distributed computing environment has emerged as an important issue with many applications. In this paper, an in-depth survey of the current status of parallel sequential pattern mining (PSPM) is investigated and provided, including detailed categorization of traditional serial SPM approaches, and state of the art parallel SPM. We review the related work of parallel sequential pattern mining in detail, including partition-based algorithms for PSPM, Apriori-based PSPM, pattern growth based PSPM, and hybrid algorithms for PSPM, and provide deep description (i.e., characteristics, advantages, disadvantages and summarization) of these parallel approaches of PSPM. Some advanced topics for PSPM, including parallel quantitative / weighted / utility sequential pattern mining, PSPM from uncertain data and stream data, hardware acceleration for PSPM, are further reviewed in details. Besides, we review and provide some well-known open-source software of PSPM. Finally, we summarize some challenges and opportunities of PSPM in the big data era.}, urldate = {2020-10-01}, journal = {arXiv:1805.10515 [cs]}, author = {Gan, Wensheng and Lin, Jerry Chun-Wei and Fournier-Viger, Philippe and Chao, Han-Chieh and Yu, Philip S.}, month = apr, year = {2019}, note = {arXiv: 1805.10515}, keywords = {Computer Science - Databases}, }
@book{li_sample_2019, title = {Sample {Adaptive} {Multiple} {Kernel} {Learning} for {Failure} {Prediction} of {Railway} {Points}}, author = {Li, Zhibin and Zhang, Jian and Wu, Qiang and Gong, Yongshun and Yi, Jinfeng and Kirsch, Christina}, year = {2019}, note = {\_eprint: 1907.01162}, }
@article{gomes_machine_2019, title = {Machine learning for streaming data: state of the art, challenges, and opportunities}, volume = {21}, issn = {1931-0145}, shorttitle = {Machine learning for streaming data}, url = {https://doi.org/10.1145/3373464.3373470}, doi = {10.1145/3373464.3373470}, abstract = {Incremental learning, online learning, and data stream learning are terms commonly associated with learning algorithms that update their models given a continuous influx of data without performing multiple passes over data. Several works have been devoted to this area, either directly or indirectly as characteristics of big data processing, i.e., Velocity and Volume. Given the current industry needs, there are many challenges to be addressed before existing methods can be efficiently applied to real-world problems. In this work, we focus on elucidating the connections among the current stateof- the-art on related fields; and clarifying open challenges in both academia and industry. We treat with special care topics that were not thoroughly investigated in past position and survey papers. This work aims to evoke discussion and elucidate the current research opportunities, highlighting the relationship of different subareas and suggesting courses of action when possible.}, number = {2}, urldate = {2020-03-17}, journal = {ACM SIGKDD Explorations Newsletter}, author = {Gomes, Heitor Murilo and Read, Jesse and Bifet, Albert and Barddal, Jean Paul and Gama, João}, month = nov, year = {2019}, pages = {6--22}, }
@article{carvalho_systematic_2019, title = {A systematic literature review of machine learning methods applied to predictive maintenance}, volume = {137}, issn = {0360-8352}, url = {http://www.sciencedirect.com/science/article/pii/S0360835219304838}, doi = {https://doi.org/10.1016/j.cie.2019.106024}, journal = {Computers \& Industrial Engineering}, author = {Carvalho, Thyago P. and Soares, Fabrízzio A. A. M. N. and Vita, Roberto and Francisco, Roberto da P. and Basto, João P. and Alcalá, Symone G. S.}, year = {2019}, keywords = {Artificial intelligence, Machine learning, PdM, Predictive maintenance, Systematic literature review}, pages = {106024}, }
@article{noman_overview_2019, title = {Overview of predictive condition based maintenance research using bibliometric indicators}, volume = {31}, issn = {1018-3639}, url = {http://www.sciencedirect.com/science/article/pii/S1018363917303720}, doi = {https://doi.org/10.1016/j.jksues.2018.02.003}, number = {4}, journal = {Journal of King Saud University - Engineering Sciences}, author = {Noman, Mohammed A. and Nasr, Emad S. Abouel and Al-Shayea, Adel and Kaid, Husam}, year = {2019}, pages = {355 -- 367}, }
@article{fernandes_data_2019, title = {Data analysis and feature selection for predictive maintenance: {A} case-study in the metallurgic industry}, volume = {46}, issn = {0268-4012}, url = {http://www.sciencedirect.com/science/article/pii/S0268401218304699}, doi = {https://doi.org/10.1016/j.ijinfomgt.2018.10.006}, abstract = {Proactive Maintenance practices are becoming more standard in industrial environments, with a direct and profound impact on the competitivity within the sector. These practices demand the continuous monitorization of industrial equipment, which generates extensive amounts of data. This information can be processed into useful knowledge with the use of machine learning algorithms. However, before the algorithms can effectively be applied, the data must go through an exploratory phase: assessing the meaning of the features and to which degree they are redundant. In this paper, we present the findings of the analysis conducted on a real-world dataset from a metallurgic company. A number of data analysis and feature selection methods are employed, uncovering several relationships, which are systematized in a rule-based model, and reducing the feature space from an initial 47-feature dataset to a 32-feature dataset.}, journal = {International Journal of Information Management}, author = {Fernandes, Marta and Canito, Alda and Bolón-Canedo, Verónica and Conceição, Luís and Praça, Isabel and Marreiros, Goreti}, year = {2019}, keywords = {Data analysis, Feature selection, Predictive maintenance, Rule-based model}, pages = {252 -- 262}, }
@article{allah_bukhsh_predictive_2019, title = {Predictive maintenance using tree-based classification techniques: {A} case of railway switches}, volume = {101}, issn = {0968-090X}, shorttitle = {Predictive maintenance using tree-based classification techniques}, url = {http://www.sciencedirect.com/science/article/pii/S0968090X18309057}, doi = {10.1016/j.trc.2019.02.001}, abstract = {With growing service demands, rapid deterioration due to extensive usage, and limited maintenance due to budget cuts, the railway infrastructure is in a critical state and require continuous maintenance. The infrastructure managers have to come up with smart maintenance decisions in order to improve the assets’ condition, spend optimal cost and keep the network available. Currently, the infrastructure managers lack the tools and decision support models that could assist them in taking (un) planned maintenance decisions effectively and efficiently. Recently, many literature studies have proposed to employ the machine learning techniques to estimate the performance state of an asset, predict the maintenance need, possible failure modes, and such similar aspects in advance. Most of these studies have utilised additional data collection measures to record the assets’ behaviour. Though useful for experimentation, it is expensive and impractical to mount monitoring devices on multiple assets across the network. Therefore, the objective of this study is to develop predictive models that utilise existing data from a railway agency and yield interpretable results. We propose to leverage the tree-based classification techniques of machine learning in order to predict maintenance need, activity type and trigger’s status of railway switches. Using the data from an in-use business process, predictive models based on the decision tree, random forest, and gradient boosted trees are developed. Moreover, to facilitate in models interpretability, we provided a detail explanation of models’ predictions by features importance analysis and instance level details. Our solution approach of predictive models development and their results explanation have wider applicability and can be used for other asset types and different (maintenance) planning scenarios.}, language = {en}, urldate = {2020-09-19}, journal = {Transportation Research Part C: Emerging Technologies}, author = {Allah Bukhsh, Zaharah and Saeed, Aaqib and Stipanovic, Irina and Doree, Andre G.}, month = apr, year = {2019}, keywords = {Classification, Data-driven, Decision support, LIME, Machine learning, Predictive maintenance, Railway infrastructure, Switches and crossings}, pages = {35--54}, }
@inproceedings{ismail_fawaz_transfer_2018, title = {Transfer learning for time series classification}, url = {https://ieeexplore.ieee.org/document/8621990}, doi = {10.1109/BigData.2018.8621990}, abstract = {Transfer learning for deep neural networks is the process of first training a base network on a source dataset, and then transferring the learned features (the network’s weights) to a second network to be trained on a target dataset. This idea has been shown to improve deep neural network’s generalization capabilities in many computer vision tasks such as image recognition and object localization. Apart from these applications, deep Convolutional Neural Networks (CNNs) have also recently gained popularity in the Time Series Classification (TSC) community. However, unlike for image recognition problems, transfer learning techniques have not yet been investigated thoroughly for the TSC task. This is surprising as the accuracy of deep learning models for TSC could potentially be improved if the model is fine-tuned from a pre-trained neural network instead of training it from scratch. In this paper, we fill this gap by investigating how to transfer deep CNNs for the TSC task. To evaluate the potential of transfer learning, we performed extensive experiments using the UCR archive which is the largest publicly available TSC benchmark containing 85 datasets. For each dataset in the archive, we pre-trained a model and then fine-tuned it on the other datasets resulting in 7140 different deep neural networks. These experiments revealed that transfer learning can improve or degrade the models predictions depending on the dataset used for transfer. Therefore, in an effort to predict the best source dataset for a given target dataset, we propose a new method relying on Dynamic Time Warping to measure inter-datasets similarities. We describe how our method can guide the transfer to choose the best source dataset leading to an improvement in accuracy on 71 out of 85 datasets.}, urldate = {2023-10-17}, booktitle = {2018 {IEEE} {International} {Conference} on {Big} {Data} ({Big} {Data})}, author = {Ismail Fawaz, Hassan and Forestier, Germain and Weber, Jonathan and Idoumghar, Lhassane and Muller, Pierre-Alain}, month = dec, year = {2018}, pages = {1367--1376}, }
@misc{nichol_first-order_2018, title = {On {First}-{Order} {Meta}-{Learning} {Algorithms}}, url = {http://arxiv.org/abs/1803.02999}, doi = {10.48550/arXiv.1803.02999}, abstract = {This paper considers meta-learning problems, where there is a distribution of tasks, and we would like to obtain an agent that performs well (i.e., learns quickly) when presented with a previously unseen task sampled from this distribution. We analyze a family of algorithms for learning a parameter initialization that can be fine-tuned quickly on a new task, using only first-order derivatives for the meta-learning updates. This family includes and generalizes first-order MAML, an approximation to MAML obtained by ignoring second-order derivatives. It also includes Reptile, a new algorithm that we introduce here, which works by repeatedly sampling a task, training on it, and moving the initialization towards the trained weights on that task. We expand on the results from Finn et al. showing that first-order meta-learning algorithms perform well on some well-established benchmarks for few-shot classification, and we provide theoretical analysis aimed at understanding why these algorithms work.}, urldate = {2023-10-03}, publisher = {arXiv}, author = {Nichol, Alex and Achiam, Joshua and Schulman, John}, month = oct, year = {2018}, note = {arXiv:1803.02999 [cs]}, keywords = {Computer Science - Machine Learning}, }
@article{losing_incremental_2018, title = {Incremental on-line learning: {A} review and comparison of state of the art algorithms}, volume = {275}, issn = {0925-2312}, shorttitle = {Incremental on-line learning}, url = {http://www.sciencedirect.com/science/article/pii/S0925231217315928}, doi = {10.1016/j.neucom.2017.06.084}, abstract = {Recently, incremental and on-line learning gained more attention especially in the context of big data and learning from data streams, conflicting with the traditional assumption of complete data availability. Even though a variety of different methods are available, it often remains unclear which of them is suitable for a specific task and how they perform in comparison to each other. We analyze the key properties of eight popular incremental methods representing different algorithm classes. Thereby, we evaluate them with regards to their on-line classification error as well as to their behavior in the limit. Further, we discuss the often neglected issue of hyperparameter optimization specifically for each method and test how robustly it can be done based on a small set of examples. Our extensive evaluation on data sets with different characteristics gives an overview of the performance with respect to accuracy, convergence speed as well as model complexity, facilitating the choice of the best method for a given application.}, language = {en}, urldate = {2020-03-17}, journal = {Neurocomputing}, author = {Losing, Viktor and Hammer, Barbara and Wersing, Heiko}, month = jan, year = {2018}, keywords = {Data streams, Hyperparameter optimization, Incremental learning, Model selection, On-line learning}, pages = {1261--1274}, }
@inproceedings{dosilovic_explainable_2018, title = {Explainable artificial intelligence: {A} survey}, shorttitle = {Explainable artificial intelligence}, doi = {10.23919/MIPRO.2018.8400040}, abstract = {In the last decade, with availability of large datasets and more computing power, machine learning systems have achieved (super)human performance in a wide variety of tasks. Examples of this rapid development can be seen in image recognition, speech analysis, strategic game planning and many more. The problem with many state-of-the-art models is a lack of transparency and interpretability. The lack of thereof is a major drawback in many applications, e.g. healthcare and finance, where rationale for model's decision is a requirement for trust. In the light of these issues, explainable artificial intelligence (XAI) has become an area of interest in research community. This paper summarizes recent developments in XAI in supervised learning, starts a discussion on its connection with artificial general intelligence, and gives proposals for further research directions.}, booktitle = {2018 41st {International} {Convention} on {Information} and {Communication} {Technology}, {Electronics} and {Microelectronics} ({MIPRO})}, author = {Došilović, Filip Karlo and Brčić, Mario and Hlupić, Nikica}, month = may, year = {2018}, keywords = {Decision trees, Machine learning, Optimization, Predictive models, Supervised learning, Support vector machines, comprehensibility, explainability, explainable artificial intelligence, interpretability}, pages = {0210--0215}, }
@misc{noauthor_bs_2018, title = {{BS} {EN} 13306 ({Maintenance} - {Maintenance} terminology)}, publisher = {BSI Standards Limited}, year = {2018}, }
@inproceedings{montiel_learning_2018, title = {Learning {Fast} and {Slow}: {A} {Unified} {Batch}/{Stream} {Framework}}, shorttitle = {Learning {Fast} and {Slow}}, doi = {10.1109/BigData.2018.8622222}, abstract = {Data ubiquity highlights the need of efficient and adaptable data-driven solutions. In this paper, we present FAST AND SLOW LEARNING (FSL), a novel unified framework that sheds light on the symbiosis between batch and stream learning. FSL works by employing Fast (stream) and Slow (batch) Learners, emulating the mechanisms used by humans to make decisions. We showcase the applicability of FSL on the task of classification by introducing the FAST AND SLOW CLASSIFIER (FSC). A Fast Learner provides predictions on the spot, continuously updating its model and adapting to changes in the data. On the other hand, the Slow Learner provides predictions considering a wider spectrum of seen data, requiring more time and data to create complex models. Once that enough data has been collected, FSC trains the Slow Learner and starts tracking the performance of both learners. A drift detection mechanism triggers the creation of new Slow models when the current Slow model becomes obsolete. FSC selects between Fast and Slow Learners according to their performance on new incoming data. Test results on real and synthetic data show that FSC effectively drives the positive interaction of stream and batch models for learning from evolving data streams.}, booktitle = {2018 {IEEE} {International} {Conference} on {Big} {Data} ({Big} {Data})}, author = {Montiel, Jacob and Bifet, Albert and Losing, Viktor and Read, Jesse and Abdessalem, Talel}, month = dec, year = {2018}, keywords = {Adaptation models, Batch Learning, Classification, Concept Drift, Data models, Machine Learning, Machine learning, Power capacitors, Predictive models, Stream Learning, Task analysis, Training}, pages = {1065--1072}, }
@article{chanial_connectionlens_2018, title = {Connectionlens: finding connections across heterogeneous data sources}, volume = {11}, issn = {2150-8097}, shorttitle = {Connectionlens}, url = {https://doi.org/10.14778/3229863.3236252}, doi = {10.14778/3229863.3236252}, abstract = {Nowadays, journalism is facilitated by the existence of large amounts of publicly available digital data sources. In particular, journalists can do investigative work, which typically consists on keyword-based searches over many heterogeneous, independently produced and dynamic data sources, to obtain useful, interconnecting and traceable information. We propose to demonstrate ConnectionLens, a system based on a novel algorithm for keyword search across heterogeneous data sources. Our demonstration scenarios are based on use cases suggested by journalists from the french journal Le Monde, with whom we collaborate.}, number = {12}, urldate = {2023-02-03}, journal = {Proceedings of the VLDB Endowment}, author = {Chanial, Camille and Dziri, Rédouane and Galhardas, Helena and Leblay, Julien and Nguyen, Minh-Huong Le and Manolescu, Ioana}, month = aug, year = {2018}, pages = {2030--2033}, }
@article{hu_survey_2018, title = {A survey on online feature selection with streaming features}, volume = {12}, issn = {2095-2236}, url = {https://doi.org/10.1007/s11704-016-5489-3}, doi = {10.1007/s11704-016-5489-3}, abstract = {In the era of big data, the dimensionality of data is increasing dramatically in many domains. To deal with high dimensionality, online feature selection becomes critical in big data mining. Recently, online selection of dynamic features has received much attention. In situations where features arrive sequentially over time, we need to perform online feature selection upon feature arrivals. Meanwhile, considering grouped features, it is necessary to deal with features arriving by groups. To handle these challenges, some state-of-the-art methods for online feature selection have been proposed. In this paper, we first give a brief review of traditional feature selection approaches. Then we discuss specific problems of online feature selection with feature streams in detail. A comprehensive review of existing online feature selection methods is presented by comparing with each other. Finally, we discuss several open issues in online feature selection.}, language = {en}, number = {3}, urldate = {2022-07-29}, journal = {Frontiers of Computer Science}, author = {Hu, Xuegang and Zhou, Peng and Li, Peipei and Wang, Jing and Wu, Xindong}, month = jun, year = {2018}, keywords = {big data, feature selection, feature stream, online feature selection}, pages = {479--493}, }
@article{abdullatif_clustering_2018, title = {Clustering of nonstationary data streams: {A} survey of fuzzy partitional methods}, volume = {8}, issn = {1942-4795}, shorttitle = {Clustering of nonstationary data streams}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/widm.1258}, doi = {10.1002/widm.1258}, abstract = {Data streams have arisen as a relevant research topic during the past decade. They are real-time, incremental in nature, temporally ordered, massive, contain outliers, and the objects in a data stream may evolve over time (concept drift). Clustering is often one of the earliest and most important steps in the streaming data analysis workflow. A comprehensive literature is available about stream data clustering; however, less attention is devoted to the fuzzy clustering approach, even though the nonstationary nature of many data streams makes it especially appealing. This survey discusses relevant data stream clustering algorithms focusing mainly on fuzzy methods, including their treatment of outliers and concept drift and shift. This article is categorized under Technologies {\textgreater} Machine Learning Technologies {\textgreater} Computational Intelligence Fundamental Concepts of Data and Knowledge {\textgreater} Data Concepts}, language = {en}, number = {4}, urldate = {2022-07-29}, journal = {WIREs Data Mining and Knowledge Discovery}, author = {Abdullatif, Amr and Masulli, Francesco and Rovetta, Stefano}, year = {2018}, note = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1002/widm.1258}, keywords = {data streams, fuzzy clustering, nonstationary data, survey}, pages = {e1258}, }
@article{von_birgelen_self-organizing_2018, series = {51st {CIRP} {Conference} on {Manufacturing} {Systems}}, title = {Self-{Organizing} {Maps} for {Anomaly} {Localization} and {Predictive} {Maintenance} in {Cyber}-{Physical} {Production} {Systems}}, volume = {72}, issn = {2212-8271}, url = {https://www.sciencedirect.com/science/article/pii/S221282711830307X}, doi = {10.1016/j.procir.2018.03.150}, abstract = {Modern Cyber-Physical Production Systems provide large amounts of data such as sensor and control signals or configuration parameters. The available data enables unsupervised, data-driven solutions for model-based anomaly detection, anomaly localization and predictive maintenance: models which represent the normal behaviour of the system are learned from data. Then, live data from the system can be compared to the predictions of the model to detect faults, perform fault diagnosis and derive the overall condition of a system or its components. In this paper we use self-organizing maps for the aforementioned tasks and evaluate the presented methods on several real-world systems.}, language = {en}, urldate = {2022-05-16}, journal = {Procedia CIRP}, author = {von Birgelen, Alexander and Buratti, Davide and Mager, Jens and Niggemann, Oliver}, month = jan, year = {2018}, keywords = {CPPS, SOM, anomaly detection, anomaly localization, cyber-physical production system, data-driven, diagnosis, predictive maintenance, self-organizing map}, pages = {480--485}, }
@article{tidriri_new_2018, series = {10th {IFAC} {Symposium} on {Fault} {Detection}, {Supervision} and {Safety} for {Technical} {Processes} {SAFEPROCESS} 2018}, title = {A {New} {Multi}-{Objective} {Decision}-{Making} {Approach} {Applied} to the {Tennessee} {Eastman} {Process}}, volume = {51}, issn = {2405-8963}, url = {https://www.sciencedirect.com/science/article/pii/S2405896318324169}, doi = {10.1016/j.ifacol.2018.09.697}, abstract = {In this paper, a generic framework and a new methodology aiming to decisions fusion of various Fault Detection and Diagnosis (FDD) methods are proposed. The framework consists of a discrete Bayesian Network (BN) and can handle all FDD methods, regardless of their a prior knowledge or requirements. The methodology expresses the FDD objectives to achieve the desired performance and results in a theoretical learning of the BN parameters. The development leads to a multi-objective problem under constraints, resolved with a lexicographic method. The effectiveness of the proposed Multi-Objective Decision-Making (MODM) approach is validated through the Tennessee Eastman Process (TEP), as a challenging industrial benchmark problem. The application shows the significant improvement in FDD performances that can be ensured by the proposed methodology, in terms of high fault detection rate and small false alarm rate.}, language = {en}, number = {24}, urldate = {2022-05-02}, journal = {IFAC-PapersOnLine}, author = {Tidriri, Khaoula and Tiplica, Teodor and Chatti, Nizar and Verron, Sylvain}, month = jan, year = {2018}, keywords = {Complex systems, Decision fusion, Fault detection, Fault isolation, Generic framework}, pages = {1212--1219}, }
@inproceedings{ren_intelligent_2018, title = {An {Intelligent} {Fault} {Detection} {Method} {Based} on {Sparse} {Auto}-{Encoder} for {Industrial} {Process} {Systems}: {A} {Case} {Study} on {Tennessee} {Eastman} {Process} {Chemical} {System}}, volume = {01}, shorttitle = {An {Intelligent} {Fault} {Detection} {Method} {Based} on {Sparse} {Auto}-{Encoder} for {Industrial} {Process} {Systems}}, doi = {10.1109/IHMSC.2018.00051}, abstract = {This paper introduced a deep learning approach to achieve fault detection with signal analysis and processing, which is based on an sparse auto-encoder and can be employed to achieve unsupervised learning to automatically extract features of complex data-sets to detect fault. This sparse auto-encoder can be employed to extract features from the unrecognized signals to achieve intelligent identification. The hidden layer of auto-encoder can be considered as an over-complete dictionary, which can be employed to reconstruct the input signals to extract data-sets features unsupervised. Furthermore, the sparse auto-encoder can be considered as the method to build up a specific architecture to describe the process industrial system, not only to avoid the requirement of large amount of data onto the training step, but also to handle the problem with small sample training data. Finally, the application of this method of Tennessee Eastman Process Chemical system can be employed to demonstrate and illustrate the effectiveness and the reliability of this proposed method, and the results have shown its excellent performance on fault detection for process industrial systems.}, booktitle = {2018 10th {International} {Conference} on {Intelligent} {Human}-{Machine} {Systems} and {Cybernetics} ({IHMSC})}, author = {Ren, Hao and Chai, Yi and Qu, Jianfeng and Zhang, Ke and Tang, Qiu}, month = aug, year = {2018}, keywords = {Chemicals, Dictionaries, Fault Detection, Fault detection, Feature extraction, Process control, Soft-max Classifier, Sparse Auto-Encoder, Tennessee Eastman Process System, Training}, pages = {190--193}, }
@article{fontes_evaluation_2018, title = {Evaluation of a {Hybrid} {Clustering} {Approach} for a {Benchmark} {Industrial} {System}}, volume = {57}, issn = {0888-5885}, url = {https://doi.org/10.1021/acs.iecr.8b00429}, doi = {10.1021/acs.iecr.8b00429}, abstract = {The paper discusses a novel algorithm for classifying data represented through multivariate time series based on similarity metrics. To improve over the performance of existent classification methods based on single similarity, the method used in this study is based on a combination between the principal component analysis similarity factor and the average-based Euclidian distance within a fuzzy clustering approach. Additionally, an approach is proposed to cope with the changes of these metrics over the time window, improving the similarity analysis between the objects. The method is applied to the Tennessee Eastman process, a well-known benchmark industrial system used to compare various fault detection and diagnosis approaches. The results were compared with standards multivariate techniques showing the efficiency and flexibility of the proposed method in fault detection and classification problems, when considering different types of failures, process variables, and changes in operating conditions.}, number = {32}, urldate = {2022-05-02}, journal = {Industrial \& Engineering Chemistry Research}, author = {Fontes, Cristiano Hora and Budman, Hector}, month = aug, year = {2018}, note = {Publisher: American Chemical Society}, pages = {11039--11049}, }
@article{thomas_data_2018, series = {Big {Data}: {Data} {Science} for {Process} {Control} and {Operations}}, title = {Data mining and clustering in chemical process databases for monitoring and knowledge discovery}, volume = {67}, issn = {0959-1524}, url = {https://www.sciencedirect.com/science/article/pii/S095915241730032X}, doi = {10.1016/j.jprocont.2017.02.006}, abstract = {Modern chemical plants maintain large historical databases recording past sensor measurements which advanced process monitoring techniques analyze to help plant operators and engineers interpret the meaning of live trends in databases. However, many of the best process monitoring methods require data organized into groups before training is possible. In practice, such organization rarely exists and the time required to create classified training data is an obstacle to the use of advanced process monitoring strategies. Data mining and knowledge discovery techniques drawn from computer science literature can help engineers find fault states in historical databases and group them together with little detailed knowledge of the process. This study evaluates how several data clustering and feature extraction techniques work together to reveal useful trends in industrial chemical process data. Two studies on an industrial scale separation tower and the Tennessee Eastman process simulation demonstrate data clustering and feature extraction effectively revealing significant process trends from high dimensional, multivariate data. Process knowledge and supervised clustering metrics compare the cluster results against true labels in the data to compare performance of different combinations of dimensionality reduction and data clustering approaches.}, language = {en}, urldate = {2022-05-02}, journal = {Journal of Process Control}, author = {Thomas, Michael C. and Zhu, Wenbo and Romagnoli, Jose A.}, month = jul, year = {2018}, keywords = {Data clustering, Data mining, Dimensionality reduction, Knowledge discovery}, pages = {160--175}, }
@article{tidriri_model-based_2018, title = {Model-based fault detection and diagnosis of complex chemical processes: {A} case study of the {Tennessee} {Eastman} process}, volume = {232}, issn = {0959-6518}, shorttitle = {Model-based fault detection and diagnosis of complex chemical processes}, url = {https://doi.org/10.1177/0959651818764510}, doi = {10.1177/0959651818764510}, abstract = {Fault detection and diagnosis for industrial systems has been an important field of research during the past years. Among these systems, the Tennessee Eastman process is extensively used as a realistic benchmark to test and compare different fault detection and diagnosis strategies. In this context, data-driven approach has been widely applied for fault detection and diagnosis of the Tennessee Eastman process, by exploiting the massive amount of available measurement data. However, only few published works had attempted to deal with the dynamic behavior of the whole system including the mixing zone, circulating pumps, the reactor, the separator, the stripper, and so on, because of the difficulty of modeling physical phenomena that may occur in such complex system. In this article, an accurate model of the Tennessee Eastman process, properly tailored for fault detection and diagnosis purposes, is provided. This model shows better fault detection and diagnosis performances than all the others proposed in the literature and gives better or comparable results with the data-driven approaches. This work uses the bond graph methodology to systematically develop computational and graphical model. This methodology provides a physical understanding of the system and a description of its dynamic behavior. The bond graph model is then used for monitoring purposes by generating formal fault indicators, called residuals, and algorithms for fault detection and diagnosis. Hence, abnormal situations are detected by supervising the residuals’ evolution and faults are isolated using the nature of the violated residuals. Therefore, the dynamic model of the Tennessee Eastman process can now be used as a basis to achieve accurately different analysis through the causal and structural features of the bond graph tool.}, language = {en}, number = {6}, urldate = {2022-05-02}, journal = {Proceedings of the Institution of Mechanical Engineers, Part I: Journal of Systems and Control Engineering}, author = {Tidriri, Khaoula and Chatti, Nizar and Verron, Sylvain and Tiplica, Teodor}, month = jul, year = {2018}, note = {Publisher: IMECHE}, keywords = {Fault detection, Tennessee Eastman process, bond graph, fault diagnosis, model-based methods}, pages = {742--760}, }
@article{zou_fault_2018, title = {Fault {Diagnosis} of {Tennessee}-{Eastman} {Process} {Using} {Orthogonal} {Incremental} {Extreme} {Learning} {Machine} {Based} on {Driving} {Amount}}, volume = {48}, issn = {2168-2275}, doi = {10.1109/TCYB.2018.2830338}, abstract = {Fault diagnosis is important to the industrial process. This paper proposes an orthogonal incremental extreme learning machine based on driving amount (DAOI-ELM) for recognizing the faults of the Tennessee-Eastman process (TEP). The basic idea of DAOI-ELM is to incorporate the Gram-Schmidt orthogonalization method and driving amount into an incremental extreme learning machine (I-ELM). The case study for the 2-D nonlinear function and regression problems from the UCI dataset results show that DAOI-ELM can obtain better generalization ability and a more compact structure of ELM than I-ELM, convex I-ELM (CI-ELM), orthogonal I-ELM (OI-ELM), and bidirectional ELM. The experimental training and testing data are derived from the simulations of TEP. The performance of DAOI-ELM is evaluated and compared with that of the back propagation neural network, support vector machine, I-ELM, CI-ELM, and OI-ELM. The simulation results show that DAOI-ELM diagnoses the TEP faults better than other methods.}, number = {12}, journal = {IEEE Transactions on Cybernetics}, author = {Zou, Weidong and Xia, Yuanqing and Li, Huifang}, month = dec, year = {2018}, note = {Conference Name: IEEE Transactions on Cybernetics}, keywords = {Artificial neural networks, Driving amount, Fault diagnosis, Gram–Schmidt orthogonalization method, Network architecture, Predictive maintenance, Prognostics and health management, Tennessee-Eastman process (TEP), fault diagnosis}, pages = {3403--3410}, }
@inproceedings{fazai_fault_2018, title = {Fault {Detection} of the {Tennessee} {Eastman} {Process} using {Online} {Reduced} {Kernel} {PCA}}, doi = {10.23919/ECC.2018.8550213}, abstract = {In this paper, we propose an online reduced kernel principal component analysis (KPCA) method for process monitoring. The developed method consists in updating the KPCA model depending on the dictionary which contains linearly independent kernel functions and then using this new reduced KPCA model for process monitoring. The process monitoring performances are studied using Tennessee Eastman Process (TEP). The results demonstrate the effectiveness of the developed online KPCA technique compared to the classical online KPCA method.}, booktitle = {2018 {European} {Control} {Conference} ({ECC})}, author = {Fazai, Radhia and Mansouri, Majdi and Taouali, Okba and Harkat, Mohamed-Faouzi and Bouguila, Nassreddine and Nounou, Mohamed}, month = jun, year = {2018}, keywords = {Dictionaries, Erbium, Indexes, Inductors, Kernel, Monitoring, Principal component analysis}, pages = {2697--2702}, }
@article{pratama_autonomous_2018, title = {Autonomous {Deep} {Learning}: {Incremental} {Learning} of {Denoising} {Autoencoder} for {Evolving} {Data} {Streams}}, shorttitle = {Autonomous {Deep} {Learning}}, url = {http://arxiv.org/abs/1809.09081}, abstract = {The generative learning phase of Autoencoder (AE) and its successor Denosing Autoencoder (DAE) enhances the flexibility of data stream method in exploiting unlabelled samples. Nonetheless, the feasibility of DAE for data stream analytic deserves in-depth study because it characterizes a fixed network capacity which cannot adapt to rapidly changing environments. An automated construction of a denoising autoeconder, namely deep evolving denoising autoencoder (DEVDAN), is proposed in this paper. DEVDAN features an open structure both in the generative phase and in the discriminative phase where input features can be automatically added and discarded on the fly. A network significance (NS) method is formulated in this paper and is derived from the bias-variance concept. This method is capable of estimating the statistical contribution of the network structure and its hidden units which precursors an ideal state to add or prune input features. Furthermore, DEVDAN is free of the problem- specific threshold and works fully in the single-pass learning fashion. The efficacy of DEVDAN is numerically validated using nine non-stationary data stream problems simulated under the prequential test-then-train protocol where DEVDAN is capable of delivering an improvement of classification accuracy to recently published online learning works while having flexibility in the automatic extraction of robust input features and in adapting to rapidly changing environments.}, urldate = {2022-03-19}, journal = {arXiv:1809.09081 [cs, stat]}, author = {Pratama, Mahardhika and Ashfahani, Andri and Ong, Yew Soon and Ramasamy, Savitha and Lughofer, Edwin}, month = sep, year = {2018}, note = {arXiv: 1809.09081}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Machine Learning, Statistics - Machine Learning}, }
@article{sahoo_online_2018, title = {Online {Deep} {Learning}: {Learning} {Deep} {Neural} {Networks} on the {Fly}}, shorttitle = {Online {Deep} {Learning}}, url = {https://www.ijcai.org/proceedings/2018/369}, abstract = {Electronic proceedings of IJCAI 2018}, urldate = {2022-03-19}, author = {Sahoo, Doyen and Pham, Quang and Lu, Jing and Hoi, Steven C. H.}, year = {2018}, pages = {2660--2666}, }
@article{montiel_scikit-multiflow_2018, title = {Scikit-multiflow: a multi-output streaming framework}, volume = {19}, issn = {1532-4435}, shorttitle = {Scikit-multiflow}, abstract = {scikit-multiflow is a framework for learning from data streams and multi-output learning in Python. Conceived to serve as a platform to encourage the democratization of stream learning research, it provides multiple state-of-the-art learning methods, data generators and evaluators for different stream learning problems, including single-output, multi-output and multi-label. scikit-multiflow builds upon popular open source frameworks including scikit-learn, MOA and MEKA. Development follows the FOSS principles. Quality is enforced by complying with PEP8 guidelines, using continuous integration and functional testing. The source code is available at https://github.com/scikit-multiflow/scikit-multiflow.}, number = {1}, journal = {The Journal of Machine Learning Research}, author = {Montiel, Jacob and Read, Jesse and Bifet, Albert and Abdessalem, Talel}, month = jan, year = {2018}, keywords = {drift detection, machine learning, multi-output, python, stream data}, pages = {2915--2914}, }
@article{abdallah_activity_2018, title = {Activity {Recognition} with {Evolving} {Data} {Streams}: {A} {Review}}, volume = {51}, issn = {0360-0300}, shorttitle = {Activity {Recognition} with {Evolving} {Data} {Streams}}, url = {https://doi.org/10.1145/3158645}, doi = {10.1145/3158645}, abstract = {Activity recognition aims to provide accurate and opportune information on people’s activities by leveraging sensory data available in today’s sensory rich environments. Nowadays, activity recognition has become an emerging field in the areas of pervasive and ubiquitous computing. A typical activity recognition technique processes data streams that evolve from sensing platforms such as mobile sensors, on body sensors, and/or ambient sensors. This article surveys the two overlapped areas of research of activity recognition and data stream mining. The perspective of this article is to review the adaptation capabilities of activity recognition techniques in streaming environment. Categories of techniques are identified based on different features in both data streams and activity recognition. The pros and cons of the algorithms in each category are analysed, and the possible directions of future research are indicated.}, number = {4}, urldate = {2022-03-15}, journal = {ACM Computing Surveys}, author = {Abdallah, Zahraa S. and Gaber, Mohamed Medhat and Srinivasan, Bala and Krishnaswamy, Shonali}, month = jul, year = {2018}, keywords = {Activity recognition, adaptive learning, stream mining, transfer learning}, pages = {71:1--71:36}, }
@inproceedings{wu_design_2018, title = {Design on {Fault} {Diagnosis} {Expert} {System} for {Railway} {Signal} {Equipment}}, isbn = {978-94-6252-531-3}, url = {https://www.atlantis-press.com/proceedings/icmmct-18/25898536}, doi = {10.2991/icmmct-18.2018.7}, abstract = {In view of the fact that the fault of railway signal equipment is difficult to diagnose, this paper designs an expert system to simulate human experts to make decisions and improve the science of fault diagnosis. The design includes four aspects: First, a system structure composed of a database, a knowledge base, an inference engine, and an interpretation...}, language = {en}, urldate = {2022-03-05}, publisher = {Atlantis Press}, author = {Wu, Guangrong}, month = jun, year = {2018}, note = {ISSN: 2352-5401}, pages = {36--41}, }
@book{sutton_reinforcement_2018, title = {Reinforcement {Learning}, second edition: {An} {Introduction}}, isbn = {978-0-262-35270-3}, shorttitle = {Reinforcement {Learning}, second edition}, abstract = {The significantly expanded and updated new edition of a widely used text on reinforcement learning, one of the most active research areas in artificial intelligence.Reinforcement learning, one of the most active research areas in artificial intelligence, is a computational approach to learning whereby an agent tries to maximize the total amount of reward it receives while interacting with a complex, uncertain environment. In Reinforcement Learning, Richard Sutton and Andrew Barto provide a clear and simple account of the field's key ideas and algorithms. This second edition has been significantly expanded and updated, presenting new topics and updating coverage of other topics.Like the first edition, this second edition focuses on core online learning algorithms, with the more mathematical material set off in shaded boxes. Part I covers as much of reinforcement learning as possible without going beyond the tabular case for which exact solutions can be found. Many algorithms presented in this part are new to the second edition, including UCB, Expected Sarsa, and Double Learning. Part II extends these ideas to function approximation, with new sections on such topics as artificial neural networks and the Fourier basis, and offers expanded treatment of off-policy learning and policy-gradient methods. Part III has new chapters on reinforcement learning's relationships to psychology and neuroscience, as well as an updated case-studies chapter including AlphaGo and AlphaGo Zero, Atari game playing, and IBM Watson's wagering strategy. The final chapter discusses the future societal impacts of reinforcement learning.}, language = {en}, publisher = {MIT Press}, author = {Sutton, Richard S. and Barto, Andrew G.}, month = nov, year = {2018}, note = {Google-Books-ID: uWV0DwAAQBAJ}, keywords = {Computers / Artificial Intelligence / General}, }
@inproceedings{korvesis_predictive_2018, title = {Predictive {Maintenance} in {Aviation}: {Failure} {Prediction} from {Post}-{Flight} {Reports}}, shorttitle = {Predictive {Maintenance} in {Aviation}}, doi = {10.1109/ICDE.2018.00160}, abstract = {In this paper we present an approach to tackle the problem of event prediction for the purpose of performing predictive maintenance in aviation. Given a collection of recorded events that correspond to equipment failures, our method predicts the next occurrence of one or more events of interest (target events or critical failures). Our objective is to develop an alerting system that would notify aviation engineers well in advance for upcoming aircraft failures, providing enough time to prepare the corresponding maintenance actions. We formulate a regression problem in order to approximate the risk of occurrence of a target event, given the past occurrences of other events. In order to achieve the best results we employed a multiple instance learning scheme (multiple instance regression) along with extensive data preprocessing. We applied our method on data coming from a fleet of aircraft and our predictions involve failures of components onboard, specifically components that are related to the landing gear. The event logs correspond to post flight reports retrieved from multiple aircraft during several years of operation. To the best of our knowledge, this paper is the first attempt on aircraft failure prediction using post flight report data and finally, our findings show high potential impact on the aviation industry.}, booktitle = {2018 {IEEE} 34th {International} {Conference} on {Data} {Engineering} ({ICDE})}, author = {Korvesis, Panagiotis and Besseau, Stephane and Vazirgiannis, Michalis}, month = apr, year = {2018}, note = {ISSN: 2375-026X}, keywords = {Aircraft, Electronic mail, Feature extraction, Hardware, Monitoring, Predictive maintenance, aviation, failure prediction, predictive maintenance}, pages = {1414--1422}, }
@article{pan_novel_2018, title = {Novel battery state-of-health online estimation method using multiple health indicators and an extreme learning machine}, volume = {160}, issn = {0360-5442}, url = {https://www.sciencedirect.com/science/article/pii/S0360544218312854}, doi = {10.1016/j.energy.2018.06.220}, abstract = {Battery health monitoring and management is critically important for electric vehicle performance and economy. This paper presents a multiple health indicators-based and machine learning-enabled state-of-health estimator for prognostics and health management. The multiple online health indicators without the influence of different loading profiles are used as effective signatures of the health estimator for effective quantification of capacity degradation. An extreme learning machine is introduced to capture the underlying correlation between the extracted health indicators and capacity degradation to improve the speed and accuracy of machine learning for online estimation. The proposed estimator is also compared to the traditional BP neural network. The associated results indicate that the maximum estimation error of the proposed health management strategy is less than 2.5\%, and it has better performance and faster speed than the BP neural network.}, language = {en}, urldate = {2022-02-09}, journal = {Energy}, author = {Pan, Haihong and Lü, Zhiqiang and Wang, Huimin and Wei, Haiyan and Chen, Lin}, month = oct, year = {2018}, keywords = {Extreme learning machine, Health indicator, Li-ion battery, State-of-Health, ecml}, pages = {466--477}, }
@article{zhao_machine_2018, title = {Machine {Health} {Monitoring} {Using} {Local} {Feature}-{Based} {Gated} {Recurrent} {Unit} {Networks}}, volume = {65}, issn = {1557-9948}, doi = {10.1109/TIE.2017.2733438}, abstract = {In modern industries, machine health monitoring systems (MHMS) have been applied wildly with the goal of realizing predictive maintenance including failures tracking, downtime reduction, and assets preservation. In the era of big machinery data, data-driven MHMS have achieved remarkable results in the detection of faults after the occurrence of certain failures (diagnosis) and prediction of the future working conditions and the remaining useful life (prognosis). The numerical representation for raw sensory data is the key stone for various successful MHMS. Conventional methods are the labor-extensive as they usually depend on handcrafted features, which require expert knowledge. Inspired by the success of deep learning methods that redefine representation learning from raw data, we propose local feature-based gated recurrent unit (LFGRU) networks. It is a hybrid approach that combines handcrafted feature design with automatic feature learning for machine health monitoring. First, features from windows of input time series are extracted. Then, an enhanced bidirectional GRU network is designed and applied on the generated sequence of local features to learn the representation. A supervised learning layer is finally trained to predict machine condition. Experiments on three machine health monitoring tasks: tool wear prediction, gearbox fault diagnosis, and incipient bearing fault detection verify the effectiveness and generalization of the proposed LFGRU.}, number = {2}, journal = {IEEE Transactions on Industrial Electronics}, author = {Zhao, Rui and Wang, Dongzhe and Yan, Ruqiang and Mao, Kezhi and Shen, Fei and Wang, Jinjiang}, month = feb, year = {2018}, note = {Conference Name: IEEE Transactions on Industrial Electronics}, keywords = {Computational modeling, Data mining, Fault diagnosis, Feature extraction, Logic gates, Monitoring, Sensors, ecml, feature engineering, feature extraction, feature leanring, gated recurrent unit (GRU), machine health monitoring (MHM), tool wear prediction}, pages = {1539--1548}, }
@article{ben_ali_online_2018, title = {Online automatic diagnosis of wind turbine bearings progressive degradations under real experimental conditions based on unsupervised machine learning}, volume = {132}, issn = {0003-682X}, url = {https://www.sciencedirect.com/science/article/pii/S0003682X1730333X}, doi = {10.1016/j.apacoust.2017.11.021}, abstract = {As a critical component, failures of high-speed shaft bearing in wind turbines cause the unplanned stoppage of electrical energy production. Investigations related to naturally progressed defects of high-speed shaft bearings are relatively scarce and the online assessment in damage severities is rarely available in the literature. In this sense, this paper presents a new online vibration-based diagnosis method for wind turbine high-speed bearing monitoring. The adaptive resonance theory 2 (ART2) is proposed for an unsupervised classification of the extracted features. The Randall model is adapted considering the geometry of the tested bearing to train the ART2 in the offline step. In fact, the time domain, the frequency domain, and the time-frequency domain are investigated for a better bearing fault characterization. Indeed, the use of real measured data from a wind turbine drivetrain proves that the proposed data-driven approach is suitable for wind turbine bearings online condition monitoring even under real experimental conditions. This method reveals a better generalization capability compared to previous works even with noisy measurements.}, language = {en}, urldate = {2022-02-09}, journal = {Applied Acoustics}, author = {Ben Ali, Jaouher and Saidi, Lotfi and Harrath, Salma and Bechhoefer, Eric and Benbouzid, Mohamed}, month = mar, year = {2018}, keywords = {ART2, Fault diagnosis, Feature extraction, High speed shaft bearing, Wind turbines, ecml}, pages = {167--181}, }
@inproceedings{rieth_issues_2018, address = {Cham}, series = {Advances in {Intelligent} {Systems} and {Computing}}, title = {Issues and {Advances} in {Anomaly} {Detection} {Evaluation} for {Joint} {Human}-{Automated} {Systems}}, isbn = {978-3-319-60384-1}, doi = {10.1007/978-3-319-60384-1_6}, abstract = {As human-managed systems become more complex, automated anomaly detection can provide assistance—but only if it is effective. Rigorous evaluation of automated detection is vital for determining its effectiveness before implementation into systems. We identified recurring issues in evaluation practices limiting the conclusions that can be applied from published studies to broader application. In this paper, we demonstrate the implications of these issues and illustrate solutions. We show how receiver operating characteristic curves can reveal performance tradeoffs masked by reporting of single metric results and how using multiple simulation data examples can prevent biases that result from evaluation using single training and testing examples. We also provide methods for incorporating detection latency into tradeoff analyses. Application of these methods will help to provide researchers, engineers, and decision makers with a more objective basis for anomaly detection performance evaluation, resulting in greater utility, better performance, and cost savings in systems engineering.}, language = {en}, booktitle = {Advances in {Human} {Factors} in {Robots} and {Unmanned} {Systems}}, publisher = {Springer International Publishing}, author = {Rieth, Cory A. and Amsel, Ben D. and Tran, Randy and Cook, Maia B.}, editor = {Chen, Jessie}, year = {2018}, keywords = {Anomaly detection, Automation evaluation, Receiver operating characteristic, Tennessee Eastman process simulation}, pages = {52--63}, }
@inproceedings{gulenko_detecting_2018, title = {Detecting {Anomalous} {Behavior} of {Black}-{Box} {Services} {Modeled} with {Distance}-{Based} {Online} {Clustering}}, doi = {10.1109/CLOUD.2018.00134}, abstract = {Reliable deployment of services is especially challenging in virtualized infrastructures, where the deep tech-nological stack and the multitude of components necessitate automatic anomaly detection and remediation mechanisms. Traditional monitoring solutions observe the system and generate alarms when the collected metrics exceed predefined thresholds. The fixed thresholds rely on expert knowledge and can lead to numerous false alarms, while abnormal behavior that spans over multiple metrics, components, or system layers, may not be detected. We propose to use an unsupervised online clustering algorithm to create a model of the normal behavior of each monitored component with minimal human interaction and no impact on the monitored system. When an anomaly is detected, a human administrator or automatic remediation system can subsequently revert the component into a normal state. An experimental evaluation resulted in a high accuracy of our approach, indicating that it is suitable for anomaly detection in productive systems.}, booktitle = {2018 {IEEE} 11th {International} {Conference} on {Cloud} {Computing} ({CLOUD})}, author = {Gulenko, Anton and Schmidt, Florian and Acker, Alexander and Wallschläger, Marcel and Kao, Odej and Liu, Feng}, month = jul, year = {2018}, note = {ISSN: 2159-6190}, keywords = {Anomaly detection, Cloud computing, Data collection, Data models, Measurement, Monitoring, Virtual machine monitors, anomaly detection, cloud computing, machine learning, service virtualization}, pages = {912--915}, }
@article{rodriguez-ramos_approach_2018, title = {An approach to fault diagnosis with online detection of novel faults using fuzzy clustering tools}, volume = {113}, issn = {0957-4174}, url = {https://www.sciencedirect.com/science/article/pii/S0957417418304135}, doi = {10.1016/j.eswa.2018.06.055}, abstract = {This paper presents an approach to fault diagnosis with online detection of novel faults and automatic learning using fuzzy clustering techniques. In the off-line learning stage, the classifier is trained to diagnose the known faults and the normal operation state using the Density Oriented Fuzzy C-Means and the Kernel Fuzzy C-Means algorithms. In this stage, the historical data previously selected by experts, are firstly pre-processed to eliminate outliers and reduce the confusion in the classification process by using the Density Oriented Fuzzy C-Means algorithm. Later on, the Kernel Fuzzy C-Means algorithm is used for achieving greater separability among the classes and reducing the classification errors. Finally, the optimization of the two parameters used by these algorithms in the training stage is developed by using a bio-inspired optimization algorithm, namely the differential evolution. After the training, the classifier is used online (online diagnosis stage) in order to classify the new observations that are collected from the process. In this stage, the detection of novel faults based on density by using the DOFCM algorithm is applied. The algorithm analyzes the observations belonging to a window of time which were not classified into the known classes and it is determined if they are a new class or outliers. If a new class is identified, a procedure is developed to incorporate it to the known classes set. The proposed approach was validated using the Development and Application of Methods for Actuator Diagnosis in Industrial Control Systems (DAMADICS) benchmark. The results obtained indicate the feasibility of the proposed method.}, language = {en}, urldate = {2022-01-14}, journal = {Expert Systems with Applications}, author = {Rodríguez-Ramos, Adrián and da Silva Neto, Antônio José and Llanes-Santiago, Orestes}, month = dec, year = {2018}, keywords = {Automatic learning, Fuzzy clustering tools, Novel faults, On-line detection, Optimal parameters, Robust fault diagnosis}, pages = {200--212}, }
@article{wang_particle_2018, title = {Particle swarm optimization algorithm: an overview}, volume = {22}, issn = {1433-7479}, shorttitle = {Particle swarm optimization algorithm}, url = {https://doi.org/10.1007/s00500-016-2474-6}, doi = {10.1007/s00500-016-2474-6}, abstract = {Particle swarm optimization (PSO) is a population-based stochastic optimization algorithm motivated by intelligent collective behavior of some animals such as flocks of birds or schools of fish. Since presented in 1995, it has experienced a multitude of enhancements. As researchers have learned about the technique, they derived new versions aiming to different demands, developed new applications in a host of areas, published theoretical studies of the effects of the various parameters and proposed many variants of the algorithm. This paper introduces its origin and background and carries out the theory analysis of the PSO. Then, we analyze its present situation of research and application in algorithm structure, parameter selection, topology structure, discrete PSO algorithm and parallel PSO algorithm, multi-objective optimization PSO and its engineering applications. Finally, the existing problems are analyzed and future research directions are presented.}, language = {en}, number = {2}, urldate = {2021-11-29}, journal = {Soft Computing}, author = {Wang, Dongshu and Tan, Dapei and Liu, Lei}, month = jan, year = {2018}, pages = {387--408}, }
@inproceedings{wadhwa_modified_2018, title = {Modified {DBSCAN} {Using} {Particle} {Swarm} {Optimization} for {Spatial} {Hotspot} {Identification}}, doi = {10.1109/IC3.2018.8530558}, abstract = {Spatial hotspots of irregular shape occur naturally in fields like epidemiology and earth science. Classical techniques for identifying hotspots are either based on scan statistics or clustering algorithms. These techniques result in hotspots of fixed shapes like circle, ellipse or straight line. Density based spatial clustering of applications with noise (DBSCAN) is one of the often used algorithms for finding non-geometric shaped clusters. It is highly sensitive to the values of its input variables (MinPoints and Epsilon) which are to be provided by the users. In this paper, we propose a Particle Swarm Optimization (PSO) based approach which automatically computes the values of MinPoints and Epsilon for given input data and finds the spatial hotspots. The modified DBSCAN approach is applied to six artificial datasets and purity of the resultant clustering is calculated. Achieved values of the purity function indicate the accuracy of the proposed method. Proposed approach is also applied to find out the hotspots for earthquake zoning.}, booktitle = {2018 {Eleventh} {International} {Conference} on {Contemporary} {Computing} ({IC3})}, author = {Wadhwa, Ankita and Thakur, Manish K.}, month = aug, year = {2018}, note = {ISSN: 2572-6129}, keywords = {Clustering, Clustering algorithms, Conferences, DBSCAN, Earthquake Zoning, Earthquakes, Hotspots, Microsoft Windows, Particle Swarm optimization, Particle swarm optimization, Scan statistics, Shape, Spatial databases}, pages = {1--3}, }
@inproceedings{zheng_dags_2018, title = {{DAGs} with {NO} {TEARS}: {Continuous} {Optimization} for {Structure} {Learning}}, volume = {31}, url = {https://proceedings.neurips.cc/paper/2018/file/e347c51419ffb23ca3fd5050202f9c3d-Paper.pdf}, booktitle = {Advances in {Neural} {Information} {Processing} {Systems}}, publisher = {Curran Associates, Inc.}, author = {Zheng, Xun and Aragam, Bryon and Ravikumar, Pradeep K and Xing, Eric P}, editor = {Bengio, S. and Wallach, H. and Larochelle, H. and Grauman, K. and Cesa-Bianchi, N. and Garnett, R.}, year = {2018}, }
@article{rebello_integrated_2018, title = {An integrated approach for system functional reliability assessment using {Dynamic} {Bayesian} {Network} and {Hidden} {Markov} {Model}}, volume = {180}, issn = {0951-8320}, url = {https://www.sciencedirect.com/science/article/pii/S0951832018300346}, doi = {10.1016/j.ress.2018.07.002}, abstract = {This paper presents a novel methodology to estimate and predict the functional reliability of a system using system functional indicators and condition indicators of components. Instead of ‘system reliability’, the paper uses the terminology ‘system functional reliability’ because the functional indicators used in the methodology principally represent the system performance level or system functionality. The proposed model relates the degradation state of components to the system functional state. The model allows the use of system functional indicators and condition data of components in continuous time domain. The proposed methodology uses both Hidden Markov Model and Dynamic Bayesian Network for estimating and predicting system functional reliability. HMM helps in mapping the continuous data into hidden state probabilities while the system DBN helps in finding the posterior system state probability by considering the component dependencies within a system. The study is also extended to show how the external covariates can be incorporated into the proposed model. Since the external covariates accelerate the degradation of a component, the component state transition probability in the second model is adjusted to vary with respect to the covariates. A case study based on Tennessee Eastman Chemical Process is conducted to demonstrate the proposed methodology for system functional reliability estimation and prediction. Another simulation based case study is presented to describe how the external covariates are included in the presented methodology.}, language = {en}, urldate = {2021-11-17}, journal = {Reliability Engineering \& System Safety}, author = {Rebello, Sinda and Yu, Hongyang and Ma, Lin}, month = dec, year = {2018}, keywords = {Condition monitoring, Covariates, Dynamic Bayesian network, Functional indicators, Hidden Markov Model, Process data, System functional reliability, bayesian network, bn, dbn, dependent components, hmm, reliability assessment}, pages = {124--135}, }
@inproceedings{khan_adbscan_2018, title = {{ADBSCAN}: {Adaptive} {Density}-{Based} {Spatial} {Clustering} of {Applications} with {Noise} for {Identifying} {Clusters} with {Varying} {Densities}}, shorttitle = {{ADBSCAN}}, doi = {10.1109/CEEICT.2018.8628138}, abstract = {Density-based spatial clustering of applications with noise (DBSCAN) is a data clustering algorithm which has the high-performance rate for dataset where clusters have the constant density of data points. One of the significant attributes of this algorithm is noise cancellation. However, DBSCAN demonstrates reduced performances for clusters with different densities. Therefore, in this paper, an adaptive DBSCAN is proposed which can work significantly well for identifying clusters with varying densities.}, booktitle = {2018 4th {International} {Conference} on {Electrical} {Engineering} and {Information} {Communication} {Technology} ({iCEEiCT})}, author = {Khan, Mohammad Mahmudur Rahman and Siddique, Md. Abu Bakr and Arif, Rezoana Bente and Oishe, Mahjabin Rahman}, month = sep, year = {2018}, keywords = {Adaptive DBSCAN, Artificial intelligence, Clustering algorithms, Computer science, Data mining, Data models, Databases, Flowcharts, border point, clustering algorithms, core point, data mining, density connected, density-based methods, eps, eps-neighborhood, minPts, spatial clustering}, pages = {107--111}, }
@article{wu_k-pdm_2018, title = {K-{PdM}: {KPI}-{Oriented} {Machinery} {Deterioration} {Estimation} {Framework} for {Predictive} {Maintenance} {Using} {Cluster}-{Based} {Hidden} {Markov} {Model}}, volume = {6}, issn = {2169-3536}, shorttitle = {K-{PdM}}, doi = {10.1109/ACCESS.2018.2859922}, abstract = {Explosive increase of industrial data collected from sensors has brought increasing attractions to the data-driven predictive maintenance for industrial machines in cyber-physical systems (CPSs). Since machinery faults are always caused by performance deterioration of components, learning the deteriorating mode from observed sensor data facilitates the prognostics of impeding faults and predicting the remaining useful life (RUL). In modern CPSs, several key performance indicators (KPIs) are monitored to detect the corresponding fine-grained deteriorating modes of industrial machines. However, the overall deterioration estimation and RUL prediction based on these KPIs with various patterns have been a great challenge, especially without labels of deteriorating index or uninterpretable of root causes. In this paper, we proposed K-PdM, a cluster-based hidden Markov model for the machinery deterioration estimation and RUL prediction based on multiple KPIs. The method uncovers the fine-grained deteriorating modes of machines through each unlabeled KPI data and learns a mapping between each deteriorating KPI index and RULs. Accordingly, an overall deterioration estimation and RUL prediction of machine are able to be achieved based on the combination of each KPI's deterioration estimation. Moreover, a set of interpretable semantic rules are setup to analyze the root cause of performance deterioration among KPIs. An experimental application is proposed to demonstrate its applicability based on the PHM08 data sets. The obtained results show their effectiveness to predict the RULs of machines.}, journal = {IEEE Access}, author = {Wu, Zhenyu and Luo, Hao and Yang, Yunong and Lv, Peng and Zhu, Xinning and Ji, Yang and Wu, Bian}, year = {2018}, note = {Conference Name: IEEE Access}, keywords = {Degradation, Estimation, Hidden Markov models, Indexes, Machinery, Prognostics and health management, Temperature measurement, Time series analysis, hidden Markov models (HMMs), remaining life assessment, time series analysis}, pages = {41676--41687}, }
@article{liu_artificial_2018, title = {Artificial intelligence for fault diagnosis of rotating machinery: {A} review}, volume = {108}, issn = {0888-3270}, shorttitle = {Artificial intelligence for fault diagnosis of rotating machinery}, url = {https://www.sciencedirect.com/science/article/pii/S0888327018300748}, doi = {10.1016/j.ymssp.2018.02.016}, abstract = {Fault diagnosis of rotating machinery plays a significant role for the reliability and safety of modern industrial systems. As an emerging field in industrial applications and an effective solution for fault recognition, artificial intelligence (AI) techniques have been receiving increasing attention from academia and industry. However, great challenges are met by the AI methods under the different real operating conditions. This paper attempts to present a comprehensive review of AI algorithms in rotating machinery fault diagnosis, from both the views of theory background and industrial applications. A brief introduction of different AI algorithms is presented first, including the following methods: k-nearest neighbour, naive Bayes, support vector machine, artificial neural network and deep learning. Then, a broad literature survey of these AI algorithms in industrial applications is given. Finally, the advantages, limitations, practical implications of different AI algorithms, as well as some new research trends, are discussed.}, language = {en}, urldate = {2021-11-02}, journal = {Mechanical Systems and Signal Processing}, author = {Liu, Ruonan and Yang, Boyuan and Zio, Enrico and Chen, Xuefeng}, month = aug, year = {2018}, keywords = {-Nearest neighbour, Artificial intelligence, Artificial neural network, Deep learning, Fault diagnosis, Naive Bayes, Rotating machinery, Support vector machine}, pages = {33--47}, }
@article{silva_history_2018, title = {On the history of {Discrete} {Event} {Systems}}, volume = {45}, issn = {1367-5788}, url = {https://www.sciencedirect.com/science/article/pii/S1367578818300300}, doi = {10.1016/j.arcontrol.2018.03.004}, abstract = {The purpose of this article is to provide a viewpoint of the development of the field of Discrete Event Systems (DES). Necessarily incomplete, because of the breath of topics and richness of research results, this paper is mainly presented from a System Theory-Automatic Control (AC) perspective. Written with a certain emphasis at the dawn of the discipline, the following five articles of this special section of the journal provide essential complements on its evolution along the four last decades. Starting with the identification of three basic threads along which many developments took place, the multidisciplinary and dynamic character of DES and the diversity of formalisms and techniques that are used are stressed.}, language = {en}, urldate = {2021-10-25}, journal = {Annual Reviews in Control}, author = {Silva, Manuel}, month = jan, year = {2018}, keywords = {Automata, Diagnosis, Discrete Event Systems, Discrete event simulation, History, Max-plus algebra, Perturbation analysis, Petri nets, Supervisory control}, pages = {213--222}, }
@inproceedings{siblini_craftml_2018, title = {{CRAFTML}, an {Efficient} {Clustering}-based {Random} {Forest} for {Extreme} {Multi}-label {Learning}}, url = {https://proceedings.mlr.press/v80/siblini18a.html}, abstract = {Extreme Multi-label Learning (XML) considers large sets of items described by a number of labels that can exceed one million. Tree-based methods, which hierarchically partition the problem into small scale sub-problems, are particularly promising in this context to reduce the learning/prediction complexity and to open the way to parallelization. However, the current best approaches do not exploit tree randomization which has shown its efficiency in random forests and they resort to complex partitioning strategies. To overcome these limits, we here introduce a new random forest based algorithm with a very fast partitioning approach called CRAFTML. Experimental comparisons on nine datasets from the XML literature show that it outperforms the other tree-based approaches. Moreover with a parallelized implementation reduced to five cores, it is competitive with the best state-of-the-art methods which run on one hundred-core machines.}, language = {en}, urldate = {2021-10-18}, booktitle = {Proceedings of the 35th {International} {Conference} on {Machine} {Learning}}, publisher = {PMLR}, author = {Siblini, Wissam and Kuntz, Pascale and Meyer, Frank}, month = jul, year = {2018}, note = {ISSN: 2640-3498}, pages = {4664--4673}, }
@article{gao_unsupervised_2018, title = {Unsupervised {Locality}-{Preserving} {Robust} {Latent} {Low}-{Rank} {Recovery}-{Based} {Subspace} {Clustering} for {Fault} {Diagnosis}}, volume = {6}, issn = {2169-3536}, doi = {10.1109/ACCESS.2018.2869923}, abstract = {With the increasing demand for unsupervised learning for fault diagnosis, the subspace clustering has been considered as a promising technique enabling unsupervised fault diagnosis. Although various subspace clustering methods have been developed to deal with high-dimensional and non-linear data, analyzing the intrinsic structure from the data is still challenging. To address this issue, a new subspace clustering method based on locality-preserving robust latent low-rank recovery (L2PLRR) was developed. Unlike conventional subspace clustering methods, the developed method maps the high-dimensional and non-linear data into a low-dimensional latent space by preserving local similarities of the data with the goal of resolving the difficulty in analyzing the high-dimensional data. Likewise, in the developed L2PLRR method, learned features correspond to low-rank coefficients of the data in the latent space, which will be further used for fault diagnosis (e.g., identification of health states of an object system). The efficacy of the developed L2PLRR method was verified with a bearing fault diagnosis application by comparing with conventional and state-of-the-art subspace clustering methods in terms of diagnostic performance.}, journal = {IEEE Access}, author = {Gao, Jie and Kang, Myeongsu and Tian, Jing and Wu, Lifeng and Pecht, Michael}, year = {2018}, note = {Conference Name: IEEE Access}, keywords = {Clustering methods, Data mining, Dictionaries, Distributed databases, Fault diagnosis, Machine learning, Robustness, locality-preserving robust latent low-rank recovery, subspace clustering, unsupervised feature learning}, pages = {52345--52354}, }
@inproceedings{albuquerque_learning_2018, title = {Learning to {Rank} with {Deep} {Autoencoder} {Features}}, doi = {10.1109/IJCNN.2018.8489646}, abstract = {Learning to rank in Information Retrieval is the problem of learning the full order of a set of documents from their partially observed order. Datasets used by learning to rank algorithms are growing enormously in terms of number of features, but it remains costly and laborious to reliably label large datasets. This paper is about learning feature transformations using inexpensive unlabeled data and available labeled data, that is, building alternate features so that it becomes easier for existing learning to rank algorithms to find better ranking models from labeled datasets that are limited in size and quality. Deep autoencoders have proven powerful as nonlinear feature extractors, and thus we exploit deep autoencoder features for semi-supervised learning to rank. Typical approaches for learning autoencoder features are based on updating model parameters using either unlabeled data only, or unlabeled data first and then labeled data. We propose a novel approach which updates model parameters using unlabeled and labeled data simultaneously, enabling label propagation from labeled to unlabeled data. We present a comprehensive study on how deep autoencoder features improve the ranking performance of representative learning to rank algorithms, revealing the importance of building an effective feature set to describe the input data.}, booktitle = {2018 {International} {Joint} {Conference} on {Neural} {Networks} ({IJCNN})}, author = {Albuquerque, Alberto and Amador, Tiago and Ferreira, Renato and Veloso, Adriano and Ziviani, Nivio}, month = jul, year = {2018}, note = {ISSN: 2161-4407}, keywords = {Data models, Decoding, Deep Autoencoders, Electronic mail, Feature extraction, Information retrieval, Learning to Rank, Prediction algorithms, Training}, pages = {1--8}, }
@inproceedings{baglietto_bayesian_2018, title = {A {Bayesian} {Network} approach for the reliability analysis of complex railway systems}, doi = {10.1109/ICIRT.2018.8641655}, abstract = {Railway system is a typical large-scale complex system with interconnected sub-systems, each containing several components. In this framework, cost-effective asset management and innovative smart maintenance strategies require an accurate estimation of the reliability at different levels, according to the system configuration. Moreover, in order to apply risk-based maintenance approaches, techniques for the evaluation of assets criticality, that take into account the causal-effect relation between system components, are necessary. This paper presents a Bayesian Network modeling approach for the reliability evaluation of a complex rail system, which is applied to a real world case study consisting of a railway signaling system, with the aim of showing the usefulness of the approach in achieving a good understanding of the behavior of such a complex system.}, booktitle = {2018 {International} {Conference} on {Intelligent} {Rail} {Transportation} ({ICIRT})}, author = {Baglietto, Emanuela and Consilvio, Alice and Febbraro, Angela Di and Papa, Federico and Sacco, Nicola}, month = dec, year = {2018}, keywords = {Bayes methods, Bayesian Network, Communication system signaling, Complex systems, Maintenance engineering, Rail transportation, Rails, Railway systems, Reliability, asset criticality, complex system, fault analysis, railway, reliability analysis, signalling}, pages = {1--6}, }
@article{gao_novel_2018, title = {A {Novel} {Multiplex} {Network}-{Based} {Sensor} {Information} {Fusion} {Model} and {Its} {Application} to {Industrial} {Multiphase} {Flow} {System}}, volume = {14}, issn = {1941-0050}, doi = {10.1109/TII.2017.2785384}, abstract = {Increasingly advanced technology allows the monitoring of complex systems from a wide variety of perspectives. But the exploration of such systems from a multichannel sensor information viewpoint remains a complicated challenge of ongoing interest. In this paper, first, based on a well-designed double-layer distributed-sector conductance (DLDSC) sensor, systematic oil-water and gas-liquid two-phase flow experiments are carried out to capture abundant spatiotemporal flow information. Second, well flow parameter measurement performance of the DLDSC sensor is effectively validated from the perspective of normalized conductance. Third, a novel multiplex network-based model is presented to implement data mining and characterize the evolution of flow dynamics. The results demonstrate that the model is powerful for the exploration of the spatial flow behaviors from heterogeneity to randomness in the studied two-phase flows.}, number = {9}, journal = {IEEE Transactions on Industrial Informatics}, author = {Gao, Zhongke and Dang, Weidong and Mu, Chaoxu and Yang, Yuxuan and Li, Shan and Grebogi, Celso}, month = sep, year = {2018}, note = {Conference Name: IEEE Transactions on Industrial Informatics}, keywords = {Complex networks, Complex systems, Industrial multiphase flow, Multiplexing, Voltage measurement, information fusion, multiplex network, signal analysis}, pages = {3982--3988}, }
@article{dang_novel_2018, title = {A novel time-frequency multilayer network for multivariate time series analysis}, volume = {20}, issn = {1367-2630}, url = {https://doi.org/10.1088/1367-2630/aaf51c}, doi = {10.1088/1367-2630/aaf51c}, abstract = {Unveiling complex dynamics of natural systems from a multivariate time series represents a research hotspot in a broad variety of areas. We develop a novel multilayer network analysis framework, i.e. multivariate time-frequency multilayer network (MTFM network), to peer into the complex system dynamics. Through mapping the system features into different frequency-based layers and inferring interactions (edges) among different channels (nodes), the MTFM network allows efficiently integrating time, frequency and spatial information hidden in a multivariate time series. We employ two dynamic systems to illustrate the effectiveness of the MTFM network. We first apply the MTFM network to analyze the 48-channel measurements from industrial oil–water flows and reveal the complex dynamics ruling the transition of different flow patterns. The MTFM network is then utilized to analyze 30-channel fatigue driving electroencephalogram signals. The results demonstrate that MTFM network enables to quantitatively characterize brain behavior associated with fatigue driving. Our MTFM network enriches the multivariate time series analysis theory and helps to better understand the complicated dynamical behaviors underlying complex systems.}, language = {en}, number = {12}, urldate = {2021-10-13}, author = {Dang, Weidong and Gao, Zhongke and Lv, Dongmei and Liu, Mingxu and Cai, Qing and Hong, Xiaolin}, month = dec, year = {2018}, note = {Publisher: IOP Publishing}, pages = {125005}, }
@article{ienco_fuzzy_2018, title = {Fuzzy extensions of the {DBScan} clustering algorithm}, volume = {22}, issn = {1433-7479}, url = {https://doi.org/10.1007/s00500-016-2435-0}, doi = {10.1007/s00500-016-2435-0}, abstract = {The DBSCAN algorithm is a well-known density-based clustering approach particularly useful in spatial data mining for its ability to find objects’ groups with heterogeneous shapes and homogeneous local density distributions in the feature space. Furthermore, it can be suitable as scaling down approach to deal with big data for its ability to remove noise. Nevertheless, it suffers for some limitations, mainly the inability to identify clusters with variable density distributions and partially overlapping borders, which is often a characteristics of both scientific data and real-world data. To this end, in this work, we propose three fuzzy extensions of the \$\${\textbackslash}textit\{DBSCAN\}\$\$algorithm to generate clusters with distinct fuzzy density characteristics. The original version of \$\${\textbackslash}textit\{DBSCAN\}\$\$requires two precise parameters (minPts and \$\${\textbackslash}epsilon \$\$) to define locally dense areas which serve as seeds of the clusters. Nevertheless, precise values of both parameters may be not appropriate in all regions of the dataset. In the proposed extensions of \$\${\textbackslash}textit\{DBSCAN\}\$\$, we define soft constraints to model approximate values of the input parameters. The first extension, named \$\${\textbackslash}textit\{Fuzzy Core DBSCAN\}\$\$, relaxes the constraint on the neighbourhood’s density to generate clusters with fuzzy core points, i.e. cores with distinct density; the second extension, named \$\${\textbackslash}textit\{Fuzzy Border DBSCAN\}\$\$, relaxes \$\${\textbackslash}epsilon \$\$to allow the generation of clusters with overlapping borders. Finally, the third extension, named \$\${\textbackslash}textit\{Fuzzy DBSCAN\}\$\$subsumes the previous ones, thus allowing to generate clusters with both fuzzy cores and fuzzy overlapping borders. Our proposals are compared w.r.t. state of the art fuzzy clustering methods over real-world datasets.}, language = {en}, number = {5}, urldate = {2021-10-03}, journal = {Soft Computing}, author = {Ienco, Dino and Bordogna, Gloria}, month = mar, year = {2018}, pages = {1719--1730}, }
@article{wan_icgt_2018, title = {{ICGT}: {A} novel incremental clustering approach based on {GMM} tree}, volume = {117}, issn = {0169-023X}, shorttitle = {{ICGT}}, url = {https://www.sciencedirect.com/science/article/pii/S0169023X16303962}, doi = {10.1016/j.datak.2018.07.003}, abstract = {Streaming data presents new challenges to data mining algorithms. To conduct data clustering on the streaming data, this paper proposes a novel incremental clustering approach utilizing Gaussian Mixture Model (GMM), termed as ICGT (Incremental Construction of GMM Tree). The ICGT creates and dynamically adjusts a GMM tree consistent to the sequentially presented data. Each leaf node in the tree corresponds to a dense Gaussian distribution and each non-leaf node to a GMM. To update the GMM tree for insertion of the newly arrived data points, we introduce the definitions of node connectivity and connected subsets, and present the tree update algorithm. We further develop a clustering evaluation criterion and search strategy to determine the final partition of the data set based on the constructed GMM tree. We evaluated the proposed approach on synthetic and real-world data sets and compared ICGT with other incremental and static clustering methods. The experimental results confirm that our approach is effective and promising.}, language = {en}, urldate = {2021-10-01}, journal = {Data \& Knowledge Engineering}, author = {Wan, Yuchai and Liu, Xiabi and Wu, Yi and Guo, Lunhao and Chen, Qiming and Wang, Murong}, month = sep, year = {2018}, keywords = {Gaussian mixture model (GMM), Incremental data clustering, Streaming data, Tree structure}, pages = {71--86}, }
@article{wu_integrated_2018, title = {An {Integrated} {Ensemble} {Learning} {Model} for {Imbalanced} {Fault} {Diagnostics} and {Prognostics}}, volume = {6}, issn = {2169-3536}, doi = {10.1109/ACCESS.2018.2807121}, abstract = {With the development of artificial intelligence technology, data-driven fault diagnostics and prognostics in industrial systems have been a hot research area since the large volume of industrial data is being collected from the industrial process. However, imbalanced distributions exist pervasively between faulty and normal samples, which leads to imprecise fault diagnostics and prognostics. In this paper, an effective imbalance learning algorithm Easy-SMT is proposed. Easy-SMT is an integrated ensemble-based method, which comprises synthetic minority oversampling technique (SMOTE)-based oversampling policy to augment minority faulty classes and EasyEnsemble to transfer an imbalanced class learning problem into an ensemble-based balanced learning subproblem. We validate the feasibility and effectiveness of the proposed method in a real wind turbine failure forecast challenge, and our solution has won the third place among hundreds of teams. Moreover, we also evaluate the method on prognostics and health management 2015 challenge datasets, and the results show that the model could also achieve good performance on multiclass imbalance learning task compared with baseline classifiers.}, journal = {IEEE Access}, author = {Wu, Zhenyu and Lin, Wenfang and Ji, Yang}, year = {2018}, note = {Conference Name: IEEE Access}, keywords = {Data models, Feature extraction, Industrial prognostics, Predictive models, Sampling methods, Task analysis, Training, Wind turbines, class-imbalance learning, ensemble learning, machine learning}, pages = {8394--8402}, }
@article{yoo_novel_2018, title = {A {Novel} {Image} {Feature} for the {Remaining} {Useful} {Lifetime} {Prediction} of {Bearings} {Based} on {Continuous} {Wavelet} {Transform} and {Convolutional} {Neural} {Network}}, volume = {8}, copyright = {http://creativecommons.org/licenses/by/3.0/}, url = {https://www.mdpi.com/2076-3417/8/7/1102}, doi = {10.3390/app8071102}, abstract = {In data-driven methods for prognostics, the remaining useful lifetime (RUL) is predicted based on the health indicator (HI). The HI detects the condition of equipment or components by monitoring sensor data such as vibration signals. To construct the HI, multiple features are extracted from signals using time domain, frequency domain, and time\–frequency domain analyses, and which are then fused. However, the process of selecting and fusing features for the HI is very complex and labor-intensive. We propose a novel time\–frequency image feature to construct HI and predict the RUL. To convert the one-dimensional vibration signals to a two-dimensional (2-D) image, the continuous wavelet transform (CWT) extracts the time\–frequency image features, i.e., the wavelet power spectrum. Then, the obtained image features are fed into a 2-D convolutional neural network (CNN) to construct the HI. The estimated HI from the proposed model is used for the RUL prediction. The accuracy of the RUL prediction is improved by using the image features. The proposed method compresses the complex process including feature extraction, selection, and fusion into a single algorithm by adopting a deep learning approach. The proposed method is validated using a bearing dataset provided by PRONOSTIA. The results demonstrate that the proposed method is superior to related studies using the same dataset.}, language = {en}, number = {7}, urldate = {2021-09-30}, journal = {Applied Sciences}, author = {Yoo, Youngji and Baek, Jun-Geol}, month = jul, year = {2018}, note = {Number: 7 Publisher: Multidisciplinary Digital Publishing Institute}, keywords = {bearings, continuous wavelet transform, convolutional neural network, health indicator, prognostics and health management, remaining useful lifetime}, pages = {1102}, }
@article{guo_machinery_2018, title = {Machinery health indicator construction based on convolutional neural networks considering trend burr}, volume = {292}, issn = {0925-2312}, url = {https://www.sciencedirect.com/science/article/pii/S0925231218302583}, doi = {10.1016/j.neucom.2018.02.083}, abstract = {In the study of data-driven prognostic methods of machinery, much attention has been paid to constructing health indicators (HIs). Most of the existing HIs, however, are manually constructed for a specific degradation process and need the prior knowledge of experts. Additionally, for the existing HIs, there are usually some outlier regions deviating to an expected degradation trend and reducing the performance of HIs. We refer to this phenomenon as trend burr. To deal with these problems, this paper proposes a convolutional neural network based HI construction method considering trend burr. The proposed method first learns features through convolution and pooling operations, and then these learned features are constructed into a HI through a nonlinear mapping operation. Furthermore, an outlier region correction technique is applied to detect and remove outlier regions existing in the HIs. Unlike traditional methods in which HIs are manually constructed, the proposed method aims to automatically construct HIs. Moreover, the outlier region correction technique enables the constructed HIs to be more effective. The effectiveness of the proposed method is verified using a bearing dataset. Through comparing with commonly used HI construction methods, it is demonstrated that the proposed method achieves better results in terms of trendability, monotonicity and scale similarity.}, language = {en}, urldate = {2021-09-30}, journal = {Neurocomputing}, author = {Guo, Liang and Lei, Yaguo and Li, Naipeng and Yan, Tao and Li, Ningbo}, month = may, year = {2018}, keywords = {Convolutional neural network, Deep learning, Machinery health indicator, Outlier region correction, Trend burr}, pages = {142--150}, }
@article{zhang_long_2018, series = {Special {Issue} on {Smart} {Manufacturing}}, title = {Long short-term memory for machine remaining life prediction}, volume = {48}, issn = {0278-6125}, url = {https://www.sciencedirect.com/science/article/pii/S0278612518300803}, doi = {10.1016/j.jmsy.2018.05.011}, abstract = {Reliable tracking of performance degradation in dynamical systems such as manufacturing machines or aircraft engines and consequently, prediction of the remaining useful life (RUL) are one of the major challenges in realizing smart manufacturing. Traditional machine learning algorithms are often constrained in adapting to the complex and non-linear characteristics of manufacturing systems and processes. With the rapid development of modern computational hardware, Deep Learning has emerged as a promising computational technique for dynamical system prediction due to its enhanced capability to characterize the system complexity, overcoming the shortcomings of those traditional methods. In this paper, a new approach based on the Long Short-Term Memory (LSTM) network, an architecture that is specialized in discovering the underlying patterns embedded in time series, is proposed to track the system degradation and consequently, to predict the RUL. The objectives of this paper are: 1) translating the raw sensor data to an interpretable health index with the aim of better describing the system health condition; and 2) tracking the historical system degradation for accurate prediction of its future health condition. Evaluation using NASA’s C-MAPSS dataset verifies the effectiveness of the proposed method. Compared with other machine learning techniques, LSTM turns out to be more powerful and accurate in revealing degradation patterns, enabled by its time-dependent structure in nature.}, language = {en}, urldate = {2021-09-28}, journal = {Journal of Manufacturing Systems}, author = {Zhang, Jianjing and Wang, Peng and Yan, Ruqiang and Gao, Robert X.}, month = jul, year = {2018}, keywords = {Deep learning, Health index, Prognostics, Remaining useful life}, pages = {78--86}, }
@article{wu_remaining_2018, title = {Remaining useful life estimation of engineered systems using vanilla {LSTM} neural networks}, volume = {275}, issn = {0925-2312}, url = {https://www.sciencedirect.com/science/article/pii/S0925231217309505}, doi = {10.1016/j.neucom.2017.05.063}, abstract = {Long Short-Term Memory (LSTM) networks are a significant branch of Recurrent Neural Networks (RNN), capable of learning long-term dependencies. In recent years, vanilla LSTM (a variation of original LSTM above) has become the state-of-the-art model for a variety of machine learning problems, especially Natural Language Processing (NLP). However, in industry, this powerful Deep Neural Network (DNN) has not aroused wide concern. In research focusing on Prognostics and Health Management (PHM) technology for complex engineered systems, Remaining Useful Life (RUL) estimation is one of the most challenging problems, which can lead to appropriate maintenance actions to be scheduled proactively to avoid catastrophic failures and minimize economic losses of the systems. Following that, this paper aims to propose utilizing vanilla LSTM neural networks to get good RUL prediction accuracy which makes the most of long short-term memory ability, in the cases of complicated operations, working conditions, model degradations and strong noises. In addition, to promote cognition ability about model degradation processes, a dynamic differential technology was proposed to extract inter-frame information. The whole proposition is illustrated and discussed by performing tests on a case of the health monitoring of aircraft turbofan engines which have four different issues. Performances of vanilla LSTM are benchmarked with standard RNN and Gated Recurrent Unit (GRU) LSTM. Results show the significance of performance improvement achieved by vanilla LSTM.}, language = {en}, urldate = {2021-09-28}, journal = {Neurocomputing}, author = {Wu, Yuting and Yuan, Mei and Dong, Shaopeng and Lin, Li and Liu, Yingqi}, month = jan, year = {2018}, keywords = {Dynamic differential feature, Long short-term memory neural network, Prognostics and health management, Remaining useful life estimation}, pages = {167--179}, }
@article{xu_ontology-based_2018, title = {Ontology-{Based} {Method} for {Fault} {Diagnosis} of {Loaders}}, volume = {18}, doi = {10.3390/s18030729}, abstract = {This paper proposes an ontology-based fault diagnosis method which overcomes the difficulty of understanding complex fault diagnosis knowledge of loaders and offers a universal approach for fault diagnosis of all loaders. This method contains the following components: (1) An ontology-based fault diagnosis model is proposed to achieve the integrating, sharing and reusing of fault diagnosis knowledge for loaders; (2) combined with ontology, CBR (case-based reasoning) is introduced to realize effective and accurate fault diagnoses following four steps (feature selection, case-retrieval, case-matching and case-updating); and (3) in order to cover the shortages of the CBR method due to the lack of concerned cases, ontology based RBR (rule-based reasoning) is put forward through building SWRL (Semantic Web Rule Language) rules. An application program is also developed to implement the above methods to assist in finding the fault causes, fault locations and maintenance measures of loaders. In addition, the program is validated through analyzing a case study.}, journal = {Sensors}, author = {Xu, Feixiang and Liu, Xinhui and Chen, Wei and Zhou, Chen and Cao, Bingwei}, month = feb, year = {2018}, pages = {729}, }
@article{nunez_ontoprog_2018, title = {{OntoProg}: {An} ontology-based model for implementing {Prognostics} {Health} {Management} in mechanical machines}, volume = {38}, issn = {1474-0346}, shorttitle = {{OntoProg}}, url = {https://www.sciencedirect.com/science/article/pii/S1474034617306080}, doi = {10.1016/j.aei.2018.10.006}, abstract = {Trends in Prognostics Health Management (PHM) have been introduced into mechanical items of manufacturing systems to predict Remaining Useful Life (RUL). PHM as an estimate of the RUL allows Condition-based Maintenance (CBM) before a functional failure occurs, avoiding corrective maintenance that generates unnecessary costs on production lines. An important factor for the implementation of PHM is the correct data collection for monitoring a machine’s health, in order to evaluate its reliability. Data collection, besides providing information about the state of degradation of the machine, also assists in the analysis of failures for intelligent interventions. Thus, the present work proposes the construction of an ontological model for future applications such as expert system in the support in the correct decision-making, besides assisting in the implementation of the PHM in several manufacturing scenarios, to be used in the future by web semantics tools focused on intelligent manufacturing, standardizing its concepts, terms, and the form of collection and processing of data. The methodological approach Design Science Research (DSR) is used to guide the development of this study. The model construction is achieved using the ontology development 101 procedure. The main result is the creation of the ontological model called OntoProg, which presents: a generic ontology addressing by international standards, capable of being used in several types of mechanical machines, of different types of manufacturing, the possibility of storing the knowledge contained in events of real activities that allow through consultations in SPARQL for decision-making which enable timely interventions of maintenance in the equipment of a real industry. The limitation of the work is that said model can be implemented only by specialists who have knowledge in ontology.}, language = {en}, urldate = {2021-09-28}, journal = {Advanced Engineering Informatics}, author = {Nuñez, David Lira and Borsato, Milton}, month = oct, year = {2018}, keywords = {Failure analysis, Ontology engineering, Prognostics Health Management}, pages = {746--759}, }
@article{cai_feature_2018, title = {Feature selection in machine learning: {A} new perspective}, volume = {300}, issn = {0925-2312}, shorttitle = {Feature selection in machine learning}, url = {https://www.sciencedirect.com/science/article/pii/S0925231218302911}, doi = {10.1016/j.neucom.2017.11.077}, abstract = {High-dimensional data analysis is a challenge for researchers and engineers in the fields of machine learning and data mining. Feature selection provides an effective way to solve this problem by removing irrelevant and redundant data, which can reduce computation time, improve learning accuracy, and facilitate a better understanding for the learning model or data. In this study, we discuss several frequently-used evaluation measures for feature selection, and then survey supervised, unsupervised, and semi-supervised feature selection methods, which are widely applied in machine learning problems, such as classification and clustering. Lastly, future challenges about feature selection are discussed.}, language = {en}, urldate = {2021-08-19}, journal = {Neurocomputing}, author = {Cai, Jie and Luo, Jiawei and Wang, Shulin and Yang, Sheng}, month = jul, year = {2018}, keywords = {Data mining, Dimensionality reduction, Feature selection, Machine learning}, pages = {70--79}, }
@article{khan_review_2018, title = {A review on the application of deep learning in system health management}, volume = {107}, issn = {0888-3270}, url = {https://www.sciencedirect.com/science/article/pii/S0888327017306064}, doi = {10.1016/j.ymssp.2017.11.024}, abstract = {Given the advancements in modern technological capabilities, having an integrated health management and diagnostic strategy becomes an important part of a system’s operational life-cycle. This is because it can be used to detect anomalies, analyse failures and predict the future state based on up-to-date information. By utilising condition data and on-site feedback, data models can be trained using machine learning and statistical concepts. Once trained, the logic for data processing can be embedded on on-board controllers whilst enabling real-time health assessment and analysis. However, this integration inevitably faces several difficulties and challenges for the community; indicating the need for novel approaches to address this vexing issue. Deep learning has gained increasing attention due to its potential advantages with data classification and feature extraction problems. It is an evolving research area with diverse application domains and hence its use for system health management applications must been researched if it can be used to increase overall system resilience or potential cost benefits for maintenance, repair, and overhaul activities. This article presents a systematic review of artificial intelligence based system health management with an emphasis on recent trends of deep learning within the field. Various architectures and related theories are discussed to clarify its potential. Based on the reviewed work, deep learning demonstrates plausible benefits for fault diagnosis and prognostics. However, there are a number of limitations that hinder its widespread adoption and require further development. Attention is paid to overcoming these challenges, with future opportunities being enumerated.}, language = {en}, urldate = {2021-02-06}, journal = {Mechanical Systems and Signal Processing}, author = {Khan, Samir and Yairi, Takehisa}, month = jul, year = {2018}, keywords = {Artificial intelligence, Deep learning, Fault analysis, Maintenance, Real-time processing, System health management}, pages = {241--265}, }
@article{kanawaday_machine_2018, title = {Machine learning for predictive maintenance of industrial machines using {IoT} sensor data}, volume = {2017-November}, issn = {23270594}, url = {https://www.mendeley.com/catalogue/da55093f-85b9-3194-8b93-7257435607e3/?articleTrace=AAABsHhfufdfonFREJmlyDCQ1ee4qFQXf8FAAW90OHBiRW2YLJju8MQKnrG1oQgHcoaJ6dWQagaa3ucR5ucn4WqkrnyY9pYn02rLtwUnupoTTeDgAHJ85i9u6wGS1yuv2rn0te1EDY04-bYSd8uqkQgarq1vJd6qZv0r3nSVQa22bLqbXJI8JIzD7WVkdgl54wNNL2k4v-JC2_Khc1lbRqI-FGI54-0rousI0BaSWK5AvAxLVU13VQdvaXzQm58IrhAAgZ0qrjELTNQqgJsc69Q0v73hd3IErcW-YkVwkiuI-mhSOQDL-zy2Zike_6hOMxEKoNHzuv-aMq7AY-NeaeTGtRlqS-v2WLCTbTFfxIRg9btGZB3y7Uyed91vdTV9WQ9o4iLRBxJnvKiO7aJM9VtoSmIAe92dt_Q65scv5NTZ46L2Nr0pFlclwgJBOag-aqycdihhH1qFQUEE_JHK5G96sTr-66VSKwKpEaCeu8kqG6ePjwXsIxWVLJsXtOD4FgJIenZb3TUL5FdNT6ixGRrU-WZuwPnH02HmETsAvSHTgJRsn-XAkAeJukXh6iSwLrQHxCB5DFFkarsWcazuszhEjXQsYObI&ref=raven&dgcid=raven_md_suggest_email}, doi = {10.1109/ICSESS.2017.8342870}, abstract = {(2018) Kanawaday, Sane. Proceedings of the IEEE International Conference on Software Engineering and Service Sciences, ICSESS. The industrial Internet of Things (IIoT) is the use of Internet of Thi...}, language = {en-GB}, urldate = {2020-12-15}, journal = {Proceedings of the IEEE International Conference on Software Engineering and Service Sciences, ICSESS}, author = {Kanawaday, Ameeth and Sane, Aditya}, year = {2018}, pages = {87--90}, }
@article{schmidt_cloud-enhanced_2018, title = {Cloud-enhanced predictive maintenance}, volume = {99}, issn = {14333015}, url = {https://www.mendeley.com/catalogue/76136251-e99d-33c1-b81c-b24405b9919e/?articleTrace=AAABsFew8cNiIl3iRJyE0pTRCfo-uMTsAlGptqbDYP4nS0IYGEH6O1XppRqOOea0wWuxNbobXZYcKVe4VZk5XFBai54PF8peYTK4aOxGWXsn-JeSNBb3ik3Vdyg-umsdqL7ZAjIDBjNAYrQ9K6-gNqYmbRE0LvmKtVcK9TS40weGy10NIctOKEuoCN6lErG1mXSG26InSydPHMSqT_mYulk1dp2Z4r_N_AVRIXnAusd2eiZN4EjdzK_e4WN5MlUt0zFfbXN0RqKhZ-loTZf7c4Ms7EwoiIacYc6I2OaslatAYbcT0G6_GgfWzMF8oKQHtIeAabL5yS03rONVqclferR3BXOa5dYzcFiCNAGgHnyajoml5UXVAqw9Xt1X88NU3n8HzS-2NRd54uLiEp-f4NiarnIe-39y3AZsV8UieX98RB9mZV7Pyn33snyt9BgJysdc7tWp77tuz0ALwWwPmpRa2jXDU435iT2CtEj09RGPpX3vHK8ZfRHYNBiwSleCMuhtHa5hJs6EUgliVH4p0T3787E-Fngq3q5p0edKjWpK48fkabd4uB4g1_7tmig4vOjkMzcSsGobDaJl61Z2SMKGnpHWXXti&ref=raven&dgcid=raven_md_suggest_email}, doi = {10.1007/s00170-016-8983-8}, abstract = {(2018) Schmidt, Wang. International Journal of Advanced Manufacturing Technology. Maintenance of assembly and manufacturing equipment is crucial to ensure productivity, product quality, on-time del...}, language = {en-GB}, number = {1-4}, urldate = {2020-12-15}, journal = {International Journal of Advanced Manufacturing Technology}, author = {Schmidt, Bernard and Wang, Lihui}, year = {2018}, note = {Number: 1-4}, pages = {undefined--undefined}, }
@inproceedings{krawczyk_combining_2018, title = {Combining active learning with concept drift detection for data stream mining}, doi = {10.1109/BigData.2018.8622549}, abstract = {Most of data stream classifier learning methods assume that a true class of an incoming object is available right after the instance has been processed and new and labeled instance may be used to update a classifier's model, drift detection or capturing novel concepts. However, assumption that we have an unlimited and infinite access to class labels is very naive and usually would require a very high labeling cost. Therefore the applicability of many supervised techniques is limited in real-life stream analytics scenarios. Active learning emerges as a potential solution to this problem, concentrating on selecting only the most valuable instances and learning an accurate predictive model with as few labeling queries as possible. However learning from data streams differ from online learning as distribution of examples may change over time. Therefore, an active learning strategy must be able to handle concept drift and quickly adapt to evolving nature of data. In this paper we present novel active learning strategies that are designed for effective tackling of such changes. We assume that most labeling effort is required when concept drift occurs, as we need a representative sample of new concept to retrain properly the predictive model. Therefore, we propose active learning strategies that are guided by drift detection module to save budget for difficult and evolving instances. Three proposed strategies are based on learner uncertainty, dynamic allocation of budget over time and search space randomization. Experimental evaluation of the proposed methods prove their usefulness for reducing labeling effort in learning from drifting data streams.}, booktitle = {2018 {IEEE} {International} {Conference} on {Big} {Data} ({Big} {Data})}, author = {Krawczyk, B. and Pfahringer, B. and Woźniak, M.}, month = dec, year = {2018}, keywords = {Big Data, Data mining, Detectors, Dynamic scheduling, Labeling, Predictive models, Resource management, accurate predictive model, active learning, active learning strategy, class labels, concept drift, concept drift detection, data handling, data mining, data stream classifier learning methods, data stream mining, data streams, drift detection, drift detection module, evolving instances, high labeling cost, labeled instance, labeling effort, labeling queries, learning (artificial intelligence), machine learning, online learning, pattern classification, query processing, valuable instances}, pages = {2239--2244}, }
@article{krawczyk_online_2018, title = {Online ensemble learning with abstaining classifiers for drifting and noisy data streams}, volume = {68}, issn = {1568-4946}, url = {http://www.sciencedirect.com/science/article/pii/S1568494617307238}, doi = {10.1016/j.asoc.2017.12.008}, abstract = {Mining data streams is among most vital contemporary topics in machine learning. Such scenario requires adaptive algorithms that are able to process constantly arriving instances, adapt to potential changes in data, use limited computational resources, as well as be robust to any atypical events that may appear. Ensemble learning has proven itself to be an effective solution, as combining learners leads to an improved predictive power, more flexible drift handling, as well as ease of being implemented in high-performance computing environments. In this paper, we propose an enhancement of popular online ensembles by augmenting them with abstaining option. Instead of relying on a traditional voting, classifiers are allowed to abstain from contributing to the final decision. Their confidence level is being monitored for each incoming instance and only learners that exceed certain threshold are selected. We introduce a dynamic and self-adapting threshold that is able to adapt to changes in the data stream, by monitoring outputs of the ensemble and allowing to exploit underlying diversity in order to efficiently anticipate drifts. Additionally, we show that forcing uncertain classifiers to abstain from making a prediction is especially useful for noisy data streams. Our proposal is a lightweight enhancement that can be applied to any online ensemble method, improving its robustness to drifts and noise. Thorough experimental analysis validated through statistical tests proves the usefulness of the proposed approach.}, language = {en}, urldate = {2020-12-12}, journal = {Applied Soft Computing}, author = {Krawczyk, Bartosz and Cano, Alberto}, month = jul, year = {2018}, keywords = {Abstaining classifier, Concept drift, Data stream mining, Diversity, Ensemble learning, Machine learning}, pages = {677--692}, }
@inproceedings{paolanti_machine_2018, title = {Machine {Learning} approach for {Predictive} {Maintenance} in {Industry} 4.0}, doi = {10.1109/MESA.2018.8449150}, abstract = {Condition monitoring together with predictive maintenance of electric motors and other equipment used by the industry avoids severe economic losses resulting from unexpected motor failures and greatly improves the system reliability. This paper describes a Machine Learning architecture for Predictive Maintenance, based on Random Forest approach. The system was tested on a real industry example, by developing the data collection and data system analysis, applying the Machine Learning approach and comparing it to the simulation tool analysis. Data has been collected by various sensors, machine PLCs and communication protocols and made available to Data Analysis Tool on the Azure Cloud architecture. Preliminary results show a proper behavior of the approach on predicting different machine states with high accuracy.}, booktitle = {2018 14th {IEEE}/{ASME} {International} {Conference} on {Mechatronic} and {Embedded} {Systems} and {Applications} ({MESA})}, author = {Paolanti, M. and Romeo, L. and Felicetti, A. and Mancini, A. and Frontoni, E. and Loncarski, J.}, month = jul, year = {2018}, keywords = {Azure cloud architecture, Current measurement, Forecasting, Industries, Industry 4.0, Machine learning, Predictive maintenance, Time measurement, cloud computing, communication protocols, condition monitoring, data analysis, data collection, data system analysis, economic loss, electric motors, failure analysis, learning (artificial intelligence), machine PLCs, machine learning approach, maintenance engineering, motor failures, predictive maintenance, production engineering computing, programmable controllers, protocols, random forest approach, reliability, sensors, simulation tool analysis, system reliability}, pages = {1--6}, }
@article{bengtsson_importance_2018, title = {On the importance of combining "the new" with "the old" - {One} important prerequisite for maintenance in {Industry} 4.0}, volume = {25}, issn = {23519789}, url = {https://www.mendeley.com/catalogue/6fee25ad-43dc-3107-b2da-ca2fd776f837/?articleTrace=AAABsA_HjhgVllZrHcV19kyQFZfeC3DPhC4CwuA1vWKXjHvLb9s1A0BYe9DlkjC3OUJGTMwzQoYNysp8xnRq3ItLsSQO_40Bu8rm-p5STZdx1OZFqjWUJcmfWtm_OvcdDi2sQ-MGby2303bxV3eNzOxO5ehKNj7ocPTSvvSNMYuk7RAKg8daYuOHy_kVoE09-TGeKC9jp0-dyhUuy3bThS4v8sVCUbLGoqMt8D0gKg5_9OT2gcEYxpLLYay1ljYs5yeUr-82e7H6LiO2f-8hReIwlOiVjlSpc4vuL44w8mSKbHC78-tx_FcsNs3e0R20l4NQzdhFThuFkmhl51ITh6bWWn83KB3M1LzkwK03WCRwTcGyeyn3JkiIidB-Oi2XmZZO10u0M9n_PqA7NanbEzYsBvw70b8ukvFPDyU5ADguLXC4xnvz7QZtrxvP9WS6UCcx20yYSp3RIelrsXaibLcy29aeQyEWyw8kHzCn2BlU0VZyWiA-j17Bd0mBqJtovHHuiTiyfM4yiUUWqp-7d0w3JJw0tVSISaDSwCmdWxvsEKQ0a7-mrkTVGLqw6mjU46mScmjQrcppQqsS_8s1gsPtXNno5zB_&ref=raven&dgcid=raven_md_suggest_email}, doi = {10.1016/j.promfg.2018.06.065}, abstract = {(2018) Bengtsson, Lundström. Procedia Manufacturing. Development and technological advancement within Industry 4.0 is on top of agendas worldwide. The prediction that cost of technology will decrea...}, language = {en-GB}, urldate = {2020-11-30}, journal = {Procedia Manufacturing}, author = {Bengtsson, Marcus and Lundström, Gunnar}, year = {2018}, pages = {118--125}, }
@article{sezer_industry_2018, title = {An {Industry} 4.0-{Enabled} {Low} {Cost} {Predictive} {Maintenance} {Approach} for {SMEs}}, issn = {undefined}, url = {https://www.mendeley.com/catalogue/d1607d82-7c41-3501-80e5-87b3a6d49002/?articleTrace=AAABsEm3fklDrFSo8MSPXLYXRxFgH4rlIa9YI3kAFTKaBsTX4XcoFbTo5x4DHTf4KvvqLLWIuAytfca7DGjTqDR9s9gNEXzPfBPKn5Uw4-15i4tWlWgU39GdgWH2czEN51tAo6yIkSqYUwTuv9CcdwWK1EmghHuxzmCRQ99W5YB2NFtcKys1NHSWIAwjVhRBymM-Jipizb9vgkQAHLWOY9MJ6w9EPZnwoHCW8uYnq6S3Hmy2kdqlT9COR0idU8wdt65LY8Y-kqyDOuR_qdRWUkzmfYUoScmnHr1oE-MH96TJIIzzN4lG-nxGht8YONKKppwBrvKUUW2d8vuAjPuSaRw4kVID7-XtAFdEYwhuABNamACNi3TsPWU2UukJAm1JBx1H040u6HC9sGVuyJPJj7gBwxB6rS89gBc7OKt7vaWB7f0oFyHDqjD2o2-h8tU_uw5DkMiwDOu04OkMrz39DLB2bHWc8rtgP4atPUSMWb3XLcCkDdoUU_pG7719sFOJtBZ9gzBIgQzk4Ny1bnzeP6djc-24CtnijhrDdLQEVwv9GbsXQMPcyAzOdZ8ziJQeoebkk98Bpo9quU90A-apOKH9t389Qx51&ref=raven&dgcid=raven_md_suggest_email}, doi = {10.1109/ICE.2018.8436307}, abstract = {(2018) Sezer et al. 2018 IEEE International Conference on Engineering, Technology and Innovation, ICE/ITMC 2018 - Proceedings. This paper outlines the base concepts, materials and methods used to d...}, language = {en-GB}, urldate = {2020-11-30}, journal = {2018 IEEE International Conference on Engineering, Technology and Innovation, ICE/ITMC 2018 - Proceedings}, author = {Sezer, Erim and Romero, David and Guedea, Federico and MacChi, Marco and Emmanouilidis, Christos}, year = {2018}, pages = {undefined--undefined}, }
@article{lee_time_2018, title = {Time {Series} {Segmentation} through {Automatic} {Feature} {Learning}}, url = {http://arxiv.org/abs/1801.05394}, abstract = {Internet of things (IoT) applications have become increasingly popular in recent years, with applications ranging from building energy monitoring to personal health tracking and activity recognition. In order to leverage these data, automatic knowledge extraction - whereby we map from observations to interpretable states and transitions - must be done at scale. As such, we have seen many recent IoT data sets include annotations with a human expert specifying states, recorded as a set of boundaries and associated labels in a data sequence. These data can be used to build automatic labeling algorithms that produce labels as an expert would. Here, we refer to human-specified boundaries as breakpoints. Traditional changepoint detection methods only look for statistically-detectable boundaries that are defined as abrupt variations in the generative parameters of a data sequence. However, we observe that breakpoints occur on more subtle boundaries that are non-trivial to detect with these statistical methods. In this work, we propose a new unsupervised approach, based on deep learning, that outperforms existing techniques and learns the more subtle, breakpoint boundaries with a high accuracy. Through extensive experiments on various real-world data sets - including human-activity sensing data, speech signals, and electroencephalogram (EEG) activity traces - we demonstrate the effectiveness of our algorithm for practical applications. Furthermore, we show that our approach achieves significantly better performance than previous methods.}, urldate = {2020-11-23}, journal = {arXiv:1801.05394 [cs, stat]}, author = {Lee, Wei-Han and Ortiz, Jorge and Ko, Bongjun and Lee, Ruby}, month = jan, year = {2018}, note = {arXiv: 1801.05394}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Machine Learning, Statistics - Machine Learning}, }
@article{salehi_survey_2018, title = {A {Survey} on {Anomaly} detection in {Evolving} {Data}: [with {Application} to {Forest} {Fire} {Risk} {Prediction}]}, volume = {20}, issn = {1931-0145}, shorttitle = {A {Survey} on {Anomaly} detection in {Evolving} {Data}}, url = {https://doi.org/10.1145/3229329.3229332}, doi = {10.1145/3229329.3229332}, abstract = {Traditionally most of the anomaly detection algorithms have been designed for 'static' datasets, in which all the observations are available at one time. In non-stationary environments on the other hand, the same algorithms cannot be applied as the underlying data distributions change constantly and the same models are not valid. Hence, we need to devise adaptive models that take into account the dynamically changing characteristics of environments and detect anomalies in 'evolving' data. Over the last two decades, many algorithms have been proposed to detect anomalies in evolving data. Some of them consider scenarios where a sequence of objects (called data streams) with one or multiple features evolves over time. Whereas the others concentrate on more complex scenarios, where streaming objects with one or multiple features have causal/non-causal relationships with each other. The latter can be represented as evolving graphs. In this paper, we categorize existing strategies for detecting anomalies in both scenarios including the state-of-the-art techniques. Since label information is mostly unavailable in real-world applications when data evolves, we review the unsupervised approaches in this paper. We then present an interesting application example, i.e., forest re risk prediction, and conclude the paper with future research directions in this eld for researchers and industry.}, number = {1}, urldate = {2020-11-12}, journal = {ACM SIGKDD Explorations Newsletter}, author = {Salehi, Mahsa and Rashidi, Lida}, month = may, year = {2018}, pages = {13--23}, }
@article{truong_ruptures_2018, title = {ruptures: change point detection in {Python}}, shorttitle = {ruptures}, url = {http://arxiv.org/abs/1801.00826}, abstract = {ruptures is a Python library for offline change point detection. This package provides methods for the analysis and segmentation of non-stationary signals. Implemented algorithms include exact and approximate detection for various parametric and non-parametric models. ruptures focuses on ease of use by providing a well-documented and consistent interface. In addition, thanks to its modular structure, different algorithms and models can be connected and extended within this package.}, urldate = {2020-10-02}, journal = {arXiv:1801.00826 [cs, stat]}, author = {Truong, Charles and Oudre, Laurent and Vayatis, Nicolas}, month = jan, year = {2018}, note = {arXiv: 1801.00826}, keywords = {Computer Science - Mathematical Software, Statistics - Computation}, }
@article{noauthor_novel_2018, title = {A {Novel} {Multi}-resolution {Representation} for {Streaming} {Time} {Series}}, volume = {129}, issn = {1877-0509}, url = {https://www.sciencedirect.com/science/article/pii/S187705091830293X}, doi = {10.1016/j.procs.2018.03.069}, abstract = {Along with the coming of IoT (Internet of things) era, massive numbers of instruments and applications in various fields are continuously producing oc…}, language = {en}, urldate = {2020-10-01}, journal = {Procedia Computer Science}, month = jan, year = {2018}, note = {Publisher: Elsevier}, pages = {178--184}, }
@article{fan_unsupervised_2018, title = {Unsupervised data analytics in mining big building operational data for energy efficiency enhancement: {A} review}, volume = {159}, issn = {0378-7788}, shorttitle = {Unsupervised data analytics in mining big building operational data for energy efficiency enhancement}, url = {http://www.sciencedirect.com/science/article/pii/S0378778817326671}, doi = {10.1016/j.enbuild.2017.11.008}, abstract = {Building operations account for the largest proportion of energy use throughout the building life cycle. The energy saving potential is considerable taking into account the existence of a wide variety of building operation deficiencies. The advancement in information technologies has made modern buildings to be not only energy-intensive, but also information-intensive. Massive amounts of building operational data, which are in essence the reflection of actual building operating conditions, are available for knowledge discovery. It is very promising to extract potentially useful insights from big building operational data, based on which actionable measures for energy efficiency enhancement are devised. Data mining is an advanced technology for analyzing big data. It consists of two main types of data analytics, i.e., supervised and unsupervised analytics. Despite of the power of supervised analytics in predictive modeling, unsupervised analytics are more practical and promising in discovering novel knowledge given limited prior knowledge. This paper provides a comprehensive review on the current utilization of unsupervised data analytics in mining massive building operational data. The commonly used unsupervised analytics are summarized according to their knowledge representations and applications. The challenges and opportunities are elaborated as guidance for future research in this multi-disciplinary field.}, language = {en}, urldate = {2020-10-01}, journal = {Energy and Buildings}, author = {Fan, Cheng and Xiao, Fu and Li, Zhengdao and Wang, Jiayuan}, month = jan, year = {2018}, keywords = {Big data, Building energy efficiency, Building energy management, Building operational performance, Unsupervised data mining}, pages = {296--308}, }
@inproceedings{gomes_adaptive_2018, title = {Adaptive random forests for data stream regression}, booktitle = {European {Symposium} on {Artificial} {Neural} {Networks}, {Computational} {Intelligence} and {Machine} {Learning} ({ESANN})}, author = {Gomes, Heitor Murilo and Barddal, Jean Paul and Boiko Ferreira, Luis and Bifet, Albert}, year = {2018}, }
@inproceedings{manzoor_xstream_2018, address = {London, United Kingdom}, series = {{KDD} '18}, title = {{xStream}: {Outlier} {Detection} in {Feature}-{Evolving} {Data} {Streams}}, isbn = {978-1-4503-5552-0}, shorttitle = {{xStream}}, url = {https://doi.org/10.1145/3219819.3220107}, doi = {10.1145/3219819.3220107}, urldate = {2020-05-03}, booktitle = {Proceedings of the 24th {ACM} {SIGKDD} {International} {Conference} on {Knowledge} {Discovery} \& {Data} {Mining}}, publisher = {Association for Computing Machinery}, author = {Manzoor, Emaad and Lamba, Hemank and Akoglu, Leman}, month = jul, year = {2018}, keywords = {anomaly detection, data streams, evolving feature spaces, outlier detection}, pages = {1963--1972}, }
@article{liang_markovian_2018, title = {A {Markovian} model for power transformer maintenance}, volume = {99}, issn = {0142-0615}, url = {http://www.sciencedirect.com/science/article/pii/S0142061517321312}, doi = {10.1016/j.ijepes.2017.12.024}, language = {en}, urldate = {2020-05-05}, journal = {International Journal of Electrical Power \& Energy Systems}, author = {Liang, Zhenglin and Parlikad, Ajith}, month = jul, year = {2018}, keywords = {Condition-based maintenance, Reliability engineering and power transformer, Value of information}, pages = {175--182}, }
@article{su_real-time_2018, title = {Real-time big data analytics for hard disk drive predictive maintenance}, volume = {71}, issn = {0045-7906}, url = {http://www.sciencedirect.com/science/article/pii/S0045790617328409}, doi = {10.1016/j.compeleceng.2018.07.025}, language = {en}, urldate = {2020-05-04}, journal = {Computers \& Electrical Engineering}, author = {Su, Chuan-Jun and Huang, Shi-Feng}, month = oct, year = {2018}, keywords = {Apache Spark, Big data, Hard Disk Drive (HDD), Machine learning, Predictive maintenance}, pages = {93--101}, }
@article{baptista_forecasting_2018, title = {Forecasting fault events for predictive maintenance using data-driven techniques and {ARMA} modeling}, volume = {115}, issn = {0360-8352}, url = {http://www.sciencedirect.com/science/article/pii/S036083521730520X}, doi = {https://doi.org/10.1016/j.cie.2017.10.033}, abstract = {Presently, time-based airline maintenance scheduling does not take fault predictions into account, but happens at fixed time-intervals. This may result in unnecessary maintenance interventions and also in situations where components are not taken out of service despite exceeding their designed risk of failure. To address this issue we propose a framework that can predict when a component/system will be at risk of failure in the future, and therefore, advise when maintenance actions should be taken. In order to facilitate such prediction, we employ an auto-regressive moving average (ARMA) model along with data-driven techniques, and compare the performance of multiple data-driven techniques. The ARMA model adds a new feature that is used within the data-driven model to give the final prediction. The novelty of our work is the integration of the ARMA methodology with data-driven techniques to predict fault events. This study reports on a real industrial case of unscheduled removals of a critical valve of the aircraft engine. Our results suggest that the support vector regression model can outperform the life usage model on the evaluation measures of sample standard deviation, median error, median absolute error, and percentage error. The generalized linear model provides an effective approach for predictive maintenance with comparable results to the baseline. The remaining data-driven models have a lower overall performance.}, journal = {Computers \& Industrial Engineering}, author = {Baptista, Marcia and Sankararaman, Shankar and Medeiros, Ivo P. de and Nascimento, Cairo and Prendinger, Helmut and Henriques, Elsa M. P.}, year = {2018}, keywords = {ARMA modeling, Aircraft prognostics, Data-driven techniques, Life usage modeling, Predictive maintenance, Real case study}, pages = {41 -- 53}, }
@inproceedings{cachada_maintenance_2018, title = {Maintenance 4.0: {Intelligent} and {Predictive} {Maintenance} {System} {Architecture}}, volume = {1}, doi = {10.1109/ETFA.2018.8502489}, booktitle = {2018 {IEEE} 23rd {International} {Conference} on {Emerging} {Technologies} and {Factory} {Automation} ({ETFA})}, author = {Cachada, A. and Barbosa, J. and Leitño, P. and Gcraldcs, C. A. S. and Deusdado, L. and Costa, J. and Teixeira, C. and Teixeira, J. and Moreira, A. H. J. and Moreira, P. M. and Romero, L.}, year = {2018}, note = {ISSN: 1946-0740}, pages = {139--146}, }
@inproceedings{kaur_towards_2018, series = {{IOT} '18}, title = {Towards an {Open}-standards {Based} {Framework} for {Achieving} {Condition}-based {Predictive} {Maintenance}}, isbn = {978-1-4503-6564-2}, url = {http://doi.acm.org/10.1145/3277593.3277608}, doi = {10.1145/3277593.3277608}, booktitle = {Proceedings of the 8th {International} {Conference} on the {Internet} of {Things}}, publisher = {ACM}, author = {Kaur, Karamjit and Selway, Matt and Grossmann, Georg and Stumptner, Markus and Johnston, Alan}, year = {2018}, keywords = {IIoT, OIIE framework, OSA-CBM, OSA-EAI, condition-based predictive maintenance, industrie 4.0, maintenance 4.0}, pages = {16:1--16:8}, }
@inproceedings{amruthnath_fault_2018, title = {Fault class prediction in unsupervised learning using model-based clustering approach}, doi = {10.1109/INFOCT.2018.8356831}, booktitle = {2018 {International} {Conference} on {Information} and {Computer} {Technologies} ({ICICT})}, author = {Amruthnath, N. and Gupta, T.}, year = {2018}, note = {ISSN: null}, pages = {5--12}, }
@book{kleppmann_designing_2018, edition = {1}, title = {Designing {Data}-{Intensive} {Applications}}, volume = {1}, isbn = {978-1-4493-7332-0}, publisher = {O'Reilly Media Inc.}, author = {Kleppmann, Martin}, year = {2018}, }
@article{alsina_use_2018, title = {On the use of machine learning methods to predict component reliability from data-driven industrial case studies}, volume = {94}, issn = {1433-3015}, url = {https://doi.org/10.1007/s00170-017-1039-x}, doi = {10.1007/s00170-017-1039-x}, number = {5}, journal = {The International Journal of Advanced Manufacturing Technology}, author = {Alsina, Emanuel F. and Chica, Manuel and Trawiński, Krzysztof and Regattieri, Alberto}, year = {2018}, pages = {2419--2433}, }
@inproceedings{snell_prototypical_2017, title = {Prototypical {Networks} for {Few}-shot {Learning}}, volume = {30}, url = {https://proceedings.neurips.cc/paper_files/paper/2017/hash/cb8da6767461f2812ae4290eac7cbc42-Abstract.html}, abstract = {We propose Prototypical Networks for the problem of few-shot classification, where a classifier must generalize to new classes not seen in the training set, given only a small number of examples of each new class. Prototypical Networks learn a metric space in which classification can be performed by computing distances to prototype representations of each class. Compared to recent approaches for few-shot learning, they reflect a simpler inductive bias that is beneficial in this limited-data regime, and achieve excellent results. We provide an analysis showing that some simple design decisions can yield substantial improvements over recent approaches involving complicated architectural choices and meta-learning. We further extend Prototypical Networks to zero-shot learning and achieve state-of-the-art results on the CU-Birds dataset.}, urldate = {2023-10-03}, booktitle = {Advances in {Neural} {Information} {Processing} {Systems}}, publisher = {Curran Associates, Inc.}, author = {Snell, Jake and Swersky, Kevin and Zemel, Richard}, year = {2017}, }
@article{bodyanskiy_evolving_2017, series = {Online {Real}-{Time} {Learning} {Strategies} for {Data} {Streams}}, title = {An evolving connectionist system for data stream fuzzy clustering and its online learning}, volume = {262}, issn = {0925-2312}, url = {https://www.sciencedirect.com/science/article/pii/S0925231217309785}, doi = {10.1016/j.neucom.2017.03.081}, abstract = {An evolving cascade neuro-fuzzy system and its online learning procedure are considered in this paper. The system is based on conventional Kohonen neurons. The proposed system solves a clustering task of non-stationary data streams under uncertainty conditions when data come in the form of a sequential stream in an online mode. A quality estimation process is defined by finding an optimal value of the used cluster validity index.}, language = {en}, urldate = {2021-10-01}, journal = {Neurocomputing}, author = {Bodyanskiy, Yevgeniy V. and Tyshchenko, Oleksii K. and Kopaliani, Daria S.}, month = nov, year = {2017}, keywords = {Data stream, Evolving connectionist system, Fuzzy clustering, Neuro-fuzzy network}, pages = {41--56}, }
@misc{kingma_adam_2017, title = {Adam: {A} {Method} for {Stochastic} {Optimization}}, shorttitle = {Adam}, url = {http://arxiv.org/abs/1412.6980}, doi = {10.48550/arXiv.1412.6980}, abstract = {We introduce Adam, an algorithm for first-order gradient-based optimization of stochastic objective functions, based on adaptive estimates of lower-order moments. The method is straightforward to implement, is computationally efficient, has little memory requirements, is invariant to diagonal rescaling of the gradients, and is well suited for problems that are large in terms of data and/or parameters. The method is also appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. The hyper-parameters have intuitive interpretations and typically require little tuning. Some connections to related algorithms, on which Adam was inspired, are discussed. We also analyze the theoretical convergence properties of the algorithm and provide a regret bound on the convergence rate that is comparable to the best known results under the online convex optimization framework. Empirical results demonstrate that Adam works well in practice and compares favorably to other stochastic optimization methods. Finally, we discuss AdaMax, a variant of Adam based on the infinity norm.}, urldate = {2023-05-20}, publisher = {arXiv}, author = {Kingma, Diederik P. and Ba, Jimmy}, month = jan, year = {2017}, note = {arXiv:1412.6980 [cs]}, keywords = {Computer Science - Machine Learning}, }
@incollection{aggarwal_introduction_2017, address = {Cham}, title = {An {Introduction} to {Outlier} {Analysis}}, isbn = {978-3-319-47578-3}, url = {https://doi.org/10.1007/978-3-319-47578-3_1}, abstract = {Outliers are also referred to as abnormalities, discordants, deviants, or anomalies in the data mining and statistics literature. In most applications, the data is created by one or more generating processes, which could either reflect activity in the system or observations collected about entities. When the generating process behaves unusually, it results in the creation of outliers. Therefore, an outlier often contains useful information about abnormal characteristics of the systems and entities that impact the data generation process. The recognition of such unusual characteristics provides useful application-specific insights.}, language = {en}, urldate = {2023-02-13}, booktitle = {Outlier {Analysis}}, publisher = {Springer International Publishing}, author = {Aggarwal, Charu C.}, editor = {Aggarwal, Charu C.}, year = {2017}, doi = {10.1007/978-3-319-47578-3_1}, keywords = {Anomaly Detection, Nonnegative Matrix Factorization, Outlier Analysis, Outlier Detection, Receiver Operating Characteristic Curve}, pages = {1--34}, }
@misc{theis_lossy_2017, title = {Lossy {Image} {Compression} with {Compressive} {Autoencoders}}, url = {http://arxiv.org/abs/1703.00395}, doi = {10.48550/arXiv.1703.00395}, abstract = {We propose a new approach to the problem of optimizing autoencoders for lossy image compression. New media formats, changing hardware technology, as well as diverse requirements and content types create a need for compression algorithms which are more flexible than existing codecs. Autoencoders have the potential to address this need, but are difficult to optimize directly due to the inherent non-differentiabilty of the compression loss. We here show that minimal changes to the loss are sufficient to train deep autoencoders competitive with JPEG 2000 and outperforming recently proposed approaches based on RNNs. Our network is furthermore computationally efficient thanks to a sub-pixel architecture, which makes it suitable for high-resolution images. This is in contrast to previous work on autoencoders for compression using coarser approximations, shallower architectures, computationally expensive methods, or focusing on small images.}, urldate = {2022-12-07}, publisher = {arXiv}, author = {Theis, Lucas and Shi, Wenzhe and Cunningham, Andrew and Huszár, Ferenc}, month = mar, year = {2017}, note = {arXiv:1703.00395 [cs, stat]}, keywords = {Computer Science - Computer Vision and Pattern Recognition, Statistics - Machine Learning}, }
@article{shao_novel_2017, title = {A novel deep autoencoder feature learning method for rotating machinery fault diagnosis}, volume = {95}, issn = {0888-3270}, url = {https://www.sciencedirect.com/science/article/pii/S0888327017301607}, doi = {10.1016/j.ymssp.2017.03.034}, abstract = {The operation conditions of the rotating machinery are always complex and variable, which makes it difficult to automatically and effectively capture the useful fault features from the measured vibration signals, and it is a great challenge for rotating machinery fault diagnosis. In this paper, a novel deep autoencoder feature learning method is developed to diagnose rotating machinery fault. Firstly, the maximum correntropy is adopted to design the new deep autoencoder loss function for the enhancement of feature learning from the measured vibration signals. Secondly, artificial fish swarm algorithm is used to optimize the key parameters of the deep autoencoder to adapt to the signal features. The proposed method is applied to the fault diagnosis of gearbox and electrical locomotive roller bearing. The results confirm that the proposed method is more effective and robust than other methods.}, language = {en}, urldate = {2022-05-02}, journal = {Mechanical Systems and Signal Processing}, author = {Shao, Haidong and Jiang, Hongkai and Zhao, Huiwei and Wang, Fuan}, month = oct, year = {2017}, keywords = {Artificial fish swarm algorithm, Deep autoencoder, Fault diagnosis, Feature learning, Maximum correntropy}, pages = {187--204}, }
@inproceedings{ragab_fault_2017, title = {Fault detection and diagnosis in the {Tennessee} {Eastman} {Process} using interpretable knowledge discovery}, doi = {10.1109/RAM.2017.7889650}, abstract = {This paper proposes an interpretable knowledge discovery approach to detect and diagnose faults in chemical processes. The approach is demonstrated using simulated data from the Tennessee Eastman Process (TEP), as a challenging benchmark problem. The TEP is a plant-wide industrial process that is commonly used to study and evaluate a variety of topics, including the design of process monitoring and control techniques. The proposed approach is called Logical Analysis of Data (LAD). LAD is a machine learning approach that is used to discover the hidden knowledge in historical data. The discovered knowledge in the form of extracted patterns is employed to construct a classification rule that is capable of characterizing the physical phenomena in the TEP, wherein one can detect and identify a fault and relate it to the causes that contribute to its occurrence. To evaluate our approach, the LAD is trained on a set of observations collected from different faults, and tested against an independent set of observations. The results in this paper show that the LAD approach achieves the highest accuracy compared to two common machine learning classification techniques; Artificial Neural Networks and Support Vector Machines.}, booktitle = {2017 {Annual} {Reliability} and {Maintainability} {Symposium} ({RAMS})}, author = {Ragab, Ahmed and El-koujok, Mohamed and Amazouz, Mouloud and Yacout, Soumaya}, month = jan, year = {2017}, keywords = {Chemical Processes, Fault Detection and Diagnosis, Fault detection, Fault diagnosis, Feeds, Inductors, Knowledge Discovery, Knowledge discovery, Logical Analysis of Data, Particle separators, Pattern Recognition, Tennessee Eastman Process, Valves}, pages = {1--7}, }
@article{hou_data-driven_2017, title = {A {Data}-{Driven} {Clustering} {Approach} for {Fault} {Diagnosis}}, volume = {5}, issn = {2169-3536}, doi = {10.1109/ACCESS.2017.2771365}, abstract = {Clustering is an important approach in fault diagnosis. The dominant sets algorithm is a graph-based clustering algorithm, which defines the dominant set as a concept of a cluster. In this paper, we make an in-depth investigation of the dominant sets algorithm. As a result, we find that this algorithm is dependent on the similarity parameter in constructing the pairwise similarity matrix, and has the tendency to generate spherical clusters only. Based on the merits and drawbacks of this algorithm, we apply the histogram equalization transformation to the similarity matrices for the purpose of removing the influence of similarity parameters, and then use a density-based cluster expansion process to improve the clustering results. In experimental validation of the proposed algorithm, we use two criterions to evaluate the clustering results in order to arrive at convincing conclusions. Data clustering experiments on ten data sets and fault detection experiments on the Tennessee Eastman process demonstrate the effectiveness of the proposed algorithm.}, journal = {IEEE Access}, author = {Hou, Jian and Xiao, Bing}, year = {2017}, note = {Conference Name: IEEE Access}, keywords = {Algorithm design and analysis, Clustering, Clustering algorithms, Data mining, Fault diagnosis, Partitioning algorithms, Shape, cluster expansion, dominant set, fault diagnosis}, pages = {26512--26520}, }
@inproceedings{chadha_comparison_2017, title = {Comparison of deep neural network architectures for fault detection in {Tennessee} {Eastman} process}, doi = {10.1109/ETFA.2017.8247619}, abstract = {Process monitoring and fault diagnosis methods are used to detect abnormal events in industrial processes. Process breakdowns hinder the overall productivity of the system which makes the early detection of faults very critical. Due to the highly non-linear nature of modern industrial processes, deep neural networks with several layers of non-linear complex representations fit aptly for contemporary fault diagnosis. Although deep neural networks have found wide array of application areas such as image recognition and speech recognition, their effectiveness in fault detection has not been tested substantially. In this study, a comparison between two deep neural network architectures, namely Deep Stacking Networks and Sparse Stacked Autoencoders for fault detection from process data is presented. The Tennessee Eastman benchmark process is considered to test the effectiveness of these deep architectures. A detailed comparison between the two architectures is illustrated with different hyperparameters. The experiment results show that the Sparse Stacked Autoencoders model has superior average fault detection capability and is also more stable as it has less variation in fault detection rate.}, booktitle = {2017 22nd {IEEE} {International} {Conference} on {Emerging} {Technologies} and {Factory} {Automation} ({ETFA})}, author = {Chadha, Gavneet Singh and Schwung, Andreas}, month = sep, year = {2017}, note = {ISSN: 1946-0759}, keywords = {Deep Neural Networks, Deep stacking network, Fault detection, Fault diagnosis, Mathematical model, Monitoring, Neural networks, Process monitoring, Stacked Autoencoders, Stacking, Tennessee Eastman Process, Training}, pages = {1--8}, }
@inproceedings{souiden_survey_2017, address = {Cham}, title = {A {Survey} on {Outlier} {Detection} in the {Context} of {Stream} {Mining}: {Review} of {Existing} {Approaches} and {Recommadations}}, isbn = {978-3-319-53480-0}, shorttitle = {A {Survey} on {Outlier} {Detection} in the {Context} of {Stream} {Mining}}, doi = {10.1007/978-3-319-53480-0_37}, abstract = {Generally, extracting only expected knowledge from data is not sufficient since unexpected ones can hide useful information concerning the data behavior. These information can be further used to optimize the current state. This has lead to the outlier detection. It refers to the data mining task that aims to find abnormal points or sequence of data hidden in the dataset. In fact, due to the emergence of new technologies, applications often generate and consume data in form of streams. This data differs from the static one. Therefore, traditional techniques cannot be used. Hence, convenient ones suitable to the data stream nature must be applied. In this paper, we will review different techniques of outlier detection in the data streams. In addition, we shall describe different approaches based on these techniques in order to establish a comparative study based on different criterion. This study aims to help users and facilitates the choice of the appropriate algorithm for a certain context.}, language = {en}, booktitle = {Intelligent {Systems} {Design} and {Applications}}, publisher = {Springer International Publishing}, author = {Souiden, Imen and Brahmi, Zaki and Toumi, Hajer}, editor = {Madureira, Ana Maria and Abraham, Ajith and Gamboa, Dorabela and Novais, Paulo}, year = {2017}, pages = {372--383}, }
@inproceedings{carnein_empirical_2017, address = {New York, NY, USA}, series = {{CF}'17}, title = {An {Empirical} {Comparison} of {Stream} {Clustering} {Algorithms}}, isbn = {978-1-4503-4487-6}, url = {https://doi.org/10.1145/3075564.3078887}, doi = {10.1145/3075564.3078887}, abstract = {Analysing streaming data has received considerable attention over the recent years. A key research area in this field is stream clustering which aims to recognize patterns in a possibly unbounded data stream of varying speed and structure. Over the past decades a multitude of new stream clustering algorithms have been proposed. However, to the best of our knowledge, no rigorous analysis and comparison of the different approaches has been performed. Our paper fills this gap and provides extensive experiments for a total of ten popular algorithms. We utilize a number of standard data sets of both, real and synthetic data and identify key weaknesses and strengths of the existing algorithms.}, urldate = {2022-03-25}, booktitle = {Proceedings of the {Computing} {Frontiers} {Conference}}, publisher = {Association for Computing Machinery}, author = {Carnein, Matthias and Assenmacher, Dennis and Trautmann, Heike}, month = may, year = {2017}, keywords = {Data Streams, Online Clustering, Stream Clustering}, pages = {361--366}, }
@inproceedings{besedin_evolutive_2017, title = {Evolutive deep models for online learning on data streams with no storage}, url = {https://hal-cea.archives-ouvertes.fr/cea-01832986}, abstract = {In recent years Deep Learning based methods gained a growing recognition in many applications and became the state-of-the-art approach in various fields of Machine Learning, such as Object Recognition, Scene Understanding, Natural Language processing and others. Nevertheless, most of the applications of Deep Learning use static datasets which do not change over time. This scenario does not respond well to a big number of recent applications (such as tendency analysis on social networks, video surveillance, sensor monitoring, etc.), especially when working with data streams which require real-time adaptation to the content of the data. In this paper, we propose a model that is able to perform online data classification and can adapt to data classes, never seen by the model before, while preserving previously learned information. Our approach does not need to store and reuse previous observations, which is a big advantage for data-streams applications, since the dataset one wants to work with can potentially be of very large size. To make up for the absence of previous data, the proposed model uses a recently developed Generative Adversarial Network to drive a Deep Convolutional Network for the main classification task. More specifically, we propagate generative models instead of the data itself, to be able to regenerate the historical training data that we didn't keep. We test our proposition on the well known MNIST benchmark database, where our method achieves results close to the state of the art convolutional networks trained by using the full dataset. We also study the impact of dataset re-generation with GANs on the learning process.}, language = {en}, urldate = {2022-03-19}, author = {Besedin, Andrey and Blanchart, Pierre and Crucianu, Michel and Ferecatu, Marin}, month = sep, year = {2017}, }
@inproceedings{bifet_extremely_2017, address = {New York, NY, USA}, series = {{KDD} '17}, title = {Extremely {Fast} {Decision} {Tree} {Mining} for {Evolving} {Data} {Streams}}, isbn = {978-1-4503-4887-4}, url = {https://doi.org/10.1145/3097983.3098139}, doi = {10.1145/3097983.3098139}, abstract = {Nowadays real-time industrial applications are generating a huge amount of data continuously every day. To process these large data streams, we need fast and efficient methodologies and systems. A useful feature desired for data scientists and analysts is to have easy to visualize and understand machine learning models. Decision trees are preferred in many real-time applications for this reason, and also, because combined in an ensemble, they are one of the most powerful methods in machine learning. In this paper, we present a new system called STREAMDM-C++, that implements decision trees for data streams in C++, and that has been used extensively at Huawei. Streaming decision trees adapt to changes on streams, a huge advantage since standard decision trees are built using a snapshot of data, and can not evolve over time. STREAMDM-C++ is easy to extend, and contains more powerful ensemble methods, and a more efficient and easy to use adaptive decision trees. We compare our new implementation with VFML, the current state of the art implementation in C, and show how our new system outperforms VFML in speed using less resources.}, urldate = {2022-03-16}, booktitle = {Proceedings of the 23rd {ACM} {SIGKDD} {International} {Conference} on {Knowledge} {Discovery} and {Data} {Mining}}, publisher = {Association for Computing Machinery}, author = {Bifet, Albert and Zhang, Jiajin and Fan, Wei and He, Cheng and Zhang, Jianfeng and Qian, Jianfeng and Holmes, Geoff and Pfahringer, Bernhard}, month = aug, year = {2017}, keywords = {classification, data streams, decision trees, online learning}, pages = {1733--1742}, }
@phdthesis{korvesis_machine_2017, type = {These de doctorat}, title = {Machine {Learning} for {Predictive} {Maintenance} in {Aviation}}, copyright = {Licence Etalab}, url = {https://www.theses.fr/2017SACLX093}, abstract = {L'augmentation des données disponibles dans presque tous les domaines soulève la nécessité d'utiliser des algorithmes pour l'analyse automatisée des données. Cette nécessité est mise en évidence dans la maintenance prédictive, où l'objectif est de prédire les pannes des systèmes en observant continuellement leur état, afin de planifier les actions de maintenance à l'avance. Ces observations sont générées par des systèmes de surveillance habituellement sous la forme de séries temporelles et de journaux d'événements et couvrent la durée de vie des composants correspondants. Le principal défi de la maintenance prédictive est l'analyse de l'historique d'observation afin de développer des modèles prédictifs.Dans ce sens, l'apprentissage automatique est devenu omniprésent puisqu'il fournit les moyens d'extraire les connaissances d'une grande variété de sources de données avec une intervention humaine minimale. L'objectif de cette thèse est d'étudier et de résoudre les problèmes dans l'aviation liés à la prévision des pannes de composants à bord. La quantité de données liées à l'exploitation des avions est énorme et, par conséquent, l'évolutivité est une condition essentielle dans chaque approche proposée.Cette thèse est divisée en trois parties qui correspondent aux différentes sources de données que nous avons rencontrées au cours de notre travail. Dans la première partie, nous avons ciblé le problème de la prédiction des pannes des systèmes, compte tenu de l'historique des Post Flight Reports. Nous avons proposé une approche statistique basée sur la régression précédée d'une formulation méticuleuse et d'un prétraitement / transformation de données. Notre méthode estime le risque d'échec avec une solution évolutive, déployée dans un environnement de cluster en apprentissage et en déploiement. À notre connaissance, il n'y a pas de méthode disponible pour résoudre ce problème jusqu'au moment où cette thèse a été écrite.La deuxième partie consiste à analyser les données du livre de bord, qui consistent en un texte décrivant les problèmes d'avions et les actions de maintenance correspondantes. Le livre de bord contient des informations qui ne sont pas présentes dans les Post Flight Reports bien qu'elles soient essentielles dans plusieurs applications, comme la prédiction de l'échec. Cependant, le journal de bord contient du texte écrit par des humains, il contient beaucoup de bruit qui doit être supprimé afin d'extraire les informations utiles. Nous avons abordé ce problème en proposant une approche basée sur des représentations vectorielles de mots. Notre approche exploite des similitudes sémantiques, apprises par des neural networks qui ont généré les représentations vectorielles, afin d'identifier et de corriger les fautes d'orthographe et les abréviations. Enfin, des mots-clés importants sont extraits à l'aide du Part of Speech Tagging.Dans la troisième partie, nous avons abordé le problème de l'évaluation de l'état des composants à bord en utilisant les mesures des capteurs. Dans les cas considérés, l'état du composant est évalué par l'ampleur de la fluctuation du capteur et une tendance à l'augmentation monotone. Dans notre approche, nous avons formulé un problème de décomposition des séries temporelles afin de séparer les fluctuations de la tendance en résolvant un problème convexe. Pour quantifier l'état du composant, nous calculons à l'aide de Gaussian Mixture Models une fonction de risque qui mesure l'écart du capteur par rapport à son comportement normal.}, urldate = {2022-03-11}, school = {Université Paris-Saclay (ComUE)}, author = {Korvesis, Panagiotis}, collaborator = {Vazirgiannis, Michalis}, month = nov, year = {2017}, keywords = {Apprentissage automatique, Aviation, Aéronautique, Machine learning, Maintenance conditionnelle, Maintenance predictive, Predictive maintenance}, }
@article{loghman_creep_2017, title = {Creep damage and life assessment of thick-walled spherical reactor using {Larson}–{Miller} parameter}, volume = {151}, issn = {0308-0161}, url = {https://www.sciencedirect.com/science/article/pii/S0308016117300571}, doi = {10.1016/j.ijpvp.2017.02.003}, abstract = {Creep damage and remnant life assessment of a thick-walled spherical reactor made of 316LN austenitic stainless steel (316LN SS) have been investigated. The Robinson's linear damage accumulation rule has been used to obtain damage and remnant life assessment in which time to rupture is determined by Larson-Miller Parameter (LMP). Due to high temperature, creep is the most significant damage mechanism exhausting the lifetime of the reactor. The material properties, except Poisson's ratio, are assumed to depend on the temperature. An analytical solution employed to calculate the stress rates followed by an iterative method using initial thermoelastic stresses at zero time to obtain effective stress histories and then using LMP to calculate the damage and remnant life assessment. It is concluded that the temperature gradient has a significant effect on the effective stress histories so that effective stresses are decreasing with time in a uniform temperature field while they are increasing in the presence of a thermal gradient.}, language = {en}, urldate = {2022-03-05}, journal = {International Journal of Pressure Vessels and Piping}, author = {Loghman, Abbas and Moradi, Mehdi}, month = mar, year = {2017}, keywords = {Austenitic stainless steel, Creep damage mechanics, Larson–Miller parameter, Life assessment, Thick-walled spherical reactor}, pages = {11--19}, }
@article{zhao_learning_2017, title = {Learning to {Monitor} {Machine} {Health} with {Convolutional} {Bi}-{Directional} {LSTM} {Networks}}, volume = {17}, copyright = {http://creativecommons.org/licenses/by/3.0/}, issn = {1424-8220}, url = {https://www.mdpi.com/1424-8220/17/2/273}, doi = {10.3390/s17020273}, abstract = {In modern manufacturing systems and industries, more and more research efforts have been made in developing effective machine health monitoring systems. Among various machine health monitoring approaches, data-driven methods are gaining in popularity due to the development of advanced sensing and data analytic techniques. However, considering the noise, varying length and irregular sampling behind sensory data, this kind of sequential data cannot be fed into classification and regression models directly. Therefore, previous work focuses on feature extraction/fusion methods requiring expensive human labor and high quality expert knowledge. With the development of deep learning methods in the last few years, which redefine representation learning from raw data, a deep neural network structure named Convolutional Bi-directional Long Short-Term Memory networks (CBLSTM) has been designed here to address raw sensory data. CBLSTM firstly uses CNN to extract local features that are robust and informative from the sequential input. Then, bi-directional LSTM is introduced to encode temporal information. Long Short-Term Memory networks(LSTMs) are able to capture long-term dependencies and model sequential data, and the bi-directional structure enables the capture of past and future contexts. Stacked, fully-connected layers and the linear regression layer are built on top of bi-directional LSTMs to predict the target value. Here, a real-life tool wear test is introduced, and our proposed CBLSTM is able to predict the actual tool wear based on raw sensory data. The experimental results have shown that our model is able to outperform several state-of-the-art baseline methods.}, language = {en}, number = {2}, urldate = {2022-02-09}, journal = {Sensors}, author = {Zhao, Rui and Yan, Ruqiang and Wang, Jinjiang and Mao, Kezhi}, month = feb, year = {2017}, note = {Number: 2 Publisher: Multidisciplinary Digital Publishing Institute}, keywords = {bi-directional long-short term memory network, convolutional neural network, ecml, machine health monitoring, recurrent neural network, tool wear prediction}, pages = {273}, }
@article{chalouli_intelligent_2017, title = {Intelligent {Health} {Monitoring} of {Machine} {Bearings} {Based} on {Feature} {Extraction}}, volume = {17}, issn = {1864-1245}, url = {https://doi.org/10.1007/s11668-017-0343-y}, doi = {10.1007/s11668-017-0343-y}, abstract = {Finding reliable condition monitoring solutions for large-scale complex systems is currently a major challenge in industrial research. Since fault diagnosis is directly related to the features of a system, there have been many research studies aimed to develop methods for the selection of the relevant features. Moreover, there are no universal features for a particular application domain such as machine diagnosis. For example, in machine bearing fault diagnosis, these features are often selected by an expert or based on previous experience. Thus, for each bearing machine type, the relevant features must be selected. This paper attempts to solve the problem of relevant features identification by building an automatic fault diagnosis process based on relevant feature selection using a data-driven approach. The proposed approach starts with the extraction of the time-domain features from the input signals. Then, a feature reduction algorithm based on cross-correlation filter is applied to reduce the time and cost of the processing. Unsupervised learning mechanism using K-means++ selects the relevant fault features based on the squared Euclidian distance between different health states. Finally, the selected features are used as inputs to a self-organizing map producing our health indicator. The proposed method is tested on roller bearing benchmark datasets.}, language = {en}, number = {5}, urldate = {2022-02-09}, journal = {Journal of Failure Analysis and Prevention}, author = {Chalouli, Mohammed and Berrached, Nasr-eddine and Denai, Mouloud}, month = oct, year = {2017}, keywords = {feature extraction, feature learning}, pages = {1053--1066}, }
@article{jun_bayesian_2017, title = {A {Bayesian} network-based approach for fault analysis}, volume = {81}, issn = {0957-4174}, url = {https://www.sciencedirect.com/science/article/pii/S0957417417302191}, doi = {10.1016/j.eswa.2017.03.056}, abstract = {For high-value assets such as certain types of plant equipment, the total amount of resources devoted to Operation and Maintenance may substantially exceed the resources expended in acquisition and installation of the asset, because high-value assets have long useful lifetimes. Any asset failure during this useful lifetime risks large losses in income and goodwill, and decreased safety. With the continual development of information, communication, and sensor technologies, Condition-Based Maintenance (CBM) policies have gained popularity in industries. A successfully implemented CBM reduces the losses due to equipment failure by intelligently maintaining the equipment before catastrophic failures occur. However, effective CBM requires an effective fault analysis method based on gathered sensor data. In this vein, this paper proposes a Bayesian network-based fault analysis method, from which novel fault identification, inference, and sensitivity analysis methods are developed. As a case study, the fault analysis method was analyzed in a centrifugal compressor utilized in a plant.}, language = {en}, urldate = {2021-10-14}, journal = {Expert Systems with Applications}, author = {Jun, Hong-Bae and Kim, David}, month = sep, year = {2017}, keywords = {Bayesian network, Condition-based maintenance, Fault identification, Fault inference, Sensitivity analysis, bn, fault analysis, fault diagnosis, fault diagnostics}, pages = {332--348}, }
@article{yu_adaptive_2017, title = {Adaptive hidden {Markov} model-based online learning framework for bearing faulty detection and performance degradation monitoring}, volume = {83}, issn = {0888-3270}, url = {https://www.sciencedirect.com/science/article/pii/S0888327016301674}, doi = {10.1016/j.ymssp.2016.06.004}, abstract = {This study proposes an adaptive-learning-based method for machine faulty detection and health degradation monitoring. The kernel of the proposed method is an “evolving” model that uses an unsupervised online learning scheme, in which an adaptive hidden Markov model (AHMM) is used for online learning the dynamic health changes of machines in their full life. A statistical index is developed for recognizing the new health states in the machines. Those new health states are then described online by adding of new hidden states in AHMM. Furthermore, the health degradations in machines are quantified online by an AHMM-based health index (HI) that measures the similarity between two density distributions that describe the historic and current health states, respectively. When necessary, the proposed method characterizes the distinct operating modes of the machine and can learn online both abrupt as well as gradual health changes. Our method overcomes some drawbacks of the HIs (e.g., relatively low comprehensibility and applicability) based on fixed monitoring models constructed in the offline phase. Results from its application in a bearing life test reveal that the proposed method is effective in online detection and adaptive assessment of machine health degradation. This study provides a useful guide for developing a condition-based maintenance (CBM) system that uses an online learning method without considerable human intervention.}, language = {en}, urldate = {2021-11-15}, journal = {Mechanical Systems and Signal Processing}, author = {Yu, Jianbo}, month = jan, year = {2017}, keywords = {Adaptive learning, Health degradation monitoring, Hidden Markov model, Machine faulty detection}, pages = {149--162}, }
@incollection{chakraborty_swarm_2017, address = {Cham}, series = {Modeling and {Optimization} in {Science} and {Technologies}}, title = {Swarm {Intelligence}: {A} {Review} of {Algorithms}}, isbn = {978-3-319-50920-4}, shorttitle = {Swarm {Intelligence}}, url = {https://doi.org/10.1007/978-3-319-50920-4_19}, abstract = {Swarm intelligence (SI), an integral part in the field of artificial intelligence, is gradually gaining prominence, as more and more high complexity problems require solutions which may be sub-optimal but yet achievable within a reasonable period of time. Mostly inspired by biological systems, swarm intelligence adopts the collective behaviour of an organized group of animals, as they strive to survive. This study aims to discuss the governing idea, identify the potential application areas and present a detailed survey of eight SI algorithms. The newly developed algorithms discussed in the study are the insect-based algorithms and animal-based algorithms in minute detail. More specifically, we focus on the algorithms inspired by ants, bees, fireflies, glow-worms, bats, monkeys, lions and wolves. The inspiration analyses on these algorithms highlight the way these algorithms operate. Variants of these algorithms have been introduced after the inspiration analysis. Specific areas for the application of such algorithms have also been highlighted for researchers interested in the domain. The study attempts to provide an initial understanding for the exploration of the technical aspects of the algorithms and their future scope by the academia and practice.}, language = {en}, urldate = {2021-11-11}, booktitle = {Nature-{Inspired} {Computing} and {Optimization}: {Theory} and {Applications}}, publisher = {Springer International Publishing}, author = {Chakraborty, Amrita and Kar, Arpan Kumar}, editor = {Patnaik, Srikanta and Yang, Xin-She and Nakamatsu, Kazumi}, year = {2017}, doi = {10.1007/978-3-319-50920-4_19}, keywords = {Bio-inspired algorithms, Intelligent algorithms, Literature review, Machine learning, Nature-inspired computing, Swarm intelligence}, pages = {475--494}, }
@article{li_fault_2017, title = {Fault diagnosis and prediction of complex system based on {Hidden} {Markov} model}, volume = {33}, issn = {1064-1246}, url = {https://content.iospress.com/articles/journal-of-intelligent-and-fuzzy-systems/ifs169344}, doi = {10.3233/JIFS-169344}, abstract = {To guarantee the performance and security of the complex system, in this paper, we focus on the problem of fault diagnosis and fault prediction method for the complex system. The proposed fault diagnosis and prediction system is made up of three part}, language = {en}, number = {5}, urldate = {2021-11-07}, journal = {Journal of Intelligent \& Fuzzy Systems}, author = {Li, Chen and Wei, Fajie and Wang, Cheng and Zhou, Shenghan}, month = jan, year = {2017}, note = {Publisher: IOS Press}, keywords = {complex system, diagnosis, diagnostics, fault diagnosis, fault prediction, hmm, prognosis, prognostics}, pages = {2937--2944}, }
@article{lin_bayesian_2017, title = {A {Bayesian} {Approach} to {Fault} {Identification} in the {Presence} of {Multi}-component {Degradation}}, volume = {8}, copyright = {Copyright (c) 2017 Yufei Lin, Skaf Zakwan, Ian Jennions}, issn = {2153-2648}, url = {http://papers.phmsociety.org/index.php/ijphm/article/view/2530}, doi = {10.36001/ijphm.2017.v8i1.2530}, abstract = {Fault diagnosis typically consists of fault detection, isolation and identification. Fault detection and isolation determine the presence of a fault in a system and the location of the fault. Fault identification then aims at determining the severity level of the fault. In a practical sense, a fault is a conditional interruption of the system ability to achieve a required function under specified operating condition; degradation is the deviation of one or more characteristic parameters of the component from acceptable conditions and is often a main cause for fault generation. A fault occurs when the degradation exceeds an allowable threshold. From the point a new aircraft takes off for the first time all of its components start to degrade, and yet in almost all studies it is presumed that we can identify a single fault in isolation, i.e. without considering multi-component degradation in the system. This paper proposes a probabilistic framework to identify a single fault in an aircraft fuel system with consideration of multi-component degradation. Based on the conditional probabilities of sensor readings for a specific fault, a Bayesian method is presented to integrate distributed sensory information and calculate the likelihood of all possible fault severity levels. The proposed framework is implemented on an experimental aircraft fuel rig which illustrates the applicability of the proposed method.}, language = {en}, number = {1}, urldate = {2021-10-28}, journal = {International Journal of Prognostics and Health Management}, author = {Lin, Yufei and Zakwan, Skaf and Jennions, Ian}, year = {2017}, note = {Number: 1}, keywords = {Aircraft fuel rig, bayesian, diagnostics, multicomponent}, }
@article{krawczyk_active_2017, title = {Active and adaptive ensemble learning for online activity recognition from data streams}, volume = {138}, issn = {0950-7051}, url = {https://www.sciencedirect.com/science/article/pii/S0950705117304513}, doi = {10.1016/j.knosys.2017.09.032}, abstract = {Activity recognition is one of the emerging trends in the domain of mining ubiquitous environments. It assumes that we can recognize the current action undertaken by the monitored subject on the basis of outputs of a set of associated sensors. Often different combinations of smart devices are being used, thus creating an Internet of Things. Such data will arrive continuously during the operation time of sensors and require an online processing in order to keep a real-time track of the current activity being undertaken. This forms a natural data stream problem with the potential presence of changes in the arriving data. Therefore, we require an efficient online machine learning system that can offer high recognition rates and adapt to drifts and shifts in the stream. In this paper we propose an efficient and lightweight adaptive ensemble learning system for real-time activity recognition. We use a weighted modification of Naïve Bayes classifier that can swiftly adapt itself to the current state of the stream without a need for an external concept drift detector. To tackle the multi-class nature of activity recognition problem we propose to use an one-vs-one decomposition to form a committee of simpler and diverse learners. We introduce a novel weighted combination for one-vs-one decomposition that can adapt itself over time. Additionally, to limit the cost of supervision we propose to enhance our classification system with active learning paradigm to select only the most important objects for labeling and work under constrained budget. Experiments carried out on six data streams gathered from ubiquitous environments show that the proposed active and adaptive ensemble offer excellent classification accuracy with low requirement for access to true class labels.}, language = {en}, urldate = {2021-10-18}, journal = {Knowledge-Based Systems}, author = {Krawczyk, Bartosz}, month = dec, year = {2017}, keywords = {Active learning, Activity recognition, Concept drift, Data streams, Ensemble learning, One-vs-One}, pages = {69--78}, }
@article{lughofer_-line_2017, title = {On-line active learning: {A} new paradigm to improve practical useability of data stream modeling methods}, volume = {415-416}, issn = {0020-0255}, shorttitle = {On-line active learning}, url = {https://www.sciencedirect.com/science/article/pii/S0020025517308083}, doi = {10.1016/j.ins.2017.06.038}, abstract = {The central purpose of this survey is to provide readers an insight into the recent advances and challenges in on-line active learning. Active learning has attracted the data mining and machine learning community since around 20 years. This is because it served for important purposes to increase practical applicability of machine learning techniques, such as (i) to reduce annotation and measurement costs for operators and measurement equipments, (ii) to reduce manual labeling effort for experts and (iii) to reduce computation time for model training. Almost all of the current techniques focus on the classical pool-based approach, which is off-line by nature as iterating over a pool of (unlabeled) reference samples a multiple times to choose the most promising ones for improving the performance of the classifiers. This is achieved by (time-intensive) re-training cycles on all labeled samples available so far. For the on-line, stream mining case, the challenge is that the sample selection strategy has to operate in a fast, ideally single-pass manner. Some first approaches have been proposed during the last decade (starting from around 2005) with the usage of machine learning (ML) oriented incremental classifiers, which are able to update their parameters based on selected samples, but not their structures. Since 2012, on-line active learning concepts have been proposed in connection with the paradigm of evolving models, which are able to expand their knowledge into feature space regions so far unexplored. This opened the possibility to address a particular type of uncertainty, namely that one which stems from a significant novelty content in streams, as, e.g., caused by drifts, new operation modes, changing system behaviors or non-stationary environments. We will provide an overview about the concepts and techniques for sample selection and active learning within these two principal major research lines (incremental ML models versus evolving systems), a comparison of their essential characteristics and properties (raising some advantages and disadvantages), and a study on possible evaluation techniques for them. We conclude with an overview of real-world application examples where various on-line AL approaches have been already successfully applied in order to significantly reduce user’s interaction efforts and costs for model updates.}, language = {en}, urldate = {2021-10-18}, journal = {Information Sciences}, author = {Lughofer, Edwin}, month = nov, year = {2017}, keywords = {Data stream mining, Evolving models, Incremental ML and DM methods, Interaction effort and cost reduction, On-line active learning, Single-pass sample selection, Uncertainty and novelty in streams}, pages = {356--376}, }
@article{svalastog_concepts_2017, title = {Concepts and definitions of health and health-related values in the knowledge landscapes of the digital society}, volume = {58}, issn = {0353-9504}, url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5778676/}, doi = {10.3325/cmj.2017.58.431}, number = {6}, urldate = {2021-10-15}, journal = {Croatian Medical Journal}, author = {Svalastog, Anna Lydia and Donev, Doncho and Jahren Kristoffersen, Nina and Gajović, Srećko}, month = dec, year = {2017}, pmid = {29308835}, pmcid = {PMC5778676}, pages = {431--435}, }
@article{klimek_visual_2017, title = {Visual analogue scales ({VAS}): {Measuring} instruments for the documentation of symptoms and therapy monitoring in cases of allergic rhinitis in everyday health care}, volume = {26}, issn = {2197-0378}, shorttitle = {Visual analogue scales ({VAS})}, url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5288410/}, doi = {10.1007/s40629-016-0006-7}, abstract = {Backround Visual analogue scales (VAS) are psychometric measuring instruments designed to document the characteristics of disease-related symptom severity in individual patients and use this to achieve a rapid (statistically measurable and reproducible) classification of symptom severity and disease control. VAS can also be used in routine patient history taking and to monitor the course of a chronic disease such as allergic rhinitis (AR). More specifically, the VAS has been used to assess effectiveness of AR therapy in real life, both in intermittent and persistent disease. Methods This position paper takes a detailed look at the historical development of VAS and its method-specific principles. Particular focus is put on aspects of practical application in daily routine and on a critical discussion of the advantages and disadvantages of the individual methods. Results VAS are well validated for the measurement of AR symptoms and correlate well with the ARIA (allergic rhinitis and its impact on asthma) severity classification and also correlated well with rTNSS and RQLQ. Moreover, several treatment studies on AR have used VAS as an evaluation parameter. Thanks to the use of new (real-life and real-time) communication technologies, such as smartphone apps, Discussion: VAS can be used relatively simply and highly effectively to assess disease control. The VAS lends itself very well to digitization and has now been incorporated into a smartphone app (called Allergy Diary) to assess AR control and direct treatment decisions as part of an AR clinical decision support system (CDSS). MASK Rhinitis has developed this app, which is currently available in 15 different languages.}, number = {1}, urldate = {2021-10-15}, journal = {Allergo Journal International}, author = {Klimek, Ludger and Bergmann, Karl-Christian and Biedermann, Tilo and Bousquet, Jean and Hellings, Peter and Jung, Kirsten and Merk, Hans and Olze, Heidi and Schlenter, Wolfgang and Stock, Philippe and Ring, Johannes and Wagenmann, Martin and Wehrmann, Wolfgang and Mösges, Ralph and Pfaar, Oliver}, year = {2017}, pmid = {28217433}, pmcid = {PMC5288410}, pages = {16--24}, }
@inproceedings{panahi_clustering_2017, title = {Clustering by {Sum} of {Norms}: {Stochastic} {Incremental} {Algorithm}, {Convergence} and {Cluster} {Recovery}}, shorttitle = {Clustering by {Sum} of {Norms}}, url = {https://proceedings.mlr.press/v70/panahi17a.html}, abstract = {Standard clustering methods such as K-means, Gaussian mixture models, and hierarchical clustering are beset by local minima, which are sometimes drastically suboptimal. Moreover the number of clusters K must be known in advance. The recently introduced the sum-of-norms (SON) or Clusterpath convex relaxation of k-means and hierarchical clustering shrinks cluster centroids toward one another and ensure a unique global minimizer. We give a scalable stochastic incremental algorithm based on proximal iterations to solve the SON problem with convergence guarantees. We also show that the algorithm recovers clusters under quite general conditions which have a similar form to the unifying proximity condition introduced in the approximation algorithms community (that covers paradigm cases such as Gaussian mixtures and planted partition models). We give experimental results to confirm that our algorithm scales much better than previous methods while producing clusters of comparable quality.}, language = {en}, urldate = {2021-10-01}, booktitle = {Proceedings of the 34th {International} {Conference} on {Machine} {Learning}}, publisher = {PMLR}, author = {Panahi, Ashkan and Dubhashi, Devdatt and Johansson, Fredrik D. and Bhattacharyya, Chiranjib}, month = jul, year = {2017}, note = {ISSN: 2640-3498}, pages = {2769--2777}, }
@article{guo_recurrent_2017, title = {A recurrent neural network based health indicator for remaining useful life prediction of bearings}, volume = {240}, issn = {0925-2312}, url = {https://www.sciencedirect.com/science/article/pii/S0925231217303363}, doi = {10.1016/j.neucom.2017.02.045}, abstract = {In data-driven prognostic methods, prediction accuracy of bearing remaining useful life (RUL) mainly depends on the performance of bearing health indicators, which are usually fused from some statistical features extracted from vibration signals. However, many existing bearing health indicators have the following two shortcomings: (1) many statistical features do not have equal contribution to construction of health indicators since the ranges of these statistical features are different; (2) it is difficult to determine a failure threshold since health indicators of different machines are generally different at a failure time. To overcome these drawbacks, a recurrent neural network based health indicator (RNN-HI) for RUL prediction of bearings is proposed in this paper. Firstly, six related-similarity features are proposed to be combined with eight classical time-frequency features so as to form an original feature set. Then, with monotonicity and correlation metrics, the most sensitive features are selected from the original feature set. Finally, these selected features are fed into a recurrent neural network to construct the RNN-HI. The performance of the RNN-HI is verified by two bearing data sets collected from experiments and an industrial field. The results show that the RNN-HI obtains fairly high monotonicity and correlation values and it is beneficial to bearing RUL prediction. In addition, it is experimentally demonstrated that the proposed RNN-HI is able to achieve better performance than a self organization map based method.}, language = {en}, urldate = {2021-09-30}, journal = {Neurocomputing}, author = {Guo, Liang and Li, Naipeng and Jia, Feng and Lei, Yaguo and Lin, Jing}, month = may, year = {2017}, keywords = {Bearing health indicator, Recurrent neural network, Related-similarity feature}, pages = {98--109}, }
@article{alaswad_review_2017, title = {A review on condition-based maintenance optimization models for stochastically deteriorating system}, volume = {157}, issn = {0951-8320}, url = {https://www.sciencedirect.com/science/article/pii/S0951832016303714}, doi = {10.1016/j.ress.2016.08.009}, abstract = {Condition-based maintenance (CBM) is a maintenance strategy that collects and assesses real-time information, and recommends maintenance decisions based on the current condition of the system. In recent decades, research on CBM has been rapidly growing due to the rapid development of computer-based monitoring technologies. Research studies have proven that CBM, if planned properly, can be effective in improving equipment reliability at reduced costs. This paper presents a review of CBM literature with emphasis on mathematical modeling and optimization approaches. We focus this review on important aspects of the CBM, such as optimization criteria, inspection frequency, maintenance degree, solution methodology, etc. Since the modeling choice for the stochastic deterioration process greatly influences CBM strategy decisions, this review classifies the literature on CBM models based on the underlying deterioration processes, namely discrete- and continuous-state deterioration, and proportional hazard model. CBM models for multi-unit systems are also reviewed in this paper. This paper provides useful references for CBM management professionals and researchers working on CBM modeling and optimization.}, language = {en}, urldate = {2021-09-29}, journal = {Reliability Engineering \& System Safety}, author = {Alaswad, Suzan and Xiang, Yisha}, month = jan, year = {2017}, keywords = {Condition-based maintenance, Inspection, Maintenance optimization, Preventive maintenance, Stochastic deterioration}, pages = {54--63}, }
@article{olde_keizer_condition-based_2017, title = {Condition-based maintenance policies for systems with multiple dependent components: {A} review}, volume = {261}, issn = {0377-2217}, shorttitle = {Condition-based maintenance policies for systems with multiple dependent components}, url = {https://www.sciencedirect.com/science/article/pii/S0377221717301881}, doi = {10.1016/j.ejor.2017.02.044}, abstract = {Condition-based maintenance (CBM) has received increasing attention in the literature over the past years. The application of CBM in practice, however, is lagging behind. This is, at least in part, explained by the complexity of real-life systems as opposed to the stylized ones studied most often. To overcome this issue, research is focusing more and more on complex systems, with multiple components subject to various dependencies. Existing classifications of these dependencies in the literature are no longer sufficient. Therefore, we provide an extended classification scheme. Besides the types of dependencies identified in the past (economic, structural, and stochastic), we add resource dependence, where multiple components are connected through, e.g., shared spares, tools, or maintenance workers. Furthermore, we extend the existing notion of structural dependence by distinguishing between structural dependence from a technical point of view and structural dependence from a performance point of view (e.g., through a series or parallel setting). We review the advances made with respect to CBM. Our main focus is on the implications of dependencies on the structure of the optimal CBM policy. We link our review to practice by providing real-life examples, thereby stressing current gaps in the literature.}, language = {en}, number = {2}, urldate = {2021-09-28}, journal = {European Journal of Operational Research}, author = {Olde Keizer, Minou C. A. and Flapper, Simme Douwe P. and Teunter, Ruud H.}, month = sep, year = {2017}, keywords = {Condition-based maintenance, Dependencies, Maintenance, Multi-component, Review}, pages = {405--420}, }
@inproceedings{aminikhanghahi_using_2017, title = {Using change point detection to automate daily activity segmentation}, doi = {10.1109/PERCOMW.2017.7917569}, abstract = {Real time detection of transitions between activities based on sensor data is a valuable but somewhat untapped challenge. Detecting these transitions is useful for activity segmentation, for timing notifications or interventions, and for analyzing human behavior. In this work, we design and evaluate real time machine learning-based methods for automatic segmentation and recognition of continuous human daily activity. We detect activity transitions and integrate the change point detection algorithm with smart home activity recognition to segment human daily activities into separate actions and correctly identify each action. Experiments with on real-world smart home datasets suggest that using transition aware activity recognition algorithms lead to best performance for detecting activity boundaries and streaming activity segmentation.}, booktitle = {2017 {IEEE} {International} {Conference} on {Pervasive} {Computing} and {Communications} {Workshops} ({PerCom} {Workshops})}, author = {Aminikhanghahi, Samaneh and Cook, Diane J.}, month = mar, year = {2017}, keywords = {Activity Recognition, Activity recognition, Change point detection, Context, Feature extraction, Hidden Markov models, Real-time systems, Segmentation, Smart home, Smart homes}, pages = {262--267}, }
@article{torkamani_survey_2017, title = {Survey on time series motif discovery}, volume = {7}, copyright = {© 2017 John Wiley \& Sons, Ltd}, issn = {1942-4795}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/widm.1199}, doi = {https://doi.org/10.1002/widm.1199}, abstract = {Last decades witness a huge growth in medical applications, genetic analysis, and in performance of manufacturing technologies and automatised production systems. A challenging task is to identify and diagnose the behavior of such systems, which aim to produce a product with desired quality. In order to control the state of the systems, various information is gathered from different types of sensors (optical, acoustic, chemical, electric, and thermal). Time series data are a set of real-valued variables obtained chronologically. Data mining and machine learning help derive meaningful knowledge from time series. Such tasks include clustering, classification, anomaly detection and motif discovery. Motif discovery attempts to find meaningful, new, and unknown knowledge from data. Detection of motifs in a time series is beneficial for, e.g., discovery of rules or specific events in a signal. Motifs provide useful information for the user in order to model or analyze the data. Motif discovery is applied to various areas as telecommunication, medicine, web, motion-capture, and sensor networks. This contribution provides a review of the existing publications in time series motif discovery along with advantages and disadvantages of existing approaches. Moreover, the research issues and missing points in this field are highlighted. The main objective of this focus article is to serve as a glossary for researchers in this field. WIREs Data Mining Knowl Discov 2017, 7:e1199. doi: 10.1002/widm.1199 This article is categorized under: Algorithmic Development {\textgreater} Spatial and Temporal Data Mining Fundamental Concepts of Data and Knowledge {\textgreater} Motivation and Emergence of Data Mining Technologies {\textgreater} Computational Intelligence Technologies {\textgreater} Machine Learning}, language = {en}, number = {2}, urldate = {2021-03-25}, journal = {WIREs Data Mining and Knowledge Discovery}, author = {Torkamani, Sahar and Lohweg, Volker}, year = {2017}, note = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1002/widm.1199}, pages = {e1199}, }
@article{ahmad_unsupervised_2017, series = {Online {Real}-{Time} {Learning} {Strategies} for {Data} {Streams}}, title = {Unsupervised real-time anomaly detection for streaming data}, volume = {262}, issn = {0925-2312}, url = {http://www.sciencedirect.com/science/article/pii/S0925231217309864}, doi = {10.1016/j.neucom.2017.04.070}, abstract = {We are seeing an enormous increase in the availability of streaming, time-series data. Largely driven by the rise of connected real-time data sources, this data presents technical challenges and opportunities. One fundamental capability for streaming analytics is to model each stream in an unsupervised fashion and detect unusual, anomalous behaviors in real-time. Early anomaly detection is valuable, yet it can be difficult to execute reliably in practice. Application constraints require systems to process data in real-time, not batches. Streaming data inherently exhibits concept drift, favoring algorithms that learn continuously. Furthermore, the massive number of independent streams in practice requires that anomaly detectors be fully automated. In this paper we propose a novel anomaly detection algorithm that meets these constraints. The technique is based on an online sequence memory algorithm called Hierarchical Temporal Memory (HTM). We also present results using the Numenta Anomaly Benchmark (NAB), a benchmark containing real-world data streams with labeled anomalies. The benchmark, the first of its kind, provides a controlled open-source environment for testing anomaly detection algorithms on streaming data. We present results and analysis for a wide range of algorithms on this benchmark, and discuss future challenges for the emerging field of streaming analytics.}, language = {en}, urldate = {2020-12-21}, journal = {Neurocomputing}, author = {Ahmad, Subutai and Lavin, Alexander and Purdy, Scott and Agha, Zuha}, month = nov, year = {2017}, keywords = {Anomaly detection, Benchmark dataset, Concept drift, Hierarchical Temporal Memory, Streaming data, Unsupervised learning}, pages = {134--147}, }
@article{yan_industrial_2017, title = {Industrial {Big} {Data} in an {Industry} 4.0 {Environment}: {Challenges}, {Schemes}, and {Applications} for {Predictive} {Maintenance}}, volume = {5}, issn = {21693536}, shorttitle = {Industrial {Big} {Data} in an {Industry} 4.0 {Environment}}, url = {https://www.mendeley.com/catalogue/1bc463dd-aaa9-3070-890b-37651afee480/?articleTrace=AAABsN6W2sCh1ac1MBYp9ot7-mwNVtwKshclvI2iIUsKl3t_vhpYbKNe6zrjIgPvqbJNDn0-Df8bMa8oXHYCM4rV0I3ElIQjgTRlbggVi_vi8cl6HzMxfOQyMvwrRdmBB5glKQmtO64-m4yyK1Awbdfte8lkxtn0w4AN3N0pJfC32bXntFjpA1flu9q0WTl5FgJE8EXwfqnERKM6xiTUIE5lAhNC4jjSn_Kldgv3PcWsGOyZiUeBZ5xvfjmSw70BBGRQFsoGnlofuP0USagGgctJMAOnwC57XcXfeOHU3avkX6uYfVLrIoFPuQwZvAJ9b8akni50IxAfSI0ZCv6_TYDrEhE_op8lco0dSOiWTJODJKcdfINO0jo0ojMdcS0rJoWTfRHNHMVLjFVvot6stsNcrjiNCcBK7daWIun31uwDiukaZeArFvXxJvmsXPIO5n13ltNv2kJ7jUryuiv-yYosD8_BBRypZRuC_-mRN8wbcLhfswbKynMWZKMTsMilX4ZHVFAmMmrHu2Y6NxuZPlxd0_izEtJBGDasQrDj5ALrbmJIk1FY8S7NrzhSLb0ow9Jy-48FmunpCs5qpviHv_PwxLZC-IWG&ref=raven&dgcid=raven_md_suggest_email}, doi = {10.1109/ACCESS.2017.2765544}, abstract = {(2017) Yan et al. IEEE Access. Industry 4.0 can make a factory smart by applying intelligent information processing approaches, communication systems, future-oriented techniques, and more. However,...}, language = {en-GB}, urldate = {2020-12-15}, journal = {IEEE Access}, author = {Yan, Jihong and Meng, Yue and Lu, Lei and Li, Lin}, year = {2017}, pages = {23484--23491}, }
@article{ramirez-gallego_survey_2017, title = {A survey on data preprocessing for data stream mining: {Current} status and future directions}, volume = {239}, issn = {0925-2312}, shorttitle = {A survey on data preprocessing for data stream mining}, url = {http://www.sciencedirect.com/science/article/pii/S0925231217302631}, doi = {10.1016/j.neucom.2017.01.078}, abstract = {Data preprocessing and reduction have become essential techniques in current knowledge discovery scenarios, dominated by increasingly large datasets. These methods aim at reducing the complexity inherent to real-world datasets, so that they can be easily processed by current data mining solutions. Advantages of such approaches include, among others, a faster and more precise learning process, and more understandable structure of raw data. However, in the context of data preprocessing techniques for data streams have a long road ahead of them, despite online learning is growing in importance thanks to the development of Internet and technologies for massive data collection. Throughout this survey, we summarize, categorize and analyze those contributions on data preprocessing that cope with streaming data. This work also takes into account the existing relationships between the different families of methods (feature and instance selection, and discretization). To enrich our study, we conduct thorough experiments using the most relevant contributions and present an analysis of their predictive performance, reduction rates, computational time, and memory usage. Finally, we offer general advices about existing data stream preprocessing algorithms, as well as discuss emerging future challenges to be faced in the domain of data stream preprocessing.}, language = {en}, urldate = {2020-12-12}, journal = {Neurocomputing}, author = {Ramírez-Gallego, Sergio and Krawczyk, Bartosz and García, Salvador and Woźniak, Michał and Herrera, Francisco}, month = may, year = {2017}, keywords = {Concept drift, Data discretization, Data mining, Data preprocessing, Data reduction, Data stream, Feature selection, Instance selection, Online learning}, pages = {39--57}, }
@inproceedings{leodolter_semi-supervised_2017, title = {Semi-supervised segmentation of accelerometer time series for transport mode classification}, doi = {10.1109/MTITS.2017.8005596}, abstract = {Collecting ground truth data with smart phone applications is as difficult as important for training classification models predicting transport modes of people. Errors of respondent input with respect to trip length and transport mode segmenting introduce a systematic bias in the classification model. We propose a semi-supervised framework adjusting user-given input to process user-collected accelerometer time series data. Our contributions are (1) an evaluation of the impact of segmentation bias, (2) a novel algorithm to find more homogeneous segments and (3) a robust incrementally trained classifier model based on clustering employing Dynamic Time Warping as similarity measure. We apply the proposed method on synthetic and real-world accelerometer trip data of 800 labeled trips consisting of 2000 user-given segments and 400 hours travel time and test it against a baseline classifier relying completely on user-feedback. The results prove that our method learns clusters revised from noise and increases the classifier's accuracy for real-world and synthetic data by up to 17\%.}, booktitle = {2017 5th {IEEE} {International} {Conference} on {Models} and {Technologies} for {Intelligent} {Transportation} {Systems} ({MT}-{ITS})}, author = {Leodolter, M. and Widhalm, P. and Plant, C. and Brandle, N.}, month = jun, year = {2017}, keywords = {Accelerometer, Accelerometers, Clustering, Clustering algorithms, Dynamic Time Warping, Feature extraction, Heuristic algorithms, Legged locomotion, Robustness, Segmentation, Time series analysis, Transport Mode Detection, accelerometers, dynamic time warping, ground truth data, homogeneous segments, labeled trips, learning (artificial intelligence), pattern classification, pattern clustering, real-world accelerometer trip data, robust incrementally trained classifier model, segmentation bias, semisupervised accelerometer time series segmentation, similarity measure, smart phone, smart phones, synthetic accelerometer trip data, time series, traffic information systems, transport mode classification, trip length, user-given segments}, pages = {663--668}, }
@inproceedings{cline_predictive_2017, title = {Predictive maintenance applications for machine learning}, doi = {10.1109/RAM.2017.7889679}, abstract = {Machine Learning provides a complementary approach to maintenance planning by analyzing significant data sets of individual machine performance and environment variables, identifying failure signatures and profiles, and providing an actionable prediction of failure for individual parts.}, booktitle = {2017 {Annual} {Reliability} and {Maintainability} {Symposium} ({RAMS})}, author = {Cline, B. and Niculescu, R. S. and Huffman, D. and Deckel, B.}, month = jan, year = {2017}, keywords = {Analytical models, Connectors, Data models, Inspection, Machine Learning, Predicted Failure Analysis, Predictive Maintenance, Predictive maintenance, Predictive models, environmental variable, failure analysis, failure signature, learning (artificial intelligence), machine learning, machine performance, maintenance engineering, maintenance planning, planning, predictive maintenance, production engineering computing, reliability}, pages = {1--7}, }
@article{gugulothu_predicting_2017, title = {Predicting {Remaining} {Useful} {Life} using {Time} {Series} {Embeddings} based on {Recurrent} {Neural} {Networks}}, url = {http://arxiv.org/abs/1709.01073}, abstract = {We consider the problem of estimating the remaining useful life (RUL) of a system or a machine from sensor data. Many approaches for RUL estimation based on sensor data make assumptions about how machines degrade. Additionally, sensor data from machines is noisy and often suffers from missing values in many practical settings. We propose Embed-RUL: a novel approach for RUL estimation from sensor data that does not rely on any degradation-trend assumptions, is robust to noise, and handles missing values. Embed-RUL utilizes a sequence-to-sequence model based on Recurrent Neural Networks (RNNs) to generate embeddings for multivariate time series subsequences. The embeddings for normal and degraded machines tend to be different, and are therefore found to be useful for RUL estimation. We show that the embeddings capture the overall pattern in the time series while filtering out the noise, so that the embeddings of two machines with similar operational behavior are close to each other, even when their sensor readings have significant and varying levels of noise content. We perform experiments on publicly available turbofan engine dataset and a proprietary real-world dataset, and demonstrate that Embed-RUL outperforms the previously reported state-of-the-art on several metrics.}, urldate = {2020-10-05}, journal = {arXiv:1709.01073 [cs]}, author = {Gugulothu, Narendhar and TV, Vishnu and Malhotra, Pankaj and Vig, Lovekesh and Agarwal, Puneet and Shroff, Gautam}, month = oct, year = {2017}, note = {arXiv: 1709.01073}, keywords = {Computer Science - Machine Learning}, }
@inproceedings{zhang_segmentation_2017, title = {A segmentation technology for multivariate contextual time series}, doi = {10.1109/ISCMI.2017.8279600}, abstract = {A time series is a series of data points indexed in time order, mining multivariate contextual time series (MCTS) should pay more attention to time order. This paper proposes a new method for splitting the MCTS into a number of segments, uses the concept of scenarios and themes to represent MCTS instead of data points and extracts important contextual features to carry out the multidimensional fitting for MCTS.}, booktitle = {2017 {IEEE} 4th {International} {Conference} on {Soft} {Computing} {Machine} {Intelligence} ({ISCMI})}, author = {Zhang, Hui-Juan and Huang, Jia-Cheng}, month = nov, year = {2017}, keywords = {Conferences, Data mining, Feature extraction, Fitting, Knowledge discovery, Legged locomotion, MCTS, Time series analysis, contextual, contextual features, data analysis, data mining, data points, multidimensional fitting, multivariate contextual time series mining, segmentation, segmentation technology, time order, time series}, pages = {71--74}, }
@article{aminikhanghahi_survey_2017, title = {A survey of methods for time series change point detection}, volume = {51}, issn = {0219-1377}, url = {https://doi.org/10.1007/s10115-016-0987-z}, doi = {10.1007/s10115-016-0987-z}, abstract = {Change points are abrupt variations in time series data. Such abrupt changes may represent transitions that occur between states. Detection of change points is useful in modelling and prediction of time series and is found in application areas such as medical condition monitoring, climate change detection, speech and image analysis, and human activity analysis. This survey article enumerates, categorizes, and compares many of the methods that have been proposed to detect change points in time series. The methods examined include both supervised and unsupervised algorithms that have been introduced and evaluated. We introduce several criteria to compare the algorithms. Finally, we present some grand challenges for the community to consider.}, number = {2}, urldate = {2020-09-30}, journal = {Knowledge and Information Systems}, author = {Aminikhanghahi, Samaneh and Cook, Diane J.}, month = may, year = {2017}, keywords = {Change point detection, Data mining, Machine learning, Segmentation, Time series data}, pages = {339--367}, }
@techreport{noauthor_maintenance_2017, type = {Standard}, title = {Maintenance. {Maintenance} terminology}, institution = {The British Standards Institution}, year = {2017}, }
@article{noauthor_ieee_2017, title = {{IEEE} {Standard} {Framework} for {Prognostics} and {Health} {Management} of {Electronic} {Systems}}, journal = {IEEE Std 1856-2017}, year = {2017}, pages = {1--31}, }
@inproceedings{zhao_advanced_2017, title = {Advanced correlation-based anomaly detection method for predictive maintenance}, doi = {10.1109/ICPHM.2017.7998309}, abstract = {Variations in sensor data collected from equipment have been widely analyzed by using anomaly detection methods for predictive maintenance. Our experience shows that correlations between sensors effectively predict failures because the correlations usually reflect the status of equipment with higher sensitivity. In this paper, we present a method that exploits correlations between sensors for pre-processing and enables anomalies to be detected using both sensor data and correlations. The method was evaluated by applying it to compact electric generators, and the results showed it detected anomalies more accurately than when only sensor data were used. This method is expected to predict failures earlier and reduce the cost of downtime and maintenance.}, booktitle = {2017 {IEEE} {International} {Conference} on {Prognostics} and {Health} {Management} ({ICPHM})}, author = {Zhao, Pushe and Kurihara, Masaru and Tanaka, Junichi and Noda, Tojiro and Chikuma, Shigeyoshi and Suzuki, Tadashi}, month = jun, year = {2017}, keywords = {Correlation, Correlation coefficient, Data models, Electric generators, Maintenance engineering, Time series analysis, advanced correlation, anomaly detection, anomaly detection methods, compact electric generators, correlation coefficient, correlation methods, electric generator, electric generators, failure analysis, failure prediction, maintenance engineering, multivairate time series, predictive maintenance}, pages = {78--83}, }
@article{wang_new_2017, title = {A new paradigm of cloud-based predictive maintenance for intelligent manufacturing}, volume = {28}, issn = {1572-8145}, url = {https://doi.org/10.1007/s10845-015-1066-0}, doi = {10.1007/s10845-015-1066-0}, number = {5}, journal = {Journal of Intelligent Manufacturing}, author = {Wang, Jinjiang and Zhang, Laibin and Duan, Lixiang and Gao, Robert X.}, year = {2017}, pages = {1125--1137}, }
@incollection{lee_big_2017, title = {Big {Data} {Analytics} for {Predictive} {Maintenance} {Strategies}}, author = {Lee, C. and Cao, Yi and Ng, Kam K.H.}, year = {2017}, doi = {10.4018/978-1-5225-0956-1.ch004}, }
@article{accorsi_data_2017, title = {Data {Mining} and {Machine} {Learning} for {Condition}-based {Maintenance}}, volume = {11}, issn = {2351-9789}, url = {http://www.sciencedirect.com/science/article/pii/S235197891730447X}, doi = {https://doi.org/10.1016/j.promfg.2017.07.239}, journal = {Procedia Manufacturing}, author = {Accorsi, Riccardo and Manzini, Riccardo and Pascarella, Pietro and Patella, Marco and Sassi, Simone}, year = {2017}, keywords = {condition-based maintenance, data analytics, data mining, failure event, machine learning}, pages = {1153 -- 1161}, }
@inproceedings{canizo_real-time_2017, title = {Real-time predictive maintenance for wind turbines using {Big} {Data} frameworks}, doi = {10.1109/ICPHM.2017.7998308}, booktitle = {2017 {IEEE} {International} {Conference} on {Prognostics} and {Health} {Management} ({ICPHM})}, author = {Canizo, M. and Onieva, E. and Conde, A. and Charramendieta, S. and Trujillo, S.}, year = {2017}, keywords = {Apache Kafka, Apache Mesos, Apache Spark, Big Data, Big Data architectures, Big Data environment, Cloud computing, Companies, HDFS, Industry 4.0, Machine learning, O and M cost reduction, Predictive maintenance, Predictive models, Wind power, Wind turbines, centralized access point, cloud computing, data process speed, data-driven solution, failure analysis, fault-tolerant functionality, learning (artificial intelligence), maintenance engineering, monitoring agent, power engineering computing, predictive model generator, random forest algorithm, real-time predictive maintenance, wind turbines}, pages = {70--77}, }
@inproceedings{hasani_robust_2017, title = {Robust anomaly detection algorithms for real-time big data: {Comparison} of algorithms}, shorttitle = {Robust anomaly detection algorithms for real-time big data}, doi = {10.1109/MECO.2017.7977130}, abstract = {Most of the today's world data are streaming, time-series data, where anomalies detection gives significant information of possible critical situations. Yet, detecting anomalies in big streaming data is a difficult task, requiring detectors to acquire and process data in a real-time, as they occur, even before they are stored and instantly alarm on potential threats. Suitable to the need for real-time alarm and unsupervised procedures for massive streaming data anomaly detection, algorithms have to be robust, with low processing time, eventually at the cost of the accuracy. In this work we explore several such fast algorithms like MAD, RunMAD, Boxplot, Twitter ADVec, DBSCAN, Moving Range Technique, Statistical Control Chart Techniques, ARIMA and Moving Average. The algorithms are tested and results are visualized in the system R, on the three Numenta datasets, with known anomalies and own e-dnevnik dataset with unknown anomalies. Evaluation is done by comparing achieved results (the algorithm execution time, CPU usage and the number of anomalies found) with Numenta HTM algorithm that detects all the anomalies in their datasets. Our interest is monitoring of the streaming log data that are generating in the national educational network (e-dnevnk) that acquires a massive number of online queries and to detect anomalies in order to scale up performance, prevent network downs, alarm on possible attacks and similar.}, booktitle = {2017 6th {Mediterranean} {Conference} on {Embedded} {Computing} ({MECO})}, author = {Hasani, Zirije}, month = jun, year = {2017}, keywords = {ARIMA, Autoregressive processes, Big Data, Boxplot, CPU usage, Classification algorithms, Clustering algorithms, Control charts, DBSCAN, HTM, Log data, MAD, Moving Average, Moving Range Technique, NuPIC, Numenta dataset, R, R system, Real-time systems, RunMAD, Statistical Control Chart Techniques, Time series analysis, Twitter ADVec, algorithm execution time, anomaly detection, autoregressive moving average processes, component, control charts, data processing, e-dnevnik dataset, educational administrative data processing, known-anomalies, moving average technique, moving range technique, national educational network, network down prevention, online queries, outlier detection, real-time Big Data, real-time alarm procedure, real-time big data, robust anomaly detection algorithm, social networking (online), statistical control chart technique, unknown anomalies, unsupervised learning, unsupervised procedure, visualization}, pages = {1--6}, }
@article{fefferman_testing_2016, title = {Testing the manifold hypothesis}, volume = {29}, issn = {0894-0347, 1088-6834}, url = {https://www.ams.org/jams/2016-29-04/S0894-0347-2016-00852-4/}, doi = {10.1090/jams/852}, abstract = {Advancing research. Creating connections.}, language = {en}, number = {4}, urldate = {2022-12-15}, journal = {Journal of the American Mathematical Society}, author = {Fefferman, Charles and Mitter, Sanjoy and Narayanan, Hariharan}, month = oct, year = {2016}, pages = {983--1049}, }
@inproceedings{yeh_matrix_2016, title = {Matrix {Profile} {I}: {All} {Pairs} {Similarity} {Joins} for {Time} {Series}: {A} {Unifying} {View} {That} {Includes} {Motifs}, {Discords} and {Shapelets}}, shorttitle = {Matrix {Profile} {I}}, doi = {10.1109/ICDM.2016.0179}, abstract = {The all-pairs-similarity-search (or similarity join) problem has been extensively studied for text and a handful of other datatypes. However, surprisingly little progress has been made on similarity joins for time series subsequences. The lack of progress probably stems from the daunting nature of the problem. For even modest sized datasets the obvious nested-loop algorithm can take months, and the typical speed-up techniques in this domain (i.e., indexing, lower-bounding, triangular-inequality pruning and early abandoning) at best produce one or two orders of magnitude speedup. In this work we introduce a novel scalable algorithm for time series subsequence all-pairs-similarity-search. For exceptionally large datasets, the algorithm can be trivially cast as an anytime algorithm and produce high-quality approximate solutions in reasonable time. The exact similarity join algorithm computes the answer to the time series motif and time series discord problem as a side-effect, and our algorithm incidentally provides the fastest known algorithm for both these extensively-studied problems. We demonstrate the utility of our ideas for two time series data mining problems, including motif discovery and novelty discovery.}, booktitle = {2016 {IEEE} 16th {International} {Conference} on {Data} {Mining} ({ICDM})}, author = {Yeh, Chin-Chia Michael and Zhu, Yan and Ulanova, Liudmila and Begum, Nurjahan and Ding, Yifei and Dau, Hoang Anh and Silva, Diego Furtado and Mueen, Abdullah and Keogh, Eamonn}, month = dec, year = {2016}, note = {ISSN: 2374-8486}, keywords = {Approximation algorithms, Clustering algorithms, Data mining, Euclidean distance, Indexes, Motif Discovery, Similarity Joins, Text processing, Time Series, Time series analysis}, pages = {1317--1322}, }
@inproceedings{chen_xgboost_2016, address = {New York, NY, USA}, series = {{KDD} '16}, title = {{XGBoost}: {A} {Scalable} {Tree} {Boosting} {System}}, isbn = {978-1-4503-4232-2}, shorttitle = {{XGBoost}}, url = {https://doi.org/10.1145/2939672.2939785}, doi = {10.1145/2939672.2939785}, abstract = {Tree boosting is a highly effective and widely used machine learning method. In this paper, we describe a scalable end-to-end tree boosting system called XGBoost, which is used widely by data scientists to achieve state-of-the-art results on many machine learning challenges. We propose a novel sparsity-aware algorithm for sparse data and weighted quantile sketch for approximate tree learning. More importantly, we provide insights on cache access patterns, data compression and sharding to build a scalable tree boosting system. By combining these insights, XGBoost scales beyond billions of examples using far fewer resources than existing systems.}, urldate = {2022-08-22}, booktitle = {Proceedings of the 22nd {ACM} {SIGKDD} {International} {Conference} on {Knowledge} {Discovery} and {Data} {Mining}}, publisher = {Association for Computing Machinery}, author = {Chen, Tianqi and Guestrin, Carlos}, month = aug, year = {2016}, keywords = {large-scale machine learning}, pages = {785--794}, }
@article{zhang_data_2016, series = {Signal {Processing} for {Heterogeneous} {Sensor} {Networks}}, title = {Data stream clustering based on {Fuzzy} {C}-{Mean} algorithm and entropy theory}, volume = {126}, issn = {0165-1684}, url = {https://www.sciencedirect.com/science/article/pii/S0165168415003576}, doi = {10.1016/j.sigpro.2015.10.014}, abstract = {In data stream clustering studies, majority of methods are traditional hard clustering, the literatures of fuzzy clustering in clustering are few. In this paper, the fuzzy clustering algorithm is used to research data stream clustering, and the clustering results can truly reflect the actual relationship between objects and classes. It overcomes the either-or shortcoming of hard clustering. This paper presents a new method to detect concept drift. The membership degree of fuzzy clustering is used to calculate the information entropy of data, and according to the entropy to detect concept drift. The experimental results show that the detection of concept drift based on the entropy theory is effective and sensitive.}, language = {en}, urldate = {2022-07-29}, journal = {Signal Processing}, author = {Zhang, Baoju and Qin, Shan and Wang, Wei and Wang, Dan and Xue, Lei}, month = sep, year = {2016}, keywords = {Clustering, Concept drift detection, Entropy theory, Fuzzy C-Means}, pages = {111--116}, }
@inproceedings{mangal_using_2016, title = {Using big data to enhance the bosch production line performance: {A} {Kaggle} challenge}, shorttitle = {Using big data to enhance the bosch production line performance}, doi = {10.1109/BigData.2016.7840826}, abstract = {This paper describes our approach to the Bosch production line performance challenge run by Kaggle.com. Maximizing the production yield is at the heart of the manufacturing industry. At the Bosch assembly line, data is recorded for products as they progress through each stage. Data science methods are applied to this huge data repository consisting records of tests and measurements made for each component along the assembly line to predict internal failures. We found that it is possible to train a model that predicts which parts are most likely to fail. Thus a smarter failure detection system can be built and the parts tagged likely to fail can be salvaged to decrease operating costs and increase the profit margins.}, booktitle = {2016 {IEEE} {International} {Conference} on {Big} {Data} ({Big} {Data})}, author = {Mangal, Ankita and Kumar, Nishant}, month = dec, year = {2016}, keywords = {Big data, Error analysis, Machine learning algorithms, Manufacturing, Manufacturing automation, Numerical models, Predictive models, Production, data science, failure analysis, predictive models}, pages = {2029--2035}, }
@inproceedings{andres-merino_streamleader_2016, address = {Cham}, series = {Lecture {Notes} in {Computer} {Science}}, title = {{StreamLeader}: {A} {New} {Stream} {Clustering} {Algorithm} not {Based} in {Conventional} {Clustering}}, isbn = {978-3-319-44781-0}, shorttitle = {{StreamLeader}}, doi = {10.1007/978-3-319-44781-0_25}, abstract = {Stream clustering algorithms normally require two phases: an online first step that statistically summarizes the stream while forming special structures – such as micro-clusters– and a second, offline phase, that uses a conventional clustering algorithm taking the micro-clusters as pseudo-points to deliver the final clustering. This procedure tends to produce oversized or overlapping clusters in medium-to-high dimensional spaces, and typically degrades seriously in noisy data environments. In this paper we introduce StreamLeader, a novel stream clustering algorithm suitable to massive data that does not resort to a conventional clustering phase, being based on the notion of Leader Cluster and on an aggressive noise reduction process. We report an extensive systematic testing in which the new algorithm is shown to consistently outperform its contenders both in terms of quality and scalability.}, language = {en}, booktitle = {Artificial {Neural} {Networks} and {Machine} {Learning} – {ICANN} 2016}, publisher = {Springer International Publishing}, author = {Andrés-Merino, Jaime and Belanche, Lluís A.}, editor = {Villa, Alessandro E.P. and Masulli, Paolo and Pons Rivero, Antonio Javier}, year = {2016}, keywords = {Big Data, Clustering, Stream algorithms}, pages = {208--215}, }
@article{dangelo_new_2016, title = {A new fault classification approach applied to {Tennessee} {Eastman} benchmark process}, volume = {49}, issn = {1568-4946}, url = {https://www.sciencedirect.com/science/article/pii/S1568494616304343}, doi = {10.1016/j.asoc.2016.08.040}, abstract = {This study presents a data-based methodology for fault detection and isolation in dynamic systems based on fuzzy/Bayesian approach for change point detection associated with a hybrid immune/neural formulation for pattern classification applied to the Tennessee Eastman benchmark process. The fault is detected when a change occurs in the signals from the sensors and classified into one of the classes by the immune/neural formulation. The change point detection system is based on fuzzy set theory associated with the Metropolis–Hastings algorithm and the classification system, the main contribution of this paper is based on a representation which combines the ClonALG algorithm with the Kohonen neural network.}, language = {en}, urldate = {2022-05-02}, journal = {Applied Soft Computing}, author = {D’Angelo, Marcos F. S. V. and Palhares, Reinaldo M. and Camargos Filho, Murilo C. O. and Maia, Renato D. and Mendes, João B. and Ekel, Petr Ya.}, month = dec, year = {2016}, keywords = {Fault detection and isolation, Fuzzy/Bayesian approach, Immune/neural formulation, Tennessee Eastman benchmark process}, pages = {676--686}, }
@article{li_data-driven_2016, title = {Data-driven root cause diagnosis of faults in process industries}, volume = {159}, issn = {0169-7439}, url = {https://www.sciencedirect.com/science/article/pii/S0169743916303203}, doi = {10.1016/j.chemolab.2016.09.006}, abstract = {Data driven fault detection and diagnosis methods become more and more attractive in modern industries especially process industries. They can not only guarantee safe operation but also greatly improve product quality. For example, dynamic principal component analysis models and reconstruction based contribution are widely applicable in many occasions. However, there is one issue which does not receive enough attention, namely locating the root cause of a fault when it occurs. In this paper, a framework of root cause location is proposed to address this issue, including both stationary faults and nonstationary faults. A case study on Tennessee Eastman process is used to demonstrate the usage and effectiveness of these approaches. Results show the proposed framework is valid.}, language = {en}, urldate = {2022-05-02}, journal = {Chemometrics and Intelligent Laboratory Systems}, author = {Li, Gang and Qin, S. Joe and Yuan, Tao}, month = dec, year = {2016}, keywords = {Dynamic principal component analysis, Dynamic time warping, Granger causality analysis, Reconstruction based contribution, Root cause diagnosis}, pages = {1--11}, }
@incollection{thomas_extracting_2016, series = {26 {European} {Symposium} on {Computer} {Aided} {Process} {Engineering}}, title = {Extracting knowledge from historical databases for process monitoring using feature extraction and data clustering}, volume = {38}, url = {https://www.sciencedirect.com/science/article/pii/B978044463428350148X}, abstract = {For most chemical plants, a major obstacle inhibiting the application of cutting edge fault detection and diagnosis is that many of the best methods require data organized into groups before training is possible. Data clustering and non-linear dimensionality reduction are underutilized tools for this task and this study evaluates how they can work in tandem to extract knowledge from chemical process data sets. Two non-linear dimensionality reduction techniques and principal component analysis as well as two clustering techniques are studied on industrial case studies and a simulation}, language = {en}, urldate = {2022-05-02}, booktitle = {Computer {Aided} {Chemical} {Engineering}}, publisher = {Elsevier}, author = {Thomas, Michael C. and Romagnoli, Jose}, editor = {Kravanja, Zdravko and Bogataj, Miloš}, month = jan, year = {2016}, doi = {10.1016/B978-0-444-63428-3.50148-X}, keywords = {data clustering, data mining, dimensionality reduction, feature extraction}, pages = {859--864}, }
@article{al-dahidi_remaining_2016, title = {Remaining useful life estimation in heterogeneous fleets working under variable operating conditions}, volume = {156}, issn = {0951-8320}, url = {https://www.sciencedirect.com/science/article/pii/S0951832016302927}, doi = {10.1016/j.ress.2016.07.019}, abstract = {The availability of condition monitoring data for large fleets of similar equipment motivates the development of data-driven prognostic approaches that capitalize on the information contained in such data to estimate equipment Remaining Useful Life (RUL). A main difficulty is that the fleet of equipment typically experiences different operating conditions, which influence both the condition monitoring data and the degradation processes that physically determine the RUL. We propose an approach for RUL estimation from heterogeneous fleet data based on three phases: firstly, the degradation levels (states) of an homogeneous discrete-time finite-state semi-markov model are identified by resorting to an unsupervised ensemble clustering approach. Then, the parameters of the discrete Weibull distributions describing the transitions among the states and their uncertainties are inferred by resorting to the Maximum Likelihood Estimation (MLE) method and to the Fisher Information Matrix (FIM), respectively. Finally, the inferred degradation model is used to estimate the RUL of fleet equipment by direct Monte Carlo (MC) simulation. The proposed approach is applied to two case studies regarding heterogeneous fleets of aluminium electrolytic capacitors and turbofan engines. Results show the effectiveness of the proposed approach in predicting the RUL and its superiority compared to a fuzzy similarity-based approach of literature.}, language = {en}, urldate = {2022-05-02}, journal = {Reliability Engineering \& System Safety}, author = {Al-Dahidi, Sameer and Di Maio, Francesco and Baraldi, Piero and Zio, Enrico}, month = dec, year = {2016}, keywords = {Aluminium electrolytic capacitors, Failure prognostics, Heterogeneous fleet, Homogeneous discrete-time finite-state semi-markov model, Remaining Useful Life (RUL), Turbofan engines}, pages = {109--124}, }
@article{chebel-morello_feature_2016, title = {Feature selection for fault detection systems: application to the {Tennessee} {Eastman} process}, volume = {44}, issn = {1573-7497}, shorttitle = {Feature selection for fault detection systems}, url = {https://doi.org/10.1007/s10489-015-0694-6}, doi = {10.1007/s10489-015-0694-6}, abstract = {In fault detection systems, a massive amount of data gathered from the life-cycle of equipment is often used to learn models or classifiers that aims at diagnosing different kinds of errors or failures. Among this huge quantity of information, some features (or sets of features) are more correlated with a kind of failure than another. The presence of irrelevant features might affect the performance of the classifier. To improve the performance of a detection system, feature selection is hence a key step. We propose in this paper an algorithm named STRASS, which aims at detecting relevant features for classification purposes. In certain cases, when there exists a strong correlation between some features and the associated class, conventional feature selection algorithms fail at selecting the most relevant features. In order to cope with this problem, STRASS algorithm uses k-way correlation between features and the class to select relevant features. To assess the performance of STRASS, we apply it on simulated data collected from the Tennessee Eastman chemical plant simulator. The Tennessee Eastman process (TEP) has been used in many fault detection studies and three specific faults are not well discriminated with conventional algorithms. The results obtained by STRASS are compared to those obtained with reference feature selection algorithms. We show that the features selected by STRASS always improve the performance of a classifier compared to the whole set of original features and that the obtained classification is better than with most of the other feature selection algorithms.}, language = {en}, number = {1}, urldate = {2022-05-02}, journal = {Applied Intelligence}, author = {Chebel-Morello, Brigitte and Malinowski, Simon and Senoussi, Hafida}, month = jan, year = {2016}, keywords = {Contextual measure, Fault detection, Feature selection, Wrapper method}, pages = {111--122}, }
@article{weiss_survey_2016, title = {A survey of transfer learning}, volume = {3}, issn = {2196-1115}, url = {https://doi.org/10.1186/s40537-016-0043-6}, doi = {10.1186/s40537-016-0043-6}, abstract = {Machine learning and data mining techniques have been used in numerous real-world applications. An assumption of traditional machine learning methodologies is the training data and testing data are taken from the same domain, such that the input feature space and data distribution characteristics are the same. However, in some real-world machine learning scenarios, this assumption does not hold. There are cases where training data is expensive or difficult to collect. Therefore, there is a need to create high-performance learners trained with more easily obtained data from different domains. This methodology is referred to as transfer learning. This survey paper formally defines transfer learning, presents information on current solutions, and reviews applications applied to transfer learning. Lastly, there is information listed on software downloads for various transfer learning solutions and a discussion of possible future research work. The transfer learning solutions surveyed are independent of data size and can be applied to big data environments.}, number = {1}, urldate = {2022-04-25}, journal = {Journal of Big Data}, author = {Weiss, Karl and Khoshgoftaar, Taghi M. and Wang, DingDing}, month = may, year = {2016}, keywords = {Data mining, Domain adaptation, Machine learning, Survey, Transfer learning}, pages = {9}, }
@inproceedings{haque_efficient_2016, title = {Efficient handling of concept drift and concept evolution over {Stream} {Data}}, doi = {10.1109/ICDE.2016.7498264}, abstract = {To decide if an update to a data stream classifier is necessary, existing sliding window based techniques monitor classifier performance on recent instances. If there is a significant change in classifier performance, these approaches determine a chunk boundary, and update the classifier. However, monitoring classifier performance is costly due to scarcity of labeled data. In our previous work, we presented a semi-supervised framework SAND, which uses change detection on classifier confidence to detect a concept drift. Unlike most approaches, it requires only a limited amount of labeled data to detect chunk boundaries and to update the classifier. However, SAND is expensive in terms of execution time due to exhaustive invocation of the change detection module. In this paper, we present an efficient framework, which is based on the same principle as SAND, but exploits dynamic programming and executes the change detection module selectively. Moreover, we provide theoretical justification of the confidence calculation, and show effect of a concept drift on subsequent confidence scores. Experiment results show efficiency of the proposed framework in terms of both accuracy and execution time.}, booktitle = {2016 {IEEE} 32nd {International} {Conference} on {Data} {Engineering} ({ICDE})}, author = {Haque, Ahsanul and Khan, Latifur and Baron, Michael and Thuraisingham, Bhavani and Aggarwal, Charu}, month = may, year = {2016}, keywords = {Classifier Confidence, Concept Drift, Data mining, Data models, Dynamic Chunk, Dynamic programming, Electronic mail, Error analysis, Labeling, Training data}, pages = {481--492}, }
@inproceedings{haque_sand_2016, address = {Phoenix, Arizona}, series = {{AAAI}'16}, title = {{SAND}: semi-supervised adaptive novel class detection and classification over data stream}, shorttitle = {{SAND}}, abstract = {Most approaches to classifying data streams either divide the stream into fixed-size chunks or use gradual forgetting. Due to evolving nature of data streams, finding a proper size or choosing a forgetting rate without prior knowledge about time-scale of change is not a trivial task. These approaches hence suffer from a trade-off between performance and sensitivity. Existing dynamic sliding window based approaches address this problem by tracking changes in classifier error rate, but are supervised in nature. We propose an efficient semi-supervised framework in this paper which uses change detection on classifier confidence to detect concept drifts, and to determine chunk boundaries dynamically. It also addresses concept evolution problem by detecting outliers having strong cohesion among themselves. Experiment results on benchmark and synthetic data sets show effectiveness of the proposed approach.}, urldate = {2022-03-31}, booktitle = {Proceedings of the {Thirtieth} {AAAI} {Conference} on {Artificial} {Intelligence}}, publisher = {AAAI Press}, author = {Haque, Ahsanul and Khan, Latifur and Baron, Michael}, month = feb, year = {2016}, pages = {1652--1658}, }
@article{ghesmoune_state---art_2016, title = {State-of-the-art on clustering data streams}, volume = {1}, issn = {2058-6345}, url = {https://doi.org/10.1186/s41044-016-0011-3}, doi = {10.1186/s41044-016-0011-3}, abstract = {Clustering is a key data mining task. This is the problem of partitioning a set of observations into clusters such that the intra-cluster observations are similar and the inter-cluster observations are dissimilar. The traditional set-up where a static dataset is available in its entirety for random access is not applicable as we do not have the entire dataset at the launch of the learning, the data continue to arrive at a rapid rate, we can not access the data randomly, and we can make only one or at most a small number of passes on the data in order to generate the clustering results. These types of data are referred to as data streams. The data stream clustering problem requires a process capable of partitioning observations continuously while taking into account restrictions of memory and time. In the literature of data stream clustering methods, a large number of algorithms use a two-phase scheme which consists of an online component that processes data stream points and produces summary statistics, and an offline component that uses the summary data to generate the clusters. An alternative class is capable of generating the final clusters without the need of an offline phase. This paper presents a comprehensive survey of the data stream clustering methods and an overview of the most well-known streaming platforms which implement clustering.}, number = {1}, urldate = {2022-03-25}, journal = {Big Data Analytics}, author = {Ghesmoune, Mohammed and Lebbah, Mustapha and Azzag, Hanene}, month = dec, year = {2016}, keywords = {Data stream clustering, State-of-the-art, Streaming platforms}, pages = {13}, }
@article{ribeiro_sequential_2016, title = {Sequential anomalies: a study in the {Railway} {Industry}}, volume = {105}, issn = {1573-0565}, shorttitle = {Sequential anomalies}, url = {https://doi.org/10.1007/s10994-016-5584-6}, doi = {10.1007/s10994-016-5584-6}, abstract = {Concerned with predicting equipment failures, predictive maintenance has a high impact both at a technical and at a financial level. Most modern equipments have logging systems that allow us to collect a diversity of data regarding their operation and health. Using data mining models for anomaly and novelty detection enables us to explore those datasets, building predictive systems that can detect and issue an alert when a failure starts evolving, avoiding the unknown development up to breakdown. In the present case, we use a failure detection system to predict train door breakdowns before they happen using data from their logging system. We use sensor data from pneumatic valves that control the open and close cycles of a door. Still, the failure of a cycle does not necessarily indicates a breakdown. A cycle might fail due to user interaction. The goal of this study is to detect structural failures in the automatic train door system, not when there is a cycle failure, but when there are sequences of cycle failures. We study three methods for such structural failure detection: outlier detection, anomaly detection and novelty detection, using different windowing strategies. We propose a two-stage approach, where the output of a point-anomaly algorithm is post-processed by a low-pass filter to obtain a subsequence-anomaly detection. The main result of the two-level architecture is a strong impact in the false alarm rate.}, language = {en}, number = {1}, urldate = {2022-02-23}, journal = {Machine Learning}, author = {Ribeiro, Rita P. and Pereira, Pedro and Gama, João}, month = oct, year = {2016}, pages = {127--153}, }
@inproceedings{wang_self-regulated_2016, title = {Self-regulated incremental clustering with focused preferences}, doi = {10.1109/IJCNN.2016.7727347}, abstract = {Due to their online learning nature, incremental clustering techniques can handle a continuous stream of data. In particular, various incremental clustering techniques based on Adaptive Resonance Theory (ART) have been shown to have low computational complexity in adaptive learning and are less sensitive to noisy information. However, parameter regularization in existing ART clustering techniques is applied either on different features or on different clusters exclusively. In this paper, we introduce Interest-Focused Clustering based on Adaptive Resonance Theory (IFC-ART), which self-regulates the vigilance parameter associated with each feature and each cluster. As such, we can incorporate the domain knowledge of the data set into IFC-ART to focus on certain preferences during the self-regulated clustering process. For performance evaluation, we use a real-world data set, named American Time Use Survey (ATUS), which records nearly 160,000 telephone interviews conducted with U.S. residents from 2003 to 2014. Specifically, we conduct case studies to explore three types of interesting relationship, focusing on the wage, age, and provision of elderly care, respectively. Experimental results show that the performance of IFC-ART is highly competitive and stable when compared with two well-established clustering techniques and three ART models. In addition, we highlight the important and unexpected findings observed from the clusters discovered.}, booktitle = {2016 {International} {Joint} {Conference} on {Neural} {Networks} ({IJCNN})}, author = {Wang, Di and Tan, Ah-Hwee}, month = jul, year = {2016}, note = {ISSN: 2161-4407}, keywords = {Education, Interviews, Modulation, Noise measurement, Performance evaluation, Senior citizens, Subspace constraints}, pages = {1297--1304}, }
@article{yu_incremental_2016, title = {Incremental {Semi}-{Supervised} {Clustering} {Ensemble} for {High} {Dimensional} {Data} {Clustering}}, volume = {28}, issn = {1558-2191}, doi = {10.1109/TKDE.2015.2499200}, abstract = {Traditional cluster ensemble approaches have three limitations: (1) They do not make use of prior knowledge of the datasets given by experts. (2) Most of the conventional cluster ensemble methods cannot obtain satisfactory results when handling high dimensional data. (3) All the ensemble members are considered, even the ones without positive contributions. In order to address the limitations of conventional cluster ensemble approaches, we first propose an incremental semi-supervised clustering ensemble framework (ISSCE) which makes use of the advantage of the random subspace technique, the constraint propagation approach, the proposed incremental ensemble member selection process, and the normalized cut algorithm to perform high dimensional data clustering. The random subspace technique is effective for handling high dimensional data, while the constraint propagation approach is useful for incorporating prior knowledge. The incremental ensemble member selection process is newly designed to judiciously remove redundant ensemble members based on a newly proposed local cost function and a global cost function, and the normalized cut algorithm is adopted to serve as the consensus function for providing more stable, robust, and accurate results. Then, a measure is proposed to quantify the similarity between two sets of attributes, and is used for computing the local cost function in ISSCE. Next, we analyze the time complexity of ISSCE theoretically. Finally, a set of nonparametric tests are adopted to compare multiple semisupervised clustering ensemble approaches over different datasets. The experiments on 18 real-world datasets, which include six UCI datasets and 12 cancer gene expression profiles, confirm that ISSCE works well on datasets with very high dimensionality, and outperforms the state-of-the-art semi-supervised clustering ensemble approaches.}, number = {3}, journal = {IEEE Transactions on Knowledge and Data Engineering}, author = {Yu, Zhiwen and Luo, Peinan and You, Jane and Wong, Hau-San and Leung, Hareton and Wu, Si and Zhang, Jun and Han, Guoqiang}, month = mar, year = {2016}, note = {Conference Name: IEEE Transactions on Knowledge and Data Engineering}, keywords = {Algorithm design and analysis, Cancer, Cluster ensemble, Clustering algorithms, Cost function, Gene expression, Linear programming, Search problems, cancer gene expression profile, clustering analysis, random subspace, semi-supervised clustering}, pages = {701--714}, }
@article{diez_clustering_2016, title = {A clustering approach for structural health monitoring on bridges}, volume = {6}, issn = {2190-5479}, url = {https://doi.org/10.1007/s13349-016-0160-0}, doi = {10.1007/s13349-016-0160-0}, abstract = {Structural health monitoring is a process for identifying damage in civil infrastructures using sensing system. It has been increasingly employed due to advances in sensing technologies and data analytic using machine learning. A common problem within this scenario is that limited data of real structural faults are available. Therefore, unsupervised and novelty detection machine learning methods must be employed. This work presents a clustering based approach to group substructures or joints with similar behaviour on bridge and then detect abnormal or damaged ones, as part of efforts in applying structural health monitoring to the Sydney Harbour Bridge, one of iconic structures in Australia. The approach is a combination of feature extraction, a nearest neighbor based outlier removal, followed by a clustering approach over both vibration events and joints representatives. Vibration signals caused by passing vehicles from different joints are then classified and damaged joints can be detected and located. The validity of the approach was demonstrated using real data collected from the Sydney Harbour Bridge. The clustering results showed correlations among similarly located joints in different bridge zones. Moreover, it also helped to detect a damaged joint and a joint with a faulty instrumented sensor, and thus demonstrated the feasibility of the proposed clustering based approach to complement existing damage detection strategies.}, language = {en}, number = {3}, urldate = {2022-02-09}, journal = {Journal of Civil Structural Health Monitoring}, author = {Diez, Alberto and Khoa, Nguyen Lu Dang and Makki Alamdari, Mehrisadat and Wang, Yang and Chen, Fang and Runcie, Peter}, month = jul, year = {2016}, keywords = {clustering, ecml, health monitoring}, pages = {429--445}, }
@inproceedings{zhao_machine_2016, title = {Machine health monitoring with {LSTM} networks}, doi = {10.1109/ICSensT.2016.7796266}, abstract = {Effective machine health monitoring systems are critical to modern manufacturing systems and industries. Among various machine health monitoring approaches, data-driven methods are gaining in popularity due to the development of advanced sensing and data analytic techniques. However, sensory data that is a kind of sequential data can not serve as direct meaningful representations for machine conditions due to its noise, varying length and irregular sampling. A majority of previous models focus on feature extraction/fusion methods that involve expensive human labor and high quality expert knowledge. With the development of deep learning methods in the last few years, representation learning from raw data has been redefined. Among deep learning models, Long Short-Term Memory networks (LSTMs) are able to capture long-term dependencies and model sequential data. Therefore, LSTMs is able to work on the sensory data of machine condition. Here, the first study about a empirical evaluation of LSTMs-based machine health monitoring systems is presented. A real life tool wear test is introduced. Basic and deep LSTMs are designed to predict the actual tool wear based on raw sensory data. The experimental results have shown that our models, especially deep LSTMs, are able to outperform several state-of-arts baseline methods.}, booktitle = {2016 10th {International} {Conference} on {Sensing} {Technology} ({ICST})}, author = {Zhao, Rui and Wang, Jinjiang and Yan, Ruqiang and Mao, Kezhi}, month = nov, year = {2016}, note = {ISSN: 2156-8073}, keywords = {Data models, Feature extraction, LSTMs, Machine Health Monitoring, Mathematical model, Monitoring, Neural networks, RNN, Sensors, Tool Wear Prediction, Training, ecml, feature extraction, feature learning}, pages = {1--6}, }
@article{yan_sensor_2016, title = {A sensor fault detection strategy for air handling units using cluster analysis}, volume = {70}, issn = {0926-5805}, url = {https://www.sciencedirect.com/science/article/pii/S0926580516301261}, doi = {10.1016/j.autcon.2016.06.005}, abstract = {Sensors are an essential component in the control systems of air handling units (AHUs). A biased sensor reading could result in inappropriate control and thereby increased energy consumption or unsatisfied indoor thermal comfort. This paper presents an unsupervised learning based strategy using cluster analysis for AHU sensor fault detection. The historical data recorded from sensors is first pre-processed to reduce the dimensions using principal component analysis (PCA). The clustering algorithm Ordering Points to Identify the Clustering Structure (OPTICS) is then employed to identify the spatial separated data groups (i.e. clusters), which possibly indicate the occurrence of sensor faults. The data points in different clusters are then checked for temporal separation in order to confirm the occurrence of sensor faults. The proposed sensor fault detection strategy is tested and evaluated with the data collected from a simulation system. The results showed that this strategy can detect single and non-simultaneously occurred multiple sensor faults in AHUs. The fault detection results were not strongly affected by the selection of the user defined input parameters required in OPTICS.}, language = {en}, urldate = {2022-01-14}, journal = {Automation in Construction}, author = {Yan, Rui and Ma, Zhenjun and Kokogiannakis, Georgios and Zhao, Yang}, month = oct, year = {2016}, keywords = {Air handling units, Cluster analysis, OPTICS, Principal component analysis, Sensor fault detection}, pages = {77--88}, }
@article{yang_health_2016, title = {Health {Index}-{Based} {Prognostics} for {Remaining} {Useful} {Life} {Predictions} in {Electrical} {Machines}}, volume = {63}, issn = {1557-9948}, doi = {10.1109/TIE.2016.2515054}, abstract = {Many industries have a growing awareness in utilizing new technologies to improve the reliability and availability of their systems. Prognostics, a subject concerned with the prediction of the remaining useful life (RUL), has been increasingly studied and applied to practical systems, such as electrical systems, over the past few years. Here, with the adoption of a data-driven prognostics framework, this paper proposed a health index (HI)-based prognostics method to predict the RUL of electrical machines. By assuming a linearly degrading HI over time, the proposed method predicts the RUL in two steps: 1) from input signals to HI; and then 2) mapping HI to RUL. The novelty of this method lies in the proposed dynamic HI smoothing approach where three characteristics of HI, namely monotonicity, gradualness, and consistency, are incorporated to smooth the current HI values with the previously predicted ones. Real data collected from eight electrical motors, subjected to accelerated thermal aging process, were used in the experimental studies, with the results showing the superiority of the proposed HI-based RUL prediction over the traditional direct RUL prediction (i.e., without HI).}, number = {4}, journal = {IEEE Transactions on Industrial Electronics}, author = {Yang, Feng and Habibullah, Mohamed Salahuddin and Zhang, Tianyou and Xu, Zhao and Lim, Pin and Nadarajan, Sivakumar}, month = apr, year = {2016}, note = {Conference Name: IEEE Transactions on Industrial Electronics}, keywords = {Aging, Data collection, Data models, Electrical Machine, Electrical machine, Feature extraction, Health Index, Hidden Markov models, Induction motors, Predictive models, Prognostics, Remaining Useful Life, health index (HI), prognostics, remaining useful life (RUL), sigkdd-rw}, pages = {2633--2644}, }
@article{cai_real-time_2016, title = {A real-time fault diagnosis methodology of complex systems using object-oriented {Bayesian} networks}, volume = {80}, issn = {0888-3270}, url = {https://www.sciencedirect.com/science/article/pii/S0888327016300607}, doi = {10.1016/j.ymssp.2016.04.019}, abstract = {Bayesian network (BN) is a commonly used tool in probabilistic reasoning of uncertainty in industrial processes, but it requires modeling of large and complex systems, in situations such as fault diagnosis and reliability evaluation. Motivated by reduction of the overall complexities of BNs for fault diagnosis, and the reporting of faults that immediately occur, a real-time fault diagnosis methodology of complex systems with repetitive structures is proposed using object-oriented Bayesian networks (OOBNs). The modeling methodology consists of two main phases: an off-line OOBN construction phase and an on-line fault diagnosis phase. In the off-line phase, sensor historical data and expert knowledge are collected and processed to determine the faults and symptoms, and OOBN-based fault diagnosis models are developed subsequently. In the on-line phase, operator experience and sensor real-time data are placed in the OOBNs to perform the fault diagnosis. According to engineering experience, the judgment rules are defined to obtain the fault diagnosis results.}, language = {en}, urldate = {2021-11-19}, journal = {Mechanical Systems and Signal Processing}, author = {Cai, Baoping and Liu, Hanlin and Xie, Min}, month = dec, year = {2016}, keywords = {Complex systems, Fault diagnosis, Object-oriented Bayesian networks, Real-time, bayesian network, bn}, pages = {31--44}, }
@article{nguyen_fault_2016, title = {Fault diagnosis for the complex manufacturing system}, volume = {230}, issn = {1748-006X}, url = {https://doi.org/10.1177/1748006X15623089}, doi = {10.1177/1748006X15623089}, abstract = {Present manufacturing systems are equipped with sensors that provide a basis for real-time monitoring and diagnosis; however, placement of these sensors is constrained by the the functions that they perform and the system’s structure. Moreover, sensors cannot be placed across all components in the equipment due to significant data challenges. This results in non-observable components, which limit our ability to support effective real-time monitoring and fault diagnosis initiatives. Consequently, product quality drifts found during inspection often result in unscheduled breakdown of all equipment involved in respective production operation. This situation becomes more complex for automated manufacturing lines, where success depends on our ability to capitalize maximum production capacities. This paper proposes a methodology that exploits historical data over unobserved equipment components to reduce the search space of potential faulty components, followed by a more accurate diagnosis of failures and causes. The purpose is to improve the effectiveness and efficiency of both the real-time monitoring of potential faulty components and the diagnosis of causes. In the proposed approach, we use a logical diagnosis approach to reduce the search space of suspected equipment in the production flow, which is then formulated as a Bayesian network. The proposed approach computes the risk priority for suspected equipment with corresponding factors (such as human factor and recipe), using joint and conditional probabilities. The objective is to quickly and accurately localize the possible fault origins in real-time and support effective corrective maintenance decisions. The key advantages offered by this approach are: (i) reduced unscheduled equipment breakdown duration, and (ii) stable production capacities, required for success in highly competitive and automated manufacturing systems. Moreover, this is a generic methodology and can be deployed on fully or semi-automated manufacturing systems.}, language = {en}, number = {2}, urldate = {2021-10-29}, journal = {Proceedings of the Institution of Mechanical Engineers, Part O: Journal of Risk and Reliability}, author = {Nguyen, Dang Trinh and Duong, Quoc Bao and Zamai, Eric and Shahzad, Muhammad Kashif}, month = apr, year = {2016}, note = {Publisher: SAGE Publications}, keywords = {Bayesian network, Fault diagnosis, automated manufacturing systems, logical diagnosis, non observable components, unobserved}, pages = {178--194}, }
@article{campo_new_2016, title = {A new index for clustering validation with overlapped clusters}, volume = {64}, issn = {0957-4174}, url = {https://www.sciencedirect.com/science/article/pii/S0957417416304158}, doi = {10.1016/j.eswa.2016.08.021}, abstract = {External validation indexes allow similarities between two clustering solutions to be quantified. With classical external indexes, it is possible to quantify how similar two disjoint clustering solutions are, where each object can only belong to a single cluster. However, in practical applications, it is common for an object to have more than one label, thereby belonging to overlapped clusters; for example, subjects that belong to multiple communities in social networks. In this study, we propose a new index based on an intuitive probabilistic approach that is applicable to overlapped clusters. Given that recently there has been a remarkable increase in the analysis of data with naturally overlapped clusters, this new index allows to comparing clustering algorithms correctly. After presenting the new index, experiments with artificial and real datasets are shown and analyzed. Results over a real social network are also presented and discussed. The results indicate that the new index can correctly measure the similarity between two partitions of the dataset when there are different levels of overlap in the analyzed clusters.}, language = {en}, urldate = {2021-10-21}, journal = {Expert Systems with Applications}, author = {Campo, D. N. and Stegmayer, G. and Milone, D. H.}, month = dec, year = {2016}, keywords = {Cluster perturbation, External validation, Overlapped clusters, Validation index}, pages = {549--556}, }
@inproceedings{yan_active_2016, title = {Active {Learning} from {Imperfect} {Labelers}}, volume = {29}, url = {https://proceedings.neurips.cc/paper/2016/hash/dd77279f7d325eec933f05b1672f6a1f-Abstract.html}, urldate = {2021-10-18}, booktitle = {Advances in {Neural} {Information} {Processing} {Systems}}, publisher = {Curran Associates, Inc.}, author = {Yan, Songbai and Chaudhuri, Kamalika and Javidi, Tara}, year = {2016}, }
@article{lulli_ng-dbscan_2016, title = {{NG}-{DBSCAN}: scalable density-based clustering for arbitrary data}, volume = {10}, issn = {2150-8097}, shorttitle = {{NG}-{DBSCAN}}, url = {https://doi.org/10.14778/3021924.3021932}, doi = {10.14778/3021924.3021932}, abstract = {We present NG-DBSCAN, an approximate density-based clustering algorithm that operates on arbitrary data and any symmetric distance measure. The distributed design of our algorithm makes it scalable to very large datasets; its approximate nature makes it fast, yet capable of producing high quality clustering results. We provide a detailed overview of the steps of NG-DBSCAN, together with their analysis. Our results, obtained through an extensive experimental campaign with real and synthetic data, substantiate our claims about NG-DBSCAN's performance and scalability.}, number = {3}, urldate = {2021-10-03}, journal = {Proceedings of the VLDB Endowment}, author = {Lulli, Alessandro and Dell'Amico, Matteo and Michiardi, Pietro and Ricci, Laura}, month = nov, year = {2016}, pages = {157--168}, }
@inproceedings{han_novel_2016, title = {A {Novel} {Scalable} {DBSCAN} {Algorithm} with {Spark}}, doi = {10.1109/IPDPSW.2016.57}, abstract = {DBSCAN is a well-known clustering algorithm which is based on density and is able to identify arbitrary shaped clusters and eliminate noise data. However, parallelization of DBSCAN is a challenging work because based on MPI or OpenMP environments, there exist the issues of lack of fault-tolerance and there is no guarantee that workload is balanced. Moreover, programming with MPI requires data scientists to have an advanced experience to handle communication between nodes which is a big challenge. We present a new parallel DBSCAN algorithm using the new big data framework Spark. In order to reduce search time, we apply kd-tree in our algorithm. More specifically, we propose a novel approach to avoid communication between executors so that we can locally obtain partial clusters more efficiently. Based on Java API, we select appropriate data structures carefully: Using Queue to contain neighbors of the data point, and using Hashtable when checking the status of and processing the data points. In addition, we use other advanced features from Spark to make our implementation more effective. We implement the algorithm in Java and evaluate its scalability by using different number of processing cores. Our experiments demonstrate that the algorithm we propose scales up very well. Using data sets containing up to 1 million high-dimensional points, we show that our proposed algorithm achieves speedups up to 6 using 8 cores (10k), 10 using 32 cores (100k), and 137 using 512 cores (1m). Another experiment using 10k data points is conducted and the result shows that the algorithm with MapReduce achieves speedups to 1.3 using 2 cores, 2.0 using 4 cores, and 3.2 using 8 cores.}, booktitle = {2016 {IEEE} {International} {Parallel} and {Distributed} {Processing} {Symposium} {Workshops} ({IPDPSW})}, author = {Han, Dianwei and Agrawal, Ankit and Liao, Wei-Keng and Choudhary, Alok}, month = may, year = {2016}, keywords = {Algorithm design and analysis, Big data, Clustering algorithms, DBSCAN, Distributed databases, Programming, Spark framework, Sparks, bigdata, clustering}, pages = {1393--1402}, }
@article{li_study_2016, title = {A study of large-scale data clustering based on fuzzy clustering}, volume = {20}, issn = {1433-7479}, url = {https://doi.org/10.1007/s00500-015-1698-1}, doi = {10.1007/s00500-015-1698-1}, abstract = {Large-scale data are any data that cannot be loaded into the main memory of the ordinary. This is not the objective definition of large-scale data, but it is easy to understand what the large-scale data is. We first introduce some present algorithms to clustering large-scale data, some data stream clustering algorithms based on FCM algorithms are also introduced. In this paper, we propose a new structure to cluster large-scale data and two new data stream clustering algorithms based on the structure are propose in Sects. 3 and 4. In our method, we load the objects in the dataset one by one. We set a threshold of the membership, if the membership of one object and a cluster center is bigger than the threshold, the object is assigned to the cluster and the location of nearest cluster center will be updated, else the object is put into the temporary matrix; we call it pool. When the pool is full, we cluster the data in the pool and update the location of cluster centers. The two algorithms are based on the data stream structure. The difference of the two algorithms is the how the objects in the data are weighed. We test our algorithms on handwritten digits images dataset and several large-scale UCI datasets and make a comparison with some presented algorithms. The experiments proved that our algorithm is more suitable to cluster large-scale datasets.}, language = {en}, number = {8}, urldate = {2021-10-01}, journal = {Soft Computing}, author = {Li, Yangyang and Yang, Guoli and He, Haiyang and Jiao, Licheng and Shang, Ronghua}, month = aug, year = {2016}, pages = {3231--3242}, }
@article{gosain_performance_2016, series = {Proceedings of {International} {Conference} on {Communication}, {Computing} and {Virtualization} ({ICCCV}) 2016}, title = {Performance {Analysis} of {Various} {Fuzzy} {Clustering} {Algorithms}: {A} {Review}}, volume = {79}, issn = {1877-0509}, shorttitle = {Performance {Analysis} of {Various} {Fuzzy} {Clustering} {Algorithms}}, url = {https://www.sciencedirect.com/science/article/pii/S1877050916001459}, doi = {10.1016/j.procs.2016.03.014}, abstract = {Fuzzy clustering is useful clustering technique which partitions the data set in fuzzy partitions and this technique is applicable in many technical applications like crime hot spot detection, tissue differentiation in medical images, software quality prediction etc. In this review paper, we have done a comprehensive study and experimental analysis of the performance of all major fuzzy clustering algorithms named: FCM, PCM, PFCM, FCM-σ, T2FCM, KT2FCM, IFCM, KIFCM, IFCM-σ, KIFCM-σ, NC, CFCM, DOFCM. To better analysis their performance we experimented with standard data points in the presents of noise and outlier. This paper will act as a catalyst in the initial study for all those researchers who directly or indirectly deal with fuzzy clustering in their research work and ease them to pick a specific method as per the suitability to their working environment.}, language = {en}, urldate = {2021-10-01}, journal = {Procedia Computer Science}, author = {Gosain, Anjana and Dahiya, Sonika}, month = jan, year = {2016}, keywords = {CFCM, DOFCM, FCM, FCM-σ, Fuzzy Clustering, IFCM, IFCM-σ, Intuitionisitic, KFCM-σ, NC, PCM, Type 2}, pages = {100--111}, }
@inproceedings{li_fuzzy_2016, title = {Fuzzy {Clustering} {Algorithms} — {Review} of the {Applications}}, doi = {10.1109/SmartCloud.2016.14}, abstract = {Fuzzy clustering is an alternative method to conventional or hard clustering algorithms, which makes partitions of data containing similar subjects. The tendency of adopting machine learning, big data science, cloud computation in various industries depends on unsupervised learning on data structures to tell the story about consumers' behavior, fraud detection, and market segmentation. Fuzzy clustering contrasts with hard clustering by its nonlinear nature and discipline of flexibility in grouping massive data. It provides more accurate and close-to-nature solutions for partitions and herein implies more possibility of solutions for decision-making. In the specific matter of computation, fuzzy clustering has its roots in fuzzy logic and indicates the likelihood or degrees of one data point belonging to more than one group. This paper focuses on the study of models of fuzzy clustering in various cases. Uniquely designed algorithms enhance the accuracy of outcomes and are worth studying to assist future work. In some case scenarios, modeling processes are data-driven and place emphasis on the distances between points and new centers of clusters. In some other cases, which aim at market segmentation or evaluation of patients by healthcare records, membership degree is a key element in the algorithm. This paper surveys a wide-range of research that has well-designed mathematic models for fuzzy clustering, some of which include genetic algorithms and neural networks. The last section introduces open sources of Python and displays sample results from hands-on practice with these packages.}, booktitle = {2016 {IEEE} {International} {Conference} on {Smart} {Cloud} ({SmartCloud})}, author = {Li, Jiamin and Lewis, Harold W.}, month = nov, year = {2016}, keywords = {Clustering algorithms, Data structures, Euclidean distance, Genetic algorithms, Histograms, Indexes, Mathematical model, fuzzy c-mean clustering, genetic algorithm, neural network, pattern recognition, validity index}, pages = {282--288}, }
@article{fahiman_fuzzy_2016, title = {Fuzzy c-{Shape}: {A} new algorithm for clustering finite time series waveforms}, shorttitle = {Fuzzy c-{Shape}}, url = {http://arxiv.org/abs/1608.01072}, abstract = {The existence of large volumes of time series data in many applications has motivated data miners to investigate specialized methods for mining time series data. Clustering is a popular data mining method due to its powerful exploratory nature and its usefulness as a preprocessing step for other data mining techniques. This article develops two novel clustering algorithms for time series data that are extensions of a crisp c-shapes algorithm. The two new algorithms are heuristic derivatives of fuzzy c-means (FCM). Fuzzy c-Shapes plus (FCS+) replaces the inner product norm in the FCM model with a shape-based distance function. Fuzzy c-Shapes double plus (FCS++) uses the shape-based distance, and also replaces the FCM cluster centers with shape-extracted prototypes. Numerical experiments on 48 real time series data sets show that the two new algorithms outperform state-of-the-art shape-based clustering algorithms in terms of accuracy and efficiency. Four external cluster validity indices (the Rand index, Adjusted Rand Index, Variation of Information, and Normalized Mutual Information) are used to match candidate partitions generated by each of the studied algorithms. All four indices agree that for these finite waveform data sets, FCS++ gives a small improvement over FCS+, and in turn, FCS+ is better than the original crisp c-shapes method. Finally, we apply two tests of statistical significance to the three algorithms. The Wilcoxon and Friedman statistics both rank the three algorithms in exactly the same way as the four cluster validity indices.}, urldate = {2021-09-30}, journal = {arXiv:1608.01072 [cs]}, author = {Fahiman, Fateme and Bezdek, Jame C. and Erfani, Sarah M. and Leckie, Christopher and Palaniswami, Marimuthu}, month = aug, year = {2016}, note = {arXiv: 1608.01072}, keywords = {Computer Science - Machine Learning}, }
@article{pevny_loda_2016, title = {Loda: {Lightweight} on-line detector of anomalies}, volume = {102}, issn = {1573-0565}, shorttitle = {Loda}, url = {https://doi.org/10.1007/s10994-015-5521-0}, doi = {10.1007/s10994-015-5521-0}, abstract = {In supervised learning it has been shown that a collection of weak classifiers can result in a strong classifier with error rates similar to those of more sophisticated methods. In unsupervised learning, namely in anomaly detection such a paradigm has not yet been demonstrated despite the fact that many methods have been devised as counterparts to supervised binary classifiers. This work partially fills the gap by showing that an ensemble of very weak detectors can lead to a strong anomaly detector with a performance equal to or better than state of the art methods. The simplicity of the proposed ensemble system (to be called Loda) is particularly useful in domains where a large number of samples need to be processed in real-time or in domains where the data stream is subject to concept drift and the detector needs to be updated on-line. Besides being fast and accurate, Loda is also able to operate and update itself on data with missing variables. Loda is thus practical in domains with sensor outages. Moreover, Loda can identify features in which the scrutinized sample deviates from the majority. This capability is useful when the goal is to find out what has caused the anomaly. It should be noted that none of these favorable properties increase Loda’s low time and space complexity. We compare Loda to several state of the art anomaly detectors in two settings: batch training and on-line training on data streams. The results on 36 datasets from UCI repository illustrate the strengths of the proposed system, but also provide more insight into the more general questions regarding batch-vs-on-line anomaly detection.}, language = {en}, number = {2}, urldate = {2021-02-16}, journal = {Machine Learning}, author = {Pevný, Tomáš}, month = feb, year = {2016}, pages = {275--304}, }
@article{zhong_overview_2016, title = {An overview on data representation learning: {From} traditional feature learning to recent deep learning}, volume = {2}, issn = {2405-9188}, shorttitle = {An overview on data representation learning}, url = {http://www.sciencedirect.com/science/article/pii/S2405918816300459}, doi = {10.1016/j.jfds.2017.05.001}, abstract = {Since about 100 years ago, to learn the intrinsic structure of data, many representation learning approaches have been proposed, either linear or nonlinear, either supervised or unsupervised, either “shallow” or “deep”. Particularly, deep architectures are widely applied for representation learning in recent years, and have delivered top results in many tasks, such as image classification, object detection and speech recognition. In this paper, we review the development of data representation learning methods. Specifically, we investigate both traditional feature learning algorithms and state-of-the-art deep learning models. The history of data representation learning is introduced, while available online resources (e.g., courses, tutorials and books) and toolboxes are provided. At the end, we give a few remarks on the development of data representation learning and suggest some interesting research directions in this area.}, language = {en}, number = {4}, urldate = {2020-09-21}, journal = {The Journal of Finance and Data Science}, author = {Zhong, Guoqiang and Wang, Li-Na and Ling, Xiao and Dong, Junyu}, month = dec, year = {2016}, keywords = {Deep learning, Feature learning, Representation learning}, pages = {265--278}, }
@article{hahsler_clustering_2016, title = {Clustering {Data} {Streams} {Based} on {Shared} {Density} between {Micro}-{Clusters}}, volume = {28}, number = {6}, journal = {IEEE Transactions on Knowledge and Data Engineering}, author = {Hahsler, M. and Bolaños, M.}, year = {2016}, pages = {1449--1461}, }
@article{cubillo_review_2016, title = {A review of physics-based models in prognostics: {Application} to gears and bearings of rotating machinery}, volume = {8}, url = {https://doi.org/10.1177/1687814016664660}, doi = {10.1177/1687814016664660}, abstract = {Health condition monitoring for rotating machinery has been developed for many years due to its potential to reduce the cost of the maintenance operations and increase availability. Covering aspects include sensors, signal processing, health assessment and decision-making. This article focuses on prognostics based on physics-based models. While the majority of the research in health condition monitoring focuses on data-driven techniques, physics-based techniques are particularly important if accuracy is a critical factor and testing is restricted. Moreover, the benefits of both approaches can be combined when data-driven and physics-based techniques are integrated. This article reviews the concept of physics-based models for prognostics. An overview of common failure modes of rotating machinery is provided along with the most relevant degradation mechanisms. The models available to represent these degradation mechanisms and their application for prognostics are discussed. Models that have not been applied to health condition monitoring, for example, wear due to metal–metal contact in hydrodynamic bearings, are also included due to its potential for health condition monitoring. The main contribution of this article is the identification of potential physics-based models for prognostics in rotating machinery.}, number = {8}, journal = {Advances in Mechanical Engineering}, author = {Cubillo, Adrian and Perinpanayagam, Suresh and Esperon-Miguez, Manuel}, year = {2016}, note = {\_eprint: https://doi.org/10.1177/1687814016664660}, pages = {1687814016664660}, }
@article{shi_edge_2016, title = {Edge {Computing}: {Vision} and {Challenges}}, volume = {3}, issn = {2372-2541}, doi = {10.1109/JIOT.2016.2579198}, number = {5}, journal = {IEEE Internet of Things Journal}, author = {Shi, W. and Cao, J. and Zhang, Q. and Li, Y. and Xu, L.}, year = {2016}, keywords = {Bandwidth, Cloud computing, Data privacy, Edge computing, Internet of Things, Internet of Things (IoT), Internet of things, Mobile handsets, Smart homes, Time factors, bandwidth cost saving, battery life constraint, cloud computing, cloud offloading, cloud services, data privacy, data processing, data safety, edge computing, response time requirement, smart cities, smart city, smart home, smart home and city}, pages = {637--646}, }
@book{ignazio_mesos_2016, address = {USA}, edition = {1st}, title = {Mesos in {Action}}, isbn = {1-61729-292-3}, publisher = {Manning Publications Co.}, author = {Ignazio, Roger}, year = {2016}, }
@inproceedings{kans_maintenance_2016, address = {Cham}, title = {Maintenance 4.0 in {Railway} {Transportation} {Industry}}, isbn = {978-3-319-27064-7}, booktitle = {Proceedings of the 10th {World} {Congress} on {Engineering} {Asset} {Management} ({WCEAM} 2015)}, publisher = {Springer International Publishing}, author = {Kans, Mirka and Galar, Diego and Thaduri, Adithya}, editor = {Koskinen, Kari T. and Kortelainen, Helena and Aaltonen, Jussi and Uusitalo, Teuvo and Komonen, Kari and Mathew, Joseph and Laitinen, Jouko}, year = {2016}, pages = {317--331}, }
@inproceedings{hermann_design_2016, title = {Design {Principles} for {Industrie} 4.0 {Scenarios}}, doi = {10.1109/HICSS.2016.488}, booktitle = {2016 49th {Hawaii} {International} {Conference} on {System} {Sciences} ({HICSS})}, author = {Hermann, M. and Pentek, T. and Otto, B.}, year = {2016}, note = {ISSN: 1530-1605}, keywords = {Case Study, Companies, Design Principles, Industrie 4.0, Industrie 4.0 scenarios, Industries, Industry 4.0, Internet, Internet of Everything, Internet of Things, Internet-of-Everything, Production facilities, Smart Factory, Text analysis, design principles, industrial value chain, production engineering computing, qualitative literature review, quantitative text analysis}, pages = {3928--3937}, }
@article{wang_towards_2016, title = {Towards smart factory for industry 4.0: a self-organized multi-agent system with big data based feedback and coordination}, volume = {101}, issn = {1389-1286}, url = {http://www.sciencedirect.com/science/article/pii/S1389128615005046}, doi = {https://doi.org/10.1016/j.comnet.2015.12.017}, journal = {Computer Networks}, author = {Wang, Shiyong and Wan, Jiafu and Zhang, Daqiang and Li, Di and Zhang, Chunhua}, year = {2016}, pages = {158 -- 168}, }
@book{lyonnet_fiabilite_2016, address = {Paris, France}, title = {Fiabilité industrielle: {La} boîte à outils des processus de fiabilité et maintenance}, isbn = {978-2-12-465533-5}, language = {fre}, publisher = {AFNOR}, author = {Lyonnet, Patrick}, year = {2016}, keywords = {Fiabilité, Gestion Du Risque, Maintenabilité, Qualité}, }
@article{feuz_automated_2015, title = {Automated {Detection} of {Activity} {Transitions} for {Prompting}}, volume = {45}, issn = {2168-2305}, doi = {10.1109/THMS.2014.2362529}, abstract = {Individuals with cognitive impairment can benefit from intervention strategies like recording important information in a memory notebook. However, training individuals to use the notebook on a regular basis requires a constant delivery of reminders. In this study, we design and evaluate machine-learning-based methods for providing automated reminders using a digital memory notebook interface. Specifically, we identify transition periods between activities as times to issue prompts. We consider the problem of detecting activity transitions using supervised and unsupervised machine-learning techniques and find that both techniques show promising results for detecting transition periods. We test the techniques in a scripted setting with 15 individuals. Motion sensors data are recorded and annotated as participants perform a fixed set of activities. We also test the techniques in an unscripted setting with eight individuals. Motion sensor data are recorded as participants go about their normal daily routine. In both the scripted and unscripted settings, a true positive rate of greater than 80\% can be achieved while maintaining a false positive rate of less than 15\%. On average, this leads to transitions being detected within 1 min of a true transition for the scripted data and within 2 min of a true transition on the unscripted data.}, number = {5}, journal = {IEEE Transactions on Human-Machine Systems}, author = {Feuz, Kyle D. and Cook, Diane J. and Rosasco, Cody and Robertson, Kayela and Schmitter-Edgecombe, Maureen}, month = oct, year = {2015}, note = {Conference Name: IEEE Transactions on Human-Machine Systems}, keywords = {Activity recognition, Intelligent sensors, Sensor phenomena and characterization, Smart homes, Supervised learning, TV, Temperature sensors, change-point detection, machine learning, prompting systems, smart environments}, pages = {575--585}, }
@article{bolon-canedo_recent_2015, title = {Recent advances and emerging challenges of feature selection in the context of big data}, volume = {86}, issn = {0950-7051}, url = {https://www.sciencedirect.com/science/article/pii/S0950705115002002}, doi = {10.1016/j.knosys.2015.05.014}, abstract = {In an era of growing data complexity and volume and the advent of big data, feature selection has a key role to play in helping reduce high-dimensionality in machine learning problems. We discuss the origins and importance of feature selection and outline recent contributions in a range of applications, from DNA microarray analysis to face recognition. Recent years have witnessed the creation of vast datasets and it seems clear that these will only continue to grow in size and number. This new big data scenario offers both opportunities and challenges to feature selection researchers, as there is a growing need for scalable yet efficient feature selection methods, given that existing methods are likely to prove inadequate.}, language = {en}, urldate = {2022-07-29}, journal = {Knowledge-Based Systems}, author = {Bolón-Canedo, V. and Sánchez-Maroño, N. and Alonso-Betanzos, A.}, month = sep, year = {2015}, keywords = {Big data, Feature selection, High dimensionality}, pages = {33--45}, }
@article{onderwater_outlier_2015, title = {Outlier {Preservation} by {Dimensionality} {Reduction} {Techniques}}, volume = {7}, doi = {10.1504/IJDATS.2015.071365}, abstract = {Sensors are increasingly part of our daily lives: motion detection, lighting control, and energy consumption all rely on sensors. Combining this information into, for instance, simple and comprehensive graphs can be quite challenging. Dimensionality reduction is often used to address this problem, by decreasing the number of variables in the data and looking for shorter representations. However, dimensionality reduction is often aimed at normal daily data, and applying it to events deviating from this daily data (so-called outliers) can affect such events negatively. In particular, outliers might go unnoticed. In this paper, we show that dimensionality reduction can indeed have a large impact on outliers. To that end we apply three dimensionality reduction techniques to three real-world datasets, and inspect how well they preserve outliers. We use several performance measures to show how well these techniques are capable of preserving outliers, and we discuss the results.}, journal = {International Journal of Data Analysis Techniques and Strategies}, author = {Onderwater, Martijn}, month = jan, year = {2015}, pages = {231--252}, }
@article{bathelt_revision_2015, series = {9th {IFAC} {Symposium} on {Advanced} {Control} of {Chemical} {Processes} {ADCHEM} 2015}, title = {Revision of the {Tennessee} {Eastman} {Process} {Model}}, volume = {48}, issn = {2405-8963}, url = {https://www.sciencedirect.com/science/article/pii/S2405896315010666}, doi = {10.1016/j.ifacol.2015.08.199}, abstract = {In this paper, a closer look at the underlying computer code of the well-known Tennessee Eastman Process model is taken. Since its introduction in the 1990s typical simulation software, e.g. MATLAB, which is used to simulate the process model, has changed. Thus the original program flow devised by Downs \& Vogel no longer holds. This results in problems regarding the repeatability of simulation results. This problem and its cause are considered in the following and a solution in terms of a modified code is presented. Furthermore, some additional changes are discussed, widening the useability of the simulation model (e.g. lower simulation runtime, additional process measurements).}, language = {en}, number = {8}, urldate = {2022-05-12}, journal = {IFAC-PapersOnLine}, author = {Bathelt, Andreas and Ricker, N. Lawrence and Jelali, Mohieddine}, month = jan, year = {2015}, keywords = {Benchmark example, Chemical industry, Process Control, Process models, Process simulators}, pages = {309--314}, }
@inproceedings{helwig_condition_2015, title = {Condition monitoring of a complex hydraulic system using multivariate statistics}, doi = {10.1109/I2MTC.2015.7151267}, abstract = {In this paper, a systematic approach for the automated training of condition monitoring systems for complex hydraulic systems is developed and evaluated. We analyzed different fault scenarios using a test rig that allows simulating a reversible degradation of component's conditions. By analyzing the correlation of features extracted from raw sensor data and the known fault characteristics of experimental obtained data, the most significant features specific to a fault case can be identified. These feature values are transferred to a lower-dimensional discriminant space using linear discriminant analysis (LDA) which allows the classification of fault condition and grade of severity. We successfully implemented and tested the system for a fixed working cycle of the hydraulic system. Furthermore, the classification rate for random load cycles was enhanced by a distribution analysis of feature trends.}, booktitle = {2015 {IEEE} {International} {Instrumentation} and {Measurement} {Technology} {Conference} ({I2MTC}) {Proceedings}}, author = {Helwig, Nikolai and Pignanelli, Eliseo and Schütze, Andreas}, month = may, year = {2015}, note = {ISSN: 1091-5281}, keywords = {Condition monitoring, Cooling, Correlation, Correlation coefficient, Feature extraction, Valves, condition monitoring, hydraulic system, linear discriminant analysis, multivariate statistics}, pages = {210--215}, }
@article{chiang_diagnosis_2015, title = {Diagnosis of multiple and unknown faults using the causal map and multivariate statistics}, volume = {28}, issn = {0959-1524}, url = {https://www.sciencedirect.com/science/article/pii/S0959152415000311}, doi = {10.1016/j.jprocont.2015.02.004}, abstract = {Feature extraction is crucial for fault diagnosis and the use of complementary features allows for improved diagnostic performance. Most of the existing fault diagnosis methods only utilize data-driven and causal connectivity-based features of faults, whereas the important complementary feature of the propagation paths of faults is not incorporated. The propagation path-based feature is important to represent the intrinsic properties of faults and plays a significant role in fault diagnosis, particularly for the diagnosis of multiple and unknown faults. In this article, a three-step framework based on the modified distance (DI) and modified causal dependency (CD) is proposed to integrate the data-driven and causal connectivity-based features with the propagation path-based feature for diagnosing known, unknown, and multiple faults. The effectiveness of the proposed approach is demonstrated on the Tennessee Eastman process.}, language = {en}, urldate = {2022-05-02}, journal = {Journal of Process Control}, author = {Chiang, Leo H. and Jiang, Benben and Zhu, Xiaoxiang and Huang, Dexian and Braatz, Richard D.}, month = apr, year = {2015}, keywords = {Causal map, Chemometrics, Fault diagnosis, Feature extraction, Feature representation, Multiple faults, Multivariate statistics, Process monitoring, Unknown faults}, pages = {27--39}, }
@inproceedings{kiss_clustering-based_2015, title = {A clustering-based approach to detect cyber attacks in process control systems}, doi = {10.1109/INDIN.2015.7281725}, abstract = {Modern Process Control Systems (PCS) exhibit an increasing trend towards the pervasive adoption of commodity, off-the-shelf Information and Communication Technologies (ICT). This has brought significant economical and operational benefits, but it also shifted the architecture of PCS from a completely isolated environment to an open, “system of systems” integration with traditional ICT systems, susceptible to traditional computer attacks. In this paper we present a novel approach to detect cyber attacks targeting measurements sent to control hardware, i.e., typically to Programmable Logical Controllers (PLC). The approach builds on the Gaussian mixture model to cluster sensor measurement values and a cluster assessment technique known as silhouette. We experimentally demonstrate that in this particular problem the Gaussian mixture clustering outperforms the k-means clustering algorithm. The effectiveness of the proposed technique is tested in a scenario involving the simulated Tennessee-Eastman chemical process and three different cyber attacks.}, booktitle = {2015 {IEEE} 13th {International} {Conference} on {Industrial} {Informatics} ({INDIN})}, author = {Kiss, István and Genge, Béla and Haller, Piroska}, month = jul, year = {2015}, note = {ISSN: 2378-363X}, keywords = {Clustering algorithms, Computer crime, Engines, Gaussian mixture model, Mathematical model, Process control}, pages = {142--148}, }
@article{singh_classification_2015, title = {Classification and {Novel} {Class} {Detection} in {Data} {Streams} {Using} {Strings}}, volume = {2}, copyright = {http://creativecommons.org/licenses/by/4.0/}, url = {http://www.scirp.org/Journal/Paperabs.aspx?paperid=68406}, doi = {10.4236/oalib.1101507}, abstract = {Data streams are continuous and always keep evolving in nature. Because of these reasons it becomes difficult to handle such data with simple and static strategies. Data stream poses four main challenges to researchers. These are infinite length, concept-evolution, concept-drift and feature evolution. Infinite-length is because of the amount of data having no bounds. Concept-drift is due to slow changes in the concept of stream. Concept-evolution occurs due to presence of unknown classes in data. Feature-evolution is because of new features continuously keeping appearing in the stream and older ones start disappearing. For performing any analysis on such data we first need to convert it into some knowledgeable form and also need to handle the above mentioned challenges. Various strategies have been proposed to tackle these difficulties. But most of them focus on handling the problem of infinite-length and concept-drift. In this paper, we make efforts to propose a string based strategy to handle infinite-length, concept-evolution and concept-drift.}, language = {en}, number = {5}, urldate = {2022-03-31}, journal = {Open Access Library Journal}, author = {Singh, Rimjhim and Chandak, Manoj B.}, month = may, year = {2015}, note = {Number: 5 Publisher: Scientific Research Publishing}, pages = {1--8}, }
@inproceedings{bifet_streamdm_2015, title = {{StreamDM}: {Advanced} {Data} {Mining} in {Spark} {Streaming}}, shorttitle = {{StreamDM}}, doi = {10.1109/ICDMW.2015.140}, abstract = {Real-time analytics are becoming increasingly important due to the large amount of data that is being created continuously. Drawing from our experiences at Huawei Noah's Ark Lab, we present and demonstrate here StreamDM, a new open source data mining and machine learning library, designed on top of Spark Streaming, an extension of the core Spark API that enables scalable stream processing of data streams. StreamDM is designed to be easily extended and used, either practitioners, developers, or researchers, and is the first library to contain advanced stream mining algorithms for Spark Streaming.}, booktitle = {2015 {IEEE} {International} {Conference} on {Data} {Mining} {Workshop} ({ICDMW})}, author = {Bifet, Albert and Maniu, Silviu and Qian, Jianfeng and Tian, Guangjian and He, Cheng and Fan, Wei}, month = nov, year = {2015}, note = {ISSN: 2375-9259}, keywords = {Algorithm design and analysis, Data mining, Data structures, Libraries, Machine learning algorithms, Spark Streaming, Sparks, Training, open-source, software, stream mining}, pages = {1608--1611}, }
@article{galar_context_2015, title = {Context awareness for maintenance decision making: {A} diagnosis and prognosis approach}, volume = {67}, issn = {0263-2241}, shorttitle = {Context awareness for maintenance decision making}, url = {https://www.sciencedirect.com/science/article/pii/S0263224115000408}, doi = {10.1016/j.measurement.2015.01.015}, abstract = {All assets necessarily suffer wear and tear during operation. Prognostics can assess the current health of a system and predict its remaining life based on features capturing the gradual degradation of its operational capabilities. Prognostics are critical to improve safety, plan successful work, schedule maintenance, and reduce maintenance costs and down time. Prognosis is a relatively new area but has become an important part of Condition-based Maintenance (CBM) of systems. As there are many prognostic techniques, usage must be acceptable to particular applications. Broadly stated, prognostic methods are either data-driven, rule based, or model-based. Each approach has advantages and disadvantages; consequently, they are often combined in hybrid applications. A hybrid model can combine some or all model types; thus, more complete information can be gathered, leading to more accurate recognition of the fault state. In this context, it is also important to evaluate the consistency and the reliability of the measurement data obtained during laboratory testing activity and the prognostic/diagnostic monitoring of the system under examination. This approach is especially relevant in systems where the maintainer and operator know some of the failure mechanisms with sufficient amount of data, but the sheer complexity of the assets precludes the development of a complete model-based approach. This paper addresses the process of data aggregation into a contextual awareness hybrid model to get Residual Useful Life (RUL) values within logical confidence intervals so that the life cycle of assets can be managed and optimised.}, language = {en}, urldate = {2022-03-05}, journal = {Measurement}, author = {Galar, Diego and Thaduri, Adithya and Catelani, Marcantonio and Ciani, Lorenzo}, month = may, year = {2015}, keywords = {Condition based maintenance, Condition monitoring, Context-driven, Diagnosis, Prognosis, eMaintenance}, pages = {137--150}, }
@article{liu_clustering_2015, title = {Clustering by growing incremental self-organizing neural network}, volume = {42}, issn = {0957-4174}, url = {https://www.sciencedirect.com/science/article/pii/S0957417415001050}, doi = {10.1016/j.eswa.2015.02.006}, abstract = {This paper presents a new clustering algorithm that detects clusters by learning data distribution of each cluster. Different from most existing clustering techniques, the proposed method is able to generate a dynamic two-dimensional topological graph which is used to explore both partitional information and detailed data relationship in each cluster. In addition, the proposed method is also able to work incrementally and detect arbitrary-shaped clusters without requiring the number of clusters as a prerequisite. The experimental data sets including five artificial data sets with various data distributions and an original hand-gesture data set are used to evaluate the proposed method. The comparable experimental results demonstrate the superior performance of the proposed algorithm in learning robustness, efficiency, working with outliers, and visualizing data relationships.}, language = {en}, number = {11}, urldate = {2022-02-20}, journal = {Expert Systems with Applications}, author = {Liu, Hao and Ban, Xiao-juan}, month = jul, year = {2015}, keywords = {Clustering, Data visualization, Incremental learning, Self-organizing neural networks, Unsupervised learning}, pages = {4965--4981}, }
@article{kwak_incremental_2015, title = {An {Incremental} {Clustering}-{Based} {Fault} {Detection} {Algorithm} for {Class}-{Imbalanced} {Process} {Data}}, volume = {28}, issn = {1558-2345}, doi = {10.1109/TSM.2015.2445380}, abstract = {Training fault detection model requires advanced data-mining algorithms when the growth rate of the process data is notably high and normal-class data overwhelm fault-class data in number. Most standard classification algorithms, such as support vector machines (SVMs), can handle moderate sizes of training data and assume balanced class distributions. When the class sizes are highly imbalanced, the standard algorithms tend to strongly favor the majority class and provide a notably low detection of the minority class as a result. In this paper, we propose an online fault detection algorithm based on incremental clustering. The algorithm accurately finds wafer faults even in severe class distribution skews and efficiently processes massive sensor data in terms of reductions in the required storage. We tested our algorithm on illustrative examples and an industrial example. The algorithm performed well with the illustrative examples that included imbalanced class distributions of Gaussian and non-Gaussian types and process drifts. In the industrial example, which simulated real data from a plasma etcher, we verified that the performance of the algorithm was better than that of the standard SVM, one-class SVM and three instance-based fault detection algorithms that are typically used in the literature.}, number = {3}, journal = {IEEE Transactions on Semiconductor Manufacturing}, author = {Kwak, Jueun and Lee, Taehyung and Kim, Chang Ouk}, month = aug, year = {2015}, note = {Conference Name: IEEE Transactions on Semiconductor Manufacturing}, keywords = {Algorithm design and analysis, Class Imbalance Data, Classification algorithms, Clustering algorithms, Covariance matrices, Data Mining, Fault Detection, Fault detection, Incremental Clustering, Process Drift, Standards, Support vector machines, class imbalance data, data mining, incremental clustering, process drift}, pages = {318--328}, }
@inproceedings{ji_review_2015, address = {Cham}, series = {Lecture {Notes} in {Computer} {Science}}, title = {A {Review} of {Parameter} {Learning} {Methods} in {Bayesian} {Network}}, isbn = {978-3-319-22053-6}, doi = {10.1007/978-3-319-22053-6_1}, abstract = {Bayesian network (BN) is one of the most classical probabilistic graphical models. It has been widely used in many areas, such as artificial intelligence, pattern recognition, and image processing. Parameter learning in Bayesian network is a very important topic. In this study, six typical parameter learning algorithms were investigated. For the completeness of dataset, there are mainly two categories of methods for parameter estimation in BN: one is suitable to deal with the complete data, and another is for incomplete data. We mainly focused on two algorithms in the first category: maximum likelihood estimate, and Bayesian method; Expectation - Maximization algorithm, Robust Bayesian estimate, Monte - Carlo method, and Gaussian approximation method were discussed for the second category. In the experiment, all these algorithms were applied on a classic example to implement the inference of parameters. The simulating results reveal the inherent differences of these six methods and the effects of the inferred parameters of network on further probability calculation. This study provides insight into the parameter inference strategies of Bayesian network and their applications in different kinds of situations.}, language = {en}, booktitle = {Advanced {Intelligent} {Computing} {Theories} and {Applications}}, publisher = {Springer International Publishing}, author = {Ji, Zhiwei and Xia, Qibiao and Meng, Guanmin}, editor = {Huang, De-Shuang and Han, Kyungsook}, year = {2015}, keywords = {Bayesian network, Conditional probability table, Parameter learning}, pages = {3--12}, }
@inproceedings{epaillard_hybrid_2015, title = {Hybrid hidden {Markov} model for mixed continuous/continuous and discrete/continuous data modeling}, doi = {10.1109/MMSP.2015.7340853}, abstract = {We propose a hybrid hidden Markov model (HMM) for mixed continuous/continuous and discrete/continuous outcomes. The Expectation-Maximization (EM) procedure used for the parameters estimation relies on a combination of local and global quantities that relate to outcomes of a specific type and to all outcomes, respectively. The new model is implemented for discrete outcomes and continuous outcomes that follow Gaussian, Dirichlet, and Gamma distributions. Experiments led with synthetic data show same or better modeling accuracy compared to a fully Gaussian HMM, with less parameters and thus a shorter convergence time for high dimensional data. Finally, the approach is validated with real data in a change detection scenario between a pair of images, one of which has been captured by an optic sensor and the other by a synthesis aperture radar (SAR) sensor. Based on the properties of the noises corrupting these images, a hybrid Gamma-Gaussian HMM is trained and the likelihood of the data with respect to the model is used to detect changes. The obtained results are in line with the most recent approaches for this task with the advantage of providing a very compact representation of the data.}, booktitle = {2015 {IEEE} 17th {International} {Workshop} on {Multimedia} {Signal} {Processing} ({MMSP})}, author = {Epaillard, Elise and Bouguila, Nizar}, month = oct, year = {2015}, keywords = {Data models, Estimation, Hidden Markov models, Mathematical model, Optical imaging, Parameter estimation}, pages = {1--6}, }
@article{chis_adapting_2015, series = {Twenty-ninth and thirtieth {Annual} {UK} {Performance} {Engineering} {Workshops} ({UKPEW})}, title = {Adapting {Hidden} {Markov} {Models} for {Online} {Learning}}, volume = {318}, issn = {1571-0661}, url = {https://www.sciencedirect.com/science/article/pii/S1571066115000638}, doi = {10.1016/j.entcs.2015.10.022}, abstract = {In modern computer systems, the intermittent behaviour of infrequent, additional loads affects performance. Often, representative traces of storage disks or remote servers can be scarce and obtaining real data is sometimes expensive. Therefore, stochastic models, through simulation and profiling, provide cheaper, effective solutions, where input model parameters are obtained. A typical example is the Markov-modulated Poisson process (MMPP), which can have its time index discretised to form a hidden Markov model (HMM). These models have been successful in capturing bursty behaviour and cyclic patterns of I/O operations and Internet traffic, using underlying properties of the discrete (or continuous) Markov chain. However, learning on such models can be cumbersome in terms of complexity through re-training on data sets. Thus, we provide an online learning HMM (OnlineHMM), which is composed of two existing variations of HMMs: first, a sliding HMM using a moving average technique to update its parameters “on-the-fly” and, secondly, a multi-input HMM capable of training on multiple discrete traces simultaneously. The OnlineHMM reduces data processing times significantly and thence synthetic workloads become computationally more cost effective. We measure the accuracy of reproducing representative traces through comparisons of moments and autocorrelation on original data points and HMM-generated synthetic traces. We present, analytically, the training steps saved through the OnlineHMM's adapted Baum-Welch algorithm and obtain, through simulation, mean waiting times of a queueing model. Finally, we conclude our work and offer model extensions for the future.}, language = {en}, urldate = {2021-11-15}, journal = {Electronic Notes in Theoretical Computer Science}, author = {Chis, Tiberiu and Harrison, Peter G.}, month = nov, year = {2015}, keywords = {HMM, MMPP, adapted Baum-Welch, autocorrelation, online hmm, online learning}, pages = {109--127}, }
@article{ge_online_2015, title = {Online diagnosis of accidental faults for real-time embedded systems using a hidden {Markov} model}, volume = {91}, issn = {0037-5497}, url = {https://doi.org/10.1177/0037549715590598}, doi = {10.1177/0037549715590598}, abstract = {This article proposes an approach for the online analysis of accidental faults for real-time embedded systems using hidden Markov models (HMMs). By introducing reasonable and appropriate abstraction of complex systems, HMMs are used to describe the healthy or faulty states of system’s hardware components. They are parametrized to statistically simulate the real system’s behavior. As it is not easy to obtain rich accidental fault data from a system, the Baum–Welch algorithm cannot be employed here to train the parameters in HMMs. Inspired by the principles of fault tree analysis and the maximum entropy in Bayesian probability theory, we propose to compute the failure propagation distribution to estimate the parameters in HMMs and to adapt the parameters using a backward algorithm. The parameterized HMMs are then used to online diagnose accidental faults using a vote algorithm integrated with a low-pass filter. We design a specific test bed to analyze the sensitivity, specificity, precision, accuracy and F1-score measures by generating a large amount of test cases. The test results show that the proposed approach is robust, efficient and accurate.}, language = {en}, number = {10}, urldate = {2021-11-07}, journal = {SIMULATION}, author = {Ge, Ning and Nakajima, Shin and Pantel, Marc}, month = oct, year = {2015}, note = {Publisher: SAGE Publications Ltd STM}, keywords = {Real-time embedded system, accidental fault, complex system, hidden Markov model, hmm, online, online diagnosis, real-time, simulation, stream}, pages = {851--868}, }
@article{nguyen_multi-level_2015, title = {Multi-level predictive maintenance for multi-component systems}, volume = {144}, issn = {0951-8320}, url = {https://www.sciencedirect.com/science/article/pii/S0951832015002215}, doi = {10.1016/j.ress.2015.07.017}, abstract = {In this paper, a novel predictive maintenance policy with multi-level decision-making is proposed for multi-component system with complex structure. The main idea is to propose a decision-making process considered on two levels: system level and component one. The goal of the decision rules at the system level is to address if preventive maintenance actions are needed regarding the predictive reliability of the system. At component level the decision rules aim at identifying optimally a group of several components to be preventively maintained when preventive maintenance is trigged due to the system level decision. Selecting optimal components is based on a cost-based group improvement factor taking into account the predictive reliability of the components, the economic dependencies as well as the location of the components in the system. Moreover, a cost model is developed to find the optimal maintenance decision variables. A 14-component system is finally introduced to illustrate the use and the performance of the proposed predictive maintenance policy. Different sensitivity analysis are also investigated and discussed. Indeed, the proposed policy provides more flexibility in maintenance decision-making for complex structure systems, hence leading to significant profits in terms of maintenance cost when compared with existing policies.}, language = {en}, urldate = {2021-10-28}, journal = {Reliability Engineering \& System Safety}, author = {Nguyen, Kim-Anh and Do, Phuc and Grall, Antoine}, month = dec, year = {2015}, keywords = {Condition-based maintenance, Economic dependencies, Importance measure, Multi-component system, Opportunistic, Prognostic, multicomponent, preventive}, pages = {83--94}, }
@article{traore_supervision_2015, title = {Supervision and prognosis architecture based on dynamical classification method for the predictive maintenance of dynamical evolving systems}, volume = {136}, issn = {0951-8320}, url = {https://www.sciencedirect.com/science/article/pii/S0951832014003123}, doi = {10.1016/j.ress.2014.12.005}, abstract = {In this paper, we are concerned by the improvement of the safety, availability and reliability of dynamical systems’ components subjected to slow degradations (slow drifts). We propose an architecture for efficient Predictive Maintenance (PM) according to the real time estimate of the future state of the components. The architecture is built on supervision and prognosis tools. The prognosis method is based on an appropriated supervision technique that consists in drift tracking of the dynamical systems using AUDyC (AUto-adaptive and Dynamical Clustering), that is an auto-adaptive dynamical classifier. Thus, due to the complexity and the dynamical of the considered systems, the Failure Mode Effect and Criticity Analysis (FMECA) is used to identify the key components of the systems. A component is defined as an element of the system that can be impacted by only one failure. A failure of a key component causes a long downtime of the system. From the FMECA, a Fault Tree Analysis (FTA) of the system are built to determine the propagation laws of a failure on the system by using a deductive method. The proposed architecture is implemented for the PM of a thermoregulator. The application on this real system highlights the interests and the performances of the proposed architecture.}, language = {en}, urldate = {2021-10-26}, journal = {Reliability Engineering \& System Safety}, author = {Traore, M. and Chammas, A. and Duviella, E.}, month = apr, year = {2015}, keywords = {Dynamical classification, Evolving systems, Non-stationary environment, Predictive maintenance, Prognosis, Supervision}, pages = {120--131}, }
@article{wang_diagnosis_2015, title = {Diagnosis of {Time} {Petri} {Nets} {Using} {Fault} {Diagnosis} {Graph}}, volume = {60}, issn = {1558-2523}, doi = {10.1109/TAC.2015.2405293}, abstract = {This paper proposes an online approach for fault diagnosis of timed discrete event systems modeled by Time Petri Net (TPN). The set of transitions is partitioned into two subsets containing observable and unobservable transitions, respectively. Faults correspond to a subset of unobservable transitions. In accordance with most of the literature on discrete event systems, we define three diagnosis states, namely normal, faulty and uncertain states, respectively. The proposed approach uses a fault diagnosis graph, which is incrementally computed using the state class graph of the unobservable TPN. After each observation, if the part of FDG necessary to compute the diagnosis states is not available, the state class graph of the unobservable TPN is computed starting from the consistent states. This graph is then optimized and added to the partial FDG keeping only the necessary information for computation of the diagnosis states. We provide algorithms to compute the FDG and the diagnosis states. The method is implemented as a software package and simulation results are included.}, number = {9}, journal = {IEEE Transactions on Automatic Control}, author = {Wang, Xu and Mahulea, Cristian and Silva, Manuel}, month = sep, year = {2015}, note = {Conference Name: IEEE Transactions on Automatic Control}, keywords = {Automata, Computational modeling, Delays, Discrete event system, Discrete event system (DES), Discrete-event systems, Fault diagnosis, Petri net, State estimation, Timed systems, Vectors, diagnostics, fault diagnosis, timed systems}, pages = {2321--2335}, }
@article{zhu_machine_2015, title = {Machine {Teaching}: {An} {Inverse} {Problem} to {Machine} {Learning} and an {Approach} {Toward} {Optimal} {Education}}, volume = {29}, copyright = {Copyright (c)}, issn = {2374-3468}, shorttitle = {Machine {Teaching}}, url = {https://ojs.aaai.org/index.php/AAAI/article/view/9761}, abstract = {I draw the reader's attention to machine teaching, the problem of finding an optimal training set given a machine learning algorithm and a target model. In addition to generating fascinating mathematical questions for computer scientists to ponder, machine teaching holds the promise of enhancing education and personnel training. The Socratic dialogue style aims to stimulate critical thinking.}, language = {en}, number = {1}, urldate = {2021-10-18}, journal = {Proceedings of the AAAI Conference on Artificial Intelligence}, author = {Zhu, Xiaojin}, month = mar, year = {2015}, note = {Number: 1}, }
@article{lee_towards_2015, title = {Towards real-world complexity: an introduction to multiplex networks}, volume = {88}, issn = {1434-6036}, shorttitle = {Towards real-world complexity}, url = {https://doi.org/10.1140/epjb/e2015-50742-1}, doi = {10.1140/epjb/e2015-50742-1}, abstract = {Many real-world complex systems are best modeled by multiplex networks of interacting network layers. The multiplex network study is one of the newest and hottest themes in the statistical physics of complex networks. Pioneering studies have proven that the multiplexity has broad impact on the system’s structure and function. In this Colloquium paper, we present an organized review of the growing body of current literature on multiplex networks by categorizing existing studies broadly according to the type of layer coupling in the problem. Major recent advances in the field are surveyed and some outstanding open challenges and future perspectives will be proposed.}, language = {en}, number = {2}, urldate = {2021-10-13}, journal = {The European Physical Journal B}, author = {Lee, Kyu-Min and Min, Byungjoon and Goh, Kwang-Il}, month = feb, year = {2015}, pages = {48}, }
@article{xu_comprehensive_2015, title = {A {Comprehensive} {Survey} of {Clustering} {Algorithms}}, volume = {2}, issn = {2198-5812}, url = {https://doi.org/10.1007/s40745-015-0040-1}, doi = {10.1007/s40745-015-0040-1}, abstract = {Data analysis is used as a common method in modern science research, which is across communication science, computer science and biology science. Clustering, as the basic composition of data analysis, plays a significant role. On one hand, many tools for cluster analysis have been created, along with the information increase and subject intersection. On the other hand, each clustering algorithm has its own strengths and weaknesses, due to the complexity of information. In this review paper, we begin at the definition of clustering, take the basic elements involved in the clustering process, such as the distance or similarity measurement and evaluation indicators, into consideration, and analyze the clustering algorithms from two perspectives, the traditional ones and the modern ones. All the discussed clustering algorithms will be compared in detail and comprehensively shown in Appendix Table 22.}, language = {en}, number = {2}, urldate = {2021-10-01}, journal = {Annals of Data Science}, author = {Xu, Dongkuan and Tian, Yingjie}, month = jun, year = {2015}, pages = {165--193}, }
@article{phillips_classifying_2015, title = {Classifying machinery condition using oil samples and binary logistic regression}, volume = {60-61}, issn = {0888-3270}, url = {https://www.sciencedirect.com/science/article/pii/S0888327014005093}, doi = {10.1016/j.ymssp.2014.12.020}, abstract = {The era of big data has resulted in an explosion of condition monitoring information. The result is an increasing motivation to automate the costly and time consuming human elements involved in the classification of machine health. When working with industry it is important to build an understanding and hence some trust in the classification scheme for those who use the analysis to initiate maintenance tasks. Typically “black box” approaches such as artificial neural networks (ANN) and support vector machines (SVM) can be difficult to provide ease of interpretability. In contrast, this paper argues that logistic regression offers easy interpretability to industry experts, providing insight to the drivers of the human classification process and to the ramifications of potential misclassification. Of course, accuracy is of foremost importance in any automated classification scheme, so we also provide a comparative study based on predictive performance of logistic regression, ANN and SVM. A real world oil analysis data set from engines on mining trucks is presented and using cross-validation we demonstrate that logistic regression out-performs the ANN and SVM approaches in terms of prediction for healthy/not healthy engines.}, language = {en}, urldate = {2021-09-30}, journal = {Mechanical Systems and Signal Processing}, author = {Phillips, J. and Cripps, E. and Lau, John W. and Hodkiewicz, M. R.}, month = aug, year = {2015}, keywords = {Classification, Logistic regression, Machine health, Mining trucks, Neural networks, Oil analysis, Receiver operating characteristic curve, Support vector machine}, pages = {316--325}, }
@article{fumeo_condition_2015, series = {{INNS} {Conference} on {Big} {Data} 2015 {Program} {San} {Francisco}, {CA}, {USA} 8-10 {August} 2015}, title = {Condition {Based} {Maintenance} in {Railway} {Transportation} {Systems} {Based} on {Big} {Data} {Streaming} {Analysis}}, volume = {53}, issn = {1877-0509}, url = {https://www.sciencedirect.com/science/article/pii/S1877050915018244}, doi = {10.1016/j.procs.2015.07.321}, abstract = {Streaming Data Analysis (SDA) of Big Data Streams (BDS) for Condition Based Maintenance (CBM) in the context of Rail Transportation Systems (RTS) is a state-of-the-art field of re- search. SDA of BDS is the problem of analyzing, modeling and extracting information from huge amounts of data that continuously come from several sources in real time through com- putational aware solutions. Among others, CBM for Rail Transportation is one of the most challenging SDA problems, consisting of the implementation of a predictive maintenance system for evaluating the future status of the monitored assets in order to reduce risks related to failures and to avoid service disruptions. The challenge is to collect and analyze all the data streams that come from the numerous on-board sensors monitoring the assets. This paper deals with the problem of CBM applied to the condition monitoring and predictive maintenance of train axle bearings based on sensors data collection, with the purpose of maximizing their Remaining Useful Life (RUL). In particular we propose a novel algorithm for CBM based on SDA that takes advantage of the Online Support Vector Regression (OL-SVR) for predicting the RUL. The novelty of our proposal is the heuristic approach for optimizing the trade-off between the accuracy of the OL-SVR models and the computational time and resources needed in order to build them. Results from tests on a real-world dataset show the actual benefits brought by the proposed methodology.}, language = {en}, urldate = {2021-03-21}, journal = {Procedia Computer Science}, author = {Fumeo, Emanuele and Oneto, Luca and Anguita, Davide}, month = jan, year = {2015}, keywords = {Big Data Streams, Condition Based Maintenance, Data Analytics, Intelligent Transporta- tion Systems, Model Selection, Online Learning}, pages = {437--446}, }
@article{chen_data_2015, title = {Data {Mining} for the {Internet} of {Things}: {Literature} {Review} and {Challenges}}, volume = {11}, issn = {1550-1477}, shorttitle = {Data {Mining} for the {Internet} of {Things}}, url = {https://doi.org/10.1155/2015/431047}, doi = {10.1155/2015/431047}, abstract = {The massive data generated by the Internet of Things (IoT) are considered of high business value, and data mining algorithms can be applied to IoT to extract hidden information from data. In this paper, we give a systematic way to review data mining in knowledge view, technique view, and application view, including classification, clustering, association analysis, time series analysis and outlier analysis. And the latest application cases are also surveyed. As more and more devices connected to IoT, large volume of data should be analyzed, the latest algorithms should be modified to apply to big data. We reviewed these algorithms and discussed challenges and open research issues. At last a suggested big data mining system is proposed.}, language = {en}, number = {8}, urldate = {2020-10-01}, journal = {International Journal of Distributed Sensor Networks}, author = {Chen, Feng and Deng, Pan and Wan, Jiafu and Zhang, Daqiang and Vasilakos, Athanasios V. and Rong, Xiaohui}, month = aug, year = {2015}, note = {Publisher: SAGE Publications}, pages = {431047}, }
@inproceedings{wollschlaeger_reference_2015, title = {A reference architecture for condition monitoring}, doi = {10.1109/WFCS.2015.7160555}, abstract = {Condition monitoring and diagnostic systems support the operators of machines and plants. By these means, machine operators can run their plants efficiently, perform effective preventive maintenance and supply superordinate information systems with valid machine data. This paper describes the VDMA reference architecture for condition monitoring systems. The components of the architecture are described and their application across all levels of an automation system is shown, combining the single functions to a complete condition monitoring system.}, booktitle = {2015 {IEEE} {World} {Conference} on {Factory} {Communication} {Systems} ({WFCS})}, author = {Wollschlaeger, Martin and Theurich, Stefan and Winter, Albrecht and Lubnau, Frank and Paulitsch, Christoph}, month = may, year = {2015}, keywords = {Automation, Condition monitoring, Guidelines, Industrial communication, Industries, Monitoring, VDMA reference architecture, automation system, condition diagnostic system, condition monitoring, condition monitoring system, function block, information systems, maintenance, mechanical engineering computing, preventive maintenance, software architecture, superordinate information systems, valid machine data, vertical integration}, pages = {1--8}, }
@techreport{noauthor_condition_2015, type = {Standard}, title = {Condition monitoring and diagnostics of machine systems — {Data} processing, communication and presentation — {Part} 4: {Presentation}}, institution = {International Organization for Standardization}, year = {2015}, }
@techreport{noauthor_condition_2015, type = {Standard}, title = {Condition monitoring and diagnostics of machines — {Data} interpretation and diagnostics techniques — {Part} 2: {Data}-driven applications}, institution = {International Organization for Standardization}, year = {2015}, }
@techreport{noauthor_condition_2015, type = {Standard}, title = {Condition monitoring and diagnostics of machines — {Prognostics} — {Part} 1: {General} guidelines}, institution = {International Organization for Standardization}, year = {2015}, }
@article{an_practical_2015, title = {Practical options for selecting data-driven or physics-based prognostics algorithms with reviews}, volume = {133}, issn = {0951-8320}, url = {http://www.sciencedirect.com/science/article/pii/S0951832014002245}, doi = {https://doi.org/10.1016/j.ress.2014.09.014}, journal = {Reliability Engineering \& System Safety}, author = {An, Dawn and Kim, Nam H. and Choi, Joo-Ho}, year = {2015}, pages = {223 -- 236}, }
@article{shin_condition_2015, title = {On condition based maintenance policy}, volume = {2}, issn = {2288-4300}, doi = {10.1016/j.jcde.2014.12.006}, number = {2}, journal = {Journal of Computational Design and Engineering}, author = {Shin, Jong-Ho and Jun, Hong-Bae}, month = apr, year = {2015}, keywords = {Condition-based maintenance, Predictive maintenance, Prognostic and health management}, pages = {119--127}, }
@article{susto_machine_2015, title = {Machine {Learning} for {Predictive} {Maintenance}: {A} {Multiple} {Classifier} {Approach}}, volume = {11}, doi = {10.1109/TII.2014.2349359}, number = {3}, journal = {IEEE Transactions on Industrial Informatics}, author = {Susto, G. A. and Schirru, A. and Pampuri, S. and McLoone, S. and Beghi, A.}, year = {2015}, keywords = {Availability, Classification algorithms, Informatics, Manufacturing, PdM, Predictive maintenance, Production, Training, censored data problem, data mining, dynamical decision rules, health factors, high-dimensional problem, ion implantation, learning (artificial intelligence), machine learning (ML), maintenance management, multiple classifier machine learning methodology, operating cost-based maintenance decision system, pattern classification, predictive maintenance, predictive maintenance (PdM), production engineering computing, quantitative indicators, semiconductor device manufacture, semiconductor manufacturing maintenance problem}, pages = {812--820}, }
@inproceedings{rei_online_2015, address = {Lisbon, Portugal}, title = {Online {Representation} {Learning} in {Recurrent} {Neural} {Language} {Models}}, url = {https://www.aclweb.org/anthology/D15-1026}, doi = {10.18653/v1/D15-1026}, urldate = {2020-07-21}, booktitle = {Proceedings of the 2015 {Conference} on {Empirical} {Methods} in {Natural} {Language} {Processing}}, publisher = {Association for Computational Linguistics}, author = {Rei, Marek}, month = sep, year = {2015}, pages = {238--243}, }
@article{amini_fast_2014, title = {A {Fast} {Density}-{Based} {Clustering} {Algorithm} for {Real}-{Time} {Internet} of {Things} {Stream}}, volume = {2014}, doi = {10.1155/2014/926020}, abstract = {Data streams are continuously generated over time from Internet of Things (IoT) devices. The faster all of this data is analyzed, its hidden trends and patterns discovered, and new strategies created, the faster action can be taken, creating greater value for organizations. Density-based method is a prominent class in clustering data streams. It has the ability to detect arbitrary shape clusters, to handle outlier, and it does not need the number of clusters in advance. Therefore, density-based clustering algorithm is a proper choice for clustering IoT streams. Recently, several density-based algorithms have been proposed for clustering data streams. However, density-based clustering in limited time is still a challenging issue. In this paper, we propose a density-based clustering algorithm for IoT streams. The method has fast processing time to be applicable in real-time application of IoT devices. Experimental results show that the proposed approach obtains high quality results with low computation time on real and synthetic datasets.}, journal = {TheScientificWorldJournal}, author = {Amini, Amineh and Saboohi, Hadi and Wah, Teh and Herawan, Tutut}, month = jun, year = {2014}, pages = {926020}, }
@article{chen_cognitive_2014, title = {Cognitive fault diagnosis in {Tennessee} {Eastman} {Process} using learning in the model space}, volume = {67}, issn = {0098-1354}, url = {https://www.sciencedirect.com/science/article/pii/S0098135414000969}, doi = {10.1016/j.compchemeng.2014.03.015}, abstract = {This paper focuses on the Tennessee Eastman (TE) process and for the first time investigates it in a cognitive way. The cognitive fault diagnosis does not assume prior knowledge of the fault numbers and signatures. This approach firstly employs deterministic reservoir models to fit the multiple-input and multiple-output signals in the TE process, which map the signal space to the (reservoir) model space. Then we investigate incremental learning algorithms in this reservoir model space based on the “function distance” between these models. The main contribution of this paper is to provide a cognitive solution to this popular benchmark problem. Our approach is not only applicable to fault detection, but also to fault isolation without knowing the prior information about the fault signature. Experimental comparisons with other state-of-the-art approaches confirmed the benefits of our approach. Our algorithm is efficient and can run in real-time for practical applications.}, language = {en}, urldate = {2022-05-02}, journal = {Computers \& Chemical Engineering}, author = {Chen, Huanhuan and Tiňo, Peter and Yao, Xin}, month = aug, year = {2014}, keywords = {Cognitive fault diagnosis, Fault detection, Learning in the model space, One class learning, Reservoir computing, Tennessee Eastman Process}, pages = {33--42}, }
@article{gupta_outlier_2014, title = {Outlier {Detection} for {Temporal} {Data}: {A} {Survey}}, volume = {26}, issn = {1558-2191}, shorttitle = {Outlier {Detection} for {Temporal} {Data}}, doi = {10.1109/TKDE.2013.184}, abstract = {In the statistics community, outlier detection for time series data has been studied for decades. Recently, with advances in hardware and software technology, there has been a large body of work on temporal outlier detection from a computational perspective within the computer science community. In particular, advances in hardware technology have enabled the availability of various forms of temporal data collection mechanisms, and advances in software technology have enabled a variety of data management mechanisms. This has fueled the growth of different kinds of data sets such as data streams, spatio-temporal data, distributed streams, temporal networks, and time series data, generated by a multitude of applications. There arises a need for an organized and detailed study of the work done in the area of outlier detection with respect to such temporal datasets. In this survey, we provide a comprehensive and structured overview of a large set of interesting outlier definitions for various forms of temporal data, novel techniques, and application scenarios in which specific definitions and techniques have been widely used.}, number = {9}, journal = {IEEE Transactions on Knowledge and Data Engineering}, author = {Gupta, Manish and Gao, Jing and Aggarwal, Charu C. and Han, Jiawei}, month = sep, year = {2014}, note = {Conference Name: IEEE Transactions on Knowledge and Data Engineering}, keywords = {Computational modeling, Data mining, Distributed databases, Hidden Markov models, Mining methods and algorithms, Pattern matching, Predictive models, Temporal outlier detection, Time series analysis, applications of temporal outlier detection, data streams, distributed data streams, network outliers, spatio-temporal outliers, temporal networks, time series data}, pages = {2250--2267}, }
@article{amini_density-based_2014, title = {On {Density}-{Based} {Data} {Streams} {Clustering} {Algorithms}: {A} {Survey}}, volume = {29}, issn = {1860-4749}, shorttitle = {On {Density}-{Based} {Data} {Streams} {Clustering} {Algorithms}}, url = {https://doi.org/10.1007/s11390-014-1416-y}, doi = {10.1007/s11390-014-1416-y}, abstract = {Clustering data streams has drawn lots of attention in the last few years due to their ever-growing presence. Data streams put additional challenges on clustering such as limited time and memory and one pass clustering. Furthermore, discovering clusters with arbitrary shapes is very important in data stream applications. Data streams are infinite and evolving over time, and we do not have any knowledge about the number of clusters. In a data stream environment due to various factors, some noise appears occasionally. Density-based method is a remarkable class in clustering data streams, which has the ability to discover arbitrary shape clusters and to detect noise. Furthermore, it does not need the number of clusters in advance. Due to data stream characteristics, the traditional density-based clustering is not applicable. Recently, a lot of density-based clustering algorithms are extended for data streams. The main idea in these algorithms is using density-based methods in the clustering process and at the same time overcoming the constraints, which are put out by data stream’s nature. The purpose of this paper is to shed light on some algorithms in the literature on density-based clustering over data streams. We not only summarize the main density-based clustering algorithms on data streams, discuss their uniqueness and limitations, but also explain how they address the challenges in clustering data streams. Moreover, we investigate the evaluation metrics used in validating cluster quality and measuring algorithms’ performance. It is hoped that this survey will serve as a steppingstone for researchers studying data streams clustering, particularly density-based algorithms.}, language = {en}, number = {1}, urldate = {2022-03-25}, journal = {Journal of Computer Science and Technology}, author = {Amini, Amineh and Wah, Teh Ying and Saboohi, Hadi}, month = jan, year = {2014}, pages = {116--141}, }
@article{moghaddass_integrated_2014, title = {An integrated framework for online diagnostic and prognostic health monitoring using a multistate deterioration process}, volume = {124}, issn = {0951-8320}, url = {https://www.sciencedirect.com/science/article/pii/S0951832013003037}, doi = {10.1016/j.ress.2013.11.006}, abstract = {Efficient asset management is of paramount importance, particularly for systems with costly downtime and failure. As in energy and capital-intensive industries, the economic loss of downtime and failure is huge, the need for a low-cost and integrated health monitoring system has increased significantly over the years. Timely detection of faults and failures through an efficient prognostics and health management (PHM) framework can lead to appropriate maintenance actions to be scheduled proactively to avoid catastrophic failures and minimize the overall maintenance cost of the systems. This paper aims at practical challenges of online diagnostics and prognostics of mechanical systems under unobservable degradation. First, the elements of a multistate degradation structure are reviewed and then a model selection framework is introduced. Important dynamic performance measures are introduced, which can be used for online diagnostics and prognostics. The effectiveness of the result of this paper is demonstrated with a case study on the health monitoring of turbofan engines.}, language = {en}, urldate = {2022-02-09}, journal = {Reliability Engineering \& System Safety}, author = {Moghaddass, Ramin and Zuo, Ming J.}, month = apr, year = {2014}, keywords = {Condition monitoring, Model selection, Multistate degradation process, Online diagnostics and prognostics, Reliability analysis, ecml}, pages = {92--104}, }
@book{noauthor_senseye_2014, title = {Senseye}, url = {https://www.senseye.io/}, year = {2014}, }
@article{le_hidden_2014, title = {Hidden {Markov} {Models} for diagnostics and prognostics of systems under multiple deterioration modes}, issn = {978-1-138-02681-0}, doi = {10.1201/b17399-166}, author = {Le, Thanh and Chatelain, Florent and Berenguer, Christophe}, month = sep, year = {2014}, }
@inproceedings{casanova_diagnosing_2014, address = {New York, NY, USA}, series = {{SEAMS} 2014}, title = {Diagnosing unobserved components in self-adaptive systems}, isbn = {978-1-4503-2864-7}, url = {https://doi.org/10.1145/2593929.2593946}, doi = {10.1145/2593929.2593946}, abstract = {Availability is an increasingly important quality for today's software-based systems and it has been successfully addressed by the use of closed-loop control systems in self-adaptive systems. Probes are inserted into a running system to obtain information and the information is fed to a controller that, through provided interfaces, acts on the system to alter its behavior. When a failure is detected, pinpointing the source of the failure is a critical step for a repair action. However, information obtained from a running system is commonly incomplete due to probing costs or unavailability of probes. In this paper we address the problem of fault localization in the presence of incomplete system monitoring. We may not be able to directly observe a component but we may be able to infer its health state. We provide formal criteria to determine when health states of unobservable components can be inferred and establish formal theoretical bounds for accuracy when using any spectrum-based fault localization algorithm.}, urldate = {2021-10-29}, booktitle = {Proceedings of the 9th {International} {Symposium} on {Software} {Engineering} for {Adaptive} and {Self}-{Managing} {Systems}}, publisher = {Association for Computing Machinery}, author = {Casanova, Paulo and Garlan, David and Schmerl, Bradley and Abreu, Rui}, month = jun, year = {2014}, keywords = {Diagnostics, Monitoring, Self-adaptive systems, unobserved}, pages = {75--84}, }
@article{nguyen_condition-based_2014, title = {Condition-based maintenance for multi-component systems using importance measure and predictive information}, volume = {1}, issn = {2330-2674}, url = {https://doi.org/10.1080/23302674.2014.983582}, doi = {10.1080/23302674.2014.983582}, abstract = {This paper presents a predictive condition-based maintenance strategy for multi-component systems whose structure may impact components deterioration process. To select components for preventive maintenance actions, a decision rule relying on both structural importance measure of components and their predictive reliability that can be estimated at inspection times is proposed. For corrective maintenance actions, an adaptive opportunistic maintenance decision rule taking into account both the criticality level of components and logistic support constraints is introduced. Moreover, both economic and structure dependencies between components are studied and integrated in maintenance model. A 12-component system is finally introduced to illustrate the use and the performance of the proposed predictive maintenance strategy. Indeed, the proposed strategy provides more flexibility in maintenance decision-making, hence leading to significant profits in terms of maintenance cost when compared to existing strategies.}, number = {4}, urldate = {2021-10-28}, journal = {International Journal of Systems Science: Operations \& Logistics}, author = {Nguyen, Kim-Anh and Do, Phuc and Grall, Antoine}, month = oct, year = {2014}, note = {Publisher: Taylor \& Francis \_eprint: https://doi.org/10.1080/23302674.2014.983582}, keywords = {condition-based maintenance, gamma process, importance measure, multi-component system, reliability prediction}, pages = {228--245}, }
@article{lefebvre_-line_2014, title = {On-{Line} {Fault} {Diagnosis} {With} {Partially} {Observed} {Petri} {Nets}}, volume = {59}, issn = {1558-2523}, doi = {10.1109/TAC.2013.2294617}, abstract = {This technical note concerns fault detection and diagnosis for discrete event systems modeled with partially observed Petri nets. The proposed method provides diagnosis decisions via the analysis of observation sequences that include some observable events and the partial measurement of the successive states visited by the system. To this end, the observation sequences are decomposed into elementary observation sequences, linear matrix inequalities are used to compute the firing sequences consistent with each elementary observation sequence and an algorithm of linear complexity with respect to the length of the observation sequences is proposed to provide on-line diagnosis decisions.}, number = {7}, journal = {IEEE Transactions on Automatic Control}, author = {Lefebvre, Dimitri}, month = jul, year = {2014}, note = {Conference Name: IEEE Transactions on Automatic Control}, keywords = {Complexity theory, Earth Observing System, Fault detection, Fault diagnosis, Firing, Petri nets, Vectors, fault diagnosis, partially observed Petri nets}, pages = {1919--1924}, }
@inproceedings{xuan_learning_2014, title = {Learning to {Combine} {Multiple} {Ranking} {Metrics} for {Fault} {Localization}}, doi = {10.1109/ICSME.2014.41}, abstract = {Fault localization is an inevitable step in software debugging. Spectrum-based fault localization consists in computing a ranking metric on execution traces to identify faulty source code. Existing empirical studies on fault localization show that there is no optimal ranking metric for all faults in practice. In this paper, we propose Multric, a learning-based approach to combining multiple ranking metrics for effective fault localization. In Multric, a suspiciousness score of a program entity is a combination of existing ranking metrics. Multric consists two major phases: learning and ranking. Based on training faults, Multric builds a ranking model by learning from pairs of faulty and non-faulty source code elements. When a new fault appears, Multric computes the final ranking with the learned model. Experiments are conducted on 5386 seeded faults in ten open-source Java programs. We empirically compare Multric against four widely-studied metrics and three recently-proposed one. Our experimental results show that Multric localizes faults more effectively than state-of-art metrics, such as Tarantula, Ochiai, and Ample.}, booktitle = {2014 {IEEE} {International} {Conference} on {Software} {Maintenance} and {Evolution}}, author = {Xuan, Jifeng and Monperrus, Martin}, month = sep, year = {2014}, note = {ISSN: 1063-6773}, keywords = {Computational modeling, Debugging, Fault localization, Java, Measurement, Object oriented modeling, Training, Training data, learning to rank, multiple ranking metrics}, pages = {191--200}, }
@incollection{moulavi_density-based_2014, series = {Proceedings}, title = {Density-{Based} {Clustering} {Validation}}, url = {https://epubs.siam.org/doi/abs/10.1137/1.9781611973440.96}, abstract = {One of the most challenging aspects of clustering is validation, which is the objective and quantitative assessment of clustering results. A number of different relative validity criteria have been proposed for the validation of globular, clusters. Not all data, however, are composed of globular clusters. Density-based clustering algorithms seek partitions with high density areas of points (clusters, not necessarily globular) separated by low density areas, possibly containing noise objects. In these cases relative validity indices proposed for globular cluster validation may fail. In this paper we propose a relative validation index for density-based, arbitrarily shaped clusters. The index assesses clustering quality based on the relative density connection between pairs of objects. Our index is formulated on the basis of a new kernel density function, which is used to compute the density of objects and to evaluate the within- and between-cluster density connectedness of clustering results. Experiments on synthetic and real world data show the effectiveness of our approach for the evaluation and selection of clustering algorithms and their respective appropriate parameters.}, urldate = {2021-10-05}, booktitle = {Proceedings of the 2014 {SIAM} {International} {Conference} on {Data} {Mining} ({SDM})}, publisher = {Society for Industrial and Applied Mathematics}, author = {Moulavi, Davoud and Jaskowiak, Pablo A. and Campello, Ricardo J. G. B. and Zimek, Arthur and Sander, Jörg}, month = apr, year = {2014}, doi = {10.1137/1.9781611973440.96}, pages = {839--847}, }
@article{young_hierarchical_2014, series = {Partially {Supervised} {Learning} for {Pattern} {Recognition}}, title = {Hierarchical spatiotemporal feature extraction using recurrent online clustering}, volume = {37}, issn = {0167-8655}, url = {https://www.sciencedirect.com/science/article/pii/S0167865513002869}, doi = {10.1016/j.patrec.2013.07.013}, abstract = {Deep machine learning offers a comprehensive framework for extracting meaningful features from complex observations in an unsupervised manner. The majority of deep learning architectures described in the literature primarily focus on extracting spatial features. However, in real-world settings, capturing temporal dependencies in observations is critical for accurate inference. This paper introduces an enhancement to DeSTIN – a compositional deep learning architecture in which each layer consists of multiple instantiations of a common node – that learns to represent spatiotemporal patterns in data based on a novel recurrent clustering algorithm. Contrary to mainstream deep architectures, such as deep belief networks where layer-by-layer training is assumed, each of the nodes in the proposed architecture is trained independently and in parallel. Moreover, top-down and bottom-up information flows facilitate rich feature formation. A semi-supervised setting is demonstrated achieving state-of-the-art results on the MNIST classification benchmarks. A GPU implementation is discussed further accentuating the scalability properties of the proposed framework.}, language = {en}, urldate = {2021-10-01}, journal = {Pattern Recognition Letters}, author = {Young, S. R. and Davis, A. and Mishtal, A. and Arel, I.}, month = feb, year = {2014}, keywords = {Deep machine learning, Online clustering, Pattern recognition, Recurrent clustering, Spatiotemporal signals, Unsupervised feature extraction}, pages = {115--123}, }
@inproceedings{shirkhorshidi_big_2014, address = {Cham}, series = {Lecture {Notes} in {Computer} {Science}}, title = {Big {Data} {Clustering}: {A} {Review}}, isbn = {978-3-319-09156-3}, shorttitle = {Big {Data} {Clustering}}, doi = {10.1007/978-3-319-09156-3_49}, abstract = {Clustering is an essential data mining and tool for analyzing big data. There are difficulties for applying clustering techniques to big data duo to new challenges that are raised with big data. As Big Data is referring to terabytes and petabytes of data and clustering algorithms are come with high computational costs, the question is how to cope with this problem and how to deploy clustering techniques to big data and get the results in a reasonable time. This study is aimed to review the trend and progress of clustering algorithms to cope with big data challenges from very first proposed algorithms until today’s novel solutions. The algorithms and the targeted challenges for producing improved clustering algorithms are introduced and analyzed, and afterward the possible future path for more advanced algorithms is illuminated based on today’s available technologies and frameworks.}, language = {en}, booktitle = {Computational {Science} and {Its} {Applications} – {ICCSA} 2014}, publisher = {Springer International Publishing}, author = {Shirkhorshidi, Ali Seyed and Aghabozorgi, Saeed and Wah, Teh Ying and Herawan, Tutut}, editor = {Murgante, Beniamino and Misra, Sanjay and Rocha, Ana Maria A. C. and Torre, Carmelo and Rocha, Jorge Gustavo and Falcão, Maria Irene and Taniar, David and Apduhan, Bernady O. and Gervasi, Osvaldo}, year = {2014}, keywords = {Big Data, Clustering, MapReduce, Parallel Clustering}, pages = {707--720}, }
@article{medina-oliva_predictive_2014, series = {Enhancing {Experience} {Reuse} and {Learning}}, title = {Predictive diagnosis based on a fleet-wide ontology approach}, volume = {68}, issn = {0950-7051}, url = {https://www.sciencedirect.com/science/article/pii/S0950705113004000}, doi = {10.1016/j.knosys.2013.12.020}, abstract = {Diagnosis is a critical activity in the PHM domain (Prognostics and Health Management) due to its impact on the downtime and on the global performances of a system. This activity becomes complex when dealing with large systems such as power plants, ships, aircrafts, which are composed of multiple systems, sub-systems and components of different technologies, different usages, and different ages. In order to ease diagnosis activities, this paper proposes to use a fleet-wide approach based on ontologies in order to capitalize knowledge and data to help decision makers to identify the causes of abnormal operations. In that sense, taking advantage of a fleet dimension implies to provide managers and engineers more knowledge as well as relevant and synthetized information about the system behavior. In order to achieve PHM at a fleet level, it is thus necessary to manage relevant knowledge arising from both modeling and monitoring of the fleet. This paper presents a knowledge structuring scheme of fleets in the marine domain based on ontologies for diagnostic purposes. The semantic knowledge model formalized with an ontology allowed to retrieve data from a set of heterogeneous units through the identification of common and pertinent points of similarity. Hence, it allows to reuse past feedback experiences to build fleet-wide statistics and to search “deeper” causes producing an operation drift.}, language = {en}, urldate = {2021-09-28}, journal = {Knowledge-Based Systems}, author = {Medina-Oliva, Gabriela and Voisin, Alexandre and Monnin, Maxime and Leger, Jean-Baptiste}, month = sep, year = {2014}, keywords = {Diagnostic, Knowledge capitalization, Knowledge reuse, Maintenance, Ontologies}, pages = {40--57}, }
@article{lovric_algoritmic_2014, title = {Algoritmic methods for segmentation of time series: {An} overview}, volume = {1}, copyright = {http://www.econstor.eu/dspace/Nutzungsbedingungen}, issn = {1857-9108}, shorttitle = {Algoritmic methods for segmentation of time series}, url = {https://www.econstor.eu/handle/10419/147468}, abstract = {Adaptive and innovative application of classical data mining principles and techniques in time series analysis has resulted in development of a concept known as time series data mining. Since the time series are present in all areas of business and scientific research, attractiveness of mining of time series datasets should not be seen only in the context of the research challenges in the scientific community, but also in terms of usefulness of the research results, as a support to the process of business decision-making. A fundamental component in the mining process of time series data is time series segmentation. As a data mining research problem, segmentation is focused on the discovery of rules in movements of observed phenomena in a form of interpretable, novel, and useful temporal patterns. In this Paper, a comprehensive review of the conceptual determinations, including the elements of comparative analysis, of the most commonly used algorithms for segmentation of time series, is being considered.}, language = {eng}, number = {1}, urldate = {2020-10-01}, journal = {Journal of Contemporary Economic and Business Issues}, author = {Lovrić, Miodrag and Milanović, Marina and Stamenković, Milan}, year = {2014}, note = {Publisher: Skopje: Ss. Cyril and Methodius University in Skopje, Faculty of Economics}, pages = {31--53}, }
@misc{zhao_novel_2014, title = {A {Novel} {Change}-{Point} {Detection} {Approach} for {Monitoring} {High}-{Dimensional} {Traffics} in {Distributed} {Systems}}, url = {https://www.scientific.net/AMM.536-537.499}, abstract = {Change-point detection is the problem of finding abrupt changes in time-series. However, the meaningful changes are usually difficult to identify from the original massive traffics, due to high dimension and strong periodicity. In this paper, we propose a novel change-point detection approach, which simultaneously detects change points from all dimensions of the traffics with three steps. We first reduce the dimensions by the classical Principal Component Analysis (PCA), then we apply an extended time-series segmentation method to detect the nontrivial change times, finally we identify the responsible applications for the changes by F-test. We demonstrate through experiments on datasets collected from four distributed systems with 44 applications that the proposed approach can effectively detect the nontrivial change points from the multivariate and periodical traffics. Our approach is more appropriate for mining the nontrivial changes in traffic data comparing with other clustering methods, such as center-based Kmeans and density-based DBSCAN.}, language = {en}, urldate = {2020-10-01}, journal = {Applied Mechanics and Materials}, author = {Zhao, Li and Liu, Qian and Du, Peng and Fu, Ge and Cao, Wei}, year = {2014}, doi = {10.4028/www.scientific.net/AMM.536-537.499}, note = {Conference Name: Advances in Mechatronics, Robotics and Automation II ISBN: 9783038350781 ISSN: 1662-7482 Pages: 499-511 Publisher: Trans Tech Publications Ltd Volume: 536-537}, }
@techreport{noauthor_fieldbus_2014, type = {Standard}, title = {Fieldbus neutral reference architecture for {Condition} {Monitoring} in production automation}, institution = {VDMA}, year = {2014}, }
@article{li_improving_2014, title = {Improving rail network velocity: {A} machine learning approach to predictive maintenance}, volume = {45}, issn = {0968-090X}, url = {http://www.sciencedirect.com/science/article/pii/S0968090X14001107}, doi = {https://doi.org/10.1016/j.trc.2014.04.013}, abstract = {Rail network velocity is defined as system-wide average speed of line-haul movement between terminals. To accommodate increased service demand and load on rail networks, increase in network velocity, without compromising safety, is required. Among many determinants of overall network velocity, a key driver is service interruption, including lowered operating speed due to track/train condition and delays caused by derailments. Railroads have put significant infrastructure and inspection programs in place to avoid service interruptions. One of the key measures is an extensive network of wayside mechanical condition detectors (temperature, strain, vision, infrared, weight, impact, etc.) that monitor the rolling-stock as it passes by. The detectors are designed to alert for conditions that either violate regulations set by governmental rail safety agencies or deteriorating rolling-stock conditions as determined by the railroad. Using huge volumes of historical detector data, in combination with failure data, maintenance action data, inspection schedule data, train type data and weather data, we are exploring several analytical approaches including, correlation analysis, causal analysis, time series analysis and machine learning techniques to automatically learn rules and build failure prediction models. These models will be applied against both historical and real-time data to predict conditions leading to failure in the future, thus avoiding service interruptions and increasing network velocity. Additionally, the analytics and models can also be used for detecting root cause of several failure modes and wear rate of components, which, while do not directly address network velocity, can be proactively used by maintenance organizations to optimize trade-offs related to maintenance schedule, costs and shop capacity. As part of our effort, we explore several avenues to machine learning techniques including distributed learning and hierarchical analytical approaches.}, journal = {Transportation Research Part C: Emerging Technologies}, author = {Li, Hongfei and Parikh, Dhaivat and He, Qing and Qian, Buyue and Li, Zhiguo and Fang, Dongping and Hampapur, Arun}, year = {2014}, keywords = {Big data, Condition based maintenance, Information fusion, Multiple wayside detectors, Predictive modeling, Rail network velocity}, pages = {17 -- 26}, }
@article{gama_survey_2014, title = {A survey on concept drift adaptation}, volume = {46}, issn = {0360-0300}, url = {https://doi.org/10.1145/2523813}, doi = {10.1145/2523813}, abstract = {Concept drift primarily refers to an online supervised learning scenario when the relation between the input data and the target variable changes over time. Assuming a general knowledge of supervised learning in this article, we characterize adaptive learning processes; categorize existing strategies for handling concept drift; overview the most representative, distinct, and popular techniques and algorithms; discuss evaluation methodology of adaptive algorithms; and present a set of illustrative applications. The survey covers the different facets of concept drift in an integrated way to reflect on the existing scattered state of the art. Thus, it aims at providing a comprehensive introduction to the concept drift adaptation for researchers, industry analysts, and practitioners.}, number = {4}, urldate = {2020-03-19}, journal = {ACM Computing Surveys}, author = {Gama, João and Žliobaitė, Indrė and Bifet, Albert and Pechenizkiy, Mykola and Bouchachia, Abdelhamid}, month = mar, year = {2014}, keywords = {Concept drift, adaptive learning, change detection, data streams}, pages = {44:1--44:37}, }
@inproceedings{heinze_cloud-based_2014, series = {{DEBS} ’14}, title = {Cloud-{Based} {Data} {Stream} {Processing}}, isbn = {978-1-4503-2737-4}, url = {https://doi.org/10.1145/2611286.2611309}, doi = {10.1145/2611286.2611309}, booktitle = {Proceedings of the 8th {ACM} {International} {Conference} on {Distributed} {Event}-{Based} {Systems}}, publisher = {Association for Computing Machinery}, author = {Heinze, Thomas and Aniello, Leonardo and Querzoni, Leonardo and Jerzak, Zbigniew}, year = {2014}, keywords = {cloud-based data stream processing, fault tolerance, load balancing}, pages = {238--245}, }
@inproceedings{wang_theoretical_2013, title = {A {Theoretical} {Analysis} of {NDCG} {Type} {Ranking} {Measures}}, url = {https://proceedings.mlr.press/v30/Wang13.html}, abstract = {Ranking has been extensively studied in information retrieval, machine learning and statistics. A central problem in ranking is to design a ranking measure for evaluation of ranking functions. State of the art leaning to rank methods often train a ranking function by using a ranking measure as the objective to maximize. In this paper we study, from a theoretical perspective, the widely used NDCG type ranking measures. We analyze the behavior of these ranking measures as the number of objects to rank getting large. We first show that, whatever the ranking function is, the standard NDCG which adopts a logarithmic discount, converges to 1 as the number of items to rank goes to infinity. On the first sight, this result seems to imply that NDCG cannot distinguish good and bad ranking functions, contradicting to the empirical success of NDCG in many applications. Our next main result is a theorem which shows that although NDCG converge to the same limit for all ranking functions, it has distinguishability for ranking functions in a strong sense. We then investigate NDCG with other possible discount. Specifically we characterize the class of feasible discount functions for NDCG. We also compare the limiting behavior and the power of distinguishability of these feasible NDCG type measures to the standard NDCG. We next turn to the cut-off version of NDCG, i.e., NDCG@k. The most popular NDCG@k uses a combination of a slow logarithmic decay and a hard cut-off as its discount. So a natural question is why not simply use a smooth discount with fast decay? We show that if the decay is too fast, then the NDCG measure does not have strong power of distinguishability and even not converge. Finally, feasible NDCG@k are also discussed.}, language = {en}, urldate = {2023-05-23}, booktitle = {Proceedings of the 26th {Annual} {Conference} on {Learning} {Theory}}, publisher = {PMLR}, author = {Wang, Yining and Wang, Liwei and Li, Yuanzhi and He, Di and Liu, Tie-Yan}, month = jun, year = {2013}, note = {ISSN: 1938-7228}, pages = {25--54}, }
@article{liu_change-point_2013, title = {Change-point detection in time-series data by relative density-ratio estimation}, volume = {43}, issn = {0893-6080}, url = {https://www.sciencedirect.com/science/article/pii/S0893608013000270}, doi = {10.1016/j.neunet.2013.01.012}, abstract = {The objective of change-point detection is to discover abrupt property changes lying behind time-series data. In this paper, we present a novel statistical change-point detection algorithm based on non-parametric divergence estimation between time-series samples from two retrospective segments. Our method uses the relative Pearson divergence as a divergence measure, and it is accurately and efficiently estimated by a method of direct density-ratio estimation. Through experiments on artificial and real-world datasets including human-activity sensing, speech, and Twitter messages, we demonstrate the usefulness of the proposed method.}, language = {en}, urldate = {2022-08-25}, journal = {Neural Networks}, author = {Liu, Song and Yamada, Makoto and Collier, Nigel and Sugiyama, Masashi}, month = jul, year = {2013}, keywords = {Change-point detection, Distribution comparison, Kernel methods, Relative density-ratio estimation, Time-series data}, pages = {72--83}, }
@article{lau_fault_2013, title = {Fault diagnosis of {Tennessee} {Eastman} process with multi-scale {PCA} and {ANFIS}}, volume = {120}, issn = {0169-7439}, url = {https://www.sciencedirect.com/science/article/pii/S0169743912002080}, doi = {10.1016/j.chemolab.2012.10.005}, abstract = {Fault diagnosis in industrial processes are challenging tasks that demand effective and timely decision making procedures under the extreme conditions of noisy measurements, highly interrelated data, large number of inputs and complex interaction between the symptoms and faults. The purpose of this study is to develop an online fault diagnosis framework for a dynamical process incorporating multi-scale principal component analysis (MSPCA) for feature extraction and adaptive neuro-fuzzy inference system (ANFIS) for learning the fault-symptom correlation from the process historical data. The features extracted from raw measured data sets using MSPCA are partitioned into score space and residual space which are then fed into multiple ANFIS classifiers in order to diagnose different faults. This data-driven based method extracts fault-symptom correlation from the data eliminating the use of process model. The use of multiple ANFIS classifiers for fault diagnosis with each dedicated to one specific fault, reduces the computational load and provides an expandable framework to incorporate new fault identified in the process. Also, the use of MSPCA enables the detection of small changes occurring in the measured variables and the proficiency of the system is improved by monitoring the subspace which is most sensitive to the faults. The proposed MSPCA-ANFIS based framework is tested on the Tennessee Eastman (TE) process and results for the selected fault cases, particularly those which exhibit highly non-linear characteristics, show improvement over the conventional multivariate PCA as well as the conventional PCA-ANFIS based methods.}, language = {en}, urldate = {2022-05-02}, journal = {Chemometrics and Intelligent Laboratory Systems}, author = {Lau, C. K. and Ghosh, Kaushik and Hussain, M. A. and Che Hassan, C. R.}, month = jan, year = {2013}, keywords = {ANFIS, Feature extraction, MSPCA, Online fault diagnosis, Tennessee Eastman process}, pages = {1--14}, }
@article{grbic_adaptive_2013, title = {Adaptive soft sensor for online prediction and process monitoring based on a mixture of {Gaussian} process models}, volume = {58}, issn = {0098-1354}, url = {https://www.sciencedirect.com/science/article/pii/S0098135413002081}, doi = {10.1016/j.compchemeng.2013.06.014}, abstract = {Linear models can be inappropriate when dealing with nonlinear and multimode processes, leading to a soft sensor with poor performance. Due to time-varying process behaviour it is necessary to derive and implement some kind of adaptation mechanism in order to keep the soft sensor performance at a desired level. Therefore, an adaptation mechanism for a soft sensor based on a mixture of Gaussian process regression models is proposed in this paper. A procedure for input variable selection based on mutual information is also presented. This procedure selects the most important input variables for output variable prediction, thus simplifying model development and adaptation. Apart from online prediction of the difficult-to-measure variable, this soft sensor can be used for adaptive process monitoring. The efficiency of the proposed method is benchmarked with the commonly applied recursive PLS and recursive PCA method on the Tennessee Eastman process and two real industrial examples.}, language = {en}, urldate = {2022-05-02}, journal = {Computers \& Chemical Engineering}, author = {Grbić, Ratko and Slišković, Dražen and Kadlec, Petr}, month = nov, year = {2013}, keywords = {Adaptive soft sensor, Gaussian process regression, Mutual information, Online prediction, Process modelling, Process monitoring}, pages = {84--97}, }
@article{mikolov_efficient_2013, title = {Efficient {Estimation} of {Word} {Representations} in {Vector} {Space}}, url = {http://arxiv.org/abs/1301.3781}, abstract = {We propose two novel model architectures for computing continuous vector representations of words from very large data sets. The quality of these representations is measured in a word similarity task, and the results are compared to the previously best performing techniques based on different types of neural networks. We observe large improvements in accuracy at much lower computational cost, i.e. it takes less than a day to learn high quality word vectors from a 1.6 billion words data set. Furthermore, we show that these vectors provide state-of-the-art performance on our test set for measuring syntactic and semantic word similarities.}, urldate = {2022-04-25}, journal = {arXiv:1301.3781 [cs]}, author = {Mikolov, Tomas and Chen, Kai and Corrado, Greg and Dean, Jeffrey}, month = sep, year = {2013}, note = {arXiv: 1301.3781}, keywords = {Computer Science - Computation and Language}, }
@article{bengio_representation_2013, title = {Representation {Learning}: {A} {Review} and {New} {Perspectives}}, volume = {35}, issn = {1939-3539}, shorttitle = {Representation {Learning}}, doi = {10.1109/TPAMI.2013.50}, abstract = {The success of machine learning algorithms generally depends on data representation, and we hypothesize that this is because different representations can entangle and hide more or less the different explanatory factors of variation behind the data. Although specific domain knowledge can be used to help design representations, learning with generic priors can also be used, and the quest for AI is motivating the design of more powerful representation-learning algorithms implementing such priors. This paper reviews recent work in the area of unsupervised feature learning and deep learning, covering advances in probabilistic models, autoencoders, manifold learning, and deep networks. This motivates longer term unanswered questions about the appropriate objectives for learning good representations, for computing representations (i.e., inference), and the geometrical connections between representation learning, density estimation, and manifold learning.}, number = {8}, journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence}, author = {Bengio, Yoshua and Courville, Aaron and Vincent, Pascal}, month = aug, year = {2013}, note = {Conference Name: IEEE Transactions on Pattern Analysis and Machine Intelligence}, keywords = {Abstracts, Boltzmann machine, Deep learning, Feature extraction, Learning systems, Machine learning, Manifolds, Neural networks, Speech recognition, autoencoder, feature learning, neural nets, representation learning, unsupervised learning}, pages = {1798--1828}, }
@article{silva_data_2013, title = {Data stream clustering: {A} survey}, volume = {46}, issn = {0360-0300}, shorttitle = {Data stream clustering}, url = {https://doi.org/10.1145/2522968.2522981}, doi = {10.1145/2522968.2522981}, abstract = {Data stream mining is an active research area that has recently emerged to discover knowledge from large amounts of continuously generated data. In this context, several data stream clustering algorithms have been proposed to perform unsupervised learning. Nevertheless, data stream clustering imposes several challenges to be addressed, such as dealing with nonstationary, unbounded data that arrive in an online fashion. The intrinsic nature of stream data requires the development of algorithms capable of performing fast and incremental processing of data objects, suitably addressing time and memory limitations. In this article, we present a survey of data stream clustering algorithms, providing a thorough discussion of the main design components of state-of-the-art algorithms. In addition, this work addresses the temporal aspects involved in data stream clustering, and presents an overview of the usually employed experimental methodologies. A number of references are provided that describe applications of data stream clustering in different domains, such as network intrusion detection, sensor networks, and stock market analysis. Information regarding software packages and data repositories are also available for helping researchers and practitioners. Finally, some important issues and open questions that can be subject of future research are discussed.}, number = {1}, urldate = {2022-03-25}, journal = {ACM Computing Surveys}, author = {Silva, Jonathan A. and Faria, Elaine R. and Barros, Rodrigo C. and Hruschka, Eduardo R. and Carvalho, André C. P. L. F. de and Gama, João}, month = jul, year = {2013}, keywords = {Data stream clustering, online clustering}, pages = {13:1--13:31}, }
@inproceedings{bifet_cd-moa_2013, address = {Berlin, Heidelberg}, series = {{IDA} 2013}, title = {{CD}-{MOA}: {Change} {Detection} {Framework} for {Massive} {Online} {Analysis}}, isbn = {978-3-642-41397-1}, shorttitle = {{CD}-{MOA}}, url = {https://doi.org/10.1007/978-3-642-41398-8_9}, doi = {10.1007/978-3-642-41398-8_9}, abstract = {Analysis of data from networked digital information systems such as mobile devices, remote sensors, and streaming applications, needs to deal with two challenges: the size of data and the capacity to be adaptive to changes in concept in real-time. Many approaches meet the challenge by using an explicit change detector alongside a classification algorithm and then evaluate performance using classification accuracy. However, there is an unexpected connection between change detectors and classification methods that needs to be acknowledged. The phenomenon has been observed previously, connecting high classification performance with high false positive rates. The implication is that we need to be careful to evaluate systems against intended outcomes---high classification rates, low false alarm rates, compromises between the two and so forth. This paper proposes a new experimental framework for evaluating change detection methods against intended outcomes. The framework is general in the sense that it can be used with other data mining tasks such as frequent item and pattern mining, clustering etc. Included in the framework is a new measure of performance of a change detector that monitors the compromise between fast detection and false alarms. Using this new experimental framework we conduct an evaluation study on synthetic and real-world datasets to show that classification performance is indeed a poor proxy for change detection performance and provide further evidence that classification performance is correlated strongly with the use of change detectors that produce high false positive rates.}, urldate = {2022-03-17}, booktitle = {Proceedings of the 12th {International} {Symposium} on {Advances} in {Intelligent} {Data} {Analysis} {XII} - {Volume} 8207}, publisher = {Springer-Verlag}, author = {Bifet, Albert and Read, Jesse and Pfahringer, Bernhard and Holmes, Geoff and źLiobaităź, Indrăź}, month = oct, year = {2013}, keywords = {data streams, dynamic, evolving, incremental, online}, pages = {92--103}, }
@inproceedings{casanova_diagnosing_2013, title = {Diagnosing architectural run-time failures}, doi = {10.1109/SEAMS.2013.6595497}, abstract = {Self-diagnosis is a fundamental capability of self-adaptive systems. In order to recover from faults, systems need to know which part is responsible for the incorrect behavior. In previous work we showed how to apply a design-time diagnosis technique at run time to identify faults at the architectural level of a system. Our contributions address three major shortcomings of our previous work: 1) we present an expressive, hierarchical language to describe system behavior that can be used to diagnose when a system is behaving different to expectation; the hierarchical language facilitates mapping low level system events to architecture level events; 2) we provide an automatic way to determine how much data to collect before an accurate diagnosis can be produced; and 3) we develop a technique that allows the detection of correlated faults between components. Our results are validated experimentally by injecting several failures in a system and accurately diagnosing them using our algorithm.}, booktitle = {2013 8th {International} {Symposium} on {Software} {Engineering} for {Adaptive} and {Self}-{Managing} {Systems} ({SEAMS})}, author = {Casanova, Paulo and Garlan, David and Schmerl, Bradley and Abreu, Rui}, month = may, year = {2013}, note = {ISSN: 2157-2321}, keywords = {Cognition, Computational modeling, Databases, Fault diagnosis, Monitoring, Probes, Web servers}, pages = {103--112}, }
@article{cabasino_diagnosis_2013, title = {Diagnosis {Using} {Labeled} {Petri} {Nets} {With} {Silent} or {Undistinguishable} {Fault} {Events}}, volume = {43}, issn = {2168-2232}, doi = {10.1109/TSMCA.2012.2199307}, abstract = {A commonplace assumption in the fault diagnosis of discrete event systems (DESs) is that of modeling faulty events with unobservable transitions, i.e., transitions whose occurrence does not produce any observable label. The diagnostic system must thus infer the occurrence of a fault from the observed behavior corresponding to the firing of nonfaulty transitions. The presence of nonfaulty unobservable transitions is a source of additional complexity in the diagnostic procedure. In this paper, we assume that fault events can also be modeled by observable transitions, i.e., transitions whose occurrence produces an observable label. This does not mean, however, that the occurrence of such a transition can be unambiguously detected: In fact, the same label may be shared with other fault transitions (e.g., belonging to different fault classes) or with other nonfaulty transitions. We generalize to this new setting our previous results on the diagnosis of DESs using Petri nets based on the notions of minimal explanations and basis markings. The presented procedure does not require the enumeration of the complete reachability set but only of the subset of basis markings, thus reducing the computational complexity of solving a diagnosis problem.}, number = {2}, journal = {IEEE Transactions on Systems, Man, and Cybernetics: Systems}, author = {Cabasino, Maria Paola and Giua, Alessandro and Seatzu, Carla}, month = mar, year = {2013}, note = {Conference Name: IEEE Transactions on Systems, Man, and Cybernetics: Systems}, keywords = {Artificial neural networks, Discrete event systems, Fault detection, Fault tolerance, Fault tolerant systems, Labeling, Petri nets, Slabs, Vectors, diagnostics, fault detection}, pages = {345--355}, }
@article{arbelaitz_extensive_2013, title = {An extensive comparative study of cluster validity indices}, volume = {46}, issn = {0031-3203}, url = {https://www.sciencedirect.com/science/article/pii/S003132031200338X}, doi = {10.1016/j.patcog.2012.07.021}, abstract = {The validation of the results obtained by clustering algorithms is a fundamental part of the clustering process. The most used approaches for cluster validation are based on internal cluster validity indices. Although many indices have been proposed, there is no recent extensive comparative study of their performance. In this paper we show the results of an experimental work that compares 30 cluster validity indices in many different environments with different characteristics. These results can serve as a guideline for selecting the most suitable index for each possible application and provide a deep insight into the performance differences between the currently available indices.}, language = {en}, number = {1}, urldate = {2021-10-21}, journal = {Pattern Recognition}, author = {Arbelaitz, Olatz and Gurrutxaga, Ibai and Muguerza, Javier and Pérez, Jesús M. and Perona, Iñigo}, month = jan, year = {2013}, keywords = {Cluster validity index, Comparative analysis, Crisp clustering, cluster validity, clustering}, pages = {243--256}, }
@article{brussow_what_2013, title = {What is health?}, volume = {6}, issn = {1751-7915}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/1751-7915.12063}, doi = {10.1111/1751-7915.12063}, abstract = {Classical medical research is disease focused and still defines health as absence of disease. Languages, however, associate a positive concept of wholeness with health as does the WHO health definition. Newer medical health definitions emphasize the capacity to adapt to changing external and internal circumstances. The results of the 2010 Global Burden of Disease study provides keys for a quantifiable health metrics by developing statistical tools calculating healthy life expectancy. Of central social and economic importance is the question whether healthy ageing can be achieved. This concept hinges on theories on the biological basis of lifespan determination and whether negligible senescence and the compression of morbidity can be achieved in human societies. Since the health impact of the human gut microbiome is currently a topical research area, microbiologists should be aware of the problems in defining health.}, language = {en}, number = {4}, urldate = {2021-10-18}, journal = {Microbial Biotechnology}, author = {Brüssow, Harald}, year = {2013}, note = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1111/1751-7915.12063}, pages = {341--348}, }
@article{rutkowski_decision_2013, title = {Decision {Trees} for {Mining} {Data} {Streams} {Based} on the {McDiarmid}'s {Bound}}, volume = {25}, issn = {1558-2191}, doi = {10.1109/TKDE.2012.66}, abstract = {In mining data streams the most popular tool is the Hoeffding tree algorithm. It uses the Hoeffding's bound to determine the smallest number of examples needed at a node to select a splitting attribute. In the literature the same Hoeffding's bound was used for any evaluation function (heuristic measure), e.g., information gain or Gini index. In this paper, it is shown that the Hoeffding's inequality is not appropriate to solve the underlying problem. We prove two theorems presenting the McDiarmid's bound for both the information gain, used in ID3 algorithm, and for Gini index, used in Classification and Regression Trees (CART) algorithm. The results of the paper guarantee that a decision tree learning system, applied to data streams and based on the McDiarmid's bound, has the property that its output is nearly identical to that of a conventional learner. The results of the paper have a great impact on the state of the art of mining data streams and various developed so far methods and algorithms should be reconsidered.}, number = {6}, journal = {IEEE Transactions on Knowledge and Data Engineering}, author = {Rutkowski, Leszek and Pietruczuk, Lena and Duda, Piotr and Jaworski, Maciej}, month = jun, year = {2013}, note = {Conference Name: IEEE Transactions on Knowledge and Data Engineering}, keywords = {Data mining, Data streams, Decision trees, Entropy, Gain measurement, Gini index, Hoeffding's bound, Indexes, Learning systems, McDiarmid's bound, Random variables, decision trees, information gain}, pages = {1272--1279}, }
@inproceedings{malinowski_1d-sax_2013, address = {Berlin, Heidelberg}, series = {Lecture {Notes} in {Computer} {Science}}, title = {1d-{SAX}: {A} {Novel} {Symbolic} {Representation} for {Time} {Series}}, isbn = {978-3-642-41398-8}, shorttitle = {1d-{SAX}}, doi = {10.1007/978-3-642-41398-8_24}, abstract = {SAX (Symbolic Aggregate approXimation) is one of the main symbolization techniques for time series. A well-known limitation of SAX is that trends are not taken into account in the symbolization. This paper proposes 1d-SAX a method to represent a time series as a sequence of symbols that each contain information about the average and the trend of the series on a segment. We compare the efficiency of SAX and 1d-SAX in terms of goodness-of-fit, retrieval and classification performance for querying a time series database with an asymmetric scheme. The results show that 1d-SAX improves performance using equal quantity of information, especially when the compression rate increases.}, language = {en}, booktitle = {Advances in {Intelligent} {Data} {Analysis} {XII}}, publisher = {Springer}, author = {Malinowski, Simon and Guyet, Thomas and Quiniou, René and Tavenard, Romain}, editor = {Tucker, Allan and Höppner, Frank and Siebes, Arno and Swift, Stephen}, year = {2013}, keywords = {Average Approximation Error, Dynamic Time Warping Distance, Original Time Series, Symbolic Representation, Time Series}, pages = {273--284}, }
@inproceedings{matuszyk_correcting_2013, address = {Berlin, Heidelberg}, series = {Lecture {Notes} in {Computer} {Science}}, title = {Correcting the {Usage} of the {Hoeffding} {Inequality} in {Stream} {Mining}}, isbn = {978-3-642-41398-8}, doi = {10.1007/978-3-642-41398-8_26}, abstract = {Many stream classification algorithms use the Hoeffding Inequality to identify the best split attribute during tree induction.We show that the prerequisites of the Inequality are violated by these algorithms, and we propose corrective steps. The new stream classification core, correctedVFDT, satisfies the prerequisites of the Hoeffding Inequality and thus provides the expected performance guarantees.The goal of our work is not to improve accuracy, but to guarantee a reliable and interpretable error bound. Nonetheless, we show that our solution achieves lower error rates regarding split attributes and sooner split decisions while maintaining a similar level of accuracy.}, language = {en}, booktitle = {Advances in {Intelligent} {Data} {Analysis} {XII}}, publisher = {Springer}, author = {Matuszyk, Pawel and Krempl, Georg and Spiliopoulou, Myra}, editor = {Tucker, Allan and Höppner, Frank and Siebes, Arno and Swift, Stephen}, year = {2013}, keywords = {Concept Drift, Incorrect Decision, Information Gain, Split Attribute, Split Function}, pages = {298--309}, }
@article{van_horenbeek_dynamic_2013, title = {A dynamic predictive maintenance policy for complex multi-component systems}, volume = {120}, issn = {0951-8320}, url = {https://www.sciencedirect.com/science/article/pii/S0951832013000732}, doi = {10.1016/j.ress.2013.02.029}, abstract = {The use of prognostic methods in maintenance in order to predict remaining useful life is receiving more attention over the past years. The use of these techniques in maintenance decision making and optimization in multi-component systems is however a still underexplored area. The objective of this paper is to optimally plan maintenance for a multi-component system based on prognostic/predictive information while considering different component dependencies (i.e. economic, structural and stochastic dependence). Consequently, this paper presents a dynamic predictive maintenance policy for multi-component systems that minimizes the long-term mean maintenance cost per unit time. The proposed maintenance policy is a dynamic method as the maintenance schedule is updated when new information on the degradation and remaining useful life of components becomes available. The performance, regarding the objective of minimal long-term mean cost per unit time, of the developed dynamic predictive maintenance policy is compared to five other conventional maintenance policies, these are: block-based maintenance, age-based maintenance, age-based maintenance with grouping, inspection condition-based maintenance and continuous condition-based maintenance. The ability of the predictive maintenance policy to react to changing component deterioration and dependencies within a multi-component system is quantified and the results show significant cost savings.}, language = {en}, urldate = {2021-09-28}, journal = {Reliability Engineering \& System Safety}, author = {Van Horenbeek, Adriaan and Pintelon, Liliane}, month = dec, year = {2013}, keywords = {Maintenance grouping, Maintenance optimization, Multi-component system, Predictive maintenance policy}, pages = {39--50}, }
@inproceedings{cheng_nuactiv_2013, address = {New York, NY, USA}, series = {{MobiSys} '13}, title = {{NuActiv}: recognizing unseen new activities using semantic attribute-based learning}, isbn = {978-1-4503-1672-9}, shorttitle = {{NuActiv}}, url = {https://doi.org/10.1145/2462456.2464438}, doi = {10.1145/2462456.2464438}, abstract = {We study the problem of how to recognize a new human activity when we have never seen any training example of that activity before. Recognizing human activities is an essential element for user-centric and context-aware applications. Previous studies showed promising results using various machine learning algorithms. However, most existing methods can only recognize the activities that were previously seen in the training data. A previously unseen activity class cannot be recognized if there were no training samples in the dataset. Even if all of the activities can be enumerated in advance, labeled samples are often time consuming and expensive to get, as they require huge effort from human annotators or experts. In this paper, we present NuActiv, an activity recognition system that can recognize a human activity even when there are no training data for that activity class. Firstly, we designed a new representation of activities using semantic attributes, where each attribute is a human readable term that describes a basic element or an inherent characteristic of an activity. Secondly, based on this representation, a two-layer zero-shot learning algorithm is developed for activity recognition. Finally, to reinforce recognition accuracy using minimal user feedback, we developed an active learning algorithm for activity recognition. Our approach is evaluated on two datasets, including a 10-exercise-activity dataset we collected, and a public dataset of 34 daily life activities. Experimental results show that using semantic attribute-based learning, NuActiv can generalize knowledge to recognize unseen new activities. Our approach achieved up to 79\% accuracy in unseen activity recognition.}, urldate = {2021-09-10}, booktitle = {Proceeding of the 11th annual international conference on {Mobile} systems, applications, and services}, publisher = {Association for Computing Machinery}, author = {Cheng, Heng-Tze and Sun, Feng-Tso and Griss, Martin and Davis, Paul and Li, Jianguo and You, Di}, month = jun, year = {2013}, keywords = {active learning, activity recognition, context-aware computing, machine learning, mobile sensing, semantic attributes, wearable computing, zero-shot learning}, pages = {361--374}, }
@inproceedings{almeida_adaptive_2013, address = {Berlin, Heidelberg}, series = {Lecture {Notes} in {Computer} {Science}}, title = {Adaptive {Model} {Rules} from {Data} {Streams}}, isbn = {978-3-642-40988-2}, doi = {10.1007/978-3-642-40988-2_31}, language = {en}, booktitle = {Machine {Learning} and {Knowledge} {Discovery} in {Databases}}, publisher = {Springer}, author = {Almeida, Ezilda and Ferreira, Carlos and Gama, João}, editor = {Blockeel, Hendrik and Kersting, Kristian and Nijssen, Siegfried and Železný, Filip}, year = {2013}, keywords = {Data Streams, Regression, Rule Learning}, pages = {480--492}, }
@article{si_wiener-process-based_2013, title = {A {Wiener}-process-based degradation model with a recursive filter algorithm for remaining useful life estimation}, volume = {35}, issn = {0888-3270}, url = {http://www.sciencedirect.com/science/article/pii/S0888327012003226}, doi = {https://doi.org/10.1016/j.ymssp.2012.08.016}, abstract = {Remaining useful life estimation (RUL) is an essential part in prognostics and health management. This paper addresses the problem of estimating the RUL from the observed degradation data. A Wiener-process-based degradation model with a recursive filter algorithm is developed to achieve the aim. A novel contribution made in this paper is the use of both a recursive filter to update the drift coefficient in the Wiener process and the expectation maximization (EM) algorithm to update all other parameters. Both updating are done at the time that a new piece of degradation data becomes available. This makes the model depend on the observed degradation data history, which the conventional Wiener-process-based models did not consider. Another contribution is to take into account the distribution in the drift coefficient when updating, rather than using a point estimate as an approximation. An exact RUL distribution considering the distribution of the drift coefficient is obtained based on the concept of the first hitting time. A practical case study for gyros in an inertial navigation system is provided to substantiate the superiority of the proposed model compared with competing models reported in the literature. The results show that our developed model can provide better RUL estimation accuracy.}, number = {1}, journal = {Mechanical Systems and Signal Processing}, author = {Si, Xiao-Sheng and Wang, Wenbin and Hu, Chang-Hua and Chen, Mao-Yin and Zhou, Dong-Hua}, year = {2013}, keywords = {Expectation maximization, Recursive filter, Reliability, Remaining useful life, Wiener process}, pages = {219 -- 237}, }
@book{tinga_principles_2013, series = {Springer {Series} in {Reliability} {Engineering}}, title = {Principles of loads and failure mechanisms. {Applications} in maintenance, reliability and design.}, isbn = {978-1-4471-4916-3}, language = {English}, publisher = {Springer}, author = {Tinga, Tiedo}, year = {2013}, doi = {10.1007/978-1-4471-4917-0}, keywords = {IR-83620, METIS-293761}, }
@techreport{kagermann_securing_2013, title = {Securing the future of {German} manufacturing industry: {Recommendations} for implementing the strategic initiative {INDUSTRIE} 4.0 ({Final} report of the {Industrie} 4.0 {Working} {Group})}, institution = {Acatech: National academy of science and engineering}, author = {Kagermann, H., W. Wahlster, and Helbig, J.}, year = {2013}, }
@article{palem_condition-based_2013, title = {Condition-{Based} {Maintenance} using {Sensor} {Arrays} and {Telematics}}, volume = {3}, doi = {10.5121/ijmnct.2013.3303}, journal = {International Journal of Mobile Network Communications \& Telematics}, author = {Palem, Gopalakrishna}, year = {2013}, pages = {19--28}, }
@inproceedings{isaksson_sostream_2012, address = {Berlin, Heidelberg}, series = {Lecture {Notes} in {Computer} {Science}}, title = {{SOStream}: {Self} {Organizing} {Density}-{Based} {Clustering} over {Data} {Stream}}, isbn = {978-3-642-31537-4}, shorttitle = {{SOStream}}, doi = {10.1007/978-3-642-31537-4_21}, abstract = {In this paper we propose a data stream clustering algorithm, called Self Organizing density based clustering over data Stream (SOStream). This algorithm has several novel features. Instead of using a fixed, user defined similarity threshold or a static grid, SOStream detects structure within fast evolving data streams by automatically adapting the threshold for density-based clustering. It also employs a novel cluster updating strategy which is inspired by competitive learning techniques developed for Self Organizing Maps (SOMs). In addition, SOStream has built-in online functionality to support advanced stream clustering operations including merging and fading. This makes SOStream completely online with no separate offline components. Experiments performed on KDD Cup’99 and artificial datasets indicate that SOStream is an effective and superior algorithm in creating clusters of higher purity while having lower space and time requirements compared to previous stream clustering algorithms.}, language = {en}, booktitle = {Machine {Learning} and {Data} {Mining} in {Pattern} {Recognition}}, publisher = {Springer}, author = {Isaksson, Charlie and Dunham, Margaret H. and Hahsler, Michael}, editor = {Perner, Petra}, year = {2012}, keywords = {Adaptive Threshold, Data Stream Clustering, Density-Based Clustering, Self Organizing Maps}, pages = {264--278}, }
@inproceedings{hassani_density-based_2012, address = {Berlin, Heidelberg}, series = {Lecture {Notes} in {Computer} {Science}}, title = {Density-{Based} {Projected} {Clustering} of {Data} {Streams}}, isbn = {978-3-642-33362-0}, doi = {10.1007/978-3-642-33362-0_24}, abstract = {In this paper, we have proposed, developed and experimentally validated our novel subspace data stream clustering, termed PreDeConStream. The technique is based on the two phase mode of mining streaming data, in which the first phase represents the process of the online maintenance of a data structure, that is then passed to an offline phase of generating the final clustering model. The technique works on incrementally updating the output of the online phase stored in a micro-cluster structure, taking into consideration those micro-clusters that are fading out over time, speeding up the process of assigning new data points to existing clusters. A density based projected clustering model in developing PreDeConStream was used. With many important applications that can benefit from such technique, we have proved experimentally the superiority of the proposed methods over state-of-the-art techniques.}, language = {en}, booktitle = {Scalable {Uncertainty} {Management}}, publisher = {Springer}, author = {Hassani, Marwan and Spaus, Pascal and Gaber, Mohamed Medhat and Seidl, Thomas}, editor = {Hüllermeier, Eyke and Link, Sebastian and Fober, Thomas and Seeger, Bernhard}, year = {2012}, pages = {311--324}, }
@incollection{ntoutsi_density-based_2012, series = {Proceedings}, title = {Density-based {Projected} {Clustering} over {High} {Dimensional} {Data} {Streams}}, isbn = {978-1-61197-232-0}, url = {https://epubs.siam.org/doi/abs/10.1137/1.9781611972825.85}, abstract = {Clustering of high dimensional data streams is an important problem in many application domains, a prominent example being network monitoring. Several approaches have been lately proposed for solving independently the different aspects of the problem. There exist methods for clustering over full dimensional streams and methods for finding clusters in subspaces of high dimensional static data. Yet only a few approaches have been proposed so far which tackle both the stream and the high dimensionality aspects of the problem simultaneously. In this work, we propose a new density-based projected clustering algorithm, HDDSTREAM, for high dimensional data streams. Our algorithm summarizes both the data points and the dimensions where these points are grouped together and maintains these summaries online, as new points arrive over time and old points expire due to ageing. Our experimental results illustrate the effectiveness and the efficiency of HDDSTREAM and also demonstrate that it could serve as a trigger for detecting drastic changes in the underlying stream population, like bursts of network attacks.}, urldate = {2023-02-16}, booktitle = {Proceedings of the 2012 {SIAM} {International} {Conference} on {Data} {Mining} ({SDM})}, publisher = {Society for Industrial and Applied Mathematics}, author = {Ntoutsi, Irene and Zimek, Arthur and Palpanas, Themis and Kröger, Peer and Kriegel, Hans-Peter}, month = apr, year = {2012}, doi = {10.1137/1.9781611972825.85}, pages = {987--998}, }
@article{killick_optimal_2012, title = {Optimal {Detection} of {Changepoints} {With} a {Linear} {Computational} {Cost}}, volume = {107}, issn = {0162-1459}, url = {https://doi.org/10.1080/01621459.2012.737745}, doi = {10.1080/01621459.2012.737745}, abstract = {In this article, we consider the problem of detecting multiple changepoints in large datasets. Our focus is on applications where the number of changepoints will increase as we collect more data: for example, in genetics as we analyze larger regions of the genome, or in finance as we observe time series over longer periods. We consider the common approach of detecting changepoints through minimizing a cost function over possible numbers and locations of changepoints. This includes several established procedures for detecting changing points, such as penalized likelihood and minimum description length. We introduce a new method for finding the minimum of such cost functions and hence the optimal number and location of changepoints that has a computational cost, which, under mild conditions, is linear in the number of observations. This compares favorably with existing methods for the same problem whose computational cost can be quadratic or even cubic. In simulation studies, we show that our new method can be orders of magnitude faster than these alternative exact methods. We also compare with the binary segmentation algorithm for identifying changepoints, showing that the exactness of our approach can lead to substantial improvements in the accuracy of the inferred segmentation of the data. This article has supplementary materials available online.}, number = {500}, urldate = {2022-08-25}, journal = {Journal of the American Statistical Association}, author = {Killick, R. and Fearnhead, P. and Eckley, I. A.}, month = dec, year = {2012}, note = {Publisher: Taylor \& Francis \_eprint: https://doi.org/10.1080/01621459.2012.737745}, keywords = {Dynamic programming, PELT, Segmentation, Structural change}, pages = {1590--1598}, }
@article{liu_fault_2012, title = {Fault diagnosis using contribution plots without smearing effect on non-faulty variables}, volume = {22}, issn = {0959-1524}, url = {https://www.sciencedirect.com/science/article/pii/S0959152412001576}, doi = {10.1016/j.jprocont.2012.06.016}, abstract = {Isolating faulty variables to provide additional information about a process fault is a crucial step in the diagnosis of a process fault. There are two types of data-driven approaches for isolating faulty variables. One is the supervised method, which requires the datasets of known faults to define a fault subspace or an abnormal operating region for each faulty mode. This type of approach is not practical for an industrial process, since the known event lists might not exist for some industrial processes. The counterpart is to isolate faulty variables without a priori knowledge, using, for example, a contribution plot, which is a popular tool in the unsupervised category. However, it is well known that this approach suffers from the smearing effect, which may mislead the faulty variables of the detected faults. In the presented work, a contribution plot without the smearing effect on non-faulty variables was derived based on missing data analysis. Two benchmark examples, the continuous stirred tank reactor (CSTR) and the Tennessee Eastman (TE) process, were provided to compare the fault isolation performances of the alternatives using the missing data approach.}, language = {en}, number = {9}, urldate = {2022-05-02}, journal = {Journal of Process Control}, author = {Liu, Jialin}, month = oct, year = {2012}, keywords = {Contribution plots, Fault isolation, Missing data analysis, Principal component analysis}, pages = {1609--1623}, }
@inproceedings{bedoya_unsupervised_2012, address = {Berlin, Heidelberg}, series = {Lecture {Notes} in {Computer} {Science}}, title = {Unsupervised {Feature} {Selection} {Based} on {Fuzzy} {Clustering} for {Fault} {Detection} of the {Tennessee} {Eastman} {Process}}, isbn = {978-3-642-34654-5}, doi = {10.1007/978-3-642-34654-5_36}, abstract = {The large number of components involved in the operation of industrial processes increases its complexity, together with the likelihood of failure or unusual behaviors. In some cases, industrial processes solely depend on plant operator experience to prevent and identify failures. It has been shown that automatic identification of failures within functional states of the process brings support to the operator performance, reducing the incidence of accidents and defective products. However, increasing use of automatic measurement systems generates large amounts of information that hinders fault detection. Obtaining adequate fault identification systems requires the use of the most informative variables to cope with large amounts of data by intelligently removing redundant and irrelevant variables. In this paper, an unsupervised methodology based on fuzzy clustering is applied on fault identification of the Tennessee Eastman process. Results show that an optimal variable subset improves the classification percentages and avoid the use of unnecessary variables.}, language = {en}, booktitle = {Advances in {Artificial} {Intelligence} – {IBERAMIA} 2012}, publisher = {Springer}, author = {Bedoya, C. and Uribe, C. and Isaza, C.}, editor = {Pavón, Juan and Duque-Méndez, Néstor D. and Fuentes-Fernández, Rubén}, year = {2012}, keywords = {Tennessee Eastman process, fault detection, feature selection, fuzzy clustering}, pages = {350--360}, }
@article{yin_comparison_2012, title = {A comparison study of basic data-driven fault diagnosis and process monitoring methods on the benchmark {Tennessee} {Eastman} process}, volume = {22}, issn = {0959-1524}, url = {https://www.sciencedirect.com/science/article/pii/S0959152412001503}, doi = {10.1016/j.jprocont.2012.06.009}, abstract = {This paper provides a comparison study on the basic data-driven methods for process monitoring and fault diagnosis (PM–FD). Based on the review of these methods and their recent developments, the original ideas, implementation conditions, off-line design and on-line computation algorithms as well as computation complexity are discussed in detail. In order to further compare their performance from the application viewpoint, an industrial benchmark of Tennessee Eastman (TE) process is utilized to illustrate the efficiencies of all the discussed methods. The study results are dedicated to provide a reference for achieving successful PM–FD on large scale industrial processes. Some important remarks are finally concluded in this paper.}, language = {en}, number = {9}, urldate = {2022-05-02}, journal = {Journal of Process Control}, author = {Yin, Shen and Ding, Steven X. and Haghani, Adel and Hao, Haiyang and Zhang, Ping}, month = oct, year = {2012}, keywords = {Data-driven methods, Fault diagnosis, Process monitoring, Tennessee Eastman process}, pages = {1567--1581}, }
@article{masud_facing_2012, title = {Facing the reality of data stream classification: coping with scarcity of labeled data}, volume = {33}, issn = {0219-3116}, shorttitle = {Facing the reality of data stream classification}, url = {https://doi.org/10.1007/s10115-011-0447-8}, doi = {10.1007/s10115-011-0447-8}, abstract = {Recent approaches for classifying data streams are mostly based on supervised learning algorithms, which can only be trained with labeled data. Manual labeling of data is both costly and time consuming. Therefore, in a real streaming environment where large volumes of data appear at a high speed, only a small fraction of the data can be labeled. Thus, only a limited number of instances will be available for training and updating the classification models, leading to poorly trained classifiers. We apply a novel technique to overcome this problem by utilizing both unlabeled and labeled instances to train and update the classification model. Each classification model is built as a collection of micro-clusters using semi-supervised clustering, and an ensemble of these models is used to classify unlabeled data. Empirical evaluation of both synthetic and real data reveals that our approach outperforms state-of-the-art stream classification algorithms that use ten times more labeled data than our approach.}, language = {en}, number = {1}, urldate = {2022-03-28}, journal = {Knowledge and Information Systems}, author = {Masud, Mohammad M. and Woolam, Clay and Gao, Jing and Khan, Latifur and Han, Jiawei and Hamlen, Kevin W. and Oza, Nikunj C.}, month = oct, year = {2012}, pages = {213--244}, }
@article{ackermann_streamkm_2012, title = {{StreamKM}++: {A} clustering algorithm for data streams}, volume = {17}, issn = {1084-6654}, shorttitle = {{StreamKM}++}, url = {https://doi.org/10.1145/2133803.2184450}, doi = {10.1145/2133803.2184450}, abstract = {We develop a new {\textless}it{\textgreater}k{\textless}/it{\textgreater}-means clustering algorithm for data streams of points from a Euclidean space. We call this algorithm StreamKM++. Our algorithm computes a small weighted sample of the data stream and solves the problem on the sample using the {\textless}it{\textgreater}k{\textless}/it{\textgreater}-means++ algorithm of Arthur and Vassilvitskii (SODA '07). To compute the small sample, we propose two new techniques. First, we use an adaptive, nonuniform sampling approach similar to the {\textless}it{\textgreater}k{\textless}/it{\textgreater}-means++ seeding procedure to obtain small coresets from the data stream. This construction is rather easy to implement and, unlike other coreset constructions, its running time has only a small dependency on the dimensionality of the data. Second, we propose a new data structure, which we call coreset tree. The use of these coreset trees significantly speeds up the time necessary for the adaptive, nonuniform sampling during our coreset construction. We compare our algorithm experimentally with two well-known streaming implementations: BIRCH [Zhang et al. 1997] and StreamLS [Guha et al. 2003]. In terms of quality (sum of squared errors), our algorithm is comparable with StreamLS and significantly better than BIRCH (up to a factor of 2). Besides, BIRCH requires significant effort to tune its parameters. In terms of running time, our algorithm is slower than BIRCH. Comparing the running time with StreamLS, it turns out that our algorithm scalesmuch better with increasing number of centers. We conclude that, if the first priority is the quality of the clustering, then our algorithm provides a good alternative to BIRCH and StreamLS, in particular, if the number of cluster centers is large. We also give a theoretical justification of our approach by proving that our sample set is a small coreset in low-dimensional spaces.}, urldate = {2022-03-25}, journal = {ACM Journal of Experimental Algorithmics}, author = {Ackermann, Marcel R. and Märtens, Marcus and Raupach, Christoph and Swierkot, Kamil and Lammersen, Christiane and Sohler, Christian}, month = may, year = {2012}, keywords = {Clustering, coresets, data stream, k-means}, pages = {2.4:2.1--2.4:2.30}, }
@article{shaker_iblstreams_2012, title = {{IBLStreams}: a system for instance-based classification and regression on data streams}, volume = {3}, issn = {1868-6486}, shorttitle = {{IBLStreams}}, url = {https://doi.org/10.1007/s12530-012-9059-0}, doi = {10.1007/s12530-012-9059-0}, abstract = {This paper presents an approach to learning on data streams called IBLStreams. More specifically, we introduce the main methodological concepts underlying this approach and discuss its implementation under the MOA software framework. IBLStreams is an instance-based algorithm that can be applied to classification and regression problems. In comparison to model-based methods for learning on data streams, it is conceptually simple. Moreover, as an algorithm for learning in dynamically evolving environments, it has a number of desirable properties that are not, at least not as a whole, shared by currently existing alternatives. Our experimental validation provides evidence for its flexibility and ability to adapt to changes of the environment quickly, a point of utmost importance in the data stream context. At the same time, IBLStreams turns out to be competitive to state-of-the-art methods in terms of prediction accuracy. Moreover, due to its robustness, it is applicable to streams with different characteristics.}, language = {en}, number = {4}, urldate = {2022-03-20}, journal = {Evolving Systems}, author = {Shaker, Ammar and Hüllermeier, Eyke}, month = dec, year = {2012}, pages = {235--249}, }
@incollection{bottou_stochastic_2012, address = {Berlin, Heidelberg}, series = {Lecture {Notes} in {Computer} {Science}}, title = {Stochastic {Gradient} {Descent} {Tricks}}, isbn = {978-3-642-35289-8}, url = {https://doi.org/10.1007/978-3-642-35289-8_25}, abstract = {Chapter 1 strongly advocates the stochastic back-propagation method to train neural networks. This is in fact an instance of a more general technique called stochastic gradient descent (SGD). This chapter provides background material, explains why SGD is a good learning algorithm when the training set is large, and provides useful recommendations.}, language = {en}, urldate = {2022-03-19}, booktitle = {Neural {Networks}: {Tricks} of the {Trade}: {Second} {Edition}}, publisher = {Springer}, author = {Bottou, Léon}, editor = {Montavon, Grégoire and Orr, Geneviève B. and Müller, Klaus-Robert}, year = {2012}, doi = {10.1007/978-3-642-35289-8_25}, keywords = {Conditional Random Field, Empirical Risk, Learning Rate, Stochastic Gradient, Support Vector Machine}, pages = {421--436}, }
@article{prajapati_condition_2012, title = {Condition based maintenance: a survey}, volume = {18}, issn = {1355-2511}, shorttitle = {Condition based maintenance}, url = {https://doi.org/10.1108/13552511211281552}, doi = {10.1108/13552511211281552}, abstract = {Purpose – The purpose of this paper is to provide a brief overview of condition based maintenance (CBM) with definitions of various terms, overview of some history, recent developments, applications, and research challenges in the CBM domain. Design/methodology/approach – The article presents the insight into various maintenance strategies and provides their respective merits and demerits in various aspects. It then provides the detailed discussion of CBM that includes applications of various methodologies and technologies that are being implemented in the field. Finally, it ends with open challenges in implementing condition based maintenance systems. Findings – This paper surveys research articles and describes how CBM can be used to optimize maintenance strategies and increase the feasibility and practicality of a CBM system. Practical implications – CBM systems are completely practical to implement and applicable to various domains including automotive, manufacturing, aviation, medical, etc. This paper presents a brief overview of literature on CBM and an insight into CBM as a maintenance strategy. CBM has wide applications in automotive, aviation, manufacturing, defense, and other industries. It involves various disciplines like data mining, artificial intelligence, and statistics to enable the systems to be maintenance intelligent. These disciplines help in predicting the future consequences based on the past and current system conditions. Based on the authors’ studies, implementation of such a system is easy and cost effective because it uses existing subsystems to collect statistical data. On top of that it requires building a software layer to process the data and to implement the prognosis techniques in the form of algorithms. Social implications – The design of CBM systems highly impact the society in terms of maintenance cost (i.e. reduces the maintenance cost of automobiles, safety by providing real time reporting of the fault using prognosis). Originality/value – To the best of the authors’ knowledge, this paper is first of its kind in the literature which presents several maintenance strategies and provides a number of possible research directions listed in open research challenges.}, number = {4}, urldate = {2022-03-05}, journal = {Journal of Quality in Maintenance Engineering}, author = {Prajapati, Ashok and Bechtel, James and Ganesan, Subramaniam}, month = jan, year = {2012}, note = {Publisher: Emerald Group Publishing Limited}, keywords = {Artificial intelligence, Condition based maintenance, Data mining, Maintenance, Sensors}, pages = {384--400}, }
@article{weber_overview_2012, series = {Special {Section}: {Dependable} {System} {Modelling} and {Analysis}}, title = {Overview on {Bayesian} networks applications for dependability, risk analysis and maintenance areas}, volume = {25}, issn = {0952-1976}, url = {https://www.sciencedirect.com/science/article/pii/S095219761000117X}, doi = {10.1016/j.engappai.2010.06.002}, abstract = {In this paper, a bibliographical review over the last decade is presented on the application of Bayesian networks to dependability, risk analysis and maintenance. It is shown an increasing trend of the literature related to these domains. This trend is due to the benefits that Bayesian networks provide in contrast with other classical methods of dependability analysis such as Markov Chains, Fault Trees and Petri Nets. Some of these benefits are the capability to model complex systems, to make predictions as well as diagnostics, to compute exactly the occurrence probability of an event, to update the calculations according to evidences, to represent multi-modal variables and to help modeling user-friendly by a graphical and compact approach. This review is based on an extraction of 200 specific references in dependability, risk analysis and maintenance applications among a database with 7000 Bayesian network references. The most representatives are presented, then discussed and some perspectives of work are provided.}, language = {en}, number = {4}, urldate = {2021-11-17}, journal = {Engineering Applications of Artificial Intelligence}, author = {Weber, P. and Medina-Oliva, G. and Simon, C. and Iung, B.}, month = jun, year = {2012}, keywords = {Bayesian networks, Dependability, Maintenance, Reliability, Risk analysis, Safety, bn}, pages = {671--682}, }
@article{chen_good_2012, title = {Good practice in {Bayesian} network modelling}, volume = {37}, issn = {1364-8152}, url = {https://www.sciencedirect.com/science/article/pii/S1364815212001041}, doi = {10.1016/j.envsoft.2012.03.012}, abstract = {Bayesian networks (BNs) are increasingly being used to model environmental systems, in order to: integrate multiple issues and system components; utilise information from different sources; and handle missing data and uncertainty. BNs also have a modular architecture that facilitates iterative model development. For a model to be of value in generating and sharing knowledge or providing decision support, it must be built using good modelling practice. This paper provides guidelines to developing and evaluating Bayesian network models of environmental systems, and presents a case study habitat suitability model for juvenile Astacopsis gouldi, the giant freshwater crayfish of Tasmania. The guidelines entail clearly defining the model objectives and scope, and using a conceptual model of the system to form the structure of the BN, which should be parsimonious yet capture all key components and processes. After the states and conditional probabilities of all variables are defined, the BN should be assessed by a suite of quantitative and qualitative forms of model evaluation. All the assumptions, uncertainties, descriptions and reasoning for each node and linkage, data and information sources, and evaluation results must be clearly documented. Following these standards will enable the modelling process and the model itself to be transparent, credible and robust, within its given limitations.}, language = {en}, urldate = {2021-11-17}, journal = {Environmental Modelling \& Software}, author = {Chen, Serena H. and Pollino, Carmel A.}, month = nov, year = {2012}, keywords = {Bayes network, Bayesian belief network, Ecological models, Good modelling practice, Integration, Model evaluation}, pages = {134--145}, }
@article{robles_methods_2012, title = {Methods to choose the best {Hidden} {Markov} {Model} topology for improving maintenance policy}, abstract = {Prediction of physical particular phenomenon is based on partial knowledge of this phenomenon. Theses knowledges help us to conceptualize this phenomenon according to different models. Hidden Markov Models (HMM) can be used for modeling complex processes. We use this kind of models as tool for fault diagnosis systems. Nowadays, industrial robots living in stochastic environment need faults detection to prevent any breakdown. In this paper, we wish to find the best Hidden Markov Model topologies to be used in predictive maintenance system. To this end, we use a synthetic Hidden Markov Model in order to simulate a real industrial CMMS * . In a stochastic way, we evaluate relevance of Hidden Markov Models parameters, without a priori knowledges. After a brief presentation of a Hidden Markov Model, we present the most used selection criteria of models in current literature. We support our study by an example of simulated industrial process by using our synthetic model. Therefore, we evaluate output parameters of the various tested models on this process: topologies, learning algorithms, observations distributions, epistemic uncertain-ties. Finally, we come up with the best model which will be used to improve maintenance policy and worker safety.}, author = {Robles, Bernard and Avila, Manuel and Duculty, Florent and Vrignat, Pascal and Kratz, Frédéric and Begot, Stephane}, month = jun, year = {2012}, }
@article{khreich_survey_2012, title = {A survey of techniques for incremental learning of {HMM} parameters}, volume = {197}, issn = {0020-0255}, url = {https://www.sciencedirect.com/science/article/pii/S002002551200120X}, doi = {10.1016/j.ins.2012.02.017}, abstract = {The performance of Hidden Markov Models (HMMs) targeted for complex real-world applications are often degraded because they are designed a priori using limited training data and prior knowledge, and because the classification environment changes during operations. Incremental learning of new data sequences allows to adapt HMM parameters as new data becomes available, without having to retrain from the start on all accumulated training data. This paper presents a survey of techniques found in literature that are suitable for incremental learning of HMM parameters. These techniques are classified according to the objective function, optimization technique and target application, involving block-wise and symbol-wise learning of parameters. Convergence properties of these techniques are presented along with an analysis of time and memory complexity. In addition, the challenges faced when these techniques are applied to incremental learning is assessed for scenarios in which the new training data is limited and abundant. While the convergence rate and resource requirements are critical factors when incremental learning is performed through one pass over abundant stream of data, effective stopping criteria and management of validation sets are important when learning is performed through several iterations over limited data. In both cases managing the learning rate to integrate pre-existing knowledge and new data is crucial for maintaining a high level of performance. Finally, this paper underscores the need for empirical benchmarking studies among techniques presented in literature, and proposes several evaluation criteria based on non-parametric statistical testing to facilitate the selection of techniques given a particular application domain.}, language = {en}, urldate = {2021-11-15}, journal = {Information Sciences}, author = {Khreich, Wael and Granger, Eric and Miri, Ali and Sabourin, Robert}, month = aug, year = {2012}, keywords = {Expectation–maximization, Hidden Markov model, Incremental learning, Limited training data, On-line learning, Recursive estimation}, pages = {105--130}, }
@article{tobon-mejia_data-driven_2012, title = {A {Data}-{Driven} {Failure} {Prognostics} {Method} {Based} on {Mixture} of {Gaussians} {Hidden} {Markov} {Models}}, volume = {61}, issn = {1558-1721}, doi = {10.1109/TR.2012.2194177}, abstract = {This paper addresses a data-driven prognostics method for the estimation of the Remaining Useful Life (RUL) and the associated confidence value of bearings. The proposed method is based on the utilization of the Wavelet Packet Decomposition (WPD) technique, and the Mixture of Gaussians Hidden Markov Models (MoG-HMM). The method relies on two phases: an off-line phase, and an on-line phase. During the first phase, the raw data provided by the sensors are first processed to extract features in the form of WPD coefficients. The extracted features are then fed to dedicated learning algorithms to estimate the parameters of a corresponding MoG-HMM, which best fits the degradation phenomenon. The generated model is exploited during the second phase to continuously assess the current health state of the physical component, and to estimate its RUL value with the associated confidence. The developed method is tested on benchmark data taken from the “NASA prognostics data repository” related to several experiments of failures on bearings done under different operating conditions. Furthermore, the method is compared to traditional time-feature prognostics and simulation results are given at the end of the paper. The results of the developed prognostics method, particularly the estimation of the RUL, can help improving the availability, reliability, and security while reducing the maintenance costs. Indeed, the RUL and associated confidence value are relevant information which can be used to take appropriate maintenance and exploitation decisions. In practice, this information may help the maintainers to prepare the necessary material and human resources before the occurrence of a failure. Thus, the traditional maintenance policies involving corrective and preventive maintenance can be replaced by condition based maintenance.}, number = {2}, journal = {IEEE Transactions on Reliability}, author = {Tobon-Mejia, Diego Alejandro and Medjaher, Kamal and Zerhouni, Noureddine and Tripot, Gerard}, month = jun, year = {2012}, note = {Conference Name: IEEE Transactions on Reliability}, keywords = {Analytical models, Condition monitoring, Data models, Degradation, Hidden Markov models, Maintenance engineering, Mathematical model, Monitoring, hidden Markov model, prognostics and health management, remaining useful life}, pages = {491--503}, }
@article{yu_health_2012, title = {Health {Condition} {Monitoring} of {Machines} {Based} on {Hidden} {Markov} {Model} and {Contribution} {Analysis}}, volume = {61}, issn = {1557-9662}, doi = {10.1109/TIM.2012.2184015}, abstract = {Degradation parameter from normal to failure condition of machine part or system is needed as an object of health monitoring in condition-based maintenance (CBM). This paper proposes a hidden Markov model (HMM) and contribution-analysis-based method to assess the machine health degradation. A dynamic principal component analysis (DPCA) is used to extract effective features from vibration signals, where inherent signal autocorrelation is considered. A novel machine health assessment indication, HMM-based Mahalanobis distance is proposed to provide a comprehensible indication for quantifying machine health states. A variable-replacing-based contribution analysis method is developed to discover the effective features that are responsible for the detection and assessment of machine health degradation in its whole life. The experimental results based on a bearing test bed show the plausibility and effectiveness of the proposed methods, which can be considered as the machine health degradation monitoring model.}, number = {8}, journal = {IEEE Transactions on Instrumentation and Measurement}, author = {Yu, Jianbo}, month = aug, year = {2012}, note = {Conference Name: IEEE Transactions on Instrumentation and Measurement}, keywords = {Bearing, Data models, Degradation, Feature extraction, Frequency domain analysis, Hidden Markov models, Monitoring, Vibrations, condition-based maintenance (CBM), contribution analysis, dynamic principal component analysis (DPCA), hidden Markov model (HMM)}, pages = {2200--2211}, }
@inproceedings{patwary_new_2012, title = {A new scalable parallel {DBSCAN} algorithm using the disjoint-set data structure}, doi = {10.1109/SC.2012.9}, abstract = {DBSCAN is a well-known density based clustering algorithm capable of discovering arbitrary shaped clusters and eliminating noise data. However, parallelization of DBSCAN is challenging as it exhibits an inherent sequential data access order. Moreover, existing parallel implementations adopt a master-slave strategy which can easily cause an unbalanced workload and hence result in low parallel efficiency. We present a new parallel DBSCAN algorithm (PDSDBSCAN) using graph algorithmic concepts. More specifically, we employ the disjoint-set data structure to break the access sequentiality of DBSCAN. In addition, we use a tree-based bottom-up approach to construct the clusters. This yields a better-balanced workload distribution. We implement the algorithm both for shared and for distributed memory. Using data sets containing up to several hundred million high-dimensional points, we show that PDSDBSCAN significantly outperforms the master-slave approach, achieving speedups up to 25.97 using 40 cores on shared memory architecture, and speedups up to 5,765 using 8,192 cores on distributed memory architecture.}, booktitle = {{SC} '12: {Proceedings} of the {International} {Conference} on {High} {Performance} {Computing}, {Networking}, {Storage} and {Analysis}}, author = {Patwary, Md. Mostofa Ali and Palsetia, Diana and Agrawal, Ankit and Liao, Wei-keng and Manne, Fredrik and Choudhary, Alok}, month = nov, year = {2012}, note = {ISSN: 2167-4337}, keywords = {Clustering algorithms, Data structures, Density based clustering, Disjoint-set data structure, Instruction sets, Merging, Noise, Partitioning algorithms, Union-Find algorithm, Vegetation}, pages = {1--11}, }
@inproceedings{jaworski_fuzzy_2012, address = {Berlin, Heidelberg}, series = {Lecture {Notes} in {Computer} {Science}}, title = {On {Fuzzy} {Clustering} of {Data} {Streams} with {Concept} {Drift}}, isbn = {978-3-642-29350-4}, doi = {10.1007/978-3-642-29350-4_10}, abstract = {In the paper the clustering algorithms based on fuzzy set theory are considered. Modifications of the Fuzzy C-Means and the Possibilistic C-Means algorithms are presented, which adjust them to deal with data streams. Since data stream is of infinite size, it has to be partitioned into chunks. Simulations show that this partitioning procedure does not affect the quality of clustering results significantly. Moreover, properly chosen weights can be assigned to each data element. This modification allows the presented algorithms to handle concept drift during simulations.}, language = {en}, booktitle = {Artificial {Intelligence} and {Soft} {Computing}}, publisher = {Springer}, author = {Jaworski, Maciej and Duda, Piotr and Pietruczuk, Lena}, editor = {Rutkowski, Leszek and Korytkowski, Marcin and Scherer, Rafał and Tadeusiewicz, Ryszard and Zadeh, Lotfi A. and Zurada, Jacek M.}, year = {2012}, keywords = {Cluster Center, Concept Drift, Data Chunk, Data Stream, Fuzzy Cluster}, pages = {82--91}, }
@article{cabal-yepez_fpga-based_2012, title = {{FPGA}-based entropy neural processor for online detection of multiple combined faults on induction motors}, volume = {30}, issn = {0888-3270}, url = {https://www.sciencedirect.com/science/article/pii/S0888327012000222}, doi = {10.1016/j.ymssp.2012.01.021}, abstract = {For industry, a faulty induction motor signifies production reduction and cost increase. Real-world induction motors can have one or more faults present at the same time that can mislead to a wrong decision about its operational condition. The detection of multiple combined faults is a demanding task, difficult to accomplish even with computing intensive techniques. This work introduces information entropy and artificial neural networks for detecting multiple combined faults by analyzing the 3-axis startup vibration signals of the rotating machine. A field programmable gate array implementation is developed for automatic online detection of single and combined faults in real time.}, language = {en}, urldate = {2021-09-30}, journal = {Mechanical Systems and Signal Processing}, author = {Cabal-Yepez, E. and Valtierra-Rodriguez, M. and Romero-Troncoso, R. J. and Garcia-Perez, A. and Osornio-Rios, R. A. and Miranda-Vidales, H. and Alvarez-Salas, R.}, month = jul, year = {2012}, keywords = {3-axis vibration signals, Artificial neural networks, Field programmable gate array, Induction motors, Information entropy, Multiple combined faults}, pages = {123--130}, }
@inproceedings{zhou_online_2012, title = {Online {Incremental} {Feature} {Learning} with {Denoising} {Autoencoders}}, url = {https://proceedings.mlr.press/v22/zhou12b.html}, language = {en}, urldate = {2021-09-01}, booktitle = {Artificial {Intelligence} and {Statistics}}, publisher = {PMLR}, author = {Zhou, Guanyu and Sohn, Kihyuk and Lee, Honglak}, month = mar, year = {2012}, note = {ISSN: 1938-7228}, pages = {1453--1461}, }
@incollection{polikar_ensemble_2012, address = {Boston, MA}, title = {Ensemble {Learning}}, isbn = {978-1-4419-9326-7}, url = {https://doi.org/10.1007/978-1-4419-9326-7_1}, abstract = {Over the last couple of decades, multiple classifier systems, also called ensemble systems have enjoyed growing attention within the computational intelligence and machine learning community. This attention has been well deserved, as ensemble systems have proven themselves to be very effective and extremely versatile in a broad spectrum of problem domains and real-world applications. Originally developed to reduce the variance—thereby improving the accuracy—of an automated decision-making system, ensemble systems have since been successfully used to address a variety of machine learning problems, such as feature selection, confidence estimation, missing feature, incremental learning, error correction, class-imbalanced data, learning concept drift from nonstationary distributions, among others. This chapter provides an overview of ensemble systems, their properties, and how they can be applied to such a wide spectrum of applications.}, language = {en}, urldate = {2021-03-25}, booktitle = {Ensemble {Machine} {Learning}: {Methods} and {Applications}}, publisher = {Springer US}, author = {Polikar, Robi}, editor = {Zhang, Cha and Ma, Yunqian}, year = {2012}, doi = {10.1007/978-1-4419-9326-7_1}, keywords = {Combination Rule, Concept Drift, Ensemble Member, Incremental Learning, Majority Vote}, pages = {1--34}, }
@article{kawahara_sequential_2012, title = {Sequential change-point detection based on direct density-ratio estimation}, volume = {5}, copyright = {Copyright © 2011 Wiley Periodicals, Inc.}, issn = {1932-1872}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/sam.10124}, doi = {10.1002/sam.10124}, abstract = {Change-point detection is the problem of discovering time points at which properties of time-series data change. This covers a broad range of real-world problems and has been actively discussed in the community of statistics and data mining. In this paper, we present a novel nonparametric approach to detecting the change of probability distributions of sequence data. Our key idea is to estimate the ratio of probability densities, not the probability densities themselves. This formulation allows us to avoid nonparametric density estimation, which is known to be a difficult problem. We provide a change-point detection algorithm based on direct density-ratio estimation that can be computed very efficiently in an online manner. The usefulness of the proposed method is demonstrated through experiments using artificial and real-world datasets. © 2011 Wiley Periodicals, Inc. Statistical Analysis and Data Mining 2011}, language = {en}, number = {2}, urldate = {2020-10-04}, journal = {Statistical Analysis and Data Mining: The ASA Data Science Journal}, author = {Kawahara, Yoshinobu and Sugiyama, Masashi}, year = {2012}, note = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1002/sam.10124}, keywords = {change-point detection, density-ratio estimation, time-series data}, pages = {114--127}, }
@article{chowdhury_bayesian_2012, title = {Bayesian on-line spectral change point detection: a soft computing approach for on-line {ASR}}, volume = {15}, issn = {1572-8110}, shorttitle = {Bayesian on-line spectral change point detection}, url = {https://doi.org/10.1007/s10772-011-9116-2}, doi = {10.1007/s10772-011-9116-2}, abstract = {Current automatic speech recognition (ASR) works in off-line mode and needs prior knowledge of the stationary or quasi-stationary test conditions for expected word recognition accuracy. These requirements limit the application of ASR for real-world applications where test conditions are highly non-stationary and are not known a priori. This paper presents an innovative frame dynamic rapid adaptation and noise compensation technique for tracking highly non-stationary noises and its application for on-line ASR. The proposed algorithm is based on a soft computing model using Bayesian on-line inference for spectral change point detection (BOSCPD) in unknown non-stationary noises. BOSCPD is tested with the MCRA noise tracking technique for on-line rapid environmental change learning in different non-stationary noise scenarios. The test results show that the proposed BOSCPD technique reduces the delay in spectral change point detection significantly compared to the baseline MCRA and its derivatives. The proposed BOSCPD soft computing model is tested for joint additive and channel distortions compensation (JAC)-based on-line ASR in unknown test conditions using non-stationary noisy speech samples from the Aurora 2 speech database. The simulation results for the on-line AR show significant improvement in recognition accuracy compared to the baseline Aurora 2 distributed speech recognition (DSR) in batch-mode.}, language = {en}, number = {1}, urldate = {2020-10-01}, journal = {International Journal of Speech Technology}, author = {Chowdhury, M. F. R. and Selouani, S.-A. and O’Shaughnessy, D.}, month = mar, year = {2012}, pages = {5--23}, }
@techreport{noauthor_condition_2012, type = {Standard}, title = {Condition monitoring and diagnostics of machines — {Data} interpretation and diagnostics techniques — {Part} 1: {General} guidelines}, institution = {International Organization for Standardization}, year = {2012}, }
@techreport{noauthor_condition_2012, type = {Standard}, title = {Condition monitoring and diagnostics of machines — {Data} processing, communication and presentation — {Part} 3: {Communication}}, institution = {International Organization for Standardization}, year = {2012}, }
@techreport{noauthor_condition_2012, type = {Standard}, title = {Condition monitoring and diagnostics of machines - {Vocabulary}}, institution = {International Organization for Standardization}, year = {2012}, }
@article{baraldi_kalman_2012, title = {A {Kalman} {Filter}-{Based} {Ensemble} {Approach} {With} {Application} to {Turbine} {Creep} {Prognostics}}, volume = {61}, issn = {1558-1721}, doi = {10.1109/TR.2012.2221037}, number = {4}, journal = {IEEE Transactions on Reliability}, author = {Baraldi, P. and Mangili, F. and Zio, E.}, year = {2012}, keywords = {Computational modeling, Creep, Degradation, Kalman filter, Kalman filter-based ensemble approach, Kalman filters, Predictive models, Prognostics and health management, Turbines, ensemble, health management systems, nuclear power plants, nuclear power stations, prognostics and health management, remaining life assessment, remaining useful life, turbine blades, turbine creep prognostics, turbines}, pages = {966--977}, }
@article{ahmad_overview_2012, title = {An overview of time-based and condition-based maintenance in industrial application}, volume = {63}, issn = {0360-8352}, url = {http://www.sciencedirect.com/science/article/pii/S0360835212000484}, doi = {https://doi.org/10.1016/j.cie.2012.02.002}, number = {1}, journal = {Computers \& Industrial Engineering}, author = {Ahmad, Rosmaini and Kamaruddin, Shahrul}, year = {2012}, keywords = {Condition-based maintenance, Industrial application, Maintenance decision making, Maintenance techniques, Preventive maintenance, Time-based maintenance}, pages = {135 -- 149}, }
@article{si_remaining_2011, title = {Remaining useful life estimation – {A} review on the statistical data driven approaches}, volume = {213}, issn = {0377-2217}, url = {https://www.sciencedirect.com/science/article/pii/S0377221710007903}, doi = {10.1016/j.ejor.2010.11.018}, abstract = {Remaining useful life (RUL) is the useful life left on an asset at a particular time of operation. Its estimation is central to condition based maintenance and prognostics and health management. RUL is typically random and unknown, and as such it must be estimated from available sources of information such as the information obtained in condition and health monitoring. The research on how to best estimate the RUL has gained popularity recently due to the rapid advances in condition and health monitoring techniques. However, due to its complicated relationship with observable health information, there is no such best approach which can be used universally to achieve the best estimate. As such this paper reviews the recent modeling developments for estimating the RUL. The review is centred on statistical data driven approaches which rely only on available past observed data and statistical models. The approaches are classified into two broad types of models, that is, models that rely on directly observed state information of the asset, and those do not. We systematically review the models and approaches reported in the literature and finally highlight future research challenges.}, language = {en}, number = {1}, urldate = {2023-02-28}, journal = {European Journal of Operational Research}, author = {Si, Xiao-Sheng and Wang, Wenbin and Hu, Chang-Hua and Zhou, Dong-Hua}, month = aug, year = {2011}, keywords = {Brown motion, Maintenance, Markov, Proportional hazards model, Remaining useful life, Stochastic filtering}, pages = {1--14}, }
@article{byttner_consensus_2011, title = {Consensus self-organized models for fault detection ({COSMO})}, volume = {24}, issn = {0952-1976}, url = {https://www.sciencedirect.com/science/article/pii/S0952197611000467}, doi = {10.1016/j.engappai.2011.03.002}, abstract = {Methods for equipment monitoring are traditionally constructed from specific sensors and/or knowledge collected prior to implementation on the equipment. A different approach is presented here that builds up knowledge over time by exploratory search among the signals available on the internal field bus system and comparing the observed signal relationships among a group of equipment that perform similar tasks. The approach is developed for the purpose of increasing vehicle uptime, and is therefore demonstrated in the case of a city bus and a heavy duty truck. However, it also works fine for smaller mechatronic systems like computer hard-drives. The approach builds on an onboard self-organized search for models that capture relations among signal values on the vehicles' data buses, combined with a limited bandwidth telematics gateway and an off-line server application where the parameters of the self-organized models are compared. The presented approach represents a new look at error detection in commercial mechatronic systems, where the normal behavior of a system is actually found under real operating conditions, rather than the behavior observed in a number of laboratory tests or test-drives prior to production of the system. The approach has potential to be the basis for a self-discovering system for general purpose fault detection and diagnostics.}, language = {en}, number = {5}, urldate = {2023-02-13}, journal = {Engineering Applications of Artificial Intelligence}, author = {Byttner, S. and Rögnvaldsson, T. and Svensson, M.}, month = aug, year = {2011}, keywords = {Fault detection, Fleet management, Remote maintenance, Self-organizing systems, Telematics}, pages = {833--839}, }
@article{fu_review_2011, title = {A review on time series data mining}, volume = {24}, issn = {0952-1976}, url = {https://www.sciencedirect.com/science/article/pii/S0952197610001727}, doi = {10.1016/j.engappai.2010.09.007}, abstract = {Time series is an important class of temporal data objects and it can be easily obtained from scientific and financial applications. A time series is a collection of observations made chronologically. The nature of time series data includes: large in data size, high dimensionality and necessary to update continuously. Moreover time series data, which is characterized by its numerical and continuous nature, is always considered as a whole instead of individual numerical field. The increasing use of time series data has initiated a great deal of research and development attempts in the field of data mining. The abundant research on time series data mining in the last decade could hamper the entry of interested researchers, due to its complexity. In this paper, a comprehensive revision on the existing time series data mining research is given. They are generally categorized into representation and indexing, similarity measure, segmentation, visualization and mining. Moreover state-of-the-art research issues are also highlighted. The primary objective of this paper is to serve as a glossary for interested researchers to have an overall picture on the current time series data mining development and identify their potential research direction to further investigation.}, language = {en}, number = {1}, urldate = {2022-09-02}, journal = {Engineering Applications of Artificial Intelligence}, author = {Fu, Tak-chung}, month = feb, year = {2011}, keywords = {Representation, Segmentation, Similarity measure, Time series data mining, Visualization}, pages = {164--181}, }
@article{eslamloueyan_designing_2011, title = {Designing a hierarchical neural network based on fuzzy clustering for fault diagnosis of the {Tennessee}–{Eastman} process}, volume = {11}, issn = {1568-4946}, url = {https://www.sciencedirect.com/science/article/pii/S1568494610000888}, doi = {10.1016/j.asoc.2010.04.012}, abstract = {This paper proposes a hierarchical artificial neural network (HANN) for isolating the faults of the Tennessee–Eastman process (TEP). The TEP process is the simulation of a chemical plant created by the Eastman Chemical Company to provide a realistic industrial process for evaluating process control and monitoring methods The first step in designing the HANN is to divide the fault patterns space into a few sub-spaces through using fuzzy C-means clustering algorithm. For each sub-space of fault patterns a special neural network has been trained in order to diagnose the faults of that sub-space. A supervisor network has been developed to decide which one of the special neural networks should be triggered. In this regard, each neural network in the proposed HANN has been given a specific duty, so the proposed procedure can be called Duty-Oriented HANN (DOHANN). The neuromorphic structure of the networks is based on multilayer perceptron (MLP) networks. The simulation of Tennessee–Eastman (TE) process has been used to generate the required training and test data. The performance of the developed method has been evaluated and compared to that of a conventional single neural network (SNN) as well as the technique of dynamic principal component analysis (DPCA). The simulation results indicate that the DOHANN diagnoses the TEP faults considerably better than SNN and DPCA methods. Training of each MLP network for the DOHANN model has required less computer time in comparison to SNN model. This is because of structurally simpler MLPs used by the developed DOHANN method.}, language = {en}, number = {1}, urldate = {2022-05-02}, journal = {Applied Soft Computing}, author = {Eslamloueyan, Reza}, month = jan, year = {2011}, keywords = {Fuzzy -means clustering, Hierarchical neural network, Nonlinear process, Process fault diagnosis, Tennessee–Eastman process}, pages = {1407--1415}, }
@article{bahrampour_weighted_2011, title = {Weighted and constrained possibilistic {C}-means clustering for online fault detection and isolation}, volume = {35}, issn = {1573-7497}, url = {https://doi.org/10.1007/s10489-010-0219-2}, doi = {10.1007/s10489-010-0219-2}, abstract = {In this paper, a new weighted and constrained possibilistic C-means clustering algorithm is proposed for process fault detection and diagnosis (FDI) in offline and online modes for both already known and novel faults. A possibilistic clustering based approach is utilized here to address some of the deficiencies of the fuzzy C-means (FCM) algorithm leading to more consistent results in the context of the FDI tasks by relaxing the probabilistic condition in FCM cost function. The proposed algorithm clusters the historical data set into C different dense regions without having precise knowledge about the number of the faults in the data set. The algorithm incorporates simultaneously possibilistic algorithm and local attribute weighting for time-series segmentation. This allows different weights to be allocated to different features responsible for the distinguished process faults which is an essential characteristic of proper FDI operations. A set of comparative studies have been carried out on the large-scale Tennessee Eastman industrial challenge problem and the DAMADICS actuator benchmark to demonstrate the superiority of the proposed algorithm in process FDI applications with respect to some available alternative approaches.}, language = {en}, number = {2}, urldate = {2022-05-02}, journal = {Applied Intelligence}, author = {Bahrampour, Soheil and Moshiri, Behzad and Salahshoor, Karim}, month = oct, year = {2011}, keywords = {Fault detection and isolation, Feature weighting, Possibilistic clustering}, pages = {269--284}, }
@inproceedings{zliobaite_active_2011, address = {Berlin, Heidelberg}, title = {Active {Learning} with {Evolving} {Streaming} {Data}}, isbn = {978-3-642-23808-6}, doi = {10.1007/978-3-642-23808-6_39}, abstract = {In learning to classify streaming data, obtaining the true labels may require major effort and may incur excessive cost. Active learning focuses on learning an accurate model with as few labels as possible. Streaming data poses additional challenges for active learning, since the data distribution may change over time (concept drift) and classifiers need to adapt. Conventional active learning strategies concentrate on querying the most uncertain instances, which are typically concentrated around the decision boundary. If changes do not occur close to the boundary, they will be missed and classifiers will fail to adapt. In this paper we develop two active learning strategies for streaming data that explicitly handle concept drift. They are based on uncertainty, dynamic allocation of labeling efforts over time and randomization of the search space. We empirically demonstrate that these strategies react well to changes that can occur anywhere in the instance space and unexpectedly.}, language = {en}, booktitle = {Machine {Learning} and {Knowledge} {Discovery} in {Databases}}, publisher = {Springer}, author = {Žliobaitė, Indrė and Bifet, Albert and Pfahringer, Bernhard and Holmes, Geoff}, editor = {Gunopulos, Dimitrios and Hofmann, Thomas and Malerba, Donato and Vazirgiannis, Michalis}, year = {2011}, pages = {597--612}, }
@inproceedings{chu_unbiased_2011, address = {New York, NY, USA}, series = {{KDD} '11}, title = {Unbiased online active learning in data streams}, isbn = {978-1-4503-0813-7}, url = {https://doi.org/10.1145/2020408.2020444}, doi = {10.1145/2020408.2020444}, abstract = {Unlabeled samples can be intelligently selected for labeling to minimize classification error. In many real-world applications, a large number of unlabeled samples arrive in a streaming manner, making it impossible to maintain all the data in a candidate pool. In this work, we focus on binary classification problems and study selective labeling in data streams where a decision is required on each sample sequentially. We consider the unbiasedness property in the sampling process, and design optimal instrumental distributions to minimize the variance in the stochastic process. Meanwhile, Bayesian linear classifiers with weighted maximum likelihood are optimized online to estimate parameters. In empirical evaluation, we collect a data stream of user-generated comments on a commercial news portal in 30 consecutive days, and carry out offline evaluation to compare various sampling strategies, including unbiased active learning, biased variants, and random sampling. Experimental results verify the usefulness of online active learning, especially in the non-stationary situation with concept drift.}, urldate = {2022-03-28}, booktitle = {Proceedings of the 17th {ACM} {SIGKDD} international conference on {Knowledge} discovery and data mining}, publisher = {Association for Computing Machinery}, author = {Chu, Wei and Zinkevich, Martin and Li, Lihong and Thomas, Achint and Tseng, Belle}, month = aug, year = {2011}, keywords = {active learning, adaptive importance sampling, bayesian online learning, data streaming, unbiasedness}, pages = {195--203}, }
@article{omitaomu_online_2011, title = {Online {Support} {Vector} {Regression} {With} {Varying} {Parameters} for {Time}-{Dependent} {Data}}, volume = {41}, issn = {1558-2426}, doi = {10.1109/TSMCA.2010.2055156}, abstract = {Support vector regression (SVR) is a machine learning technique that continues to receive interest in several domains, including manufacturing, engineering, and medicine. In order to extend its application to problems in which data sets arrive constantly and in which batch processing of the data sets is infeasible or expensive, an accurate online SVR (AOSVR) technique was proposed. The AOSVR technique efficiently updates a trained SVR function whenever a sample is added to or removed from the training set without retraining the entire training data. However, the AOSVR technique assumes that the new samples and the training samples are of the same characteristics; hence, the same value of SVR parameters is used for training and prediction. This assumption is not applicable to data samples that are inherently noisy and nonstationary, such as sensor data. As a result, we propose AOSVR with varying parameters that uses varying SVR parameters rather than fixed SVR parameters and hence accounts for the variability that may exist in the samples. To accomplish this objective, we also propose a generalized weight function to automatically update the weights of SVR parameters in online monitoring applications. The proposed function allows for lower and upper bounds for SVR parameters. We tested our proposed approach and compared results with the conventional AOSVR approach using two benchmark time-series data and sensor data from a nuclear power plant. The results show that using varying SVR parameters is more applicable to time-dependent data.}, number = {1}, journal = {IEEE Transactions on Systems, Man, and Cybernetics - Part A: Systems and Humans}, author = {Omitaomu, Olufemi A. and Jeong, Myong K. and Badiru, Adedeji B.}, month = jan, year = {2011}, note = {Conference Name: IEEE Transactions on Systems, Man, and Cybernetics - Part A: Systems and Humans}, keywords = {Automobile manufacture, Condition monitoring, Data engineering, Machine learning, Manufacturing, Medical diagnostic imaging, Power generation, Sensor systems, Systems engineering and theory, Training data, inferential sensing, online prediction, support vector machine, system diagnosis}, pages = {191--197}, }
@inproceedings{xioufis_dealing_2011, address = {Barcelona, Catalonia, Spain}, series = {{IJCAI}'11}, title = {Dealing with concept drift and class imbalance in multi-label stream classification}, isbn = {978-1-57735-514-4}, abstract = {Streams of objects that are associated with one or more labels at the same time appear in many applications. However, stream classification of multi-label data is largely unexplored. Existing approaches try to tackle the problem by transferring traditional single-label stream classification practices to the multi-label domain. Nevertheless, they fail to consider some of the unique properties of the problem such as within and between class imbalance and multiple concept drift. To deal with these challenges, this paper proposes a novel multilabel stream classification approach that employs two windows for each label, one for positive and one for negative examples. Instance-sharing is exploited for space efficiency, while a time-efficient instantiation based on the k-Nearest Neighbor algorithm is also proposed. Finally, a batch-incremental thresholding technique is proposed to further deal with the class imbalance problem. Results of an empirical comparison against two other methods on three real world datasets are in favor of the proposed approach.}, urldate = {2022-03-17}, booktitle = {Proceedings of the {Twenty}-{Second} international joint conference on {Artificial} {Intelligence} - {Volume} {Volume} {Two}}, publisher = {AAAI Press}, author = {Xioufis, Eleftherios Spyromitros and Spiliopoulou, Myra and Tsoumakas, Grigorios and Vlahavas, Ioannis}, month = jul, year = {2011}, pages = {1583--1588}, }
@article{newman_complex_2011, title = {Complex {Systems}: {A} {Survey}}, volume = {79}, issn = {0002-9505, 1943-2909}, shorttitle = {Complex {Systems}}, url = {http://arxiv.org/abs/1112.1440}, doi = {10.1119/1.3590372}, abstract = {A complex system is a system composed of many interacting parts, often called agents, which displays collective behavior that does not follow trivially from the behaviors of the individual parts. Examples include condensed matter systems, ecosystems, stock markets and economies, biological evolution, and indeed the whole of human society. Substantial progress has been made in the quantitative understanding of complex systems, particularly since the 1980s, using a combination of basic theory, much of it derived from physics, and computer simulation. The subject is a broad one, drawing on techniques and ideas from a wide range of areas. Here I give a survey of the main themes and methods of complex systems science and an annotated bibliography of resources, ranging from classic papers to recent books and reviews.}, number = {8}, urldate = {2022-02-28}, journal = {American Journal of Physics}, author = {Newman, M. E. J.}, month = aug, year = {2011}, note = {arXiv: 1112.1440}, keywords = {Condensed Matter - Statistical Mechanics, Nonlinear Sciences - Adaptation and Self-Organizing Systems, Physics - Physics and Society}, pages = {800--810}, }
@article{rabatel_anomaly_2011, title = {Anomaly detection in monitoring sensor data for preventive maintenance}, volume = {38}, issn = {0957-4174}, url = {https://www.sciencedirect.com/science/article/pii/S0957417410013771}, doi = {10.1016/j.eswa.2010.12.014}, abstract = {Today, many industrial companies must face problems raised by maintenance. In particular, the anomaly detection problem is probably one of the most challenging. In this paper we focus on the railway maintenance task and propose to automatically detect anomalies in order to predict in advance potential failures. We first address the problem of characterizing normal behavior. In order to extract interesting patterns, we have developed a method to take into account the contextual criteria associated to railway data (itinerary, weather conditions, etc.). We then measure the compliance of new data, according to extracted knowledge, and provide information about the seriousness and the exact localization of a detected anomaly.}, language = {en}, number = {6}, urldate = {2022-01-13}, journal = {Expert Systems with Applications}, author = {Rabatel, Julien and Bringay, Sandra and Poncelet, Pascal}, month = jun, year = {2011}, keywords = {Anomaly detection, Behavior characterization, Preventive maintenance, Sequential patterns}, pages = {7003--7015}, }
@article{li_short_2011, title = {A {Short} {Introduction} to {Learning} to {Rank}}, volume = {E94-D}, issn = {1745-1361, 0916-8532}, url = {https://search.ieice.org/bin/summary.php?id=e94-d_10_1854&category=D&year=2011&lang=E&abst=}, abstract = {Learning to rank refers to machine learning techniques for training the model in a ranking task. Learning to rank is useful for many applications in Information Retrieval, Natural Language Processing, and Data Mining. Intensive studies have been conducted on the problem and significant progress has been made [1],[2]. This short paper gives an introduction to learning to rank, and it specifically explains the fundamental problems, existing approaches, and future work of learning to rank. Several learning to rank methods using SVM techniques are described in details.}, number = {10}, urldate = {2021-10-15}, journal = {IEICE TRANSACTIONS on Information and Systems}, author = {Li, Hang}, month = oct, year = {2011}, note = {Publisher: The Institute of Electronics, Information and Communication Engineers}, pages = {1854--1862}, }
@inproceedings{tan_fast_2011, address = {Barcelona, Catalonia, Spain}, series = {{IJCAI}'11}, title = {Fast anomaly detection for streaming data}, isbn = {978-1-57735-514-4}, abstract = {This paper introduces Streaming Half-Space-Trees (HS-Trees), a fast one-class anomaly detector for evolving data streams. It requires only normal data for training and works well when anomalous data are rare. The model features an ensemble of random HS-Trees, and the tree structure is constructed without any data. This makes the method highly efficient because it requires no model restructuring when adapting to evolving data streams. Our analysis shows that Streaming HS-Trees has constant amortised time complexity and constant memory requirement. When compared with a state-of-the-art method, our method performs favourably in terms of detection accuracy and runtime performance. Our experimental results also show that the detection performance of Streaming HS-Trees is not sensitive to its parameter settings.}, urldate = {2021-10-15}, booktitle = {Proceedings of the {Twenty}-{Second} international joint conference on {Artificial} {Intelligence} - {Volume} {Volume} {Two}}, publisher = {AAAI Press}, author = {Tan, Swee Chuan and Ting, Kai Ming and Liu, Tony Fei}, month = jul, year = {2011}, keywords = {anomaly detection, stream, tree}, pages = {1511--1516}, }
@article{hu_integrated_2011, title = {An integrated safety prognosis model for complex system based on dynamic {Bayesian} network and ant colony algorithm}, volume = {38}, issn = {0957-4174}, url = {https://www.sciencedirect.com/science/article/pii/S0957417410006780}, doi = {10.1016/j.eswa.2010.07.050}, abstract = {In complex industrial system, most of single faults have multiple propagation paths, so any local slight deviation is able to propagate, spread, accumulate and increase through system fault causal chains. It will finally result in unplanned outages and even catastrophic accidents, which lead to huge economic losses, environmental contamination, or human injuries. In order to ensure system intrinsic safety and increase operational performance and reliability in a long period, this study proposes an integrated safety prognosis model (ISPM) considering the randomness, complexity and uncertainty of fault propagation. ISPM is developed based on dynamic Bayesian networks to model the propagation of faults in a complex system, integrating the priori knowledge of the interactions and dependencies among subsystems, components, and the environment of the system, as well as the relationships between fault causes and effects. So the current safety state and potential risk of system can be assessed by locating potential hazard origins and deducing corresponding possible consequences. Furthermore, ISPM is also developed to predict the future degradation trend in terms of future reliability or performance of system, and provide proper proactive maintenance plans. Ant colony algorithm is introduced in ISPM by comprehensively considering two factors as probability and severity of faults, to perform the quantitative risk estimation of the underlining system. The feasibility and benefits of ISPM are investigated with a field case study of gas turbine compressor system. According to the outputs given by ISPM in the application, proactive maintenance, safety-related actions and contingency plans are further discussed and then made to keep the system in a high reliability and safety level in the long term.}, language = {en}, number = {3}, urldate = {2021-10-14}, journal = {Expert Systems with Applications}, author = {Hu, Jinqiu and Zhang, Laibin and Ma, Lin and Liang, Wei}, month = mar, year = {2011}, keywords = {Ant colony algorithm, Dynamic Bayesian networks, Fault propagation path, Proactive maintenance, Risk evaluation, Safety prognosis}, pages = {1431--1446}, }
@article{dehestani_online_2011, title = {Online {Support} {Vector} {Machine} {Applicationfor} {Model} {Based} {Fault} {Detection} and {Isolationof} {HVAC} {System}}, issn = {20103700}, url = {http://www.ijmlc.org/show-7-17-1.html}, doi = {10.7763/IJMLC.2011.V1.10}, urldate = {2021-10-12}, journal = {International Journal of Machine Learning and Computing}, author = {Dehestani, Davood and Eftekhari, Fahimeh and Guo, Ying and Ling, Steven and Su, Steven and Nguyen, Hung}, year = {2011}, keywords = {hvac, online\_learning, stream\_learning, svm}, pages = {66--72}, }
@inproceedings{kremer_effective_2011, address = {New York, NY, USA}, series = {{KDD} '11}, title = {An effective evaluation measure for clustering on evolving data streams}, isbn = {978-1-4503-0813-7}, url = {https://doi.org/10.1145/2020408.2020555}, doi = {10.1145/2020408.2020555}, abstract = {Due to the ever growing presence of data streams, there has been a considerable amount of research on stream mining algorithms. While many algorithms have been introduced that tackle the problem of clustering on evolving data streams, hardly any attention has been paid to appropriate evaluation measures. Measures developed for static scenarios, namely structural measures and ground-truth-based measures, cannot correctly reflect errors attributable to emerging, splitting, or moving clusters. These situations are inherent to the streaming context due to the dynamic changes in the data distribution. In this paper we develop a novel evaluation measure for stream clustering called Cluster Mapping Measure (CMM). CMM effectively indicates different types of errors by taking the important properties of evolving data streams into account. We show in extensive experiments on real and synthetic data that CMM is a robust measure for stream clustering evaluation.}, urldate = {2021-10-07}, booktitle = {Proceedings of the 17th {ACM} {SIGKDD} international conference on {Knowledge} discovery and data mining}, publisher = {Association for Computing Machinery}, author = {Kremer, Hardy and Kranen, Philipp and Jansen, Timm and Seidl, Thomas and Bifet, Albert and Holmes, Geoff and Pfahringer, Bernhard}, month = aug, year = {2011}, keywords = {evaluation measure, stream clustering}, pages = {868--876}, }
@article{rashidi_discovering_2011, title = {Discovering {Activities} to {Recognize} and {Track} in a {Smart} {Environment}}, volume = {23}, issn = {1558-2191}, doi = {10.1109/TKDE.2010.148}, abstract = {The machine learning and pervasive sensing technologies found in smart homes offer unprecedented opportunities for providing health monitoring and assistance to individuals experiencing difficulties living independently at home. In order to monitor the functional health of smart home residents, we need to design technologies that recognize and track activities that people normally perform as part of their daily routines. Although approaches do exist for recognizing activities, the approaches are applied to activities that have been preselected and for which labeled training data are available. In contrast, we introduce an automated approach to activity tracking that identifies frequent activities that naturally occur in an individual's routine. With this capability, we can then track the occurrence of regular activities to monitor functional health and to detect changes in an individual's patterns and lifestyle. In this paper, we describe our activity mining and tracking approach, and validate our algorithms on data collected in physical smart environments.}, number = {4}, journal = {IEEE Transactions on Knowledge and Data Engineering}, author = {Rashidi, Parisa and Cook, Diane J. and Holder, Lawrence B. and Schmitter-Edgecombe, Maureen}, month = apr, year = {2011}, note = {Conference Name: IEEE Transactions on Knowledge and Data Engineering}, keywords = {Activity recognition, Clustering algorithms, Data mining, Hidden Markov models, Intelligent sensors, Monitoring, Smart homes, clustering, data mining, sequence mining, smart homes.}, pages = {527--539}, }
@book{kolowrocki_reliability_2011, address = {London}, series = {Springer {Series} in {Reliability} {Engineering}}, title = {Reliability and {Safety} of {Complex} {Technical} {Systems} and {Processes}: {Modeling} – {Identification} – {Prediction} - {Optimization}}, isbn = {978-0-85729-693-1}, shorttitle = {Reliability and {Safety} of {Complex} {Technical} {Systems} and {Processes}}, url = {https://www.springer.com/gp/book/9780857296931}, abstract = {Reliability and Safety of Complex Technical Systems and Processes offers a comprehensive approach to the analysis, identification, evaluation, prediction and optimization of complex technical systems operation, reliability and safety. Its main emphasis is on multistate systems with ageing components, changes to their structure, and their components reliability and safety parameters during the operation processes. Reliability and Safety of Complex Technical Systems and Processes presents integrated models for the reliability, availability and safety of complex non-repairable and repairable multistate technical systems, with reference to their operation processes and their practical applications to real industrial systems. The authors consider variables in different operation states, reliability and safety structures, and the reliability and safety parameters of components, as well as suggesting a cost analysis for complex technical systems. Researchers and industry practitioners will find information on a wide range of complex technical systems in Reliability and Safety of Complex Technical Systems and Processes. It may prove an easy-to-use guide to reliability and safety evaluations of real complex technical systems, both during their operation and at the design stages.}, language = {en}, urldate = {2021-07-31}, publisher = {Springer-Verlag}, author = {Kołowrocki, Krzysztof and Soszyńska-Budny, Joanna}, year = {2011}, doi = {10.1007/978-0-85729-694-8}, }
@article{ikonomovska_learning_2011, title = {Learning model trees from evolving data streams}, volume = {23}, issn = {1573-756X}, url = {https://doi.org/10.1007/s10618-010-0201-y}, doi = {10.1007/s10618-010-0201-y}, language = {en}, number = {1}, urldate = {2020-05-03}, journal = {Data Mining and Knowledge Discovery}, author = {Ikonomovska, Elena and Gama, João and Džeroski, Sašo}, year = {2011}, pages = {128--168}, }
@inproceedings{hindman_mesos_2011, series = {{NSDI}’11}, title = {Mesos: {A} {Platform} for {Fine}-{Grained} {Resource} {Sharing} in the {Data} {Center}}, booktitle = {Proceedings of the 8th {USENIX} {Conference} on {Networked} {Systems} {Design} and {Implementation}}, publisher = {USENIX Association}, author = {Hindman, Benjamin and Konwinski, Andy and Zaharia, Matei and Ghodsi, Ali and Joseph, Anthony D. and Katz, Randy and Shenker, Scott and Stoica, Ion}, year = {2011}, pages = {295--308}, }
@article{hashemian_state---art_2011, title = {State-of-the-{Art} {Predictive} {Maintenance} {Techniques}}, volume = {60}, number = {10}, journal = {IEEE Transactions on Instrumentation and Measurement}, author = {Hashemian, H. M. and Bean, W. C.}, year = {2011}, pages = {3480--3492}, }
@article{maestri_robust_2010, title = {A robust clustering method for detection of abnormal situations in a process with multiple steady-state operation modes}, volume = {34}, issn = {0098-1354}, url = {https://www.sciencedirect.com/science/article/pii/S0098135409001331}, doi = {10.1016/j.compchemeng.2009.05.012}, abstract = {Many classical multivariate statistical process monitoring (MSPM) techniques assume normal distribution of the data and independence of the samples. Very often, these assumptions do not hold for real industrial chemical processes, where multiple plant operating modes lead to multiple nominal operation regions. MSPM techniques that do not take account of this fact show increased false alarm and missing alarm rates. In this work, a simple fault detection tool based on a robust clustering technique is implemented to detect abnormal situations in an industrial installation with multiple operation modes. The tool is applied to three case studies: (i) a two-dimensional toy example, (ii) a realistic simulation usually used as a benchmark example, known as the Tennessee–Eastman Process, and (iii) real data from a methanol plant. The clustering technique on which the tool relies assumes that the observations come from multiple populations with a common covariance matrix (i.e., the same underlying physical relations). The clustering technique is also capable of coping with a certain percentage of outliers, thus avoiding the need of extensive preprocessing of the data. Moreover, improvements in detection capacity are found when comparing the results to those obtained with standard methodologies. Hence, the feasibility of implementing fault detection tools based on this technique in the field of chemical industrial processes is discussed.}, language = {en}, number = {2}, urldate = {2022-05-02}, journal = {Computers \& Chemical Engineering}, author = {Maestri, Mauricio and Farall, Andrés and Groisman, Pablo and Cassanello, Miryan and Horowitz, Gabriel}, month = feb, year = {2010}, keywords = {Fault detection, Multiple operating modes, Multivariate statistical process monitoring}, pages = {223--231}, }
@inproceedings{wu_fault_2010, title = {Fault {Diagnosis} {Based} on {K}-{Means} {Clustering} and {PNN}}, doi = {10.1109/ICINIS.2010.169}, abstract = {This paper presents the development of an algorithm based on K-Means clustering and probabilistic neural network (PNN) for classifying the industrial system faults. The proposed technique consists of a preprocessing unit based on K-Means clustering and probabilistic neural network (PNN). Given a set of data points, firstly the K-Means algorithm is used to obtain K-temporary clusters, and then PNN is used to diagnose faults. To validate the performance and effectiveness of the proposed scheme, K-Means and PNN are applied to diagnose the faults in TE Process. Simulation studies show that the proposed algorithm not only provides an accepted degree of accuracy in fault classification under different fault conditions and the result is also reliable.}, booktitle = {2010 {Third} {International} {Conference} on {Intelligent} {Networks} and {Intelligent} {Systems}}, author = {Wu, Dongsheng and Yang, Qing and Tian, Feng and Zhang, Dong Xu}, month = nov, year = {2010}, keywords = {Artificial neural networks, Clustering algorithms, Cooling, Fault diagnosis, Feeds, K-Means, Neurons, PNN, TE process, cluster, fault diagnosis}, pages = {173--176}, }
@inproceedings{bifet_moa_2010, title = {{MOA}: {Massive} {Online} {Analysis}, a {Framework} for {Stream} {Classification} and {Clustering}}, shorttitle = {{MOA}}, url = {https://proceedings.mlr.press/v11/bifet10a.html}, abstract = {Massive Online Analysis (MOA) is a software environment for implementing algorithms and running experiments for online learning from evolving data streams. MOA is designed to deal with the challenging problem of scaling up the implementation of state of the art algorithms to real world dataset sizes. It contains collection of offline and online for both classification and clustering as well as tools for evaluation. In particular, for classification it implements boosting, bagging, and Hoeffding Trees, all with and without Naive Bayes classifiers at the leaves. For clustering, it implements StreamKM++, CluStream, ClusTree, Den-Stream, D-Stream and CobWeb. Researchers benefit from MOA by getting insights into workings and problems of different approaches, practitioners can easily apply and compare several algorithms to real world data set and settings. MOA supports bi-directional interaction with WEKA, the Waikato Environment for Knowledge Analysis, and is released under the GNU GPL license.}, language = {en}, urldate = {2022-03-15}, booktitle = {Proceedings of the {First} {Workshop} on {Applications} of {Pattern} {Analysis}}, publisher = {PMLR}, author = {Bifet, Albert and Holmes, Geoff and Pfahringer, Bernhard and Kranen, Philipp and Kremer, Hardy and Jansen, Timm and Seidl, Thomas}, month = sep, year = {2010}, note = {ISSN: 1938-7228}, pages = {44--50}, }
@inproceedings{shen_numerical_2010, title = {Numerical simulation of sliding wear based on archard model}, doi = {10.1109/MACE.2010.5535855}, abstract = {A sliding wear simulation approach based on Archard's wear model was proposed, in which ABAQUS scripting interface was used to simulate the progressive accumulation of wear between contact surfaces. Removal of material caused by wear was implemented by moving boundary nodes. In order to measure wear rates of woven fabric liner, a pin-on-disk experiment was performed. The wear rate of 9.76 × 10-7mm3/Nm was used as input of the simulation. Then a wear problem of spherical plain bearing with self-lubricating fabric liner was performed to validate above wear prediction method. Compared with wear experiments of other bearings, close agreement between experiments and wear simulation was found. At the same time, proper re-meshing technology and time step would increase calculation efficiency and precision. Results show that the complex nonlinear wear process can be simulated with a series of discrete quasi static models and the method proposed can be widely used to predict wear problems for engineering.}, booktitle = {2010 {International} {Conference} on {Mechanic} {Automation} and {Control} {Engineering}}, author = {Shen, Xuejin and Cao, Lei and Li, Ruyan}, month = jun, year = {2010}, keywords = {Archard Model, Automation, Computational modeling, Equations, Fabrics, Finite element method (FEM), Finite element methods, Numerical simulation, Performance evaluation, Prediction methods, Predictive models, Spherical Plain bearing, Tribology, Wear Rate, Wear Simulation}, pages = {325--329}, }
@article{kandare_larsonmiller_2010, title = {Larson–{Miller} {Failure} {Modeling} of {Aluminum} in {Fire}}, volume = {41}, issn = {1543-1940}, url = {https://doi.org/10.1007/s11661-010-0369-1}, doi = {10.1007/s11661-010-0369-1}, abstract = {This article presents a modeling approach based on the Larson–Miller parameter (LMP) for creep rupture to predict failure of aluminum in fire. The modified Larson–Miller model can predict time-dependent tensile rupture or compressive buckling of aluminum plate under combined loading and one-sided heating by fire. The model applies the LMP to determine the failure time and failure temperature of aluminum exposed to fire. Fire structural tests were performed on an aluminum alloy (5083-H116) subjected to different load levels and heat flux conditions (with maximum temperatures of 473 to 688 K (200 to 415 °C)) to validate the Larson–Miller modeling approach. The tests reveal that the Larson–Miller model can accurately predict tensile and compressive failure of aluminum plates (with and without surface insulation) in fire in terms of critical temperature and time.}, language = {en}, number = {12}, urldate = {2022-03-05}, journal = {Metallurgical and Materials Transactions A}, author = {Kandare, E. and Feih, S. and Lattimer, B.Y. and Mouritz, A.P.}, month = dec, year = {2010}, pages = {3091--3099}, }
@article{ding_probabilistic_2010, title = {Probabilistic {Inferences} in {Bayesian} {Networks}}, url = {http://arxiv.org/abs/1011.0935}, abstract = {Bayesian network is a complete model for the variables and their relationships, it can be used to answer probabilistic queries about them. A Bayesian network can thus be considered a mechanism for automatically applying Bayes' theorem to complex problems. In the application of Bayesian networks, most of the work is related to probabilistic inferences. Any variable updating in any node of Bayesian networks might result in the evidence propagation across the Bayesian networks. This paper sums up various inference techniques in Bayesian networks and provide guidance for the algorithm calculation in probabilistic inference in Bayesian networks.}, urldate = {2021-11-21}, journal = {arXiv:1011.0935 [cs]}, author = {Ding, Jianguo}, month = nov, year = {2010}, note = {arXiv: 1011.0935}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Networking and Internet Architecture}, }
@book{korb_bayesian_2010, title = {Bayesian {Artificial} {Intelligence}}, isbn = {978-1-4398-1592-2}, abstract = {Updated and expanded, Bayesian Artificial Intelligence, Second Edition provides a practical and accessible introduction to the main concepts, foundation, and applications of Bayesian networks. It focuses on both the causal discovery of networks and Bayesian inference procedures. Adopting a causal interpretation of Bayesian networks, the authors dis}, language = {en}, publisher = {CRC Press}, author = {Korb, Kevin B. and Nicholson, Ann E.}, month = dec, year = {2010}, note = {Google-Books-ID: LxXOBQAAQBAJ}, keywords = {Business \& Economics / Statistics, Computers / General, Computers / Software Development \& Engineering / Systems Analysis \& Design, Mathematics / Probability \& Statistics / General}, }
@inproceedings{young_fast_2010, title = {A {Fast} and {Stable} {Incremental} {Clustering} {Algorithm}}, doi = {10.1109/ITNG.2010.148}, abstract = {Clustering is a pivotal building block in many data mining applications and in machine learning in general. Most clustering algorithms in the literature pertain to off-line (or batch) processing, in which the clustering process repeatedly sweeps through a set of data samples in an attempt to capture its underlying structure in a compact and efficient way. However, many recent applications require that the clustering algorithm be online, or incremental, in the that there is no a priori set of samples to process but rather samples are provided one iteration at a time. Accordingly, the clustering algorithm is expected to gradually improve its prototype (or centroid) constructs. Several problems emerge in this context, particularly relating to the stability of the process and its speed of convergence. In this paper, we present a fast and stable incremental clustering algorithm, which is computationally modest and imposes minimal memory requirements. Simulation results clearly demonstrate the advantages of the proposed framework in a variety of practical scenarios.}, booktitle = {2010 {Seventh} {International} {Conference} on {Information} {Technology}: {New} {Generations}}, author = {Young, Steven and Arel, Itamar and Karnowski, Thomas P. and Rose, Derek}, month = apr, year = {2010}, keywords = {Clustering algorithms, Information technology, cluster stability}, pages = {204--209}, }
@article{lee_online_2010, title = {Online {Degradation} {Assessment} and {Adaptive} {Fault} {Detection} {Using} {Modified} {Hidden} {Markov} {Model}}, volume = {132}, issn = {1087-1357}, url = {https://asmedigitalcollection.asme.org/manufacturingscience/article/132/2/021010/468512/Online-Degradation-Assessment-and-Adaptive-Fault}, doi = {10.1115/1.4001247}, language = {en}, number = {2}, urldate = {2021-11-04}, journal = {Journal of Manufacturing Science and Engineering}, author = {Lee, Seungchul and Li, Lin and Ni, Jun}, month = apr, year = {2010}, note = {Publisher: American Society of Mechanical Engineers Digital Collection}, }
@inproceedings{glorot_understanding_2010, title = {Understanding the difficulty of training deep feedforward neural networks}, url = {http://proceedings.mlr.press/v9/glorot10a.html}, language = {en}, urldate = {2021-08-18}, booktitle = {Proceedings of the {Thirteenth} {International} {Conference} on {Artificial} {Intelligence} and {Statistics}}, publisher = {JMLR Workshop and Conference Proceedings}, author = {Glorot, Xavier and Bengio, Yoshua}, month = mar, year = {2010}, note = {ISSN: 1938-7228}, pages = {249--256}, }
@inproceedings{amaya_simprebal_2010, title = {{SIMPREBAL}: {An} expert system for real-time fault diagnosis of hydrogenerators machinery}, shorttitle = {{SIMPREBAL}}, doi = {10.1109/ETFA.2010.5641302}, abstract = {This paper proposes an expert system to aid plant maintainers and operators personnel for solving hydroelectric equipments troubleshootings. The expert system was implemented into intelligent maintenance system called SIMPREBAL (Predictive Maintenance System of Balbina). The SIMPREBAL knowledge base, the architecture and the inference machine are presented in detail. The knowledge base is based on experts empirical knowledge, work orders, manuals, technical documents and operation procedures. The predictive maintenance system architecture is based on the OSA-CBM framework that has seven layers. The software application has been successfully implemented in client-server computational framework. The data acquisition and intelligent processing tasks were develop in the server side and the user interface in the client side. The intelligent processing task is an expert system that use JESS inference machine. During two years, the SIMPREBAL has been used for monitoring and diagnosing hydrogenerators machinery malfunctions. The industrial application of the SIMPREBAL proved its high reliability and accuracy. Finally, satisfactory fault diagnostics have been verified using maintenance indicators before and after the SIMPREBAL installation in the hydroelectric power plant. These valuable results are been used in the decision support layer to pre-schedule maintenance work, reduce inventory costs for spare parts and minimize the risk of catastrophic failure.}, booktitle = {2010 {IEEE} 15th {Conference} on {Emerging} {Technologies} {Factory} {Automation} ({ETFA} 2010)}, author = {Amaya, Edgar J. and Alvares, Alberto J.}, month = sep, year = {2010}, keywords = {Databases, Expert systems, Maintenance engineering, Monitoring, Petroleum, SIMPREBAL, Servers, client-server computational framework, client-server systems, data acquisition, decision support layer, decision support systems, electrical maintenance, expert system, expert systems, fault diagnosis, hydroelectric generators, hydroelectric power plant, hydroelectric power stations, hydrogenerators machinery, inference machine, inference mechanisms, intelligent maintenance system, intelligent processing task, knowledge base, predictive maintenance system, real-time fault diagnosis, user interface, user interfaces}, pages = {1--8}, }
@article{niu_intelligent_2010, title = {Intelligent condition monitoring and prognostics system based on data-fusion strategy}, volume = {37}, issn = {0957-4174}, url = {http://www.sciencedirect.com/science/article/pii/S095741741000518X}, doi = {10.1016/j.eswa.2010.06.014}, abstract = {This paper proposes an intelligent condition monitoring and prognostics system in condition-based maintenance architecture based on data-fusion strategy. Firstly, vibration signals are collected and trend features are extracted. Then features are normalized and sent into neural network for feature-level fusion. Next, data de-noising is conducted containing smoothing and wavelet decomposition to reduce the fluctuation and pick out trend information. The processed information is used for autonomic health degradation monitoring and data-driven prognostics. When the degradation curve crosses through the specified threshold of alarm, prognostics module is triggered and time-series prediction is performed using multi-nonlinear regression models. Furthermore, the predicted point estimate and interval estimate are fused, respectively. Finally, remaining useful life of operating machine, with its uncertainty interval, are assessed. The proposed system is evaluated by an experiment of health degradation monitoring and prognostics for a methane compressor. The experiment results show that the enhanced maintenance performances can be obtained, which make it suitable for advanced industry maintenance.}, language = {en}, number = {12}, urldate = {2020-03-30}, journal = {Expert Systems with Applications}, author = {Niu, Gang and Yang, Bo-Suk}, month = dec, year = {2010}, keywords = {Alarm setting, Condition monitoring, Data fusion, Data-driven prognostics, Degradation assessment, Remaining useful life prediction}, pages = {8831--8840}, }
@techreport{noauthor_1232-2010_2010, type = {Standard}, title = {1232-2010 - {IEEE} {Standard} for {Artificial} {Intelligence} {Exchange} and {Service} {Tie} to {All} {Test} {Environments} ({AI}-{ESTATE})}, institution = {Institute of Electrical and Electronics Engineers}, year = {2010}, }
@book{noauthor_astm_2010, address = {West Conshohocken, PA}, title = {{ASTM} {G40}-10b: {Standard} {Terminology} {Relating} to {Wear} and {Erosion}}, publisher = {ASTM International}, year = {2010}, }
@book{gama_knowledge_2010, edition = {1st}, title = {Knowledge {Discovery} from {Data} {Streams}}, isbn = {978-1-4398-2611-9}, abstract = {Since the beginning of the Internet age and the increased use of ubiquitous computing devices, the large volume and continuous flow of distributed data have imposed new constraints on the design of learning algorithms. Exploring how to extract knowledge structures from evolving and time-changing data, Knowledge Discovery from Data Streams presents a coherent overview of state-of-the-art research in learning from data streams. The book covers the fundamentals that are imperative to understanding data streams and describes important applications, such as TCP/IP traffic, GPS data, sensor networks, and customer click streams. It also addresses several challenges of data mining in the future, when stream mining will be at the core of many applications. These challenges involve designing useful and efficient data mining solutions applicable to real-world problems. In the appendix, the author includes examples of publicly available software and online data sets. This practical, up-to-date book focuses on the new requirements of the next generation of data mining. Although the concepts presented in the text are mainly about data streams, they also are valid for different areas of machine learning and data mining.}, publisher = {Chapman \& Hall/CRC}, author = {Gama, Joao}, year = {2010}, }
@article{peng_current_2010, title = {Current status of machine prognostics in condition-based maintenance: a review}, volume = {50}, issn = {1433-3015}, url = {https://doi.org/10.1007/s00170-009-2482-0}, doi = {10.1007/s00170-009-2482-0}, number = {1}, journal = {The International Journal of Advanced Manufacturing Technology}, author = {Peng, Ying and Dong, Ming and Zuo, Ming Jian}, year = {2010}, pages = {297--313}, }
@article{niu_development_2010, title = {Development of an optimized condition-based maintenance system by data fusion and reliability-centered maintenance}, volume = {95}, issn = {0951-8320}, url = {http://www.sciencedirect.com/science/article/pii/S0951832010000591}, doi = {https://doi.org/10.1016/j.ress.2010.02.016}, number = {7}, journal = {Reliability Engineering \& System Safety}, author = {Niu, Gang and Yang, Bo-Suk and Pecht, Michael}, year = {2010}, keywords = {Condition-based maintenance, Data fusion, Reliability-centered maintenance}, pages = {786 -- 796}, }
@article{mitchell_complexity_2010, title = {Complexity: {A} {Guided} {Tour}}, volume = {63}, doi = {10.1063/1.3326990}, journal = {Physics Today}, author = {Mitchell, Melanie and Toroczkai, Zoltan}, year = {2010}, pages = {47--}, }
@article{luhr_incremental_2009, title = {Incremental clustering of dynamic data streams using connectivity based representative points}, volume = {68}, doi = {10.1016/j.datak.2008.08.006}, abstract = {We present an incremental graph-based clustering algorithm whose design was motivated by a need to extract and retain meaningful information from data streams produced by applications such as large scale surveillance, network packet inspection and financial transaction monitoring. To this end, the method we propose utilises representative points to both incrementally cluster new data and to selectively retain important cluster information within a knowledge repository. The repository can then be subsequently used to assist in the processing of new data, the archival of critical features for off-line analysis, and in the identification of recurrent patterns.}, journal = {Data Knowl. Eng.}, author = {Lühr, Sebastian and Lazarescu, Mihai}, month = jan, year = {2009}, pages = {1--27}, }
@inproceedings{forestiero_flockstream_2009, title = {{FlockStream}: {A} {Bio}-{Inspired} {Algorithm} for {Clustering} {Evolving} {Data} {Streams}}, shorttitle = {{FlockStream}}, doi = {10.1109/ICTAI.2009.60}, abstract = {Existing density-based data stream clustering algorithms use a two-phase scheme approach consisting of an online phase, in which raw data is processed to gather summary statistics, and an offline phase that generates the clusters by using the summary data. In this paper we propose a data stream clustering method based on a multi-agent system that uses a decentralized bottom-up self-organizing strategy to group similar data points. Data points are associated with agents and deployed onto a 2D space, to work simultaneously by applying a heuristic strategy based on a bio-inspired model, known as flocking model. Agents move onto the space for a fixed time and, when they encounter other agents into a predefined visibility range, they can decide to form a flock if they are similar. Flocks can join to form swarms of similar groups. This strategy allows to merge the two phases of density-based approaches and thus to avoid the offline cluster computation, since a swarm represents a cluster. Experimental results show the capability of the bio-inspired approach to obtain very good results on real and synthetic data sets.}, booktitle = {2009 21st {IEEE} {International} {Conference} on {Tools} with {Artificial} {Intelligence}}, author = {Forestiero, Agostino and Pizzuti, Clara and Spezzano, Giandomenico}, month = nov, year = {2009}, note = {ISSN: 2375-0197}, keywords = {Artificial intelligence, Biosensors, Clustering algorithms, Clustering methods, Credit cards, Data mining, High performance computing, Multiagent systems, Statistics, Telephony}, pages = {1--8}, }
@inproceedings{ren_density-based_2009, title = {Density-{Based} {Data} {Streams} {Clustering} over {Sliding} {Windows}}, volume = {5}, doi = {10.1109/FSKD.2009.553}, abstract = {Data stream clustering is an important task in data stream mining. In this paper, we propose SDStream, a new method for performing density-based data streams clustering over sliding windows. SDStream adopts CluStream clustering framework. In the online component, the potential core-micro-cluster and outlier micro-cluster structures are introduced to maintain the potential clusters and outliers. They are stored in the form of exponential histogram of cluster feature (EHCF) in main memory and are maintained by the maintenance of EHCFs. Outdated micro-clusters which need to be deleted are found by the value of t in temporal cluster feature (TCF). In the offline component, the final clusters of arbitrary shape are generated according to all the potential core-micro-clusters maintained online by DBSCAN algorithm. Experimental results show that SDStream which can generate clusters of arbitrary shape has a much higher clustering quality than CluStream which generates spherical clusters.}, booktitle = {2009 {Sixth} {International} {Conference} on {Fuzzy} {Systems} and {Knowledge} {Discovery}}, author = {Ren, Jiadong and Ma, Ruiqing}, month = aug, year = {2009}, keywords = {Cities and towns, Clustering algorithms, Data mining, Data structures, Educational institutions, Electronic mail, Fuzzy systems, Histograms, Partitioning algorithms, Shape, data stream, density-based clustering, sliding windows}, pages = {248--252}, }
@inproceedings{liu_rdenstream_2009, title = {{rDenStream}, {A} {Clustering} {Algorithm} over an {Evolving} {Data} {Stream}}, doi = {10.1109/ICIECS.2009.5363379}, abstract = {For mining new pattern from evolving data streams, most algorithms are inherited from DenStream framework which is realized via a sliding window. So at the early stage of a pattern emerges, its knowledge points can be easily mistaken as outliers and dropped. In most cases, these points can be ignored, but in some special applications which need to quickly and precisely master the emergence rule of some patterns, these points must play their rules. Based on DenStream, this paper proposes a three-step clustering algorithm, rDenStream, which presents the concept of outlier retrospect. In rDenStream clustering, dropped micro-clusters are stored on outside memory temporarily, and will be given new chance to attend clustering to improve the clustering accuracy. Experiments modeled the arrival of data stream in Poisson process, and the results over standard data set showed its advantage over other methods in the early phase of new pattern discovery.}, booktitle = {2009 {International} {Conference} on {Information} {Engineering} and {Computer} {Science}}, author = {Liu, Li-xiong and Huang, Hai and Guo, Yun-fei and Chen, Fu-cai}, month = dec, year = {2009}, note = {ISSN: 2156-7387}, keywords = {Analytical models, Clustering algorithms, Computational modeling, Data engineering, Intrusion detection, Mathematics, Partitioning algorithms, Research and development, Switching systems, Systems engineering and theory}, pages = {1--4}, }
@inproceedings{ruiz_c-denstream_2009, address = {Berlin, Heidelberg}, series = {Lecture {Notes} in {Computer} {Science}}, title = {C-{DenStream}: {Using} {Domain} {Knowledge} on a {Data} {Stream}}, isbn = {978-3-642-04747-3}, shorttitle = {C-{DenStream}}, doi = {10.1007/978-3-642-04747-3_23}, abstract = {Stream clustering algorithms are traditionally designed to process streams efficiently and to adapt to the evolution of the underlying population. This is done without assuming any prior knowledge about the data. However, in many cases, a certain amount of domain or background knowledge is available, and instead of simply using it for the external validation of the clustering results, this knowledge can be used to guide the clustering process. In non-stream data, domain knowledge is exploited in the context of semi-supervised clustering.}, language = {en}, booktitle = {Discovery {Science}}, publisher = {Springer}, author = {Ruiz, Carlos and Menasalvas, Ernestina and Spiliopoulou, Myra}, editor = {Gama, João and Costa, Vítor Santos and Jorge, Alípio Mário and Brazdil, Pavel B.}, year = {2009}, keywords = {Data Stream, Domain Knowledge, Rand Index, Synthetic Dataset, Time Stamp}, pages = {287--301}, }
@inproceedings{woolam_lacking_2009, address = {Berlin, Heidelberg}, series = {{ISMIS} '09}, title = {Lacking {Labels} in the {Stream}: {Classifying} {Evolving} {Stream} {Data} with {Few} {Labels}}, isbn = {978-3-642-04124-2}, shorttitle = {Lacking {Labels} in the {Stream}}, url = {https://doi.org/10.1007/978-3-642-04125-9_58}, doi = {10.1007/978-3-642-04125-9_58}, abstract = {This paper outlines a data stream classification technique that addresses the problem of insufficient and biased labeled data. It is practical to assume that only a small fraction of instances in the stream are labeled. A more practical assumption would be that the labeled data may not be independently distributed among all training documents. How can we ensure that a good classification model would be built in these scenarios, considering that the data stream also has evolving nature? In our previous work we applied semi-supervised clustering to build classification models using limited amount of labeled training data. However, it assumed that the data to be labeled should be chosen randomly. In our current work, we relax this assumption, and propose a label propagation framework for data streams that can build good classification models even if the data are not labeled randomly. Comparison with state-of-the-art stream classification techniques on synthetic and benchmark real data proves the effectiveness of our approach.}, urldate = {2022-03-28}, booktitle = {Proceedings of the 18th {International} {Symposium} on {Foundations} of {Intelligent} {Systems}}, publisher = {Springer-Verlag}, author = {Woolam, Clay and Masud, Mohammad M. and Khan, Latifur}, month = aug, year = {2009}, pages = {552--562}, }
@inproceedings{bifet_adaptive_2009, address = {Berlin, Heidelberg}, title = {Adaptive {Learning} from {Evolving} {Data} {Streams}}, isbn = {978-3-642-03915-7}, doi = {10.1007/978-3-642-03915-7_22}, abstract = {We propose and illustrate a method for developing algorithms that can adaptively learn from data streams that drift over time. As an example, we take Hoeffding Tree, an incremental decision tree inducer for data streams, and use as a basis it to build two new methods that can deal with distribution and concept drift: a sliding window-based algorithm, Hoeffding Window Tree, and an adaptive method, Hoeffding Adaptive Tree. Our methods are based on using change detectors and estimator modules at the right places; we choose implementations with theoretical guarantees in order to extend such guarantees to the resulting adaptive learning algorithm. A main advantage of our methods is that they require no guess about how fast or how often the stream will drift; other methods typically have several user-defined parameters to this effect.}, language = {en}, booktitle = {Advances in {Intelligent} {Data} {Analysis} {VIII}}, publisher = {Springer}, author = {Bifet, Albert and Gavaldà, Ricard}, editor = {Adams, Niall M. and Robardet, Céline and Siebes, Arno and Boulicaut, Jean-François}, year = {2009}, pages = {249--260}, }
@inproceedings{gama_issues_2009, address = {New York, NY, USA}, series = {{KDD} '09}, title = {Issues in evaluation of stream learning algorithms}, isbn = {978-1-60558-495-9}, url = {https://doi.org/10.1145/1557019.1557060}, doi = {10.1145/1557019.1557060}, abstract = {Learning from data streams is a research area of increasing importance. Nowadays, several stream learning algorithms have been developed. Most of them learn decision models that continuously evolve over time, run in resource-aware environments, detect and react to changes in the environment generating data. One important issue, not yet conveniently addressed, is the design of experimental work to evaluate and compare decision models that evolve over time. There are no golden standards for assessing performance in non-stationary environments. This paper proposes a general framework for assessing predictive stream learning algorithms. We defend the use of Predictive Sequential methods for error estimate - the prequential error. The prequential error allows us to monitor the evolution of the performance of models that evolve over time. Nevertheless, it is known to be a pessimistic estimator in comparison to holdout estimates. To obtain more reliable estimators we need some forgetting mechanism. Two viable alternatives are: sliding windows and fading factors. We observe that the prequential error converges to an holdout estimator when estimated over a sliding window or using fading factors. We present illustrative examples of the use of prequential error estimators, using fading factors, for the tasks of: i) assessing performance of a learning algorithm; ii) comparing learning algorithms; iii) hypothesis testing using McNemar test; and iv) change detection using Page-Hinkley test. In these tasks, the prequential error estimated using fading factors provide reliable estimators. In comparison to sliding windows, fading factors are faster and memory-less, a requirement for streaming applications. This paper is a contribution to a discussion in the good-practices on performance assessment when learning dynamic models that evolve over time.}, urldate = {2022-03-15}, booktitle = {Proceedings of the 15th {ACM} {SIGKDD} international conference on {Knowledge} discovery and data mining}, publisher = {Association for Computing Machinery}, author = {Gama, João and Sebastião, Raquel and Rodrigues, Pedro Pereira}, month = jun, year = {2009}, keywords = {data streams, evaluation design}, pages = {329--338}, }
@inproceedings{saffari_-line_2009, title = {On-line {Random} {Forests}}, doi = {10.1109/ICCVW.2009.5457447}, abstract = {Random Forests (RFs) are frequently used in many computer vision and machine learning applications. Their popularity is mainly driven by their high computational efficiency during both training and evaluation while achieving state-of-the-art results. However, in most applications RFs are used off-line. This limits their usability for many practical problems, for instance, when training data arrives sequentially or the underlying distribution is continuously changing. In this paper, we propose a novel on-line random forest algorithm. We combine ideas from on-line bagging, extremely randomized forests and propose an on-line decision tree growing procedure. Additionally, we add a temporal weighting scheme for adaptively discarding some trees based on their out-of-bag-error in given time intervals and consequently growing of new trees. The experiments on common machine learning data sets show that our algorithm converges to the performance of the off-line RF. Additionally, we conduct experiments for visual tracking, where we demonstrate real-time state-of-the-art performance on well-known scenarios and show good performance in case of occlusions and appearance changes where we outperform trackers based on on-line boosting. Finally, we demonstrate the usability of on-line RFs on the task of interactive real-time segmentation.}, booktitle = {2009 {IEEE} 12th {International} {Conference} on {Computer} {Vision} {Workshops}, {ICCV} {Workshops}}, author = {Saffari, Amir and Leistner, Christian and Santner, Jakob and Godec, Martin and Bischof, Horst}, month = sep, year = {2009}, keywords = {Application software, Bagging, Computational efficiency, Computer vision, Decision trees, Machine learning, Machine learning algorithms, Radio frequency, Training data, Usability}, pages = {1393--1400}, }
@inproceedings{atamuradov_failure_2009, title = {Failure diagnostics for railway point machines using expert systems}, doi = {10.1109/DEMPED.2009.5292755}, abstract = {Maintenance is an inevitable reality in industry. Maintenance of a system usually involves maintenance of multiple components with multiple failure modes, each of which may require different maintenance policy (i.e., corrective (CM), preventive (PM), or condition based maintenance (CBM)). A maintenance policy may be best for one component and the worst for the other (CM may be best for a very cheap and non-critical component and the worst for a critical one). This paper presents an economical analysis method that identifies the best maintenance policy for a failure mode and/or component of a system.}, booktitle = {2009 {IEEE} {International} {Symposium} on {Diagnostics} for {Electric} {Machines}, {Power} {Electronics} and {Drives}}, author = {Atamuradov, V. and Camci, F. and Baskan, S. and Sevkli, M.}, month = aug, year = {2009}, keywords = {Clustering algorithms, Control systems, Diagnostic expert systems, Diagnostics, Expert Systems, Fault diagnosis, Phase measurement, Rail transportation, Railway Turnouts, Remote monitoring, Signal analysis, Smoothing methods, Time Series Analysis, Time series analysis}, pages = {1--5}, }
@inproceedings{kranen_self-adaptive_2009, title = {Self-{Adaptive} {Anytime} {Stream} {Clustering}}, doi = {10.1109/ICDM.2009.47}, abstract = {Clustering streaming data requires algorithms which are capable of updating clustering results for the incoming data. As data is constantly arriving, time for processing is limited. Clustering has to be performed in a single pass over the incoming data and within the possibly varying inter-arrival times of the stream. Likewise, memory is limited, making it impossible to store all data. For clustering, we are faced with the challenge of maintaining a current result that can be presented to the user at any given time. In this work, we propose a parameter free algorithm that automatically adapts to the speed of the data stream. It makes best use of the time available under the current constraints to provide a clustering of the objects seen up to that point. Our approach incorporates the age of the objects to reflect the greater importance of more recent data. Moreover, we are capable of detecting concept drift, novelty and outliers in the stream. For efficient and effective handling, we introduce the ClusTree, a compact and self-adaptive index structure for maintaining stream summaries. Our experiments show that our approach is capable of handling a multitude of different stream characteristics for accurate and scalable anytime stream clustering.}, booktitle = {2009 {Ninth} {IEEE} {International} {Conference} on {Data} {Mining}}, author = {Kranen, Philipp and Assent, Ira and Baldauf, Corinna and Seidl, Thomas}, month = dec, year = {2009}, note = {ISSN: 2374-8486}, keywords = {Adaptive algorithm, Algorithm design and analysis, Clustering algorithms, Consumer behavior, Data analysis, Data mining, Memory management, NOT SELF-ADAPTIVE PARAMS, Partitioning algorithms, Sensor phenomena and characterization, Time factors, anytime algorithms, clustree, self-adaptive algorithms, stream clustering}, pages = {249--258}, }
@article{ribot_vers_2009, title = {Vers l'intégration diagnostic/pronostic pour la maintenance des systèmes complexes}, abstract = {L'efficacité de la maintenance des systèmes industriels est un enjeu économique majeur pour leur exploitation commerciale. Les principales difficultés et sources d'inefficacité résident dans le choix des actions de maintenance. Un mauvais choix peut mener à une maintenance non satisfaisante et un surcoût dû à l'indisponibilité du système. Cette thèse propose une architecture générique de supervision pour aider à la prise de décisions d'actions de maintenance pour un système complexe. Cette architecture intègre des capacités de diagnostic et de pronostic permettant de connaître l'état actuel et l'état futur du système. La fonction de diagnostic détermine les composants en faute à l'origine des défaillances. La fonction de pronostic calcule la durée avant la prochaine défaillance du système. Nous présentons un cadre de modélisation générique formel pour un système complexe qui capture l'ensemble des connaissances nécessaires aux fonctions de diagnostic et de pronostic. Il permet de caractériser un couplage diagnostic/pronostic original. Une fonction générique et adaptative de pronostic est définie à l'aide d'un modèle de Weibull afin d'évaluer de façon probabiliste la durée de vie résiduelle du système. Des critères de performance pour l'architecture de supervision proposée reposant sur des propriétés du diagnostic et du pronostic sont caractérisés. Une méthodologie de retour sur conception est proposée dans le but d'assurer la performance de la fonction de diagnostic en garantissant la diagnosticabilité du système. L'application de ce travail de recherche aux systèmes aéronautiques s'inscrit dans le cadre du projet ARCHISTIC en collaboration avec Airbus et l'ENIT.}, author = {Ribot, Pauline}, month = dec, year = {2009}, }
@book{koller_probabilistic_2009, address = {Cambridge, MA, USA}, series = {Adaptive {Computation} and {Machine} {Learning} series}, title = {Probabilistic {Graphical} {Models}: {Principles} and {Techniques}}, isbn = {978-0-262-01319-2}, shorttitle = {Probabilistic {Graphical} {Models}}, abstract = {A general framework for constructing and using probabilistic models of complex systems that would enable a computer to use available information for making decisions.}, language = {en}, publisher = {MIT Press}, author = {Koller, Daphne and Friedman, Nir}, editor = {Bach, Francis}, month = jul, year = {2009}, }
@techreport{settles_active_2009, type = {Technical {Report}}, title = {Active {Learning} {Literature} {Survey}}, url = {https://minds.wisconsin.edu/handle/1793/60660}, abstract = {The key idea behind active learning is that a machine learning algorithm can achieve greater accuracy with fewer labeled training instances if it is allowed to choose the training data from which is learns. An active learner may ask queries in the form of unlabeled instances to be labeled by an oracle (e.g., a human annotator). Active learning is well-motivated in many modern machine learning problems, where unlabeled data may be abundant but labels are difficult, time-consuming, or expensive to obtain. This report provides a general introduction to active learning and a survey of the literature. This includes a discussion of the scenarios in which queries can be formulated, and an overview of the query strategy frameworks proposed in the literature to date. An analysis of the empirical and theoretical evidence for active learning, a summary of several problem setting variants, and a discussion of related topics in machine learning research are also presented.}, language = {en}, urldate = {2021-03-25}, institution = {University of Wisconsin-Madison Department of Computer Sciences}, author = {Settles, Burr}, year = {2009}, note = {Accepted: 2012-03-15T17:23:56Z}, }
@article{deloux_predictive_2009, title = {Predictive maintenance policy for a gradually deteriorating system subject to stress}, volume = {94}, issn = {0951-8320}, url = {http://www.sciencedirect.com/science/article/pii/S0951832008001336}, doi = {https://doi.org/10.1016/j.ress.2008.04.002}, number = {2}, journal = {Reliability Engineering \& System Safety}, author = {Deloux, E. and Castanier, B. and Bérenguer, C.}, year = {2009}, keywords = {Control chart, Economic performance, Predictive maintenance, Stochastic modeling}, pages = {418 -- 431}, }
@article{heng_rotating_2009, title = {Rotating machinery prognostics: {State} of the art, challenges and opportunities}, volume = {23}, issn = {0888-3270}, url = {http://www.sciencedirect.com/science/article/pii/S0888327008001489}, doi = {https://doi.org/10.1016/j.ymssp.2008.06.009}, abstract = {Machinery prognosis is the forecast of the remaining operational life, future condition, or probability of reliable operation of an equipment based on the acquired condition monitoring data. This approach to modern maintenance practice promises to reduce downtime, spares inventory, maintenance costs, and safety hazards. Given the significance of prognostics capabilities and the maturity of condition monitoring technology, there have been an increasing number of publications on rotating machinery prognostics in the past few years. These publications covered a wide spectrum of prognostics techniques. This review article first synthesises and places these individual pieces of information in context, while identifying their merits and weaknesses. It then discusses the identified challenges, and in doing so, alerts researchers to opportunities for conducting advanced research in the field. Current methods for predicting rotating machinery failures are summarised and classified as conventional reliability models, condition-based prognostics models and models integrating reliability and prognostics. Areas in need of development or improvement include the integration of condition monitoring and reliability, utilisation of incomplete trending data, consideration of effects from maintenance actions and variable operating conditions, derivation of the non-linear relationship between measured data and actual asset health, consideration of failure interactions, practicability of requirements and assumptions, as well as development of performance evaluation frameworks.}, number = {3}, journal = {Mechanical Systems and Signal Processing}, author = {Heng, Aiwina and Zhang, Sheng and Tan, Andy C. C. and Mathew, Joseph}, year = {2009}, keywords = {Condition monitoring, Condition-based maintenance, Prognostics, Reliability}, pages = {724 -- 739}, }
@article{noortwijk_survey_2009, title = {A survey of the application of gamma processes in maintenance}, volume = {94}, issn = {0951-8320}, url = {http://www.sciencedirect.com/science/article/pii/S0951832007001111}, doi = {https://doi.org/10.1016/j.ress.2007.03.019}, abstract = {This article surveys the application of gamma processes in maintenance. Since the introduction of the gamma process in the area of reliability in 1975, it has been increasingly used to model stochastic deterioration for optimising maintenance. Because gamma processes are well suited for modelling the temporal variability of deterioration, they have proven to be useful in determining optimal inspection and maintenance decisions. An overview is given of the rich theoretical aspects as well as the successful maintenance applications of gamma processes. The statistical properties of the gamma process as a probabilistic stress–strength model are given and put in a historic perspective. Furthermore, methods for estimation, approximation, and simulation of gamma processes are reviewed. Finally, an extensive catalogue of inspection and maintenance models under gamma-process deterioration is presented with the emphasis on engineering applications.}, number = {1}, journal = {Reliability Engineering \& System Safety}, author = {Noortwijk, J. M. van}, year = {2009}, keywords = {Brownian motion with drift, Compound Poisson process, Deterioration, Gamma process, Inspection, Maintenance, Markov process, Renewal theory, Risk}, pages = {2 -- 21}, }
@article{salahshoor_online_2008, series = {17th {IFAC} {World} {Congress}}, title = {Online {Statistical} {Monitoring} and {Fault} {Classification} of the {Tennessee} {Eastman} {Challenge} {Process} {Based} on {Dynamic} {Independent} {Component} {Analysis} and {Support} {Vector} {Machine}}, volume = {41}, issn = {1474-6670}, url = {https://www.sciencedirect.com/science/article/pii/S1474667016401357}, doi = {10.3182/20080706-5-KR-1001.01252}, abstract = {This paper presents a new online statistical monitoring based on dynamic independent component analysis (DICA) to detect the Tennessee Eastman challenge process faults. The proposed method employs dynamic feature extraction approach to capture most of the inherent dynamic fault information. This leads to an efficient fault detection with superior performance compared to independent component analysis (ICA) approach in both detection rate and number of false alarms. A new statistic measure has been introduced to enhance the monitoring capabilities of ICA and DICA. An approach based on cumulative percent variance (CPV) has been incorporated to mechanize the selection of required number of independent components in both ICA and DICA online monitoring methods. To choose the best time-lag order for each fault dynamic model in the DICA augmented data matrix, a multivariate auto regressive exogenous (ARX) model structure has been adopted by validating the minimum Akaike's information criterion (AIC) index. An online procedure based on a multi-class support vector machine (SVM) with Gaussian kernel function, being set by sub-optimal width parameters, is employed to classify and isolate each fault. The SVM uses one against all (OAA) algorithm for fault classification and sequential minimization optimization (SMO) to solve the classification problem. Performances of the developed process monitoring methods (ICA-SVM, DICASVM) are evaluated on the Tennessee Eastman challenge process (TE).}, language = {en}, number = {2}, urldate = {2022-05-02}, journal = {IFAC Proceedings Volumes}, author = {Salahshoor, Karim and Kiasi, Fariborz}, month = jan, year = {2008}, keywords = {Tennessee Eastman process, dynamic independent component analysis (DICA), fault detection, independent component analysis (ICA), statistical monitoring, support vector machine (SVM)}, pages = {7405--7412}, }
@inproceedings{masud_practical_2008, title = {A {Practical} {Approach} to {Classify} {Evolving} {Data} {Streams}: {Training} with {Limited} {Amount} of {Labeled} {Data}}, shorttitle = {A {Practical} {Approach} to {Classify} {Evolving} {Data} {Streams}}, doi = {10.1109/ICDM.2008.152}, abstract = {Recent approaches in classifying evolving data streams are based on supervised learning algorithms, which can be trained with labeled data only. Manual labeling of data is both costly and time consuming. Therefore, in a real streaming environment, where huge volumes of data appear at a high speed, labeled data may be very scarce. Thus, only a limited amount of training data may be available for building the classification models, leading to poorly trained classifiers. We apply a novel technique to overcome this problem by building a classification model from a training set having both unlabeled and a small amount of labeled instances. This model is built as micro-clusters using semi-supervised clustering technique and classification is performed with kappa-nearest neighbor algorithm. An ensemble of these models is used to classify the unlabeled data. Empirical evaluation on both synthetic data and real botnet traffic reveals that our approach, using only a small amount of labeled data for training, outperforms state-of-the-art stream classification algorithms that use twenty times more labeled data than our approach.}, booktitle = {2008 {Eighth} {IEEE} {International} {Conference} on {Data} {Mining}}, author = {Masud, Mohammad M. and Gao, Jing and Khan, Latifur and Han, Jiawei and Thuraisingham, Bhavani}, month = dec, year = {2008}, note = {ISSN: 2374-8486}, keywords = {Buffer storage, Classification algorithms, Clustering algorithms, Computer science, Data mining, Data stream, Labeling, Probability distribution, Supervised learning, Testing, Training data, ensemble classification, semi-supervised clustering}, pages = {929--934}, }
@article{zhou_tracking_2008, title = {Tracking clusters in evolving data streams over sliding windows}, volume = {15}, issn = {0219-3116}, url = {https://doi.org/10.1007/s10115-007-0070-x}, doi = {10.1007/s10115-007-0070-x}, abstract = {Mining data streams poses great challenges due to the limited memory availability and real-time query response requirement. Clustering an evolving data stream is especially interesting because it captures not only the changing distribution of clusters but also the evolving behaviors of individual clusters. In this paper, we present a novel method for tracking the evolution of clusters over sliding windows. In our SWClustering algorithm, we combine the exponential histogram with the temporal cluster features, propose a novel data structure, the Exponential Histogram of Cluster Features (EHCF). The exponential histogram is used to handle the in-cluster evolution, and the temporal cluster features represent the change of the cluster distribution. Our approach has several advantages over existing methods: (1) the quality of the clusters is improved because the EHCF captures the distribution of recent records precisely; (2) compared with previous methods, the mechanism employed to adaptively maintain the in-cluster synopsis can track the cluster evolution better, while consuming much less memory; (3) the EHCF provides a flexible framework for analyzing the cluster evolution and tracking a specific cluster efficiently without interfering with other clusters, thus reducing the consumption of computing resources for data stream clustering. Both the theoretical analysis and extensive experiments show the effectiveness and efficiency of the proposed method.}, language = {en}, number = {2}, urldate = {2022-03-27}, journal = {Knowledge and Information Systems}, author = {Zhou, Aoying and Cao, Feng and Qian, Weining and Jin, Cheqing}, month = may, year = {2008}, pages = {181--214}, }
@article{zhu_semi-supervised_2008, title = {Semi-{Supervised} {Learning} {Literature} {Survey}}, volume = {2}, abstract = {We review the literature on semi-supervised learning, which is an area in machine learning and more generally, artificial intelligence. There has been a whole spectrum of interesting ideas on how to learn from both labeled and unlabeled data, i.e. semi-supervised learning. This document is a chapter excerpt from the author’s doctoral thesis (Zhu, 2005). However the author plans to update the online version frequently to incorporate the latest development in the field. Please obtain the latest version at http://www.cs.wisc.edu/{\textasciitilde}jerryzhu/pub/ssl\_survey.pdf}, journal = {Comput Sci, University of Wisconsin-Madison}, author = {Zhu, Xiaojin}, month = jul, year = {2008}, }
@article{agarwal_kernel-based_2008, series = {Progress in {Modeling}, {Theory}, and {Application} of {Computational} {Intelligenc}}, title = {Kernel-based online machine learning and support vector reduction}, volume = {71}, issn = {0925-2312}, url = {https://www.sciencedirect.com/science/article/pii/S0925231208000581}, doi = {10.1016/j.neucom.2007.11.023}, abstract = {We apply kernel-based machine learning methods to online learning situations, and look at the related requirement of reducing the complexity of the learnt classifier. Online methods are particularly useful in situations which involve streaming data, such as medical or financial applications. We show that the concept of span of support vectors can be used to build a classifier that performs reasonably well while satisfying given space and time constraints, thus making it potentially suitable for such online situations. The span-based heuristic is observed to be effective under stringent memory limits (that is when the number of support vectors a machine can hold is very small).}, language = {en}, number = {7}, urldate = {2022-03-19}, journal = {Neurocomputing}, author = {Agarwal, Sumeet and Vijaya Saradhi, V. and Karnick, Harish}, month = mar, year = {2008}, keywords = {Budget algorithm, Classifier complexity reduction, Online SVMs, Span of support vectors, Support vector machines}, pages = {1230--1237}, }
@article{da_silva_uncertainty_2008, title = {Uncertainty analysis on the wear coefficient of {Archard} model}, volume = {41}, issn = {0301-679X}, url = {https://www.sciencedirect.com/science/article/pii/S0301679X07001740}, doi = {10.1016/j.triboint.2007.10.007}, abstract = {This paper proposes a probabilistic model for the wear of surfaces in contact. An initial value problem (IVP) is formulated from the particularization of Archard model for a contact in line. Based on this problem, two mathematical formulations for this model are presented. In the former, the wear coefficient is modeled as a random variable, while in the latter this coefficient is assumed as a stochastic process. The Karhunen–Loeve series is employed to represent the wear coefficient stochastic process. The solution of the IVP is the worn height stochastic process (WHSP). From this result, the functions of expectation and covariance are obtained. The results of mathematical formulations are compared with the simulations made by Monte Carlo and Latin Hypercube methods. The stochastic process presented better results, regarding the expectation and covariance functions. In relation to the propagation of uncertainty of wear coefficient through Archard model it was observed that in both presented problems, the variance of WHSP increased as the sliding time increased.}, language = {en}, number = {6}, urldate = {2022-03-05}, journal = {Tribology International}, author = {da Silva, Cláudio R. Ávila and Pintaude, Giuseppe}, month = jun, year = {2008}, keywords = {Monte Carlo simulation, Probabilistic approach, Wear modeling}, pages = {473--481}, }
@incollection{heckerman_tutorial_2008, address = {Berlin, Heidelberg}, series = {Studies in {Computational} {Intelligence}}, title = {A {Tutorial} on {Learning} with {Bayesian} {Networks}}, isbn = {978-3-540-85066-3}, url = {https://doi.org/10.1007/978-3-540-85066-3_3}, abstract = {A Bayesian network is a graphical model that encodes probabilistic relationships among variables of interest. When used in conjunction with statistical techniques, the graphical model has several advantages for data analysis. One, because the model encodes dependencies among all variables, it readily handles situations where some data entries are missing. Two, a Bayesian network can be used to learn causal relationships, and hence can be used to gain understanding about a problem domain and to predict the consequences of intervention. Three, because the model has both a causal and probabilistic semantics, it is an ideal representation for combining prior knowledge (which often comes in causal form) and data. Four, Bayesian statistical methods in conjunction with Bayesian networks offer an efficient and principled approach for avoiding the overfitting of data. In this paper, we discuss methods for constructing Bayesian networks from prior knowledge and summarize Bayesian statistical methods for using data to improve these models. With regard to the latter task, we describe methods for learning both the parameters and structure of a Bayesian network, including techniques for learning with incomplete data. In addition, we relate Bayesian-network methods for learning to techniques for supervised and unsupervised learning. We illustrate the graphical-modeling approach using a real-world case study.}, language = {en}, urldate = {2021-11-17}, booktitle = {Innovations in {Bayesian} {Networks}: {Theory} and {Applications}}, publisher = {Springer}, author = {Heckerman, David}, editor = {Holmes, Dawn E. and Jain, Lakhmi C.}, year = {2008}, doi = {10.1007/978-3-540-85066-3_3}, keywords = {Bayesian Network, Hide Variable, Intelligence Quotient, Marginal Likelihood, Network Structure, bn, tutorial}, pages = {33--82}, }
@article{mongillo_online_2008, title = {Online {Learning} with {Hidden} {Markov} {Models}}, volume = {20}, issn = {0899-7667}, url = {https://doi.org/10.1162/neco.2008.10-06-351}, doi = {10.1162/neco.2008.10-06-351}, abstract = {We present an online version of the expectation-maximization (EM) algorithm for hidden Markov models (HMMs). The sufficient statistics required for parameters estimation is computed recursively with time, that is, in an online way instead of using the batch forward-backward procedure. This computational scheme is generalized to the case where the model parameters can change with time by introducing a discount factor into the recurrence relations. The resulting algorithm is equivalent to the batch EM algorithm, for appropriate discount factor and scheduling of parameters update. On the other hand, the online algorithm is able to deal with dynamic environments, i.e., when the statistics of the observed data is changing with time. The implications of the online algorithm for probabilistic modeling in neuroscience are briefly discussed.}, number = {7}, urldate = {2021-11-15}, journal = {Neural Computation}, author = {Mongillo, Gianluigi and Deneve, Sophie}, month = jul, year = {2008}, keywords = {hmm, online hmm, online learning}, pages = {1706--1716}, }
@book{kobbacy_complex_2008, edition = {1}, series = {Springer {Series} in {Reliability} {Engineering}}, title = {Complex {System} {Maintenance} {Handbook}}, author = {Kobbacy, Khairy Ahmed Helmy and Murthy, D. N. Prabhakar}, year = {2008}, keywords = {complex system, maintenance}, }
@inproceedings{donmez_proactive_2008, address = {New York, NY, USA}, series = {{CIKM} '08}, title = {Proactive learning: cost-sensitive active learning with multiple imperfect oracles}, isbn = {978-1-59593-991-3}, shorttitle = {Proactive learning}, url = {https://doi.org/10.1145/1458082.1458165}, doi = {10.1145/1458082.1458165}, abstract = {Proactive learning is a generalization of active learning designed to relax unrealistic assumptions and thereby reach practical applications. Active learning seeks to select the most informative unlabeled instances and ask an omniscient oracle for their labels, so as to retrain the learning algorithm maximizing accuracy. However, the oracle is assumed to be infallible (never wrong), indefatigable (always answers), individual (only one oracle), and insensitive to costs (always free or always charges the same). Proactive learning relaxes all four of these assumptions, relying on a decision-theoretic approach to jointly select the optimal oracle and instance, by casting the problem as a utility optimization problem subject to a budget constraint. Results on multi-oracle optimization over several data sets demonstrate the superiority of our approach over the single-imperfect-oracle baselines in most cases.}, urldate = {2021-10-18}, booktitle = {Proceedings of the 17th {ACM} conference on {Information} and knowledge management}, publisher = {Association for Computing Machinery}, author = {Donmez, Pinar and Carbonell, Jaime G.}, month = oct, year = {2008}, keywords = {cost-sensitive active learning, decision theory, multiple oracles}, pages = {619--628}, }
@inproceedings{duh_learning_2008, address = {New York, NY, USA}, series = {{SIGIR} '08}, title = {Learning to rank with partially-labeled data}, isbn = {978-1-60558-164-4}, url = {https://doi.org/10.1145/1390334.1390379}, doi = {10.1145/1390334.1390379}, abstract = {Ranking algorithms, whose goal is to appropriately order a set of objects/documents, are an important component of information retrieval systems. Previous work on ranking algorithms has focused on cases where only labeled data is available for training (i.e. supervised learning). In this paper, we consider the question whether unlabeled (test) data can be exploited to improve ranking performance. We present a framework for transductive learning of ranking functions and show that the answer is affirmative. Our framework is based on generating better features from the test data (via KernelPCA) and incorporating such features via Boosting, thus learning different ranking functions adapted to the individual test queries. We evaluate this method on the LETOR (TREC, OHSUMED) dataset and demonstrate significant improvements.}, urldate = {2021-10-15}, booktitle = {Proceedings of the 31st annual international {ACM} {SIGIR} conference on {Research} and development in information retrieval}, publisher = {Association for Computing Machinery}, author = {Duh, Kevin and Kirchhoff, Katrin}, month = jul, year = {2008}, keywords = {boosting, information retrieval, kernel principal components analysis, learning to rank, transductive learning}, pages = {251--258}, }
@article{li_agglomerative_2008, title = {Agglomerative {Fuzzy} {K}-{Means} {Clustering} {Algorithm} with {Selection} of {Number} of {Clusters}}, volume = {20}, issn = {1558-2191}, doi = {10.1109/TKDE.2008.88}, abstract = {In this paper, we present an agglomerative fuzzy K-means clustering algorithm for numerical data, an extension to the standard fuzzy K-means algorithm by introducing a penalty term to the objective function to make the clustering process not sensitive to the initial cluster centers. The new algorithm can produce more consistent clustering results from different sets of initial clusters centers. Combined with cluster validation techniques, the new algorithm can determine the number of clusters in a data set, which is a well known problem in \$k\$-means clustering. Experimental results on synthetic data sets (2 to 5 dimensions, 500 to 5000 objects and 3 to 7 clusters), the BIRCH two-dimensional data set of 20000 objects and 100 clusters, and the WINE data set of 178 objects, 17 dimensions and 3 clusters from UCI, have demonstrated the effectiveness of the new algorithm in producing consistent clustering results and determining the correct number of clusters in different data sets, some with overlapping inherent clusters.}, number = {11}, journal = {IEEE Transactions on Knowledge and Data Engineering}, author = {Li, Mark Junjie and Ng, Michael K. and Cheung, Yiu-ming and Huang, Joshua Zhexue}, month = nov, year = {2008}, note = {Conference Name: IEEE Transactions on Knowledge and Data Engineering}, keywords = {Algorithm design and analysis, Application software, Clustering, Clustering algorithms, Clustering methods, Computer vision, Data mining, Genetic algorithms, Minimization methods, Mining methods and algorithms, Optimization methods, Pattern recognition, Statistical analysis}, pages = {1519--1534}, }
@article{maaten_visualizing_2008, title = {Visualizing {Data} using t-{SNE}}, volume = {9}, issn = {1533-7928}, url = {http://jmlr.org/papers/v9/vandermaaten08a.html}, abstract = {We present a new technique called "t-SNE" that visualizes high-dimensional data by giving each datapoint a location in a two or three-dimensional map. The technique is a variation of Stochastic Neighbor Embedding (Hinton and Roweis, 2002) that is much easier to optimize, and produces significantly better visualizations by reducing the tendency to crowd points together in the center of the map. t-SNE is better than existing techniques at creating a single map that reveals structure at many different scales. This is particularly important for high-dimensional data that lie on several different, but related, low-dimensional manifolds, such as images ofobjects from multiple classes seen from multiple viewpoints. For visualizing the structure of very large data sets, we show how t-SNE can use random walks on neighborhood graphs to allow the implicit structure of all of the data to influence the way in which a subset of the data is displayed. We illustrate the performance of t-SNE on a wide variety of data sets and compare it with many other non-parametric visualization techniques, including Sammon mapping, Isomap, and Locally Linear Embedding. The visualizations produced by t-SNE are significantly better than those produced by the other techniques on almost all of the data sets.}, number = {86}, urldate = {2021-08-07}, journal = {Journal of Machine Learning Research}, author = {Maaten, Laurens van der and Hinton, Geoffrey}, year = {2008}, pages = {2579--2605}, }
@article{li_out--order_2008, title = {Out-of-order processing: a new architecture for high-performance stream systems}, volume = {1}, issn = {2150-8097}, shorttitle = {Out-of-order processing}, url = {https://doi.org/10.14778/1453856.1453890}, doi = {10.14778/1453856.1453890}, abstract = {Many stream-processing systems enforce an order on data streams during query evaluation to help unblock blocking operators and purge state from stateful operators. Such in-order processing (IOP) systems not only must enforce order on input streams, but also require that query operators preserve order. This order-preserving requirement constrains the implementation of stream systems and incurs significant performance penalties, particularly for memory consumption. Especially for high-performance, potentially distributed stream systems, the cost of enforcing order can be prohibitive. We introduce a new architecture for stream systems, out-of-order processing (OOP), that avoids ordering constraints. The OOP architecture frees stream systems from the burden of order maintenance by using explicit stream progress indicators, such as punctuation or heartbeats, to unblock and purge operators. We describe the implementation of OOP stream systems and discuss the benefits of this architecture in depth. For example, the OOP approach has proven useful for smoothing workload bursts caused by expensive end-of-window operations, which can overwhelm internal communication paths in IOP approaches. We have implemented OOP in two stream systems, Gigascope and NiagaraST. Our experimental study shows that the OOP approach can significantly outperform IOP in a number of aspects, including memory, throughput and latency.}, number = {1}, urldate = {2021-03-26}, journal = {Proceedings of the VLDB Endowment}, author = {Li, Jin and Tufte, Kristin and Shkapenyuk, Vladislav and Papadimos, Vassilis and Johnson, Theodore and Maier, David}, month = aug, year = {2008}, pages = {274--288}, }
@inproceedings{shieh_isax_2008, address = {New York, NY, USA}, series = {{KDD} '08}, title = {{iSAX}: indexing and mining terabyte sized time series}, isbn = {978-1-60558-193-4}, shorttitle = {\textit{i}{SAX}}, url = {https://doi.org/10.1145/1401890.1401966}, doi = {10.1145/1401890.1401966}, abstract = {Current research in indexing and mining time series data has produced many interesting algorithms and representations. However, the algorithms and the size of data considered have generally not been representative of the increasingly massive datasets encountered in science, engineering, and business domains. In this work, we show how a novel multi-resolution symbolic representation can be used to index datasets which are several orders of magnitude larger than anything else considered in the literature. Our approach allows both fast exact search and ultra fast approximate search. We show how to exploit the combination of both types of search as sub-routines in data mining algorithms, allowing for the exact mining of truly massive real world datasets, containing millions of time series.}, urldate = {2020-10-01}, booktitle = {Proceedings of the 14th {ACM} {SIGKDD} international conference on {Knowledge} discovery and data mining}, publisher = {Association for Computing Machinery}, author = {Shieh, Jin and Keogh, Eamonn}, month = aug, year = {2008}, keywords = {data mining, indexing, representations, time series}, pages = {623--631}, }
@article{tang_online_2008, title = {Online fault diagnosis and prevention expert system for dredgers}, volume = {34}, issn = {0957-4174}, url = {http://www.sciencedirect.com/science/article/pii/S0957417406003022}, doi = {10.1016/j.eswa.2006.09.032}, abstract = {Faults during dredging process often caused serious damages on dredging system. High maintenance costs and prolonged fault recovering process often make dredging production and profit low. An online fault diagnosis and prevention expert system that is aimed to prevent fault occurrence and to quicken the recovering process is introduced in this paper. For the complexity of fault judging and prevention process and frequently varying dynamics of dredging operations, hybrid structure and inference process are adopted in the expert system. ANNs are introduced to adapt the varying system dynamics and to predict system state. Designed expert application is also featured by online fault prevention and recovering decisions. Preliminary test has been carried out in actual engineering project and results of performance evaluation experiment are also introduced.}, language = {en}, number = {1}, urldate = {2020-05-06}, journal = {Expert Systems with Applications}, author = {Tang, Jian-Zhong and Wang, Qing-Feng}, month = jan, year = {2008}, keywords = {Dredging, Expert system applications, Expert systems, Fault diagnosis, Fault prevention}, pages = {511--521}, }
@inproceedings{heimes_recurrent_2008, title = {Recurrent neural networks for remaining useful life estimation}, doi = {10.1109/PHM.2008.4711422}, booktitle = {2008 {International} {Conference} on {Prognostics} and {Health} {Management}}, author = {Heimes, F. O.}, year = {2008}, note = {ISSN: null}, keywords = {Degradation, Kalman filters, Life estimation, Machine Learning, Machine learning, Machine learning algorithms, Management training, Pollution measurement, Prognostics, Prognostics and health management, Recurrent Neural Networks, Recurrent neural networks, Remaining Useful Life, Statistics, Testing, back-propagation, evolutionary algorithms, evolutionary computation, extended Kalman Filter training method, learning (artificial intelligence), machine learning, nonlinear filters, recurrent neural nets, recurrent neural networks, remaining useful life estimation}, pages = {1--6}, }
@inproceedings{tasoulis_visualising_2007, address = {Berlin, Heidelberg}, series = {Lecture {Notes} in {Computer} {Science}}, title = {Visualising the {Cluster} {Structure} of {Data} {Streams}}, isbn = {978-3-540-74825-0}, doi = {10.1007/978-3-540-74825-0_8}, abstract = {The increasing availability of streaming data is a consequence of the continuing advancement of data acquisition technology. Such data provides new challenges to the various data analysis communities. Clustering has long been a fundamental procedure for acquiring knowledge from data, and new tools are emerging that allow the clustering of data streams. However the dynamic, temporal components of streaming data provide extra challenges to the development of stream clustering and associated visualisation techniques. In this work we combine a streaming clustering framework with an extension of a static cluster visualisation method, in order to construct a surface that graphically represents the clustering structure of the data stream. The proposed method, OpticsStream, provides intuitive representations of the clustering structure as well as the manner in which this structure changes through time.}, language = {en}, booktitle = {Advances in {Intelligent} {Data} {Analysis} {VII}}, publisher = {Springer}, author = {Tasoulis, Dimitris K. and Ross, Gordon and Adams, Niall M.}, editor = {R. Berthold, Michael and Shawe-Taylor, John and Lavrač, Nada}, year = {2007}, keywords = {Cluster Structure, Data Stream, Reachability Distance, Stream Cluster, Streaming Data}, pages = {81--92}, }
@inproceedings{zhu_active_2007, title = {Active {Learning} from {Data} {Streams}}, doi = {10.1109/ICDM.2007.101}, abstract = {In this paper, we address a new research problem on active learning from data streams where data volumes grow continuously and labeling all data is considered expensive and impractical. The objective is to label a small portion of stream data from which a model is derived to predict newly arrived instances as accurate as possible. In order to tackle the challenges raised by data streams' dynamic nature, we propose a classifier ensembling based active learning framework which selectively labels instances from data streams to build an accurate classifier. A minimal variance principle is introduced to guide instance labeling from data streams. In addition, a weight updating rule is derived to ensure that our instance labeling process can adaptively adjust to dynamic drifting concepts in the data. Experimental results on synthetic and real-world data demonstrate the performances of the proposed efforts in comparison with other simple approaches.}, booktitle = {Seventh {IEEE} {International} {Conference} on {Data} {Mining} ({ICDM} 2007)}, author = {Zhu, Xingquan and Zhang, Peng and Lin, Xiaodong and Shi, Yong}, month = oct, year = {2007}, note = {ISSN: 2374-8486}, keywords = {Accuracy, Association rules, Computer science, Data engineering, Data mining, Decision making, Labeling, Predictive models, USA Councils, Uncertainty}, pages = {757--762}, }
@inproceedings{beringer_efficient_2007, address = {Berlin, Heidelberg}, title = {An {Efficient} {Algorithm} for {Instance}-{Based} {Learning} on {Data} {Streams}}, isbn = {978-3-540-73435-2}, doi = {10.1007/978-3-540-73435-2_4}, abstract = {The processing of data streams in general and the mining of such streams in particular have recently attracted considerable attention in various research fields. A key problem in stream mining is to extend existing machine learning and data mining methods so as to meet the increased requirements imposed by the data stream scenario, including the ability to analyze incoming data in an online, incremental manner, to observe tight time and memory constraints, and to appropriately respond to changes of the data characteristics and underlying distributions, amongst others. This paper considers the problem of classification on data streams and develops an instance-based learning algorithm for that purpose. The experimental studies presented in the paper suggest that this algorithm has a number of desirable properties that are not, at least not as a whole, shared by currently existing alternatives. Notably, our method is very flexible and thus able to adapt to an evolving environment quickly, a point of utmost importance in the data stream context. At the same time, the algorithm is relatively robust and thus applicable to streams with different characteristics.}, language = {en}, booktitle = {Advances in {Data} {Mining}. {Theoretical} {Aspects} and {Applications}}, publisher = {Springer}, author = {Beringer, Jürgen and Hüllermeier, Eyke}, editor = {Perner, Petra}, year = {2007}, pages = {34--48}, }
@article{widodo_support_2007, title = {Support vector machine in machine condition monitoring and fault diagnosis}, volume = {21}, issn = {0888-3270}, url = {https://www.sciencedirect.com/science/article/pii/S0888327007000027}, doi = {10.1016/j.ymssp.2006.12.007}, abstract = {Recently, the issue of machine condition monitoring and fault diagnosis as a part of maintenance system became global due to the potential advantages to be gained from reduced maintenance costs, improved productivity and increased machine availability. This paper presents a survey of machine condition monitoring and fault diagnosis using support vector machine (SVM). It attempts to summarize and review the recent research and developments of SVM in machine condition monitoring and diagnosis. Numerous methods have been developed based on intelligent systems such as artificial neural network, fuzzy expert system, condition-based reasoning, random forest, etc. However, the use of SVM for machine condition monitoring and fault diagnosis is still rare. SVM has excellent performance in generalization so it can produce high accuracy in classification for machine condition monitoring and diagnosis. Until 2006, the use of SVM in machine condition monitoring and fault diagnosis is tending to develop towards expertise orientation and problem-oriented domain. Finally, the ability to continually change and obtain a novel idea for machine condition monitoring and fault diagnosis using SVM will be future works.}, language = {en}, number = {6}, urldate = {2021-09-30}, journal = {Mechanical Systems and Signal Processing}, author = {Widodo, Achmad and Yang, Bo-Suk}, month = aug, year = {2007}, keywords = {Fault diagnosis, Machine condition monitoring, Support vector machine}, pages = {2560--2574}, }
@incollection{muller_dynamic_2007, address = {Berlin, Heidelberg}, title = {Dynamic {Time} {Warping}}, isbn = {978-3-540-74048-3}, url = {https://doi.org/10.1007/978-3-540-74048-3_4}, abstract = {Dynamic time warping (DTW) is a well-known technique to find an optimal alignment between two given (time-dependent) sequences under certain restrictions (Fig. 4.1). Intuitively, the sequences are warped in a nonlinear fashion to match each other. Originally, DTW has been used to compare different speech patterns in automatic speech recognition, see [170]. In fields such as data mining and information retrieval, DTW has been successfully applied to automatically cope with time deformations and different speeds associated with time-dependent data.In this chapter, we introduce and discuss the main ideas of classical DTW (Sect. 4.1) and summarize several modifications concerning local as well as global parameters (Sect. 4.2). To speed up classical DTW, we describe in Sect. 4.3 a general multiscale DTW approach. In Sect. 4.4, we show how DTW can be employed to identify all subsequence within a long data stream that are similar to a given query sequence (Sect. 4.4). A discussion of related alignment techniques and references to the literature can be found in Sect. 4.5.}, language = {en}, urldate = {2021-03-26}, booktitle = {Information {Retrieval} for {Music} and {Motion}}, publisher = {Springer}, editor = {Müller, Meinard}, year = {2007}, doi = {10.1007/978-3-540-74048-3_4}, keywords = {Automatic Speech Recognition, Constraint Region, Cost Matrix, Dynamic Time Warping, Edit Distance}, pages = {69--84}, }
@article{adams_bayesian_2007, title = {Bayesian {Online} {Changepoint} {Detection}}, url = {http://arxiv.org/abs/0710.3742}, abstract = {Changepoints are abrupt variations in the generative parameters of a data sequence. Online detection of changepoints is useful in modelling and prediction of time series in application areas such as finance, biometrics, and robotics. While frequentist methods have yielded online filtering and prediction techniques, most Bayesian papers have focused on the retrospective segmentation problem. Here we examine the case where the model parameters before and after the changepoint are independent and we derive an online algorithm for exact inference of the most recent changepoint. We compute the probability distribution of the length of the current ``run,'' or time since the last changepoint, using a simple message-passing algorithm. Our implementation is highly modular so that the algorithm may be applied to a variety of types of data. We illustrate this modularity by demonstrating the algorithm on three different real-world data sets.}, urldate = {2020-10-02}, journal = {arXiv:0710.3742 [stat]}, author = {Adams, Ryan Prescott and MacKay, David J. C.}, month = oct, year = {2007}, note = {arXiv: 0710.3742}, keywords = {Statistics - Machine Learning}, }
@inproceedings{chen_density-based_2007, address = {San Jose, California, USA}, series = {{KDD} '07}, title = {Density-based clustering for real-time stream data}, isbn = {978-1-59593-609-7}, url = {https://doi.org/10.1145/1281192.1281210}, doi = {10.1145/1281192.1281210}, urldate = {2020-03-25}, booktitle = {Proceedings of the 13th {ACM} {SIGKDD} international conference on {Knowledge} discovery and data mining}, publisher = {Association for Computing Machinery}, author = {Chen, Yixin and Tu, Li}, year = {2007}, keywords = {d-stream, density-based clustering, sporadic grids, stream data mining}, pages = {133--142}, }
@inproceedings{swearingen_open_2007, title = {An {Open} {System} {Architecture} for {Condition} {Based} {Maintenance} {Overview}}, doi = {10.1109/AERO.2007.352921}, abstract = {The Boeing Company is developing an open integrated vehicle health management (IVHM) architecture; Smiths aerospace is extending and applying it. The charter of the Boeing phantom works is to provide tools and assets that enable next generation vehicles to be more reliable, efficient, capable, and autonomous. In support of those goals Integrated vehicle health management (IVHM) is receiving increased attention. The open system architecture for condition based maintenance (OSACBM) is a standard for building IVHM applications that meet those goals. Boeing has created a software framework for developing generic tools based on OSACBM that support scaleable, efficient modules which simplify IVHM integration in two ways: First, the integration improves the IVHM software models, software algorithms, data, communications, and embedded processors. Second, integration facilitates the use of IVHM with command, control, communication, mission, flight, maintenance, and other vehicle major systems.}, booktitle = {2007 {IEEE} {Aerospace} {Conference}}, author = {Swearingen, Kevin and Majkowski, Wayne and Bruggeman, Brian and Gilbertson, Dan and Dunsdon, Jon and Sykes, Ben}, month = mar, year = {2007}, keywords = {Application software, Boeing Company, Buildings, Computer architecture, Embedded software, IVHM integration, Imaging phantoms, Maintenance, Mobile robots, OSACBM, Open systems, Remotely operated vehicles, Smiths Aerospace, Software tools, aerospace computing, aerospace engineering, condition based maintenance overview, integrated vehicle health management, maintenance engineering, open system architecture, open systems, software development tool chain, software engineering, software framework}, pages = {1--8}, }
@techreport{noauthor_condition_2007, type = {Standard}, title = {Condition monitoring and diagnostics of machines — {Data} processing, communication and presentation — {Part} 2: {Data} processing}, institution = {International Organization for Standardization}, year = {2007}, }
@inproceedings{bifet_learning_2007, title = {Learning from {Time}-{Changing} {Data} with {Adaptive} {Windowing}}, volume = {7}, doi = {10.1137/1.9781611972771.42}, booktitle = {Proceedings of the 7th {SIAM} {International} {Conference} on {Data} {Mining}}, author = {Bifet, Albert and Gavaldà, Ricard}, year = {2007}, }
@article{pau_survey_2007, title = {Survey of expert systems for fault detection, test generation and maintenance}, volume = {3}, doi = {10.1111/j.1468-0394.1986.tb00199.x}, journal = {Expert Systems}, author = {Pau, Louis-Francois}, year = {2007}, pages = {100 -- 110}, }
@inproceedings{groba_architecture_2007, title = {Architecture of a {Predictive} {Maintenance} {Framework}}, doi = {10.1109/CISIM.2007.14}, booktitle = {6th {International} {Conference} on {Computer} {Information} {Systems} and {Industrial} {Management} {Applications} ({CISIM}'07)}, author = {Groba, C. and Cech, S. and Rosenthal, F. and Gossling, A.}, month = jun, year = {2007}, keywords = {Condition monitoring, Costs, Decision making, Electric breakdown, Isolation technology, Job shop scheduling, Predictive maintenance, Predictive models, Production, Yield estimation, enterprise systems, equipment failure prediction, predictive maintenance, preventive maintenance, system recovery}, pages = {59--64}, }
@inproceedings{gu_improved_2006, title = {An {Improved} {Manifold} {Learning} {Algorithm} for {Data} {Visualization}}, doi = {10.1109/ICMLC.2006.258599}, abstract = {Recently, a series of methods called manifold learning have been developed to visualize the convex but intrinsically flat manifolds such as Swiss roll. Isomap is a representative of them, which can easily discover low dimensional manifolds from high dimensional data but its computation complexity is quadratic. To speed up Isomap, L-Isomap was proposed to reduce the complexity by using landmark points. But how to select landmarks is an open problem. In this paper, we present an extension of Isomap focusing on the suitable selection of landmarks even the number of landmarks is quite small. In our method, each data point is assigned a weight according to the distance between it and its neighbors and point with a higher weight has a larger probability to be selected as a landmark point. The selection of landmarks falls into two phases. In 1st phase, n' candidate landmarks are selected only by the weights of data points. And in 2nd phase, n landmarks are refined from the candidates by maximizing the sum of distances between all pairwise landmarks. Experimental results showed that our method was more stable than L-Isomap and outperformed L-Isomap especially when the number of landmark points is quite small}, booktitle = {2006 {International} {Conference} on {Machine} {Learning} and {Cybernetics}}, author = {Gu, Rui-jun and Xu, Wen-bo}, month = aug, year = {2006}, note = {ISSN: 2160-1348}, keywords = {Data analysis, Data mining, Data visualization, Euclidean distance, Information technology, Isomap, Laplace equations, Lighting control, Linear approximation, Manifolds, Principal component analysis, dimensionality reduction, manifold learning}, pages = {1170--1173}, }
@book{yang_efficient_2006, title = {An {Efficient} {Algorithm} for {Local} {Distance} {Metric} {Learning}.}, abstract = {Learning application-specific distance metrics from la- beled data is critical for both statistical classification and information retrieval. Most of the earlier work in this area has focused on finding metrics that simultane- ously optimize compactness and separability in a global sense. Specifically, such distance metrics attempt to keep all of the data points in each class close together while ensuring that data points from different classes are separated. However, particularly when classes ex- hibit multimodal data distributions, these goals conflict and thus cannot be simultaneously satisfied. This paper proposes a Local Distance Metric (LDM) that aims to optimize local compactness and local separability. We present an efficient algorithm that employs eigenvector analysis and bound optimization to learn the LDM from training data in a probabilistic framework. We demon- strate that LDM achieves significant improvements in both classification and retrieval accuracy compared to global distance learning and kernel-based KNN.}, author = {Yang, Liu and Jin, Rong and Sukthankar, Rahul and Liu, Yi}, month = jan, year = {2006}, }
@inproceedings{verron_fault_2006, title = {Fault {Diagnosis} with {Bayesian} {Networks}: {Application} to the {Tennessee} {Eastman} {Process}}, shorttitle = {Fault {Diagnosis} with {Bayesian} {Networks}}, doi = {10.1109/ICIT.2006.372301}, abstract = {The purpose of this article is to present and evaluate the performance of a new procedure for industrial process diagnosis. This method is based on the use of a Bayesian network as a classifier. But, as the classification performances are not very efficient in the space described by all variables of the process, an identification of important variables is made. This feature selection is made by computing the mutual information between each process variable and the class variable. The performances of this method are evaluated on the data of a benchmark problem: the Tennessee Eastman process. Three kinds of faults are taken into account on this complex process. The objective is to obtain the minimal recognition error rate for these 3 faults. Results are given and compared with results of other authors on the same data.}, booktitle = {2006 {IEEE} {International} {Conference} on {Industrial} {Technology}}, author = {Verron, Sylvain and Tiplica, Teodor and Kobi, Abdessamad}, month = dec, year = {2006}, keywords = {Aerospace industry, Bayesian methods, Computer networks, Error analysis, Fault detection, Fault diagnosis, Industrial control, Mutual information, Principal component analysis, Process control}, pages = {98--103}, }
@article{crammer_online_2006, title = {Online {Passive}-{Aggressive} {Algorithms}}, volume = {7}, issn = {1532-4435}, abstract = {We present a family of margin based online learning algorithms for various prediction tasks. In particular we derive and analyze algorithms for binary and multiclass categorization, regression, uniclass prediction and sequence prediction. The update steps of our different algorithms are all based on analytical solutions to simple constrained optimization problems. This unified view allows us to prove worst-case loss bounds for the different algorithms and for the various decision problems based on a single lemma. Our bounds on the cumulative loss of the algorithms are relative to the smallest loss that can be attained by any fixed hypothesis, and as such are applicable to both realizable and unrealizable settings. We demonstrate some of the merits of the proposed algorithms in a series of experiments with synthetic and real data sets.}, journal = {The Journal of Machine Learning Research}, author = {Crammer, Koby and Dekel, Ofer and Keshet, Joseph and Shalev-Shwartz, Shai and Singer, Yoram}, month = dec, year = {2006}, pages = {551--585}, }
@inproceedings{pena_particle_2006, title = {Particle {Swarm} {Optimization} with {Discrete} {Recombination}: {An} {Online} {Optimizer} for {Evolvable} {Hardware}}, shorttitle = {Particle {Swarm} {Optimization} with {Discrete} {Recombination}}, doi = {10.1109/AHS.2006.56}, abstract = {Self-reconfigurable adaptive systems have the possibility of adapting their own hardware configuration. This feature provides enhanced performance and flexibility, reflected in computational cost reductions. Self-reconfigurable adaptation requires powerful optimization algorithms in order to search in a space of possible hardware configurations. If such algorithms are to be implemented on chip, they must also be as simple as possible, so the best performance can be achieved with the less cost in terms of logic resources, convergence speed, and power consumption. This paper presents hybrid bio-inspired optimization technique that introduces the concept of discrete recombination in a particle swarm optimizer, obtaining a simple and powerful algorithm, well suited for embedded applications. The proposed algorithm is validated using standard benchmark functions and used for training a neural network-based adaptive equalizer for communications systems}, booktitle = {First {NASA}/{ESA} {Conference} on {Adaptive} {Hardware} and {Systems} ({AHS}'06)}, author = {Pena, J. and Upegui, A. and Sanchez, E.}, month = jun, year = {2006}, keywords = {Adaptive systems, Communication standards, Computational efficiency, Convergence, Costs, Energy consumption, Hardware, Logic, Neural networks, Particle swarm optimization}, pages = {163--170}, }
@inproceedings{garg_pbirch_2006, title = {{PBIRCH}: {A} {Scalable} {Parallel} {Clustering} algorithm for {Incremental} {Data}}, shorttitle = {{PBIRCH}}, doi = {10.1109/IDEAS.2006.36}, abstract = {We present a parallel version of BIRCH with the objective of enhancing the scalability without compromising on the quality of clustering. The incoming data is distributed in a cyclic manner (or block cyclic manner if the data is bursty) to balance the load among processors. The algorithm is implemented on a message passing share-nothing model. Experiments show that for very large data sets the algorithm scales nearly linearly with the increasing number of processors. Experiments also show that clusters obtained by PBIRCH are comparable to those obtained using BIRCH}, booktitle = {2006 10th {International} {Database} {Engineering} and {Applications} {Symposium} ({IDEAS}'06)}, author = {Garg, Ashwani and Mangla, Ashish and Gupta, Neelima and Bhatnagar, Vasudha}, month = dec, year = {2006}, note = {ISSN: 1098-8068}, keywords = {Algorithm design and analysis, Broadcasting, Clustering algorithms, Computer science, Delay, Memory management, Message passing, Partitioning algorithms, Scalability, Time factors}, pages = {315--316}, }
@inproceedings{cao_density-based_2006, title = {Density-{Based} {Clustering} over an {Evolving} {Data} {Stream} with {Noise}}, volume = {2006}, doi = {10.1137/1.9781611972764.29}, booktitle = {Proceedings of the {Sixth} {SIAM} {International} {Conference} on {Data} {Mining}, {April} 20-22, 2006, {Bethesda}, {MD}, {USA}}, author = {Cao, Feng and Ester, Martin and Qian, Weining and Zhou, Aoying}, year = {2006}, }
@inproceedings{per_anders_akersten_maintenance_2006, address = {London}, title = {Maintenance {Related} {IEC} {Dependability} {Standards}}, isbn = {978-1-84628-814-2}, doi = {10.1007/978-1-84628-814-2_12}, language = {en}, booktitle = {Engineering {Asset} {Management}}, publisher = {Springer}, author = {{Per Anders Akersten}}, editor = {{Joseph Mathew} and {Jim Kennedy} and {Lin Ma} and {Andy Tan} and {Deryk Anderson}}, year = {2006}, keywords = {ILS, RCM, dependability, life cycle cost, logistic support, maintainability, maintenance, management, standardization}, pages = {115--119}, }
@inproceedings{mathew_review_2006, address = {London}, title = {A {Review} of the {MIMOSA} {OSA}-{EAI} {Database} for {Condition} {Monitoring} {Systems}}, isbn = {978-1-84628-814-2}, doi = {10.1007/978-1-84628-814-2_88}, abstract = {Condition monitoring systems are prevalent in industries that contain critical assets. Their use allows the detection of potential failures at an early stage in order to minimise downtime and maintenance costs of a system. Many condition monitoring systems are commercially available, however these systems are limited in their support of diagnostic and prognostic models developed by external parties. An attempt to develop an extensible condition monitoring software lead to the creation of an ISO 13374/OSA-CBM based system named BUDS, which focuses on vibration condition monitoring. Research into condition monitoring data management strategies led our team to implement a database to support the data-driven application. A database based on the MIMOSA OSA-EAI database specification was used to store asset and sensor location, measurement event, raw sensor and processed signal data. This paper investigates the use of the MIMOSA OSA-EAI database for condition monitoring systems, and presents several issues encountered during the development process.}, language = {en}, booktitle = {Engineering {Asset} {Management}}, publisher = {Springer}, author = {Mathew, Avin and Zhang, Liqun and Zhang, Sheng and Ma, Lin}, editor = {Mathew, Joseph and Kennedy, Jim and Ma, Lin and Tan, Andy and Anderson, Deryk}, year = {2006}, keywords = {BUDS, Condition Monitoring, Critical Assets, Development, Diagnostic, Downtime, MIMOSA, Prognostic, Sensor}, pages = {837--846}, }
@article{liao_maintenance_2006, title = {Maintenance of continuously monitored degrading systems}, volume = {175}, issn = {0377-2217}, url = {http://www.sciencedirect.com/science/article/pii/S0377221705005059}, doi = {https://doi.org/10.1016/j.ejor.2005.05.017}, abstract = {This paper considers a condition-based maintenance model for continuously degrading systems under continuous monitoring. After maintenance, the states of the system are randomly distributed with residual damage. We investigate a realistic maintenance policy, referred to as condition-based availability limit policy, which achieves the maximum availability level of such a system. The optimum maintenance threshold is determined using a search algorithm. A numerical example for a degrading system modeled by a Gamma process is presented to demonstrate the use of this policy in practical applications.}, number = {2}, journal = {European Journal of Operational Research}, author = {Liao, Haitao and Elsayed, Elsayed A. and Chan, Ling-Yau}, year = {2006}, keywords = {Availability limit, Maintenance, Optimization, Replacement}, pages = {821 -- 835}, }
@book{hopcroft_introduction_2006, address = {Boston, MA, USA}, title = {Introduction to {Automata} {Theory}, {Languages}, and {Computation} (3rd {Edition})}, isbn = {0-321-46225-4}, publisher = {Addison-Wesley Longman Publishing Co., Inc.}, author = {Hopcroft, John E. and Motwani, Rajeev and Ullman, Jeffrey D.}, year = {2006}, }
@article{jardine_review_2006, title = {A review on machinery diagnostics and prognostics implementing condition-based maintenance}, volume = {20}, issn = {0888-3270}, url = {http://www.sciencedirect.com/science/article/pii/S0888327005001512}, doi = {https://doi.org/10.1016/j.ymssp.2005.09.012}, abstract = {Condition-based maintenance (CBM) is a maintenance program that recommends maintenance decisions based on the information collected through condition monitoring. It consists of three main steps: data acquisition, data processing and maintenance decision-making. Diagnostics and prognostics are two important aspects of a CBM program. Research in the CBM area grows rapidly. Hundreds of papers in this area, including theory and practical applications, appear every year in academic journals, conference proceedings and technical reports. This paper attempts to summarise and review the recent research and developments in diagnostics and prognostics of mechanical systems implementing CBM with emphasis on models, algorithms and technologies for data processing and maintenance decision-making. Realising the increasing trend of using multiple sensors in condition monitoring, the authors also discuss different techniques for multiple sensor data fusion. The paper concludes with a brief discussion on current practices and possible future trends of CBM.}, number = {7}, journal = {Mechanical Systems and Signal Processing}, author = {Jardine, Andrew K. S. and Lin, Daming and Banjevic, Dragan}, year = {2006}, keywords = {Condition monitoring, Condition-based maintenance, Diagnostics, Prognostics, Sensor data fusion, Signal processing}, pages = {1483 -- 1510}, }
@inproceedings{gao_incremental_2005, address = {Berlin, Heidelberg}, series = {Lecture {Notes} in {Computer} {Science}}, title = {An {Incremental} {Data} {Stream} {Clustering} {Algorithm} {Based} on {Dense} {Units} {Detection}}, isbn = {978-3-540-31935-1}, doi = {10.1007/11430919_49}, abstract = {The data stream model of computation is often used for analyzing huge volumes of continuously arriving data. In this paper, we present a novel algorithm called DUCstream for clustering data streams. Our work is motivated by the needs to develop a single-pass algorithm that is capable of detecting evolving clusters, and yet requires little memory and computation time. To that end, we propose an incremental clustering method based on dense units detection. Evolving clusters are identified on the basis of the dense units, which contain relatively large number of points. For efficiency reasons, a bitwise dense unit representation is introduced. Our experimental results demonstrate DUCstream’s efficiency and efficacy.}, language = {en}, booktitle = {Advances in {Knowledge} {Discovery} and {Data} {Mining}}, publisher = {Springer}, author = {Gao, Jing and Li, Jianzhong and Zhang, Zhaogong and Tan, Pang-Ning}, editor = {Ho, Tu Bao and Cheung, David and Liu, Huan}, year = {2005}, pages = {420--425}, }
@article{desobry_online_2005, title = {An online kernel change detection algorithm}, volume = {53}, issn = {1941-0476}, doi = {10.1109/TSP.2005.851098}, abstract = {A number of abrupt change detection methods have been proposed in the past, among which are efficient model-based techniques such as the Generalized Likelihood Ratio (GLR) test. We consider the case where no accurate nor tractable model can be found, using a model-free approach, called Kernel change detection (KCD). KCD compares two sets of descriptors extracted online from the signal at each time instant: The immediate past set and the immediate future set. Based on the soft margin single-class Support Vector Machine (SVM), we build a dissimilarity measure in feature space between those sets, without estimating densities as an intermediary step. This dissimilarity measure is shown to be asymptotically equivalent to the Fisher ratio in the Gaussian case. Implementation issues are addressed; in particular, the dissimilarity measure can be computed online in input space. Simulation results on both synthetic signals and real music signals show the efficiency of KCD.}, number = {8}, journal = {IEEE Transactions on Signal Processing}, author = {Desobry, F. and Davy, M. and Doncarli, C.}, month = aug, year = {2005}, note = {Conference Name: IEEE Transactions on Signal Processing}, keywords = {Abrupt change detection, Density measurement, Detection algorithms, Extraterrestrial measurements, Kernel, Multiple signal classification, Object detection, Particle measurements, Signal processing, Support vector machines, Testing, kernel method, music segmentation, online, single-class SVM}, pages = {2961--2974}, }
@techreport{zhu_semi-supervised_2005, type = {Technical {Report}}, title = {Semi-{Supervised} {Learning} {Literature} {Survey}}, url = {https://minds.wisconsin.edu/handle/1793/60444}, abstract = {We review some of the literature on semi-supervised learning in this paper. Traditional classifiers need labeled data (feature / label pairs) to train. Labeled instances however are often difficult, expensive, or time consuming to obtain, as they require the efforts of experienced human annotators. Meanwhile unlabeled data may be relatively easy to collect, but there has been few ways to use them. Semi-supervised learning addresses this problem by using large amount of unlabeled data, together with the labeled data, to build better classifiers. Because semi-supervised learning requires less human effort and gives higher accuracy, it is of great interest both in theory and in practice.}, language = {en}, urldate = {2022-03-20}, institution = {University of Wisconsin-Madison Department of Computer Sciences}, author = {Zhu, Xiaojin (Jerry)}, year = {2005}, note = {Accepted: 2012-03-15T17:19:12Z}, }
@article{purushotham_multi-fault_2005, title = {Multi-fault diagnosis of rolling bearing elements using wavelet analysis and hidden {Markov} model based fault recognition}, volume = {38}, issn = {0963-8695}, url = {https://www.sciencedirect.com/science/article/pii/S0963869505000666}, doi = {10.1016/j.ndteint.2005.04.003}, abstract = {Due to the importance of rolling bearings as the most widely used machine elements, it is necessary to establish a suitable condition monitoring procedure to prevent malfunctions and breakages during operation. This paper presents a new method for detecting localized bearing defects based on wavelet transform. Bearing race faults have been detected by using discrete wavelet transform (DWT). Vibration signals from ball bearings having single and multiple point defects on inner race, outer race, ball fault and combination of these faults have been considered for analysis. Wavelet transform provides a variable resolution time–frequency distribution from which periodic structural ringing due to repetitive force impulses, generated upon the passing of each rolling element over the defect, are detected. It is found that the impulses appear periodically with a time period corresponding to characteristic defect frequencies. In this study, the diagnoses of ball bearing race faults have been investigated using wavelet transform. These results are compared with feature extraction data and results from spectrum analysis. It has been clearly shown that DWT can be used as an effective tool for detecting single and multiple faults in ball bearings. This paper also presents a new method of pattern recognition for bearing fault monitoring using hidden Markov Models (HMMs). Experimental results show that successful bearing fault detection rates as high as 99\% can be achieved by this approach.}, language = {en}, number = {8}, urldate = {2021-11-04}, journal = {NDT \& E International}, author = {Purushotham, V. and Narayanan, S. and Prasad, Suryanarayana A. N.}, month = dec, year = {2005}, keywords = {Bearing fault recognition, Discrete wavelet transform, Hidden Markov model, Impulses, Mel frequency complex cepstrum}, pages = {654--664}, }
@article{barke_structural_2005, title = {Structural {Health} {Monitoring} in the {Railway} {Industry}: {A} {Review}}, volume = {4}, issn = {1475-9217}, shorttitle = {Structural {Health} {Monitoring} in the {Railway} {Industry}}, url = {https://doi.org/10.1177/1475921705049764}, doi = {10.1177/1475921705049764}, abstract = {Wayside detection monitors critical parameters relating to the condition of in-service railway vehicles. Economic decisions about the maintenance of vehicles can be made, and servicing can occur when a particular vehicle is likely to cause even small amounts of damage to the track, to itself, or when the cost of damage is significant, such as in catastrophic failure., Vehicles with poorly performing axle bearings, out-of-round (skidded or spalled) wheels, vehicles which exhibit transient lateral motion (‘hunting’), and vehicles with poorly performing brakes are all likely to fall into the category of requiring maintenance, in order to save the track and the vehicle owner's money., In the present paper, the parameters that define vehicle condition and their measurable effects are stated. There are frequently a number of wayside detection methods of inspecting a vehicle for the same vehicle condition and each of these is described in detail., This investigation reveals the need for further research to enable rollingstock owners to make better decisions about the cost of operating their vehicles, based on the output from wayside detectors and the observed trends in wheel impact.}, language = {en}, number = {1}, urldate = {2021-10-13}, journal = {Structural Health Monitoring}, author = {Barke, D. and Chiu, W. K.}, month = mar, year = {2005}, note = {Publisher: SAGE Publications}, pages = {81--93}, }
@article{lin_neural_2005, title = {A neural network application for reliability modelling and condition-based predictive maintenance}, volume = {25}, issn = {1433-3015}, url = {https://doi.org/10.1007/s00170-003-1835-3}, doi = {10.1007/s00170-003-1835-3}, abstract = {Traditionally, decisions on the use of machinery are based on previous experience, historical data and common sense. However, carrying out an effective predictive maintenance plan, information about current machine conditions must be made known to the decision-maker. In this paper, a new method of obtaining maintenance information has been proposed. By integrating traditional reliability modelling techniques with a real-time, online performance estimation model, machine reliability information such as hazard rate and mean time between failures can be calculated. Essentially, this paper presents an innovative method to synthesise low level information (such as vibration signals) with high level information (like reliability statistics) to form a rigorous theoretical base for better machine maintenance.}, language = {en}, number = {1}, urldate = {2021-09-29}, journal = {The International Journal of Advanced Manufacturing Technology}, author = {Lin, Chang-Ching and Tseng, Hsien-Yu}, month = jan, year = {2005}, pages = {174--179}, }
@article{watson_dynamic_2005, title = {Dynamic {Modeling} and {Wear}-{Based} {Remaining} {Useful} {Life} {Prediction} of {High} {Power} {Clutch} {Systems}}, volume = {48}, doi = {10.1080/05698190590927451}, journal = {Tribology Transactions - TRIBOL TRANS}, author = {Watson, Matthew and Byington, Carl and Edwards, Douglas and Amin, Sanket}, year = {2005}, pages = {208--217}, }
@article{stonebraker_8_2005, title = {The 8 {Requirements} of {Real}-{Time} {Stream} {Processing}}, volume = {34}, issn = {0163-5808}, url = {https://doi.org/10.1145/1107499.1107504}, doi = {10.1145/1107499.1107504}, number = {4}, journal = {SIGMOD Rec.}, author = {Stonebraker, Michael and undefinedetintemel, Uundefinedur and Zdonik, Stan}, month = dec, year = {2005}, note = {Place: New York, NY, USA Publisher: Association for Computing Machinery}, pages = {42--47}, }
@article{bengtsson_technical_2004, title = {Technical design of condition based maintenance system—{A} case study using sound analysis and case-based reasoning}, abstract = {Productivity is a key weapon for manufacturing companies to stay competitive in a continuous growing global market. Increased productivity can be achieved through increased availability. This has directed focus on different maintenance types and maintenance strategies. Increased availability through efficient maintenance can be achieved through less corrective maintenance actions and more accurate preventive maintenance intervals. Condition Based Maintenance (CBM) is a technology that strives to identify incipient faults before they become critical which enables more accurate planning of the preventive maintenance. CBM can be achieved by utilizing complex technical systems or by humans manually monitoring the condition by using their experience, normally a mixture of both is used. Although CBM holds a lot of benefits compared to other maintenance types it is not yet commonly utilized in industry. One reason for this might be that the maturity level in complex technical CBM system is too low. This paper will acknowledge this possible reason, although not trying to resolve it, but focusing on system technology with component strategy and an open approach to condition parameters as the objective is fulfilled. This paper will theoretically discuss the technical components of a complete CBM system approach and by a case study illustrate how a CBM system for industrial robot fault detection/diagnosis can be designed using the Artificial Intelligence method Case-Based Reasoning and sound analysis. Introduction Industrial competition today is truly global with fragmented markets and customers expecting to get the best product at the best price with immediate availability. Success in manufacturing, and indeed survival, is increasingly more difficult to ensure and it requires continuous development and improvement of the way products are produced. Meeting customer demands require a high degree of flexibility, low-cost/low-volume manufacturing skills, and short delivery times. These demands make manufacturing performance a strategic weapon for competition and future success. This view is supported by Rolstadås who state that many managers believe that the greatest potential for improvement of competitiveness lies in better production management (Rolstadås, 1995).}, author = {Bengtsson, Marcus and Olsson, Ella and Funk, Peter and Jackson, Mats}, month = jan, year = {2004}, }
@article{lee_multiple-fault_2004, title = {Multiple-{Fault} {Diagnosis} of the {Tennessee} {Eastman} {Process} {Based} on {System} {Decomposition} and {Dynamic} {PLS}}, volume = {43}, issn = {0888-5885}, url = {https://doi.org/10.1021/ie049624u}, doi = {10.1021/ie049624u}, abstract = {The hybrid fault diagnosis method based on a combination of the signed digraph and partial least-squares (PLS) has the advantage of improving the diagnosis resolution, accuracy, and reliability, compared to those of previous qualitative methods, and of enhancing the ability to diagnose multiple fault [Ind. Eng. Chem. Res. 2003, 42, 6145−6154]. In this study, the method is applied for the multiple fault diagnosis of the Tennessee Eastman challenge process. The target process is decomposed using the local qualitative relationships of each measured variable. Linear and quadratic models based on dynamic PLS are built to estimate each measured variable, which is then compared with the estimated value in order to diagnose the fault. Through case studies, the proposed method demonstrated a good diagnosis capability compared with previous statistical methods.}, number = {25}, urldate = {2022-05-06}, journal = {Industrial \& Engineering Chemistry Research}, author = {Lee, Gibaek and Han, Chonghun and Yoon, En Sup}, month = dec, year = {2004}, note = {Publisher: American Chemical Society}, pages = {8037--8048}, }
@article{kivinen_online_2004, title = {Online learning with kernels}, volume = {52}, issn = {1941-0476}, doi = {10.1109/TSP.2004.830991}, abstract = {Kernel-based algorithms such as support vector machines have achieved considerable success in various problems in batch setting, where all of the training data is available in advance. Support vector machines combine the so-called kernel trick with the large margin idea. There has been little use of these methods in an online setting suitable for real-time applications. In this paper, we consider online learning in a reproducing kernel Hilbert space. By considering classical stochastic gradient descent within a feature space and the use of some straightforward tricks, we develop simple and computationally efficient algorithms for a wide range of problems such as classification, regression, and novelty detection. In addition to allowing the exploitation of the kernel trick in an online setting, we examine the value of large margins for classification in the online setting with a drifting target. We derive worst-case loss bounds, and moreover, we show the convergence of the hypothesis to the minimizer of the regularized risk functional. We present some experimental results that support the theory as well as illustrating the power of the new algorithms for online novelty detection.}, number = {8}, journal = {IEEE Transactions on Signal Processing}, author = {Kivinen, J. and Smola, A.J. and Williamson, R.C.}, month = aug, year = {2004}, note = {Conference Name: IEEE Transactions on Signal Processing}, keywords = {Australia, Condition monitoring, Convergence, Gaussian processes, Hilbert space, Kernel, Signal processing algorithms, Stochastic processes, Support vector machines, Training data}, pages = {2165--2176}, }
@inproceedings{bickel_multi-view_2004, title = {Multi-view clustering}, doi = {10.1109/ICDM.2004.10095}, abstract = {We consider clustering problems in which the available attributes can be split into two independent subsets, such that either subset suffices for learning. Example applications of this multi-view setting include clustering of Web pages which have an intrinsic view (the pages themselves) and an extrinsic view (e.g., anchor texts of inbound hyperlinks); multi-view learning has so far been studied in the context of classification. We develop and study partitioning and agglomerative, hierarchical multi-view clustering algorithms for text data. We find empirically that the multi-view versions of k-means and EM greatly improve on their single-view counterparts. By contrast, we obtain negative results for agglomerative hierarchical multi-view clustering. Our analysis explains this surprising phenomenon.}, booktitle = {Fourth {IEEE} {International} {Conference} on {Data} {Mining} ({ICDM}'04)}, author = {Bickel, S. and Scheffer, T.}, month = nov, year = {2004}, keywords = {Data mining}, pages = {19--26}, }
@article{sun_fault_2004, title = {Fault diagnosis of electric power systems based on fuzzy {Petri} nets}, volume = {19}, issn = {1558-0679}, doi = {10.1109/TPWRS.2004.836256}, abstract = {In this paper, Fuzzy Petri Nets (FPN) is used as a modeling tool to build fault diagnosis models aimed to accurately diagnose faults when some incomplete and uncertain alarm information of protective relays and circuit breakers is detected. In order to understand the significance of fault diagnosis models more conveniently, the definition and structure of FPNs are necessarily introduced at first. Then, models of fault diagnosis based on FPN are built, and their corresponding logical testifications are carried out. Finally, the validity and feasibility of this method is illustrated by simulation examples. It is shown from seven cases that the faulted system elements can be diagnosed correctly by use of these models, and a satisfying result can also be achieved even in the situation with large amount of incomplete and uncertain alarm information.}, number = {4}, journal = {IEEE Transactions on Power Systems}, author = {Sun, Jing and Qin, Shi-Yin and Song, Yong-Hua}, month = nov, year = {2004}, note = {Conference Name: IEEE Transactions on Power Systems}, keywords = {Circuit breakers, Circuit faults, Electric power system (EPS), Electrical fault detection, Fault diagnosis, Fuzzy systems, Petri nets, Power system modeling, Power system protection, Power system relaying, Protective relaying, diagnostics, fault diagnosis, fuzzy Petri nets (FPN), petri net}, pages = {2053--2059}, }
@incollection{keogh_segmenting_2004, series = {Series in {Machine} {Perception} and {Artificial} {Intelligence}}, title = {Segmenting time series: a survey and novel approach}, volume = {Volume 57}, isbn = {978-981-238-290-0}, shorttitle = {Segmenting time series}, url = {https://www.worldscientific.com/doi/abs/10.1142/9789812565402_0001}, number = {Volume 57}, urldate = {2021-03-26}, booktitle = {Data {Mining} in {Time} {Series} {Databases}}, publisher = {WORLD SCIENTIFIC}, author = {Keogh, Eamonn and Chu, Selina and Hart, David and Pazzani, Michael}, month = jun, year = {2004}, doi = {10.1142/9789812565402_0001}, pages = {1--21}, }
@inproceedings{mouss_test_2004, title = {Test of {Page}-{Hinckley}, an approach for fault detection in an agro-alimentary production system}, volume = {2}, abstract = {The increasingly important automation of the manufacturing processes has made into evidence the needs in dependability for the installations. To ensure the industrial process dependability, the establishment of a monitoring system is primordial whose role is to recognize and to indicate in real time the behavior anomalies starting from information available on the system. Indeed the function of monitoring of a system is to detect, locate and diagnose the faults, which can affect its performances and its dependability. The objective of these communication consists of the study and the conception of a detection module based on the techniques of static analysis and modelling, the matter is of establishing the operations which starting from the data coming from the industrial system make it possible to detect the abnormal situations in order to prevent or to reduce the dysfunction risks. Thus, the study consists in developing a detection module of the dysfunction for the diagnosis tool system. Our study is interested particularly at the stage of fault detection, which precedes any stage of diagnosis, based on the application of the test of Page-Hinckley for a system of pasteurization of agro-alimentary production system.}, booktitle = {2004 5th {Asian} {Control} {Conference} ({IEEE} {Cat}. {No}.{04EX904})}, author = {Mouss, H. and Mouss, D. and Mouss, N. and Sefouhi, L.}, month = jul, year = {2004}, keywords = {Communication industry, Fault detection, Manufacturing automation, Manufacturing industries, Manufacturing processes, Monitoring, Page-Hinckley testing, Production systems, Real time systems, Risk analysis, System testing, agro-alimentary production system, alarm systems, diagnosis tool system, fault detection, fault diagnosis, manufacturing process, manufacturing processes, monitoring system, process monitoring}, pages = {815--818 Vol.2}, }
@inproceedings{aggarwal_framework_2004, address = {Toronto, Canada}, series = {{VLDB} '04}, title = {A framework for projected clustering of high dimensional data streams}, isbn = {978-0-12-088469-8}, urldate = {2020-03-25}, booktitle = {Proceedings of the {Thirtieth} international conference on {Very} large data bases - {Volume} 30}, publisher = {VLDB Endowment}, author = {Aggarwal, Charu C. and Han, Jiawei and Wang, Jianyong and Yu, Philip S.}, month = aug, year = {2004}, pages = {852--863}, }
@article{ma_accurate_2003, title = {Accurate {On}-line {Support} {Vector} {Regression}}, volume = {15}, issn = {0899-7667}, doi = {10.1162/089976603322385117}, abstract = {Batch implementations of support vector regression (SVR) are inefficient when used in an on-line setting because they must be retrained from scratch every time the training set is modified. Following an incremental support vector classification algorithm introduced by Cauwenberghs and Poggio (2001), we have developed an accurate on-line support vector regression (AOSVR) that efficiently updates a trained SVR function whenever a sample is added to or removed from the training set. The updated SVR function is identical to that produced by a batch algorithm. Applications of AOSVR in both on-line and cross-validation scenarios are presented.Inbothscenarios, numerical experiments indicate that AOSVR is faster than batch SVR algorithms with both cold and warm start.}, number = {11}, journal = {Neural Computation}, author = {Ma, Junshui and Theiler, James and Perkins, Simon}, month = nov, year = {2003}, note = {Conference Name: Neural Computation}, pages = {2683--2703}, }
@article{samanta_artificial_2003, title = {{ARTIFICIAL} {NEURAL} {NETWORK} {BASED} {FAULT} {DIAGNOSTICS} {OF} {ROLLING} {ELEMENT} {BEARINGS} {USING} {TIME}-{DOMAIN} {FEATURES}}, volume = {17}, issn = {0888-3270}, url = {https://www.sciencedirect.com/science/article/pii/S0888327001914621}, doi = {10.1006/mssp.2001.1462}, abstract = {A procedure is presented for fault diagnosis of rolling element bearings through artificial neural network (ANN). The characteristic features of time-domain vibration signals of the rotating machinery with normal and defective bearings have been used as inputs to the ANN consisting of input, hidden and output layers. The features are obtained from direct processing of the signal segments using very simple preprocessing. The input layer consists of five nodes, one each for root mean square, variance, skewness, kurtosis and normalised sixth central moment of the time-domain vibration signals. The inputs are normalised in the range of 0.0 and 1.0 except for the skewness which is normalised between −1.0 and 1.0. The output layer consists of two binary nodes indicating the status of the machine—normal or defective bearings. Two hidden layers with different number of neurons have been used. The ANN is trained using backpropagation algorithm with a subset of the experimental data for known machine conditions. The ANN is tested using the remaining set of data. The effects of some preprocessing techniques like high-pass, band-pass filtration, envelope detection (demodulation) and wavelet transform of the vibration signals, prior to feature extraction, are also studied. The results show the effectiveness of the ANN in diagnosis of the machine condition. The proposed procedure requires only a few features extracted from the measured vibration data either directly or with simple preprocessing. The reduced number of inputs leads to faster training requiring far less iterations making the procedure suitable for on-line condition monitoring and diagnostics of machines.}, language = {en}, number = {2}, urldate = {2021-09-30}, journal = {Mechanical Systems and Signal Processing}, author = {Samanta, B. and Al-balushi, K. R.}, month = mar, year = {2003}, pages = {317--328}, }
@article{samanta_artificial_2003, title = {Artificial neural networks and support vector machines with genetic algorithm for bearing fault detection}, volume = {16}, issn = {0952-1976}, url = {https://www.sciencedirect.com/science/article/pii/S0952197603001143}, doi = {10.1016/j.engappai.2003.09.006}, abstract = {A study is presented to compare the performance of bearing fault detection using two different classifiers, namely, artificial neural networks (ANNs) and support vector machines (SMVs). The time-domain vibration signals of a rotating machine with normal and defective bearings are processed for feature extraction. The extracted features from original and preprocessed signals are used as inputs to the classifiers for two-class (normal or fault) recognition. The classifier parameters, e.g., the number of nodes in the hidden layer in case of ANNs and the radial basis function kernel parameter (width) in case of SVMs along with the selection of input features are optimized using genetic algorithms. The classifiers are trained with a subset of the experimental data for known machine conditions and are tested using the remaining set of data. The procedure is illustrated using the experimental vibration data of a rotating machine. The roles of different vibration signals and signal preprocessing techniques are investigated. The results show the effectiveness of the features and the classifiers in detection of machine condition.}, language = {en}, number = {7}, urldate = {2021-09-30}, journal = {Engineering Applications of Artificial Intelligence}, author = {Samanta, B. and Al-Balushi, K. R. and Al-Araimi, S. A.}, month = oct, year = {2003}, keywords = {Bearing faults, Condition monitoring, Feature selection, Genetic algorithm, Neural network, Rotating machines, Signal processing, Support vector machines}, pages = {657--665}, }
@article{he_discovering_2003, title = {Discovering cluster-based local outliers}, volume = {24}, issn = {0167-8655}, url = {https://www.sciencedirect.com/science/article/pii/S0167865503000035}, doi = {10.1016/S0167-8655(03)00003-5}, abstract = {In this paper, we present a new definition for outlier: cluster-based local outlier, which is meaningful and provides importance to the local data behavior. A measure for identifying the physical significance of an outlier is designed, which is called cluster-based local outlier factor (CBLOF). We also propose the FindCBLOF algorithm for discovering outliers. The experimental results show that our approach outperformed the existing methods on identifying meaningful and interesting outliers.}, language = {en}, number = {9}, urldate = {2021-08-07}, journal = {Pattern Recognition Letters}, author = {He, Zengyou and Xu, Xiaofei and Deng, Shengchun}, month = jun, year = {2003}, keywords = {Clustering, Data mining, Outlier detection}, pages = {1641--1650}, }
@inproceedings{lin_symbolic_2003, address = {New York, NY, USA}, series = {{DMKD} '03}, title = {A symbolic representation of time series, with implications for streaming algorithms}, isbn = {978-1-4503-7422-4}, url = {https://doi.org/10.1145/882082.882086}, doi = {10.1145/882082.882086}, abstract = {The parallel explosions of interest in streaming data, and data mining of time series have had surprisingly little intersection. This is in spite of the fact that time series data are typically streaming data. The main reason for this apparent paradox is the fact that the vast majority of work on streaming data explicitly assumes that the data is discrete, whereas the vast majority of time series data is real valued.Many researchers have also considered transforming real valued time series into symbolic representations, nothing that such representations would potentially allow researchers to avail of the wealth of data structures and algorithms from the text processing and bioinformatics communities, in addition to allowing formerly "batch-only" problems to be tackled by the streaming community. While many symbolic representations of time series have been introduced over the past decades, they all suffer from three fatal flaws. Firstly, the dimensionality of the symbolic representation is the same as the original data, and virtually all data mining algorithms scale poorly with dimensionality. Secondly, although distance measures can be defined on the symbolic approaches, these distance measures have little correlation with distance measures defined on the original time series. Finally, most of these symbolic approaches require one to have access to all the data, before creating the symbolic representation. This last feature explicitly thwarts efforts to use the representations with streaming algorithms.In this work we introduce a new symbolic representation of time series. Our representation is unique in that it allows dimensionality/numerosity reduction, and it also allows distance measures to be defined on the symbolic approach that lower bound corresponding distance measures defined on the original series. As we shall demonstrate, this latter feature is particularly exciting because it allows one to run certain data mining algorithms on the efficiently manipulated symbolic representation, while producing identical results to the algorithms that operate on the original data. Finally, our representation allows the real valued data to be converted in a streaming fashion, with only an infinitesimal time and space overhead.We will demonstrate the utility of our representation on the classic data mining tasks of clustering, classification, query by content and anomaly detection.}, urldate = {2020-10-01}, booktitle = {Proceedings of the 8th {ACM} {SIGMOD} workshop on {Research} issues in data mining and knowledge discovery}, publisher = {Association for Computing Machinery}, author = {Lin, Jessica and Keogh, Eamonn and Lonardi, Stefano and Chiu, Bill}, month = jun, year = {2003}, keywords = {data mining, data streams, discretize, symbolic, time series}, pages = {2--11}, }
@inproceedings{aggarwal_framework_2003, address = {Berlin, Germany}, series = {{VLDB} '03}, title = {A framework for clustering evolving data streams}, isbn = {978-0-12-722442-8}, urldate = {2020-03-25}, booktitle = {Proceedings of the 29th international conference on {Very} large data bases - {Volume} 29}, publisher = {VLDB Endowment}, author = {Aggarwal, Charu C. and Han, Jiawei and Wang, Jianyong and Yu, Philip S.}, month = sep, year = {2003}, pages = {81--92}, }
@techreport{byington_prognostic_2003, title = {Prognostic {Enhancements} to {Gas} {Turbine} {Diagnostic} {Systems}}, url = {https://apps.dtic.mil/docs/citations/ADA457841}, abstract = {The development of machinery health monitoring technologies has taken center stage within the DoD community in recent years. Existing health monitoring systems, such as the Integrated Condition Assessment System (ICAS) for NAVSEA, enable the diagnosis of mission critical problems using fault detection and diagnostic technologies. These technologies, however, have not specifically focused on the automated prediction of future condition (prognostics) of a machine based on the current diagnostic state of the machinery and its available operating and failure history data. Current efforts are focused on developing a generic architecture for the development of prognostic systems that will enable plug and play capabilities within existing systems. The designs utilize Open System Architecture (OSA) guidelines, such as OSA-CBM (Condition Based Maintenance), to provide these capabilities and enhance reusability of the software modules. One such implementation, which determines the optimal water wash interval to mitigate gas turbine compressor performance degradation due to salt deposit ingestion, is the focus of this paper. The module utilizes advanced probabilistic modeling and analysis technologies to forecast the future performance characteristics of the compressor and yield the optimal Time To Wash (TTW) from a cost/benefit standpoint. This paper describes the developed approach and architecture for developing prognostics using the gas turbine module.}, language = {en}, urldate = {2020-03-31}, institution = {IMPACT TECHNOLOGIES LLC STATE COLLEGE PA}, author = {Byington, Carl S. and Watson, Matthew and Roemer, Michael J. and Galie, Thomas R. and McGroarty, Jack J. and Savage, Christopher}, month = jan, year = {2003}, }
@techreport{noauthor_condition_2003, type = {Standard}, title = {Condition monitoring and diagnostics of machines — {Data} processing, communication and presentation — {Part} 1: {General} guidelines}, institution = {International Organization for Standardization}, year = {2003}, }
@article{klutke_critical_2003, title = {A critical look at the bathtub curve}, volume = {52}, doi = {10.1109/TR.2002.804492}, number = {1}, journal = {IEEE Transactions on Reliability}, author = {Klutke, G. A. and Kiessler, P. C. and Wortman, M. A.}, year = {2003}, pages = {125--129}, }
@inproceedings{ocallaghan_streaming-data_2002, title = {Streaming-data algorithms for high-quality clustering}, doi = {10.1109/ICDE.2002.994785}, abstract = {Streaming data analysis has recently attracted attention in numerous applications including telephone records, Web documents and click streams. For such analysis, single-pass algorithms that consume a small amount of memory are critical. We describe such a streaming algorithm that effectively clusters large data streams. We also provide empirical evidence of the algorithm's performance on synthetic and real data streams.}, booktitle = {Proceedings 18th {International} {Conference} on {Data} {Engineering}}, author = {O'Callaghan, L. and Mishra, N. and Meyerson, A. and Guha, S. and Motwani, R.}, month = feb, year = {2002}, note = {ISSN: 1063-6382}, keywords = {Algorithm design and analysis, Clustering algorithms, Computer science, Data analysis, Data engineering, Lab-on-a-chip, Laboratories, Partitioning algorithms, Telecommunications, Telephony}, pages = {685--694}, }
@article{jarvelin_cumulated_2002, title = {Cumulated gain-based evaluation of {IR} techniques}, volume = {20}, issn = {1046-8188}, url = {https://doi.org/10.1145/582415.582418}, doi = {10.1145/582415.582418}, abstract = {Modern large retrieval environments tend to overwhelm their users by their large output. Since all documents are not of equal relevance to their users, highly relevant documents should be identified and ranked first for presentation. In order to develop IR techniques in this direction, it is necessary to develop evaluation approaches and methods that credit IR methods for their ability to retrieve highly relevant documents. This can be done by extending traditional evaluation methods, that is, recall and precision based on binary relevance judgments, to graded relevance judgments. Alternatively, novel measures based on graded relevance judgments may be developed. This article proposes several novel measures that compute the cumulative gain the user obtains by examining the retrieval result up to a given ranked position. The first one accumulates the relevance scores of retrieved documents along the ranked result list. The second one is similar but applies a discount factor to the relevance scores in order to devaluate late-retrieved documents. The third one computes the relative-to-the-ideal performance of IR techniques, based on the cumulative gain they are able to yield. These novel measures are defined and discussed and their use is demonstrated in a case study using TREC data: sample system run results for 20 queries in TREC-7. As a relevance base we used novel graded relevance judgments on a four-point scale. The test results indicate that the proposed measures credit IR methods for their ability to retrieve highly relevant documents and allow testing of statistical significance of effectiveness differences. The graphs based on the measures also provide insight into the performance IR techniques and allow interpretation, for example, from the user point of view.}, number = {4}, urldate = {2022-04-16}, journal = {ACM Transactions on Information Systems}, author = {Järvelin, Kalervo and Kekäläinen, Jaana}, month = oct, year = {2002}, keywords = {Graded relevance judgments, cumulated gain}, pages = {422--446}, }
@inproceedings{lebold_framework_2002, title = {A framework for next generation machinery monitoring and diagnostics}, publisher = {Citeseer}, author = {Lebold, Mitchell and Reichard, Karl and Hejda, Petr and Bezdicek, Jan}, year = {2002}, }
@inproceedings{yamanishi_unifying_2002, address = {New York, NY, USA}, series = {{KDD} '02}, title = {A unifying framework for detecting outliers and change points from non-stationary time series data}, isbn = {978-1-58113-567-1}, url = {https://doi.org/10.1145/775047.775148}, doi = {10.1145/775047.775148}, abstract = {We are concerned with the issues of outlier detection and change point detection from a data stream. In the area of data mining, there have been increased interest in these issues since the former is related to fraud detection, rare event discovery, etc., while the latter is related to event/trend by change detection, activity monitoring, etc. Specifically, it is important to consider the situation where the data source is non-stationary, since the nature of data source may change over time in real applications. Although in most previous work outlier detection and change point detection have not been related explicitly, this paper presents a unifying framework for dealing with both of them on the basis of the theory of on-line learning of non-stationary time series. In this framework a probabilistic model of the data source is incrementally learned using an on-line discounting learning algorithm, which can track the changing data source adaptively by forgetting the effect of past data gradually. Then the score for any given data is calculated to measure its deviation from the learned model, with a higher score indicating a high possibility of being an outlier. Further change points in a data stream are detected by applying this scoring method into a time series of moving averaged losses for prediction using the learned model. Specifically we develop an efficient algorithms for on-line discounting learning of auto-regression models from time series data, and demonstrate the validity of our framework through simulation and experimental applications to stock market data analysis.}, urldate = {2020-10-04}, booktitle = {Proceedings of the eighth {ACM} {SIGKDD} international conference on {Knowledge} discovery and data mining}, publisher = {Association for Computing Machinery}, author = {Yamanishi, Kenji and Takeuchi, Jun-ichi}, month = jul, year = {2002}, pages = {676--681}, }
@inproceedings{lebold_osa-cbm_2002, title = {{OSA}-{CBM} {Architecture} {Development} with {Emphasis} on {XML} {Implementations}}, author = {Lebold, Mitchell and Reichard, Karl}, year = {2002}, }
@article{qiu_damage_2002, title = {Damage mechanics approach for bearing lifetime prognostics}, volume = {16}, issn = {0888-3270}, url = {http://www.sciencedirect.com/science/article/pii/S0888327002914834}, doi = {https://doi.org/10.1006/mssp.2002.1483}, abstract = {The ability to achieve accurate bearing prognostics is critical to the optimal maintenance of rotating machinery in the interest of cost and productivity. However, techniques to real time predict the lifetime of a bearing under practical operating conditions have not been well developed. In this paper, a stiffness-based prognostic model for bearing systems based on vibration response analysis and damage mechanics is discussed. As the bearing system is considered as a single-degree-of-freedom vibratory system, its natural frequency and its acceleration amplitude at the natural frequency can be related to the system stiffness. On the other hand, the relationship between failure lifetime, running time and stiffness variation can be established from the damage mechanics. Combining the above two, the natural frequency and the acceleration amplitude of a bearing system can be related to its running time and failure lifetime. Thus, the failure lifetime of a bearing system can be predicted on-line based on vibration measurement. Experiments have been performed on a tapered roller bearing life testing stand under various operation conditions to calibrate and to validate the proposed model. The comparison between model-calculated data and experimental results indicates that this model can be used to effectively predict the failure lifetime and the remaining life of a bearing system.}, number = {5}, journal = {Mechanical Systems and Signal Processing}, author = {QIU, JING and SETH, BRIJ B. and LIANG, STEVEN Y. and ZHANG, CHENG}, year = {2002}, pages = {817 -- 829}, }
@inproceedings{oppenheimer_physically_2002, title = {Physically based diagnosis and prognosis of cracked rotor shafts}, volume = {4733}, url = {https://doi.org/10.1117/12.475502}, doi = {10.1117/12.475502}, booktitle = {Component and {Systems} {Diagnostics}, {Prognostics}, and {Health} {Management} {II}}, publisher = {SPIE}, author = {Oppenheimer, Charles H. and Loparo, Kenneth A.}, editor = {Willett, Peter K. and Kirubarajan, Thiagalingam}, year = {2002}, note = {Backup Publisher: International Society for Optics and Photonics}, pages = {122 -- 132}, }
@article{wang_survey_2002, title = {A survey of maintenance policies of deteriorating systems}, volume = {139}, issn = {0377-2217}, url = {http://www.sciencedirect.com/science/article/pii/S0377221701001977}, doi = {https://doi.org/10.1016/S0377-2217(01)00197-7}, number = {3}, journal = {European Journal of Operational Research}, author = {Wang, Hongzhou}, year = {2002}, keywords = {Maintenance, Maintenance policy, Optimisation, Reliability, Replacement}, pages = {469 -- 489}, }
@article{keogh_dimensionality_2001, title = {Dimensionality {Reduction} for {Fast} {Similarity} {Search} in {Large} {Time} {Series} {Databases}}, volume = {3}, issn = {0219-1377}, url = {https://doi.org/10.1007/PL00011669}, doi = {10.1007/PL00011669}, abstract = {The problem of similarity search in large time series databases has attracted much attention recently. It is a non-trivial problem because of the inherent high dimensionality of the data. The most promising solutions involve first performing dimensionality reduction on the data, and then indexing the reduced data with a spatial access method. Three major dimensionality reduction techniques have been proposed: Singular Value Decomposition (SVD), the Discrete Fourier transform (DFT), and more recently the Discrete Wavelet Transform (DWT). In this work we introduce a new dimensionality reduction technique which we call Piecewise Aggregate Approximation (PAA). We theoretically and empirically compare it to the other techniques and demonstrate its superiority. In addition to being competitive with or faster than the other methods, our approach has numerous other advantages. It is simple to understand and to implement, it allows more flexible distance measures, including weighted Euclidean queries, and the index can be built in linear time.}, language = {en}, number = {3}, urldate = {2022-09-02}, journal = {Knowledge and Information Systems}, author = {Keogh, Eamonn and Chakrabarti, Kaushik and Pazzani, Michael and Mehrotra, Sharad}, month = aug, year = {2001}, keywords = {Keywords: Data mining; Dimensionality reduction; Indexing and retrieval; Time series}, pages = {263--286}, }
@inproceedings{hulten_mining_2001, address = {New York, NY, USA}, series = {{KDD} '01}, title = {Mining time-changing data streams}, isbn = {978-1-58113-391-2}, url = {https://doi.org/10.1145/502512.502529}, doi = {10.1145/502512.502529}, abstract = {Most statistical and machine-learning algorithms assume that the data is a random sample drawn from a stationary distribution. Unfortunately, most of the large databases available for mining today violate this assumption. They were gathered over months or years, and the underlying processes generating them changed during this time, sometimes radically. Although a number of algorithms have been proposed for learning time-changing concepts, they generally do not scale well to very large databases. In this paper we propose an efficient algorithm for mining decision trees from continuously-changing data streams, based on the ultra-fast VFDT decision tree learner. This algorithm, called CVFDT, stays current while making the most of old data by growing an alternative subtree whenever an old one becomes questionable, and replacing the old with the new when the new becomes more accurate. CVFDT learns a model which is similar in accuracy to the one that would be learned by reapplying VFDT to a moving window of examples every time a new example arrives, but with O(1) complexity per example, as opposed to O(w), where w is the size of the window. Experiments on a set of large time-changing data streams demonstrate the utility of this approach.}, urldate = {2022-03-16}, booktitle = {Proceedings of the seventh {ACM} {SIGKDD} international conference on {Knowledge} discovery and data mining}, publisher = {Association for Computing Machinery}, author = {Hulten, Geoff and Spencer, Laurie and Domingos, Pedro}, month = aug, year = {2001}, keywords = {Decision trees, Hoeffding bounds, concept drift, data streams, incremental learning, subsampling}, pages = {97--106}, }
@article{polikar_learn_2001, title = {Learn++: an incremental learning algorithm for supervised neural networks}, volume = {31}, issn = {1558-2442}, shorttitle = {Learn++}, doi = {10.1109/5326.983933}, abstract = {We introduce Learn++, an algorithm for incremental training of neural network (NN) pattern classifiers. The proposed algorithm enables supervised NN paradigms, such as the multilayer perceptron (MLP), to accommodate new data, including examples that correspond to previously unseen classes. Furthermore, the algorithm does not require access to previously used data during subsequent incremental learning sessions, yet at the same time, it does not forget previously acquired knowledge. Learn++ utilizes ensemble of classifiers by generating multiple hypotheses using training data sampled according to carefully tailored distributions. The outputs of the resulting classifiers are combined using a weighted majority voting procedure. We present simulation results on several benchmark datasets as well as a real-world classification task. Initial results indicate that the proposed algorithm works rather well in practice. A theoretical upper bound on the error of the classifiers constructed by Learn++ is also provided.}, number = {4}, journal = {IEEE Transactions on Systems, Man, and Cybernetics, Part C (Applications and Reviews)}, author = {Polikar, R. and Upda, L. and Upda, S.S. and Honavar, V.}, month = nov, year = {2001}, note = {Conference Name: IEEE Transactions on Systems, Man, and Cybernetics, Part C (Applications and Reviews)}, keywords = {Classification algorithms, Costs, Knowledge acquisition, Multilayer perceptrons, Neural networks, Pattern recognition, Stability, Training data, Upper bound, Voting}, pages = {497--508}, }
@article{lampinen_bayesian_2001, title = {Bayesian approach for neural networks—review and case studies}, volume = {14}, issn = {0893-6080}, url = {https://www.sciencedirect.com/science/article/pii/S0893608000000988}, doi = {10.1016/S0893-6080(00)00098-8}, abstract = {We give a short review on the Bayesian approach for neural network learning and demonstrate the advantages of the approach in three real applications. We discuss the Bayesian approach with emphasis on the role of prior knowledge in Bayesian models and in classical error minimization approaches. The generalization capability of a statistical model, classical or Bayesian, is ultimately based on the prior assumptions. The Bayesian approach permits propagation of uncertainty in quantities which are unknown to other assumptions in the model, which may be more generally valid or easier to guess in the problem. The case problem studied in this paper include a regression, a classification, and an inverse problem. In the most thoroughly analyzed regression problem, the best models were those with less restrictive priors. This emphasizes the major advantage of the Bayesian approach, that we are not forced to guess attributes that are unknown, such as the number of degrees of freedom in the model, non-linearity of the model with respect to each input variable, or the exact form for the distribution of the model residuals.}, language = {en}, number = {3}, urldate = {2021-11-20}, journal = {Neural Networks}, author = {Lampinen, Jouko and Vehtari, Aki}, month = apr, year = {2001}, keywords = {Bayesian data analysis, Comparison of models, Hirarchical models, Neural networks}, pages = {257--274}, }
@inproceedings{stenger_topology_2001, title = {Topology free hidden {Markov} models: application to background modeling}, volume = {1}, shorttitle = {Topology free hidden {Markov} models}, doi = {10.1109/ICCV.2001.937532}, abstract = {Hidden Markov models (HMMs) are increasingly being used in computer vision for applications such as: gesture analysis, action recognition from video, and illumination modeling. Their use involves an off-line learning step that is used as a basis for on-line decision making (i.e. a stationarity assumption on the model parameters). But, real-world applications are often non-stationary in nature. This leads to the need for a dynamic mechanism to learn and update the model topology as well as its parameters. This paper presents a new framework for HMM topology and parameter estimation in an online, dynamic fashion. The topology and parameter estimation is posed as a model selection problem with an MDL prior. Online modifications to the topology are made possible by incorporating a state splitting criterion. To demonstrate the potential of the algorithm, the background modeling problem is considered. Theoretical validation and real experiments are presented.}, booktitle = {Proceedings {Eighth} {IEEE} {International} {Conference} on {Computer} {Vision}. {ICCV} 2001}, author = {Stenger, B. and Ramesh, V. and Paragios, N. and Coetzee, F. and Buhmann, J.M.}, month = jul, year = {2001}, keywords = {Application software, Computer science, Computer vision, Hidden Markov models, Image analysis, Parameter estimation, Signal processing algorithms, State estimation, Topology, Visualization, hmm, unknown topology}, pages = {294--301 vol.1}, }
@book{noauthor_mimosa_2001, title = {{MIMOSA} {OSA}-{CBM}}, url = {http://www.mimosa.org/mimosa-osa-cbm/}, year = {2001}, }
@incollection{ben-hur_stability_2001, title = {A stability based method for discovering structure in clustered data}, isbn = {978-981-02-4777-5}, url = {https://www.worldscientific.com/doi/abs/10.1142/9789812799623_0002}, urldate = {2021-10-22}, booktitle = {Biocomputing 2002}, publisher = {WORLD SCIENTIFIC}, author = {Ben-Hur, Asa and Elisseeff, Andre and Guyon, Isabelle}, month = dec, year = {2001}, doi = {10.1142/9789812799623_0002}, keywords = {cluster validation, cluster validity, stability based, stability based cluster validation}, pages = {6--17}, }
@inproceedings{keogh_online_2001, title = {An online algorithm for segmenting time series}, doi = {10.1109/ICDM.2001.989531}, abstract = {In recent years, there has been an explosion of interest in mining time-series databases. As with most computer science problems, representation of the data is the key to efficient and effective solutions. One of the most commonly used representations is piecewise linear approximation. This representation has been used by various researchers to support clustering, classification, indexing and association rule mining of time-series data. A variety of algorithms have been proposed to obtain this representation, with several algorithms having been independently rediscovered several times. In this paper, we undertake the first extensive review and empirical comparison of all proposed techniques. We show that all these algorithms have fatal flaws from a data-mining perspective. We introduce a novel algorithm that we empirically show to be superior to all others in the literature.}, booktitle = {Proceedings 2001 {IEEE} {International} {Conference} on {Data} {Mining}}, author = {Keogh, E. and Chu, S. and Hart, D. and Pazzani, M.}, month = nov, year = {2001}, keywords = {Association rules, Change detection algorithms, Clustering algorithms, Computer science, Data mining, Databases, Explosions, Indexing, Piecewise linear approximation, Piecewise linear techniques, association rule mining, classification, clustering, data mining, data representation, empirical comparison, indexing, online algorithm, online operation, piecewise linear approximation, piecewise linear techniques, review, reviews, time series, time series segmentation, time-series database mining}, pages = {289--296}, }
@article{thurston_standards_2001, title = {Standards {Developments} for {Condition}-{Based} {Maintenance} {Systems}}, abstract = {An effort is underway to develop an Open System Architecture for ConditionBased Maintenance. The architecture development has focused on the definition of a distributed software architecture for CBM. The distributed software model was selected due to the recent emergence of enabling software technologies and the benefits of the approach. In particular, the availability of network connectivity provides a ready hardware backbone over which the software system may be distributed. The requirements for a general CBM architecture are defined, and the framework of the distributed architecture is provided.}, language = {en}, author = {Thurston, Michael and Lebold, Mitchell and Box, P O}, year = {2001}, pages = {12}, }
@article{yam_intelligent_2001, title = {Intelligent {Predictive} {Decision} {Support} {System} for {Condition}-{Based} {Maintenance}}, volume = {17}, issn = {1433-3015}, url = {https://doi.org/10.1007/s001700170173}, doi = {10.1007/s001700170173}, number = {5}, journal = {The International Journal of Advanced Manufacturing Technology}, author = {Yam, R. C. M. and Tse, P.W. and Li, L. and Tu, P.}, year = {2001}, pages = {383--391}, }
@book{moubray_reliability-centered_2001, title = {Reliability-centered {Maintenance}}, isbn = {978-0-8311-3146-3}, url = {https://books.google.fr/books?id=bNCVF0B7vpIC}, publisher = {Industrial Press}, author = {Moubray, J.}, year = {2001}, lccn = {97008176}, }
@article{roweis_nonlinear_2000, title = {Nonlinear {Dimensionality} {Reduction} by {Locally} {Linear} {Embedding}}, volume = {290}, url = {https://www.science.org/doi/10.1126/science.290.5500.2323}, doi = {10.1126/science.290.5500.2323}, abstract = {Many areas of science depend on exploratory data analysis and visualization. The need to analyze large amounts of multivariate data raises the fundamental problem of dimensionality reduction: how to discover compact representations of high-dimensional data. Here, we introduce locally linear embedding (LLE), an unsupervised learning algorithm that computes low-dimensional, neighborhood-preserving embeddings of high-dimensional inputs. Unlike clustering methods for local dimensionality reduction, LLE maps its inputs into a single global coordinate system of lower dimensionality, and its optimizations do not involve local minima. By exploiting the local symmetries of linear reconstructions, LLE is able to learn the global structure of nonlinear manifolds, such as those generated by images of faces or documents of text.}, number = {5500}, urldate = {2022-12-15}, journal = {Science}, author = {Roweis, Sam T. and Saul, Lawrence K.}, month = dec, year = {2000}, note = {Publisher: American Association for the Advancement of Science}, pages = {2323--2326}, }
@article{tenenbaum_global_2000, title = {A {Global} {Geometric} {Framework} for {Nonlinear} {Dimensionality} {Reduction}}, volume = {290}, url = {https://www.science.org/doi/10.1126/science.290.5500.2319}, doi = {10.1126/science.290.5500.2319}, abstract = {Scientists working with large volumes of high-dimensional data, such as global climate patterns, stellar spectra, or human gene distributions, regularly confront the problem of dimensionality reduction: finding meaningful low-dimensional structures hidden in their high-dimensional observations. The human brain confronts the same problem in everyday perception, extracting from its high-dimensional sensory inputs—30,000 auditory nerve fibers or 106 optic nerve fibers—a manageably small number of perceptually relevant features. Here we describe an approach to solving dimensionality reduction problems that uses easily measured local metric information to learn the underlying global geometry of a data set. Unlike classical techniques such as principal component analysis (PCA) and multidimensional scaling (MDS), our approach is capable of discovering the nonlinear degrees of freedom that underlie complex natural observations, such as human handwriting or images of a face under different viewing conditions. In contrast to previous algorithms for nonlinear dimensionality reduction, ours efficiently computes a globally optimal solution, and, for an important class of data manifolds, is guaranteed to converge asymptotically to the true structure.}, number = {5500}, urldate = {2022-12-15}, journal = {Science}, author = {Tenenbaum, Joshua B. and Silva, Vin de and Langford, John C.}, month = dec, year = {2000}, note = {Publisher: American Association for the Advancement of Science}, pages = {2319--2323}, }
@inproceedings{cauwenberghs_incremental_2000, address = {Cambridge, MA, USA}, series = {{NIPS}'00}, title = {Incremental and decremental support vector machine learning}, abstract = {An on-line recursive algorithm for training support vector machines, one vector at a time, is presented. Adiabatic increments retain the Kuhn-Tucker conditions on all previously seen training data, in a number of steps each computed analytically. The incremental procedure is reversible, and decremental "unlearning" offers an efficient method to exactly evaluate leave-one-out generalization performance. Interpretation of decremental unlearning in feature space sheds light on the relationship between generalization and geometry of the data.}, urldate = {2022-03-15}, booktitle = {Proceedings of the 13th {International} {Conference} on {Neural} {Information} {Processing} {Systems}}, publisher = {MIT Press}, author = {Cauwenberghs, Gert and Poggio, Tomaso}, month = jan, year = {2000}, pages = {388--394}, }
@incollection{russell_tennessee_2000, address = {London}, series = {Advances in {Industrial} {Control}}, title = {Tennessee {Eastman} {Process}}, isbn = {978-1-4471-0409-4}, url = {https://doi.org/10.1007/978-1-4471-0409-4_8}, abstract = {In Part IV the various data-driven process monitoring statistics are compared through application to a simulation of a chemical plant. The methods would ideally be illustrated on data collected during specific known faults from an actual chemical process, but this type of data is not publicly available for any large scale chemical plant. Instead, many academics in process monitoring perform studies based on data collected from computer simulations of a chemical process. The process monitoring methods in this book are tested on the data collected from the process simulation for the Tennessee Eastman process (TEP). The TEP has been widely used by the process monitoring community as a source of data for comparing various approaches [10, 24, 62, 63, 74, 77, 125, 133, 187, 189, 188].}, language = {en}, urldate = {2021-11-18}, booktitle = {Data-driven {Methods} for {Fault} {Detection} and {Diagnosis} in {Chemical} {Processes}}, publisher = {Springer}, author = {Russell, Evan L. and Chiang, Leo H. and Braatz, Richard D.}, editor = {Russell, Evan L. and Chiang, Leo H. and Braatz, Richard D.}, year = {2000}, doi = {10.1007/978-1-4471-0409-4_8}, keywords = {Homework Problem, Manipulate Variable, Process Monitoring, Reactor Cool Water, Water Inlet Temperature}, pages = {99--108}, }
@inproceedings{domingos_mining_2000, address = {Boston, Massachusetts, USA}, series = {{KDD} '00}, title = {Mining high-speed data streams}, isbn = {978-1-58113-233-5}, url = {https://doi.org/10.1145/347090.347107}, doi = {10.1145/347090.347107}, urldate = {2020-03-17}, booktitle = {Proceedings of the sixth {ACM} {SIGKDD} international conference on {Knowledge} discovery and data mining}, publisher = {Association for Computing Machinery}, author = {Domingos, Pedro and Hulten, Geoff}, month = aug, year = {2000}, keywords = {Hoeffding bounds, decision trees, disk-based algorithms, incremental learning, subsampling}, pages = {71--80}, }
@inproceedings{vitanyi_three_2000, title = {Three approaches to the quantitative definition of information in an individual pure quantum state}, doi = {10.1109/CCC.2000.856757}, booktitle = {Proceedings 15th {Annual} {IEEE} {Conference} on {Computational} {Complexity}}, author = {Vitanyi, P.}, month = jul, year = {2000}, note = {ISSN: 1093-0159}, keywords = {Computational modeling, Electrical capacitance tomography, Length measurement, Quantum computing, Quantum mechanics, Turing machines, algorithmic information, classical Kolmogorov complexity, computable real parameters, computational complexity, continuously many pure quantum states, individual pure quantum state, quantitative definition, quantum Kolmogorov complexity, shortest qubit program}, pages = {263--270}, }
@article{backlund_definition_2000, title = {The definition of system}, volume = {29}, doi = {10.1108/03684920010322055}, journal = {Kybernetes: The International Journal of Systems \& Cybernetics}, author = {Backlund, Alexander}, year = {2000}, }
@article{ankerst_optics_1999, title = {{OPTICS}: ordering points to identify the clustering structure}, volume = {28}, issn = {0163-5808}, shorttitle = {{OPTICS}}, url = {https://doi.org/10.1145/304181.304187}, doi = {10.1145/304181.304187}, abstract = {Cluster analysis is a primary method for database mining. It is either used as a stand-alone tool to get insight into the distribution of a data set, e.g. to focus further analysis and data processing, or as a preprocessing step for other algorithms operating on the detected clusters. Almost all of the well-known clustering algorithms require input parameters which are hard to determine but have a significant influence on the clustering result. Furthermore, for many real-data sets there does not even exist a global parameter setting for which the result of the clustering algorithm describes the intrinsic clustering structure accurately. We introduce a new algorithm for the purpose of cluster analysis which does not produce a clustering of a data set explicitly; but instead creates an augmented ordering of the database representing its density-based clustering structure. This cluster-ordering contains information which is equivalent to the density-based clusterings corresponding to a broad range of parameter settings. It is a versatile basis for both automatic and interactive cluster analysis. We show how to automatically and efficiently extract not only 'traditional' clustering information (e.g. representative points, arbitrary shaped clusters), but also the intrinsic clustering structure. For medium sized data sets, the cluster-ordering can be represented graphically and for very large data sets, we introduce an appropriate visualization technique. Both are suitable for interactive exploration of the intrinsic clustering structure offering additional insights into the distribution and correlation of the data.}, number = {2}, urldate = {2023-02-15}, journal = {ACM SIGMOD Record}, author = {Ankerst, Mihael and Breunig, Markus M. and Kriegel, Hans-Peter and Sander, Jörg}, month = jun, year = {1999}, keywords = {cluster analysis, database mining, visualization}, pages = {49--60}, }
@inproceedings{seymore_learning_1999, title = {Learning {Hidden} {Markov} {Model} {Structure} for {Information} {Extraction}}, url = {https://www.aaai.org/Library/Workshops/1999/ws99-11-007.php}, urldate = {2021-11-15}, author = {Seymore, Kristie and McCallum, Andrew and Rosenfeld, Ronald}, year = {1999}, }
@article{li_adaptive_1999, title = {Adaptive prognostics for rolling element bearing condition}, volume = {13}, issn = {0888-3270}, url = {http://www.sciencedirect.com/science/article/pii/S0888327098901832}, doi = {https://doi.org/10.1006/mssp.1998.0183}, abstract = {Rolling element bearing failure is one of the foremost causes of breakdown in rotating machinery. This paper proposes a remaining life adaptation methodology based on mechanistic modeling and parameter tuning. Vibration measurement is used to estimate defect severity by monitoring the signals generated from rotating bearings. Through a defect propagation model and defect diagnostic model, an adaptive algorithm is developed to fine tune the parameters involved in the propagation model by comparing predicted and measured defect sizes. In this manner, the instantaneous rate of defect propagation can be captured despite defect growth behavior variation. Therefore, a precise estimation of the remaining life can be determined. Simulations and experimental results are presented to illustrate the implementation principles and to verify the applicability of the adaptive prognostic methodology.}, number = {1}, journal = {Mechanical Systems and Signal Processing}, author = {Li, Y. and Billington, S. and Zhang, C. and Kurfess, T. and Danyluk, S. and Liang, S.}, year = {1999}, pages = {103 -- 113}, }
@article{harries_extracting_1998, title = {Extracting {Hidden} {Context}}, volume = {32}, issn = {1573-0565}, url = {https://doi.org/10.1023/A:1007420529897}, doi = {10.1023/A:1007420529897}, abstract = {Concept drift due to hidden changes in context complicates learning in many domains including financial prediction, medical diagnosis, and communication network performance. Existing machine learning approaches to this problem use an incremental learning, on-line paradigm. Batch, off-line learners tend to be ineffective in domains with hidden changes in context as they assume that the training set is homogeneous. An off-line, meta-learning approach for the identification of hidden context is presented. The new approach uses an existing batch learner and the process of contextual clustering to identify stable hidden contexts and the associated context specific, locally stable concepts. The approach is broadly applicable to the extraction of context reflected in time and spatial attributes. Several algorithms for the approach are presented and evaluated. A successful application of the approach to a complex flight simulator control task is also presented.}, language = {en}, number = {2}, urldate = {2022-04-26}, journal = {Machine Learning}, author = {Harries, Michael Bonnell and Sammut, Claude and Horn, Kim}, month = aug, year = {1998}, keywords = {batch learning, concept drift, context-sensitive learning, contextual clustering, hidden context}, pages = {101--126}, }
@inproceedings{blum_combining_1998, address = {New York, NY, USA}, series = {{COLT}' 98}, title = {Combining labeled and unlabeled data with co-training}, isbn = {978-1-58113-057-7}, url = {https://doi.org/10.1145/279943.279962}, doi = {10.1145/279943.279962}, urldate = {2022-03-20}, booktitle = {Proceedings of the eleventh annual conference on {Computational} learning theory}, publisher = {Association for Computing Machinery}, author = {Blum, Avrim and Mitchell, Tom}, month = jul, year = {1998}, pages = {92--100}, }
@book{noauthor_mimosa_1998, title = {{MIMOSA} {OSA}-{EAI}}, url = {http://www.mimosa.org/mimosa-osa-eai/}, year = {1998}, }
@inproceedings{ester_incremental_1998, address = {San Francisco, CA, USA}, series = {{VLDB} '98}, title = {Incremental {Clustering} for {Mining} in a {Data} {Warehousing} {Environment}}, isbn = {978-1-55860-566-4}, urldate = {2021-07-18}, booktitle = {Proceedings of the 24rd {International} {Conference} on {Very} {Large} {Data} {Bases}}, publisher = {Morgan Kaufmann Publishers Inc.}, author = {Ester, Martin and Kriegel, Hans-Peter and Sander, Jörg and Wimmer, Michael and Xu, Xiaowei}, month = aug, year = {1998}, pages = {323--333}, }
@article{chande_expert-based_1998, title = {Expert-based maintenance: a study of its effectiveness}, volume = {47}, issn = {1558-1721}, shorttitle = {Expert-based maintenance}, doi = {10.1109/24.690904}, abstract = {Monitoring of computer-based systems by a supervisory computer is common for high-availability systems. Expert-based supervisory systems are being proposed which are able to use dynamic information of the system to operate them with increased reliability. This paper brings out the functional capabilities of expert-based maintenance, and presents an analytic model to evaluate the effectiveness of the expert system in maintenance. The abilities of the expert system to maintain the host are parameterized and their effects on the performance of the system are studied. The results show possible improvement in the performance of a host due to expert-based maintenance.}, number = {1}, journal = {IEEE Transactions on Reliability}, author = {Chande, P.K. and Tokekar, S.V.}, month = mar, year = {1998}, note = {Conference Name: IEEE Transactions on Reliability}, keywords = {Computerized monitoring, Condition monitoring, Control systems, Degradation, Expert systems, Hardware, Industrial economics, Knowledge based systems, Real time systems, Software maintenance, computer maintenance, computer-based systems monitoring, expert systems, expert-based maintenance, expert-based supervisory systems, functional capabilities, high-availability systems, performance modeling, reliability, supervisory computer}, pages = {53--58}, }
@inproceedings{koller_object-oriented_1997, address = {Providence, Rhode Island}, series = {{UAI}'97}, title = {Object-oriented {Bayesian} networks}, isbn = {978-1-55860-485-8}, abstract = {Bayesian networks provide a modeling language and associated inference algorithm for stochastic domains. They have been successfully applied in a variety of medium-scale applications. However, when faced with a large complex domain, the task of modeling using Bayesian networks begins to resemble the task of programming using logical circuits. In this paper, we describe an object-oriented Bayesian network (OOBN) language, which allows complex domains to be described in terms of inter-related objects. We use a Bayesian network fragment to describe the probabilistic relations between the attributes of an object. These attributes can themselves be objects, providing a natural framework for encoding part-of hierarchies, Classes are used to provide a reusable probabilistic model which can be applied to multiple similar objects. Classes also support inheritance of model fragments from a class to a subclass, allowing the common aspects of related classes to be defined only once. Our language has clear declarative semantics: an OOBN can be interpreted as a stochastic functional program, so that it uniquely specifies a probabilistic model. We provide an inference algorithm for OOBNs, and show that much of the structural information encoded by an OOBN--particularly the encapsulation of variables within an object and the reuse of model fragments in different contexts---can also be used to speed up the inference process.}, urldate = {2021-11-19}, booktitle = {Proceedings of the {Thirteenth} conference on {Uncertainty} in artificial intelligence}, author = {Koller, Daphne and Pfeffer, Avi}, month = aug, year = {1997}, pages = {302--313}, }
@article{hochreiter_long_1997, title = {Long {Short}-{Term} {Memory}}, volume = {9}, issn = {0899-7667}, url = {https://doi.org/10.1162/neco.1997.9.8.1735}, doi = {10.1162/neco.1997.9.8.1735}, abstract = {Learning to store information over extended time intervals by recurrent backpropagation takes a very long time, mostly because of insufficient, decaying error backflow. We briefly review Hochreiter's (1991) analysis of this problem, then address it by introducing a novel, efficient, gradient based method called long short-term memory (LSTM). Truncating the gradient where this does not do harm, LSTM can learn to bridge minimal time lags in excess of 1000 discrete-time steps by enforcing constant error flow through constant error carousels within special units. Multiplicative gate units learn to open and close access to the constant error flow. LSTM is local in space and time; its computational complexity per time step and weight is O. 1. Our experiments with artificial data involve local, distributed, real-valued, and noisy pattern representations. In comparisons with real-time recurrent learning, back propagation through time, recurrent cascade correlation, Elman nets, and neural sequence chunking, LSTM leads to many more successful runs, and learns much faster. LSTM also solves complex, artificial long-time-lag tasks that have never been solved by previous recurrent network algorithms.}, number = {8}, urldate = {2021-09-02}, journal = {Neural Computation}, author = {Hochreiter, Sepp and Schmidhuber, Jürgen}, month = nov, year = {1997}, pages = {1735--1780}, }
@article{jozwiak_introduction_1997, title = {An introduction to the studies of reliability of systems using the {Weibull} proportional hazards model}, volume = {37}, issn = {0026-2714}, url = {http://www.sciencedirect.com/science/article/pii/S0026271496002855}, doi = {https://doi.org/10.1016/S0026-2714(96)00285-5}, abstract = {The paper presents the way of solving problems concerning reliability with respect to concomitant variables. The Cox models are discussed and Weibull's proportional hazards model is defined. The paper shows how to estimate the model parameters and statistically verify the results.}, number = {6}, journal = {Microelectronics Reliability}, author = {Jóźwiak, Ireneusz J.}, year = {1997}, pages = {915 -- 918}, }
@inproceedings{starr_structured_1997, title = {A structured approach to the selection of condition based maintenance}, doi = {10.1049/cp:19970134}, booktitle = {Fifth {International} {Conference} on {Factory} 2000 - {The} {Technology} {Exploitation} {Process}}, author = {Starr, A. G.}, year = {1997}, note = {ISSN: 0537-9989}, keywords = {Production control, automotive industry, building services sector, condition based maintenance, cost effectiveness, failure history, machine health, management, plant criticality, production, production control, selection based maintenance}, pages = {131--138}, }
@book{ebeling_introduction_1997, title = {An {Introduction} {To} {Reliability} and {Maintainability} {Engineering}}, isbn = {0-07-018852-1}, publisher = {McGraw-Hill}, author = {Ebeling, Charles E.}, year = {1997}, }
@article{zhang_birch_1996, title = {{BIRCH}: an efficient data clustering method for very large databases}, volume = {25}, issn = {0163-5808}, shorttitle = {{BIRCH}}, url = {https://doi.org/10.1145/235968.233324}, doi = {10.1145/235968.233324}, abstract = {Finding useful patterns in large datasets has attracted considerable interest recently, and one of the most widely studied problems in this area is the identification of clusters, or densely populated regions, in a multi-dimensional dataset. Prior work does not adequately address the problem of large datasets and minimization of I/O costs.This paper presents a data clustering method named BIRCH (Balanced Iterative Reducing and Clustering using Hierarchies), and demonstrates that it is especially suitable for very large databases. BIRCH incrementally and dynamically clusters incoming multi-dimensional metric data points to try to produce the best quality clustering with the available resources (i.e., available memory and time constraints). BIRCH can typically find a good clustering with a single scan of the data, and improve the quality further with a few additional scans. BIRCH is also the first clustering algorithm proposed in the database area to handle "noise" (data points that are not part of the underlying pattern) effectively.We evaluate BIRCH's time/space efficiency, data input order sensitivity, and clustering quality through several experiments. We also present a performance comparisons of BIRCH versus CLARANS, a clustering method proposed recently for large datasets, and show that BIRCH is consistently superior.}, number = {2}, urldate = {2022-03-27}, journal = {ACM SIGMOD Record}, author = {Zhang, Tian and Ramakrishnan, Raghu and Livny, Miron}, month = jun, year = {1996}, pages = {103--114}, }
@book{ruanaidh_numerical_1996, address = {New York}, series = {Statistics and {Computing}}, title = {Numerical {Bayesian} {Methods} {Applied} to {Signal} {Processing}}, isbn = {978-0-387-94629-0}, url = {https://www.springer.com/gp/book/9780387946290}, abstract = {This book is concerned with the processing of signals that have been sam pled and digitized. The fundamental theory behind Digital Signal Process ing has been in existence for decades and has extensive applications to the fields of speech and data communications, biomedical engineering, acous tics, sonar, radar, seismology, oil exploration, instrumentation and audio signal processing to name but a few [87]. The term "Digital Signal Processing", in its broadest sense, could apply to any operation carried out on a finite set of measurements for whatever purpose. A book on signal processing would usually contain detailed de scriptions of the standard mathematical machinery often used to describe signals. It would also motivate an approach to real world problems based on concepts and results developed in linear systems theory, that make use of some rather interesting properties of the time and frequency domain representations of signals. While this book assumes some familiarity with traditional methods the emphasis is altogether quite different. The aim is to describe general methods for carrying out optimal signal processing.}, language = {en}, urldate = {2020-10-07}, publisher = {Springer-Verlag}, author = {Ruanaidh, Joseph J. K. O. and Fitzgerald, William J.}, year = {1996}, doi = {10.1007/978-1-4612-0717-7}, }
@inproceedings{ester_density-based_1996, address = {Portland, Oregon}, series = {{KDD}'96}, title = {A density-based algorithm for discovering clusters in large spatial databases with noise}, urldate = {2020-03-25}, booktitle = {Proceedings of the {Second} {International} {Conference} on {Knowledge} {Discovery} and {Data} {Mining}}, publisher = {AAAI Press}, author = {Ester, Martin and Kriegel, Hans-Peter and Sander, Jörg and Xu, Xiaowei}, month = aug, year = {1996}, keywords = {arbitrary shape of clusters, clustering algorithms, efficiency on large spatial databases, handling nlj4-275oise}, pages = {226--231}, }
@inproceedings{yarowsky_unsupervised_1995, address = {USA}, series = {{ACL} '95}, title = {Unsupervised word sense disambiguation rivaling supervised methods}, url = {https://doi.org/10.3115/981658.981684}, doi = {10.3115/981658.981684}, abstract = {This paper presents an unsupervised learning algorithm for sense disambiguation that, when trained on unannotated English text, rivals the performance of supervised techniques that require time-consuming hand annotations. The algorithm is based on two powerful constraints---that words tend to have one sense per discourse and one sense per collocation---exploited in an iterative bootstrapping procedure. Tested accuracy exceeds 96\%.}, urldate = {2022-03-20}, booktitle = {Proceedings of the 33rd annual meeting on {Association} for {Computational} {Linguistics}}, publisher = {Association for Computational Linguistics}, author = {Yarowsky, David}, month = jun, year = {1995}, pages = {189--196}, }
@inproceedings{john_estimating_1995, address = {San Francisco, CA, USA}, series = {{UAI}'95}, title = {Estimating continuous distributions in {Bayesian} classifiers}, isbn = {978-1-55860-385-1}, abstract = {When modeling a probability distribution with a Bayesian network, we are faced with the problem of how to handle continuous variables. Most previous work has either solved the problem by discretizing, or assumed that the data are generated by a single Gaussian. In this paper we abandon the normality assumption and instead use statistical methods for nonparametric density estimation. For a naive Bayesian classifier, we present experimental results on a variety of natural and artificial domains, comparing two methods of density estimation: assuming normality and modeling each conditional distribution with a single Gaussian; and using nonparametric kernel density estimation. We observe large reductions in error on several natural and artificial data sets, which suggests that kernel estimation is a useful tool for learning Bayesian models.}, urldate = {2022-03-16}, booktitle = {Proceedings of the {Eleventh} conference on {Uncertainty} in artificial intelligence}, publisher = {Morgan Kaufmann Publishers Inc.}, author = {John, George H. and Langley, Pat}, month = aug, year = {1995}, pages = {338--345}, }
@inproceedings{kennedy_particle_1995, title = {Particle swarm optimization}, volume = {4}, doi = {10.1109/ICNN.1995.488968}, abstract = {A concept for the optimization of nonlinear functions using particle swarm methodology is introduced. The evolution of several paradigms is outlined, and an implementation of one of the paradigms is discussed. Benchmark testing of the paradigm is described, and applications, including nonlinear function optimization and neural network training, are proposed. The relationships between particle swarm optimization and both artificial life and genetic algorithms are described.}, booktitle = {Proceedings of {ICNN}'95 - {International} {Conference} on {Neural} {Networks}}, author = {Kennedy, J. and Eberhart, R.}, month = nov, year = {1995}, keywords = {Artificial neural networks, Birds, Educational institutions, Genetic algorithms, Humans, Marine animals, Optimization methods, Particle swarm optimization, Performance evaluation, Testing}, pages = {1942--1948 vol.4}, }
@article{tsang_conditionbased_1995, title = {Condition‐based maintenance: tools and decision making}, volume = {1}, number = {3}, journal = {Journal of Quality in Maintenance Engineering}, author = {Tsang, Albert H. C.}, year = {1995}, pages = {3--17}, }
@book{benjamin_s_blanchard_maintainability_1995, title = {Maintainability: {A} {Key} to {Effective} {Serviceability} and {Maintenance} {Management}}, isbn = {0-471-59132-7}, publisher = {John Wiley \& Sons}, author = {Benjamin S. Blanchard, Dinesh C. Verma, Elmer L. Peterson}, year = {1995}, }
@article{vasudevan_use_1994, title = {Use of the {Larson}-{Miller} parameter to study the influence of ageing on the hardness of cold-worked austenitic stainless steel}, volume = {211}, issn = {0022-3115}, url = {https://www.sciencedirect.com/science/article/pii/0022311594903557}, doi = {10.1016/0022-3115(94)90355-7}, abstract = {The Larson-Miller parameter (L-M parameter) was used for studying the influence of thermal ageing on hardness of cold-worked titanium-modified 15Cr-15Ni-2.2Mo austenitic stainless steel, which is commonly referred to as alloy D-9. The samples were given 15–22.5\% prior cold work and aged in the temperature range 873–1223 K for durations ranging from 0.25–4800 h. Using a non-linear regression analysis of hardness data as a function of cold-work level and ageing conditions, the optimum cold-work level for alloy D-9 has been determined to be 20\%.}, language = {en}, number = {3}, urldate = {2022-03-05}, journal = {Journal of Nuclear Materials}, author = {Vasudevan, M. and Venkadesan, S. and Sivaprasad, P. V. and Mannan, S. L.}, month = aug, year = {1994}, pages = {251--255}, }
@incollection{hoeffding_probability_1994, address = {New York, NY}, series = {Springer {Series} in {Statistics}}, title = {Probability {Inequalities} for sums of {Bounded} {Random} {Variables}}, isbn = {978-1-4612-0865-5}, url = {https://doi.org/10.1007/978-1-4612-0865-5_26}, abstract = {Upper bounds are derived for the probability that the sum S of n independent random variables exceeds its mean ES by a positive number nt. It is assumed that the range of each summand of S is bounded or bounded above. The bounds for PrS — ES≥nt depend only on the endpoints of the ranges of the summands and the mean, or the mean and the variance of S. These results are then used to obtain analogous inequalities for certain sums of dependent random variables such as U statistics and the sum of a random sample without replacement from a finite population.}, language = {en}, urldate = {2021-10-08}, booktitle = {The {Collected} {Works} of {Wassily} {Hoeffding}}, publisher = {Springer}, author = {Hoeffding, Wassily}, editor = {Fisher, N. I. and Sen, P. K.}, year = {1994}, doi = {10.1007/978-1-4612-0865-5_26}, pages = {409--426}, }
@article{downs_plant-wide_1993, series = {Industrial challenge problems in process control}, title = {A plant-wide industrial process control problem}, volume = {17}, issn = {0098-1354}, url = {https://www.sciencedirect.com/science/article/pii/009813549380018I}, doi = {10.1016/0098-1354(93)80018-I}, abstract = {This paper describes a model of an industrial chemical process for the purpose of developing, studying and evaluating process control technology. This process is well suited for a wide variety of studies including both plant-wide control and multivariable control problems. It consists of a reactor/ separator/recycle arrangement involving two simultaneous gas—liquid exothermic reactions of the following form: A(g) + C(g) + D(g) → G(liq), Product 1, A(g) + C(g) + E(g) → H(liq), Product 2. Two additional byproduct reactions also occur. The process has 12 valves available for manipulation and 41 measurements available for monitoring or control. The process equipment, operating objectives, process control objectives and process disturbances are described. A set of FORTRAN subroutines which simulate the process are available upon request. The chemical process model presented here is a challenging problem for a wide variety of process control technology studies. Even though this process has only a few unit operations, it is much more complex than it appears on first examination. We hope that this problem will be useful in the development of the process control field. We are also interested in hearing about applications of the problem.}, language = {en}, number = {3}, urldate = {2022-01-18}, journal = {Computers \& Chemical Engineering}, author = {Downs, J. J. and Vogel, E. F.}, month = mar, year = {1993}, pages = {245--255}, }
@article{srinivasan_fault_1993, title = {Fault detection/monitoring using time {Petri} nets}, volume = {23}, issn = {2168-2909}, doi = {10.1109/21.247896}, abstract = {While controlling manufacturing systems, real time data is collected through sensory devices or some other means and fed back to the controller for the purpose of monitoring that system. Monitoring refers to the analysis of data collected from the system. It involves fault detection and diagnostics. Here, we shall emphasize the fault detection aspects of monitoring. Modeling the control system by a time Petri net permits us to develop an analytical procedure for fault detection. By backfiring transitions in the time Petri net, we are able to determine if a given state is invalid. The back-firing approach also provides us with state space representation of time Petri nets and the maximum token holding times in different places of time Petri nets.{\textless}{\textgreater}}, number = {4}, journal = {IEEE Transactions on Systems, Man, and Cybernetics}, author = {Srinivasan, V.S. and Jafari, M.A.}, month = jul, year = {1993}, note = {Conference Name: IEEE Transactions on Systems, Man, and Cybernetics}, keywords = {Control system synthesis, Control systems, Data analysis, Fault detection, Formal specifications, Industrial engineering, Manufacturing systems, Monitoring, Petri nets, State-space methods}, pages = {1155--1162}, }
@article{yang_survey_1993, title = {A survey of fuzzy clustering}, volume = {18}, issn = {0895-7177}, url = {https://www.sciencedirect.com/science/article/pii/089571779390202A}, doi = {10.1016/0895-7177(93)90202-A}, abstract = {This paper is a survey of fuzzy set theory applied in cluster analysis. These fuzzy clustering algorithms have been widely studied and applied in a variety of substantive areas. They also become the major techniques in cluster analysis. In this paper, we give a survey of fuzzy clustering in three categories. The first category is the fuzzy clustering based on fuzzy relation. The second one is the fuzzy clustering based on objective function. Finally, we give an overview of a nonparametric classifier. That is the fuzzy generalized k-nearest neighbor rule.}, language = {en}, number = {11}, urldate = {2021-09-30}, journal = {Mathematical and Computer Modelling}, author = {Yang, M. -S.}, month = dec, year = {1993}, keywords = {Cluster analysis, Cluster validity, Fuzzy -means, Fuzzy -partitions, Fuzzy clustering, Fuzzy generalized -nearest neighbor rule, Fuzzy relation}, pages = {1--16}, }
@article{di_marco_expert_1992, series = {{IFAC} {Workshop} on {Motion} {Control} for {Intelligent} {Automation}, {Perugia}, {Italy}, 27-29 {October} 1992}, title = {An {Expert} {System} for {On}-{Line} {Fault} {Diagnosis} and {Control} of a {Railway} {Locomotive}}, volume = {25}, issn = {1474-6670}, url = {https://www.sciencedirect.com/science/article/pii/S1474667017505698}, doi = {10.1016/S1474-6670(17)50569-8}, abstract = {In this paper the main features of an on-line expert system, still under developement, for online diagnosis and control of a railway locomotive is described, with particular reference to the system architecture and the knowledge representation aspects. The system's behavior during operation is illustrated by an example.}, language = {en}, number = {29, Part 1}, urldate = {2022-03-05}, journal = {IFAC Proceedings Volumes}, author = {Di Marco, F. and Fortuna, L. and Gallo, A. and Nunnari, G.}, month = oct, year = {1992}, keywords = {control, diagnosis, on-line expert system, traction systems}, pages = {217--221}, }
@article{hoeprich_rolling_1992, title = {Rolling {Element} {Bearing} {Fatigue} {Damage} {Propagation}}, volume = {114}, issn = {0742-4787}, url = {https://doi.org/10.1115/1.2920891}, doi = {10.1115/1.2920891}, abstract = {Rolling element bearing fatigue spalls were propagated for several test conditions well beyond the laboratory criteria of 6.5 square millimeters used in the author’s laboratory. Usually more than one mode of fatigue spall propagation is involved when large spalls develop. The information provided in this paper can be used to gain an appreciation of the total useful life of a bearing and to allow a more accurate diagnosis of fatigue spall damage.}, number = {2}, journal = {Journal of Tribology}, author = {Hoeprich, M. R.}, year = {1992}, note = {\_eprint: https://asmedigitalcollection.asme.org/tribology/article-pdf/114/2/328/5583421/328\_1.pdf}, pages = {328--333}, }
@inproceedings{karp_-line_1992, address = {NLD}, title = {On-{Line} {Algorithms} {Versus} {Off}-{Line} {Algorithms}: {How} {Much} is it {Worth} to {Know} the {Future}?}, isbn = {978-0-444-89747-3}, shorttitle = {On-{Line} {Algorithms} {Versus} {Off}-{Line} {Algorithms}}, urldate = {2020-03-17}, booktitle = {Proceedings of the {IFIP} 12th {World} {Computer} {Congress} on {Algorithms}, {Software}, {Architecture} - {Information} {Processing} '92, {Volume} 1 - {Volume} {I}}, publisher = {North-Holland Publishing Co.}, author = {Karp, Richard M.}, month = sep, year = {1992}, pages = {416--429}, }
@article{xie_validity_1991, title = {A validity measure for fuzzy clustering}, volume = {13}, issn = {1939-3539}, doi = {10.1109/34.85677}, abstract = {The authors present a fuzzy validity criterion based on a validity function which identifies compact and separate fuzzy c-partitions without assumptions as to the number of substructures inherent in the data. This function depends on the data set, geometric distance measure, distance between cluster centroids and more importantly on the fuzzy partition generated by any fuzzy algorithm used. The function is mathematically justified via its relationship to a well-defined hard clustering validity function, the separation index for which the condition of uniqueness has already been established. The performance of this validity function compares favorably to that of several others. The application of this validity function to color image segmentation in a computer color vision system for recognition of IC wafer defects which are otherwise impossible to detect using gray-scale image processing is discussed.{\textless}{\textgreater}}, number = {8}, journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence}, author = {Xie, X.L. and Beni, G.}, month = aug, year = {1991}, note = {Conference Name: IEEE Transactions on Pattern Analysis and Machine Intelligence}, keywords = {Application software, Application specific integrated circuits, Clustering algorithms, Color, Computer vision, Fuzzy sets, Image recognition, Image segmentation, Machine vision, Partitioning algorithms}, pages = {841--847}, }
@book{noauthor_normal_1991, title = {The {Normal} and the {Pathological}}, isbn = {978-0-942299-59-5}, url = {https://press.princeton.edu/books/paperback/9780942299595/the-normal-and-the-pathological}, language = {en}, urldate = {2021-10-18}, month = dec, year = {1991}, }
@article{rabiner_tutorial_1989, title = {A tutorial on hidden {Markov} models and selected applications in speech recognition}, volume = {77}, issn = {1558-2256}, doi = {10.1109/5.18626}, abstract = {This tutorial provides an overview of the basic theory of hidden Markov models (HMMs) as originated by L.E. Baum and T. Petrie (1966) and gives practical details on methods of implementation of the theory along with a description of selected applications of the theory to distinct problems in speech recognition. Results from a number of original sources are combined to provide a single source of acquiring the background required to pursue further this area of research. The author first reviews the theory of discrete Markov chains and shows how the concept of hidden states, where the observation is a probabilistic function of the state, can be used effectively. The theory is illustrated with two simple examples, namely coin-tossing, and the classic balls-in-urns system. Three fundamental problems of HMMs are noted and several practical techniques for solving these problems are given. The various types of HMMs that have been studied, including ergodic as well as left-right models, are described.{\textless}{\textgreater}}, number = {2}, journal = {Proceedings of the IEEE}, author = {Rabiner, L.R.}, month = feb, year = {1989}, note = {Conference Name: Proceedings of the IEEE}, keywords = {Hidden Markov models, Speech recognition, Tutorial}, pages = {257--286}, }
@article{murphy_measurement_1987, title = {Measurement of {Pain}: {A} {Comparison} of the {Visual} {Analogue} with a {Nonvisual} {Analogue} {Scale}}, volume = {3}, issn = {0749-8047}, shorttitle = {Measurement of {Pain}}, url = {https://journals.lww.com/clinicalpain/Abstract/1987/12000/Measurement_of_Pain__A_Comparison_of_the_Visual.3.aspx}, abstract = {Because of difficulties encountered with patient compliance using the standard visual analogue scale, a new nonvisual analogue scale has been devised for pain measurement. The new scale was found to give values for pain that correlated well with values given using the visual scale and was found to be more easily understood. The scale was considered to be simpler and more reliable than the visual scale heretofore in use.}, language = {en-US}, number = {4}, urldate = {2021-10-18}, journal = {The Clinical Journal of Pain}, author = {Murphy, Dermot F. and McDonald, A. and Power, C. and Unwin, A. and MacSullivan, R.}, year = {1987}, pages = {197--200}, }
@article{grassberger_toward_1986, title = {Toward a quantitative theory of self-generated complexity}, volume = {25}, issn = {1572-9575}, url = {https://doi.org/10.1007/BF00668821}, doi = {10.1007/BF00668821}, number = {9}, journal = {International Journal of Theoretical Physics}, author = {Grassberger, Peter}, month = sep, year = {1986}, pages = {907--938}, }
@article{bezdek_fcm_1984, title = {{FCM}: {The} fuzzy c-means clustering algorithm}, volume = {10}, issn = {0098-3004}, shorttitle = {{FCM}}, url = {https://www.sciencedirect.com/science/article/pii/0098300484900207}, doi = {10.1016/0098-3004(84)90020-7}, abstract = {This paper transmits a FORTRAN-IV coding of the fuzzy c-means (FCM) clustering program. The FCM program is applicable to a wide variety of geostatistical data analysis problems. This program generates fuzzy partitions and prototypes for any set of numerical data. These partitions are useful for corroborating known substructures or suggesting substructure in unexplored data. The clustering criterion used to aggregate subsets is a generalized least-squares objective function. Features of this program include a choice of three norms (Euclidean, Diagonal, or Mahalonobis), an adjustable weighting factor that essentially controls sensitivity to noise, acceptance of variable numbers of clusters, and outputs that include several measures of cluster validity.}, language = {en}, number = {2}, urldate = {2021-10-01}, journal = {Computers \& Geosciences}, author = {Bezdek, James C. and Ehrlich, Robert and Full, William}, month = jan, year = {1984}, keywords = {Cluster analysis, Cluster validity, Fuzzy QMODEL, Fuzzy clustering, Least-squared errors}, pages = {191--203}, }
@article{lloyd_least_1982, title = {Least squares quantization in {PCM}}, volume = {28}, issn = {1557-9654}, doi = {10.1109/TIT.1982.1056489}, abstract = {It has long been realized that in pulse-code modulation (PCM), with a given ensemble of signals to handle, the quantum values should be spaced more closely in the voltage regions where the signal amplitude is more likely to fall. It has been shown by Panter and Dite that, in the limit as the number of quanta becomes infinite, the asymptotic fractional density of quanta per unit voltage should vary as the one-third power of the probability density per unit voltage of signal amplitudes. In this paper the corresponding result for any finite number of quanta is derived; that is, necessary conditions are found that the quanta and associated quantization intervals of an optimum finite quantization scheme must satisfy. The optimization criterion used is that the average quantization noise power be a minimum. It is shown that the result obtained here goes over into the Panter and Dite result as the number of quanta become large. The optimum quautization schemes for2{\textasciicircum}bquanta,b=1,2, {\textbackslash}cdots, 7, are given numerically for Gaussian and for Laplacian distribution of signal amplitudes.}, number = {2}, journal = {IEEE Transactions on Information Theory}, author = {Lloyd, S.}, month = mar, year = {1982}, note = {Conference Name: IEEE Transactions on Information Theory}, pages = {129--137}, }
@article{davies_cluster_1979, title = {A {Cluster} {Separation} {Measure}}, volume = {PAMI-1}, issn = {1939-3539}, doi = {10.1109/TPAMI.1979.4766909}, abstract = {A measure is presented which indicates the similarity of clusters which are assumed to have a data density which is a decreasing function of distance from a vector characteristic of the cluster. The measure can be used to infer the appropriateness of data partitions and can therefore be used to compare relative appropriateness of various divisions of the data. The measure does not depend on either the number of clusters analyzed nor the method of partitioning of the data and can be used to guide a cluster seeking algorithm.}, number = {2}, journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence}, author = {Davies, David L. and Bouldin, Donald W.}, month = apr, year = {1979}, note = {Conference Name: IEEE Transactions on Pattern Analysis and Machine Intelligence}, keywords = {Algorithm design and analysis, Cluster, Clustering algorithms, Data analysis, Density measurement, Dispersion, Humans, Missiles, Multidimensional systems, Partitioning algorithms, Performance analysis, data partitions, multidimensional data analysis, parametric clustering, partitions, similarity measure}, pages = {224--227}, }
@article{abdel-hameed_gamma_1975, title = {A {Gamma} {Wear} {Process}}, volume = {R-24}, doi = {10.1109/TR.1975.5215123}, number = {2}, journal = {IEEE Transactions on Reliability}, author = {Abdel-Hameed, M.}, month = jun, year = {1975}, keywords = {Electric shock, Probability density function, Probability distribution, Random variables, Statistical distributions, Stochastic processes, Stochastic systems}, pages = {152--153}, }
@article{brook_cumulative_1969, title = {Cumulative {Damage} in {Fatigue}: {A} {Step} towards {Its} {Understanding}}, volume = {11}, url = {https://doi.org/10.1243/JMES_JOUR_1969_011_032_02}, doi = {10.1243/JMES_JOUR_1969_011_032_02}, abstract = {A method is described which uses changes of apparent dynamic modulus and damping during fatigue cycling, for estimating accurately the fatigue lives of stainless steel (Rex 535) specimens. This technique for estimating well in advance of failure the fatigue lives of individual specimens avoids the usual difficulties caused by scatter of fatigue results, and has enabled a more precise quantitative investigation to be made of cumulative fatigue damage (i.e. damage due to multi-level loading) than would have been possible using conventional experimental methods. By estimating the remaining life of a specimen at one stress amplitude before measuring the equivalent remaining life at a second stress amplitude by cycling to failure, it was possible to determine lines of equal damage on a plot of stress amplitude versus remaining fatigue life. These lines of equal damage were used to predict the fatigue lives of specimens subjected to programmes of multi-level loading, and the accuracy of these estimates, when compared with the subsequent experimental results, is much better than has been achieved hitherto. Depending on the stressing program chosen, Miner's linear damage rule is shown to be very good, rather pessimistic, or very dangerous.}, number = {3}, journal = {Journal of Mechanical Engineering Science}, author = {Brook, R. H. W. and Parry, J. S. C.}, year = {1969}, pages = {243--255}, }
@article{kolmogorov_three_1968, title = {Three approaches to the quantitative definition of information}, volume = {2}, url = {https://doi.org/10.1080/00207166808803030}, doi = {10.1080/00207166808803030}, number = {1-4}, journal = {International Journal of Computer Mathematics}, author = {Kolmogorov, A. N.}, year = {1968}, note = {Publisher: Taylor \& Francis \_eprint: https://doi.org/10.1080/00207166808803030}, pages = {157--168}, }
@article{hoeffding_probability_1963, title = {Probability {Inequalities} for {Sums} of {Bounded} {Random} {Variables}}, volume = {58}, issn = {0162-1459}, url = {https://www.jstor.org/stable/2282952}, doi = {10.2307/2282952}, abstract = {Upper bounds are derived for the probability that the sum S of n independent random variables exceeds its mean ES by a positive number nt. It is assumed that the range of each summand of S is bounded or bounded above. The bounds for {\textless}tex-math{\textgreater}\${\textbackslash}Pr {\textbackslash}\{ S - ES {\textbackslash}geq nt {\textbackslash}\}\${\textless}/tex-math{\textgreater} depend only on the endpoints of the ranges of the summands and the mean, or the mean and the variance of S. These results are then used to obtain analogous inequalities for certain sums of dependent random variables such as U statistics and the sum of a random sample without replacement from a finite population.}, number = {301}, urldate = {2021-10-03}, journal = {Journal of the American Statistical Association}, author = {Hoeffding, Wassily}, year = {1963}, note = {Publisher: [American Statistical Association, Taylor \& Francis, Ltd.]}, pages = {13--30}, }
@article{paris_rational_1961, title = {A rational analytic theory of fatigue}, volume = {13}, journal = {The trend inf Engineering}, author = {Paris, Paul and Gomez, Mario and Anderson, William}, year = {1961}, pages = {9--14}, }
@article{cohen_coefficient_1960, title = {A {Coefficient} of {Agreement} for {Nominal} {Scales}}, volume = {20}, issn = {0013-1644}, url = {https://doi.org/10.1177/001316446002000104}, doi = {10.1177/001316446002000104}, language = {en}, number = {1}, urldate = {2021-03-24}, journal = {Educational and Psychological Measurement}, author = {Cohen, Jacob}, month = apr, year = {1960}, note = {Publisher: SAGE Publications Inc}, pages = {37--46}, }
@article{samuel_studies_1959, title = {Some {Studies} in {Machine} {Learning} {Using} the {Game} of {Checkers}}, volume = {3}, issn = {0018-8646}, doi = {10.1147/rd.33.0210}, abstract = {Two machine-learning procedures have been investigated in some detail using the game of checkers. Enough work has been done to verify the fact that a computer can be programmed so that it will learn to play a better game of checkers than can be played by the person who wrote the program. Furthermore, it can learn to do this in a remarkably short period of time (8 or 10 hours of machine-playing time) when given only the rules of the game, a sense of direction, and a redundant and incomplete list of parameters which are thought to have something to do with the game, but whose correct signs and relative weights are unknown and unspecified. The principles of machine learning verified by these experiments are, of course, applicable to many other situations.}, number = {3}, journal = {IBM Journal of Research and Development}, author = {Samuel, A. L.}, month = jul, year = {1959}, note = {Conference Name: IBM Journal of Research and Development}, pages = {210--229}, }
@article{rosenblatt_perceptron_1958, title = {The perceptron: a probabilistic model for information storage and organization in the brain}, volume = {65}, issn = {0033-295X}, shorttitle = {The perceptron}, doi = {10.1037/h0042519}, language = {eng}, number = {6}, journal = {Psychological Review}, author = {Rosenblatt, F.}, month = nov, year = {1958}, pmid = {13602029}, keywords = {Brain, Humans, Information Storage and Retrieval, Models, Statistical, Neural Networks, Computer, PERCEPTION, Perception}, pages = {386--408}, }
@techreport{ss_behavior_1954, title = {Behavior of materials under conditions of thermal stress}, institution = {National Advisory Committee for Aeronautics. Lewis Flight Propulsion Lab.; Cleveland, OH, United States}, author = {SS, Manson}, year = {1954}, }
@article{archard_contact_1953, title = {Contact and {Rubbing} of {Flat} {Surfaces}}, volume = {24}, url = {https://doi.org/10.1063/1.1721448}, doi = {10.1063/1.1721448}, number = {8}, journal = {Journal of Applied Physics}, author = {Archard, J. F.}, year = {1953}, note = {\_eprint: https://doi.org/10.1063/1.1721448}, pages = {981--988}, }
@article{shannon_mathematical_1948, title = {A mathematical theory of communication}, volume = {27}, issn = {0005-8580}, doi = {10.1002/j.1538-7305.1948.tb01338.x}, abstract = {The recent development of various methods of modulation such as PCM and PPM which exchange bandwidth for signal-to-noise ratio has intensified the interest in a general theory of communication. A basis for such a theory is contained in the important papers of Nyquist1 and Hartley2 on this subject. In the present paper we will extend the theory to include a number of new factors, in particular the effect of noise in the channel, and the savings possible due to the statistical structure of the original message and due to the nature of the final destination of the information.}, number = {3}, journal = {The Bell System Technical Journal}, author = {Shannon, C. E.}, month = jul, year = {1948}, note = {Conference Name: The Bell System Technical Journal}, pages = {379--423}, }
@article{hayes_experimental_1921, title = {Experimental development of the graphic rating method}, volume = {18}, journal = {Psychol Bull}, author = {Hayes, MHS and Paterson, DG}, year = {1921}, pages = {8--9}, }
@article{oh_exponential_1910, title = {The exponential law of endurance tests}, volume = {10}, journal = {American Society for Testing Materials}, author = {OH, Basquin}, year = {1910}, pages = {625--630}, }
@misc{noauthor_predictive_nodate, title = {Predictive {Maintenance} for {Railway} {Domain}: {A} {Systematic} {Literature} {Review}}, shorttitle = {Predictive {Maintenance} for {Railway} {Domain}}, url = {https://ieeexplore.ieee.org/abstract/document/10082880/}, abstract = {Railways are considered to be an environmentally friendly and efficient means of transport for people and goods with increasing importance in the transport policies of many countries. However, the infrastructure and the substantial demand for maintenance create additional costs for railway operators. To overcome outdated maintenance modes, implementation of new solutions, optimization of maintenance activities, and resource utilization are required. Through a systematic literature review, this article evaluates new approaches toward implementing predictive maintenance in the railway domain. A comprehensive search, including the IEEE Xplore, Science Direct, and ACM Digital Library, has been conducted, focusing on papers related to predictive maintenance and railway systems, published in peer-reviewed journals since 2016. The selected papers were analyzed and grouped to allocate the research purposes as well as the considered assets, components, predicted defects, and maintenance conditions. Furthermore, the utilized predictive maintenance algorithms and their limitations are structured and evaluated. Analysis shows that a great variety of algorithms were used for either defect detection or the prediction of conditions of 20 different components, which are critical for the safety and availability of railway operations. The study shows that the proposed approaches were successfully tested and yielded great potential for predictive maintenance solutions. Researchers state to enhance proposed solutions within their future work, increasing accuracy and performance and widening the area of application in the railway domain.}, language = {en-US}, urldate = {2023-10-26}, }
@misc{noauthor_transfer_nodate, title = {Transfer learning for remaining useful life prediction based on consensus self-organizing models - {ScienceDirect}}, url = {https://www.sciencedirect.com/science/article/pii/S0951832020305998}, urldate = {2023-05-21}, }
@article{marcinkevics_interpretable_nodate, title = {Interpretable and explainable machine learning: {A} methods-centric overview with concrete examples}, volume = {n/a}, issn = {1942-4795}, shorttitle = {Interpretable and explainable machine learning}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/widm.1493}, doi = {10.1002/widm.1493}, abstract = {Interpretability and explainability are crucial for machine learning (ML) and statistical applications in medicine, economics, law, and natural sciences and form an essential principle for ML model design and development. Although interpretability and explainability have escaped a precise and universal definition, many models and techniques motivated by these properties have been developed over the last 30 years, with the focus currently shifting toward deep learning. We will consider concrete examples of state-of-the-art, including specially tailored rule-based, sparse, and additive classification models, interpretable representation learning, and methods for explaining black-box models post hoc. The discussion will emphasize the need for and relevance of interpretability and explainability, the divide between them, and the inductive biases behind the presented “zoo” of interpretable models and explanation methods. This article is categorized under: Fundamental Concepts of Data and Knowledge {\textgreater} Explainable AI Technologies {\textgreater} Machine Learning Commercial, Legal, and Ethical Issues {\textgreater} Social Considerations}, language = {en}, number = {n/a}, urldate = {2023-03-08}, journal = {WIREs Data Mining and Knowledge Discovery}, author = {Marcinkevičs, Ričards and Vogt, Julia E.}, note = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1002/widm.1493}, keywords = {explainability, interpretability, machine learning, neural networks}, pages = {e1493}, }
@misc{noauthor_understanding_nodate, title = {Understanding {LSTM} {Networks} -- colah's blog}, url = {https://colah.github.io/posts/2015-08-Understanding-LSTMs/}, urldate = {2022-09-16}, }
@misc{noauthor_du_nodate, title = {Du matériel roulant à l'infrastructure, l'importance de la maintenance prédictive pour la {RATP}}, url = {https://www.usine-digitale.fr/article/du-materiel-roulant-a-l-infrastructure-l-importance-de-la-maintenance-predictive-pour-la-ratp.N2022417}, abstract = {La RATP a d\évelopp\é son propre outil de maintenance pr\édictive pour le RER A. Le logiciel baptis\é Serval permet d\&\#39;identifier un certain nombre de pannes avant qu\&\#39;elles ne surviennent pour qu\&\#39;un technicien intervienne en amont et que cela ne perturbe pas la circulation. La RATP travaille \également \à d\éployer de tels outils sur son infrastructure. L\&\#39;immensit\é du r\éseau exploit\é et la nature strat\égique des \équipements rendent ce type de solutions d\&\#39;autant plus importantes.}, urldate = {2022-07-19}, journal = {usine-digitale.fr}, }
@misc{noauthor_comparative_nodate, title = {A comparative study on online machine learning techniques for network traffic streams analysis {\textbar} {Elsevier} {Enhanced} {Reader}}, url = {https://reader.elsevier.com/reader/sd/pii/S1389128622000512?token=3D8EDB35702EC08CA17B927FC3704FFC38C20591AFC1A44B1BE665A467EF80E7422F49C05418C88C1782F4B0234E574B&originRegion=eu-west-1&originCreation=20220712115842}, language = {en}, urldate = {2022-07-12}, doi = {10.1016/j.comnet.2022.108836}, }
@techreport{noauthor_french_nodate, title = {The {French} passenger rail transport market 2015-2016}, language = {en}, institution = {Autorité de régulation des activités ferroviaires et routières}, pages = {85}, }
@misc{noauthor_perceptron_nodate, title = {The perceptron: {A} probabilistic model for information storage and organization in the brain. - {PsycNET}}, shorttitle = {The perceptron}, url = {https://doi.apa.org/doiLanding?doi=10.1037%2Fh0042519}, abstract = {APA PsycNet DoiLanding page}, language = {en}, urldate = {2022-03-16}, }