A Fusion of Deep Convolutional Generative Adversarial Networks and Sequence to Sequence Autoencoders for Acoustic Scene Classification. Arniriparian, S., Freitag, M., Cummins, N., Gerczuk, M., Pugachevskiy, S., & Schuller, B. In 2018 26th European Signal Processing Conference (EUSIPCO), pages 977-981, Sep., 2018. Paper doi abstract bibtex Unsupervised representation learning shows high promise for generating robust features for acoustic scene analysis. In this regard, we propose and investigate a novel combination of features learnt using both a deep convolutional generative adversarial network (DCGAN) and a recurrent sequence to sequence autoencoder (S2SAE). Each of the representation learning algorithms is trained individually on spectral features extracted from audio instances. The learnt representations are: (i) the activations of the discriminator in case of the DCGAN, and (ii) the activations of a fully connected layer between the decoder and encoder units in case of the S2SAE. We then train two multilayer perceptron neural networks on the DCGAN and S2SAE feature vectors to predict the class labels. The individual predicted labels are combined in a weighted decision-level fusion to achieve the final prediction. The system is evaluated on the development partition of the acoustic scene classification data set of the IEEE AASP Challenge on Detection and Classification of Acoustic Scenes and Events (DCASE 2017). In comparison to the baseline, the accuracy is increased from 74.8 % to 86.4 % using only the DCGAN, to 88.5 % on the development set using only the S2SAE, and to 91.1 % after fusion of the individual predictions.
@InProceedings{8553225,
author = {S. Arniriparian and M. Freitag and N. Cummins and M. Gerczuk and S. Pugachevskiy and B. Schuller},
booktitle = {2018 26th European Signal Processing Conference (EUSIPCO)},
title = {A Fusion of Deep Convolutional Generative Adversarial Networks and Sequence to Sequence Autoencoders for Acoustic Scene Classification},
year = {2018},
pages = {977-981},
abstract = {Unsupervised representation learning shows high promise for generating robust features for acoustic scene analysis. In this regard, we propose and investigate a novel combination of features learnt using both a deep convolutional generative adversarial network (DCGAN) and a recurrent sequence to sequence autoencoder (S2SAE). Each of the representation learning algorithms is trained individually on spectral features extracted from audio instances. The learnt representations are: (i) the activations of the discriminator in case of the DCGAN, and (ii) the activations of a fully connected layer between the decoder and encoder units in case of the S2SAE. We then train two multilayer perceptron neural networks on the DCGAN and S2SAE feature vectors to predict the class labels. The individual predicted labels are combined in a weighted decision-level fusion to achieve the final prediction. The system is evaluated on the development partition of the acoustic scene classification data set of the IEEE AASP Challenge on Detection and Classification of Acoustic Scenes and Events (DCASE 2017). In comparison to the baseline, the accuracy is increased from 74.8 % to 86.4 % using only the DCGAN, to 88.5 % on the development set using only the S2SAE, and to 91.1 % after fusion of the individual predictions.},
keywords = {feature extraction;image classification;image representation;learning (artificial intelligence);multilayer perceptrons;pattern classification;signal classification;S2SAE feature vectors;weighted decision-level fusion;acoustic scene classification data;DCGAN;deep convolutional generative adversarial network;sequence autoencoder;unsupervised representation learning;robust features;acoustic scene analysis;recurrent sequence;representation learning algorithms;spectral features;learnt representations;multilayer perceptron neural networks;Acoustics;Generators;Task analysis;Convolution;Training;Spectrogram;Feature extraction;unsupervised feature learning;generative adversarial networks;sequence to sequence autoencoders;acoustic scene classification},
doi = {10.23919/EUSIPCO.2018.8553225},
issn = {2076-1465},
month = {Sep.},
url = {https://www.eurasip.org/proceedings/eusipco/eusipco2018/papers/1570438044.pdf},
}
Downloads: 0
{"_id":"oZhL6bBLe3H9nqkuZ","bibbaseid":"arniriparian-freitag-cummins-gerczuk-pugachevskiy-schuller-afusionofdeepconvolutionalgenerativeadversarialnetworksandsequencetosequenceautoencodersforacousticsceneclassification-2018","authorIDs":[],"author_short":["Arniriparian, S.","Freitag, M.","Cummins, N.","Gerczuk, M.","Pugachevskiy, S.","Schuller, B."],"bibdata":{"bibtype":"inproceedings","type":"inproceedings","author":[{"firstnames":["S."],"propositions":[],"lastnames":["Arniriparian"],"suffixes":[]},{"firstnames":["M."],"propositions":[],"lastnames":["Freitag"],"suffixes":[]},{"firstnames":["N."],"propositions":[],"lastnames":["Cummins"],"suffixes":[]},{"firstnames":["M."],"propositions":[],"lastnames":["Gerczuk"],"suffixes":[]},{"firstnames":["S."],"propositions":[],"lastnames":["Pugachevskiy"],"suffixes":[]},{"firstnames":["B."],"propositions":[],"lastnames":["Schuller"],"suffixes":[]}],"booktitle":"2018 26th European Signal Processing Conference (EUSIPCO)","title":"A Fusion of Deep Convolutional Generative Adversarial Networks and Sequence to Sequence Autoencoders for Acoustic Scene Classification","year":"2018","pages":"977-981","abstract":"Unsupervised representation learning shows high promise for generating robust features for acoustic scene analysis. In this regard, we propose and investigate a novel combination of features learnt using both a deep convolutional generative adversarial network (DCGAN) and a recurrent sequence to sequence autoencoder (S2SAE). Each of the representation learning algorithms is trained individually on spectral features extracted from audio instances. The learnt representations are: (i) the activations of the discriminator in case of the DCGAN, and (ii) the activations of a fully connected layer between the decoder and encoder units in case of the S2SAE. We then train two multilayer perceptron neural networks on the DCGAN and S2SAE feature vectors to predict the class labels. The individual predicted labels are combined in a weighted decision-level fusion to achieve the final prediction. The system is evaluated on the development partition of the acoustic scene classification data set of the IEEE AASP Challenge on Detection and Classification of Acoustic Scenes and Events (DCASE 2017). In comparison to the baseline, the accuracy is increased from 74.8 % to 86.4 % using only the DCGAN, to 88.5 % on the development set using only the S2SAE, and to 91.1 % after fusion of the individual predictions.","keywords":"feature extraction;image classification;image representation;learning (artificial intelligence);multilayer perceptrons;pattern classification;signal classification;S2SAE feature vectors;weighted decision-level fusion;acoustic scene classification data;DCGAN;deep convolutional generative adversarial network;sequence autoencoder;unsupervised representation learning;robust features;acoustic scene analysis;recurrent sequence;representation learning algorithms;spectral features;learnt representations;multilayer perceptron neural networks;Acoustics;Generators;Task analysis;Convolution;Training;Spectrogram;Feature extraction;unsupervised feature learning;generative adversarial networks;sequence to sequence autoencoders;acoustic scene classification","doi":"10.23919/EUSIPCO.2018.8553225","issn":"2076-1465","month":"Sep.","url":"https://www.eurasip.org/proceedings/eusipco/eusipco2018/papers/1570438044.pdf","bibtex":"@InProceedings{8553225,\n author = {S. Arniriparian and M. Freitag and N. Cummins and M. Gerczuk and S. Pugachevskiy and B. Schuller},\n booktitle = {2018 26th European Signal Processing Conference (EUSIPCO)},\n title = {A Fusion of Deep Convolutional Generative Adversarial Networks and Sequence to Sequence Autoencoders for Acoustic Scene Classification},\n year = {2018},\n pages = {977-981},\n abstract = {Unsupervised representation learning shows high promise for generating robust features for acoustic scene analysis. In this regard, we propose and investigate a novel combination of features learnt using both a deep convolutional generative adversarial network (DCGAN) and a recurrent sequence to sequence autoencoder (S2SAE). Each of the representation learning algorithms is trained individually on spectral features extracted from audio instances. The learnt representations are: (i) the activations of the discriminator in case of the DCGAN, and (ii) the activations of a fully connected layer between the decoder and encoder units in case of the S2SAE. We then train two multilayer perceptron neural networks on the DCGAN and S2SAE feature vectors to predict the class labels. The individual predicted labels are combined in a weighted decision-level fusion to achieve the final prediction. The system is evaluated on the development partition of the acoustic scene classification data set of the IEEE AASP Challenge on Detection and Classification of Acoustic Scenes and Events (DCASE 2017). In comparison to the baseline, the accuracy is increased from 74.8 % to 86.4 % using only the DCGAN, to 88.5 % on the development set using only the S2SAE, and to 91.1 % after fusion of the individual predictions.},\n keywords = {feature extraction;image classification;image representation;learning (artificial intelligence);multilayer perceptrons;pattern classification;signal classification;S2SAE feature vectors;weighted decision-level fusion;acoustic scene classification data;DCGAN;deep convolutional generative adversarial network;sequence autoencoder;unsupervised representation learning;robust features;acoustic scene analysis;recurrent sequence;representation learning algorithms;spectral features;learnt representations;multilayer perceptron neural networks;Acoustics;Generators;Task analysis;Convolution;Training;Spectrogram;Feature extraction;unsupervised feature learning;generative adversarial networks;sequence to sequence autoencoders;acoustic scene classification},\n doi = {10.23919/EUSIPCO.2018.8553225},\n issn = {2076-1465},\n month = {Sep.},\n url = {https://www.eurasip.org/proceedings/eusipco/eusipco2018/papers/1570438044.pdf},\n}\n\n","author_short":["Arniriparian, S.","Freitag, M.","Cummins, N.","Gerczuk, M.","Pugachevskiy, S.","Schuller, B."],"key":"8553225","id":"8553225","bibbaseid":"arniriparian-freitag-cummins-gerczuk-pugachevskiy-schuller-afusionofdeepconvolutionalgenerativeadversarialnetworksandsequencetosequenceautoencodersforacousticsceneclassification-2018","role":"author","urls":{"Paper":"https://www.eurasip.org/proceedings/eusipco/eusipco2018/papers/1570438044.pdf"},"keyword":["feature extraction;image classification;image representation;learning (artificial intelligence);multilayer perceptrons;pattern classification;signal classification;S2SAE feature vectors;weighted decision-level fusion;acoustic scene classification data;DCGAN;deep convolutional generative adversarial network;sequence autoencoder;unsupervised representation learning;robust features;acoustic scene analysis;recurrent sequence;representation learning algorithms;spectral features;learnt representations;multilayer perceptron neural networks;Acoustics;Generators;Task analysis;Convolution;Training;Spectrogram;Feature extraction;unsupervised feature learning;generative adversarial networks;sequence to sequence autoencoders;acoustic scene classification"],"metadata":{"authorlinks":{}},"downloads":0},"bibtype":"inproceedings","biburl":"https://raw.githubusercontent.com/Roznn/EUSIPCO/main/eusipco2018url.bib","creationDate":"2021-02-13T15:38:40.341Z","downloads":0,"keywords":["feature extraction;image classification;image representation;learning (artificial intelligence);multilayer perceptrons;pattern classification;signal classification;s2sae feature vectors;weighted decision-level fusion;acoustic scene classification data;dcgan;deep convolutional generative adversarial network;sequence autoencoder;unsupervised representation learning;robust features;acoustic scene analysis;recurrent sequence;representation learning algorithms;spectral features;learnt representations;multilayer perceptron neural networks;acoustics;generators;task analysis;convolution;training;spectrogram;feature extraction;unsupervised feature learning;generative adversarial networks;sequence to sequence autoencoders;acoustic scene classification"],"search_terms":["fusion","deep","convolutional","generative","adversarial","networks","sequence","sequence","autoencoders","acoustic","scene","classification","arniriparian","freitag","cummins","gerczuk","pugachevskiy","schuller"],"title":"A Fusion of Deep Convolutional Generative Adversarial Networks and Sequence to Sequence Autoencoders for Acoustic Scene Classification","year":2018,"dataSources":["yiZioZximP7hphDpY","iuBeKSmaES2fHcEE9"]}