Masked Siamese Networks for Label-Ecient Learning. Assran, M., Caron, M., Misra, I., Bojanowski, P., Bordes, F., Vincent, P., Joulin, A., Rabbat, M., Ballas, N., & Ai, M. Paper Website abstract bibtex We propose Masked Siamese Networks (MSN), a self-supervised learning framework for learning image representations. Our approach matches the representation of an image view containing randomly masked patches to the representation of the original unmasked image. This self-supervised pre-training strategy is particularly scalable when applied to Vision Transformers since only the unmasked patches are processed by the network. As a result, MSNs improve the scalability of joint-embedding architectures, while producing representations of a high semantic level that perform competitively on low-shot image classification. For instance, on ImageNet-1K, with only 5,000 annotated images, our base MSN model achieves 72.4% top-1 accuracy, and with 1% of ImageNet-1K labels, we achieve 75.7% top-1 accuracy, setting a new state-of-the-art for self-supervised learning on this benchmark. Our code is publicly available at https://github.com/facebookresearch/msn.
@article{
title = {Masked Siamese Networks for Label-Ecient Learning},
type = {article},
keywords = {Low-Shot Classifi-cation,Self-Supervised Representation Learning,Siamese Networks,Vision Transformers},
websites = {https://github.com/facebookresearch/msn.},
id = {fca3f057-a3c9-37f9-a6bf-a6e4b71cbfef},
created = {2022-11-01T11:50:24.706Z},
accessed = {2022-11-01},
file_attached = {true},
profile_id = {235249c2-3ed4-314a-b309-b1ea0330f5d9},
group_id = {5ec9cc91-a5d6-3de5-82f3-3ef3d98a89c1},
last_modified = {2022-11-15T06:28:33.422Z},
read = {false},
starred = {false},
authored = {false},
confirmed = {false},
hidden = {false},
folder_uuids = {5a010301-acb6-4642-a6b2-8afaee1b741c,bd3c6f2e-3514-47cf-bc42-12db8b9abe45},
private_publication = {false},
abstract = {We propose Masked Siamese Networks (MSN), a self-supervised learning framework for learning image representations. Our approach matches the representation of an image view containing randomly masked patches to the representation of the original unmasked image. This self-supervised pre-training strategy is particularly scalable when applied to Vision Transformers since only the unmasked patches are processed by the network. As a result, MSNs improve the scalability of joint-embedding architectures, while producing representations of a high semantic level that perform competitively on low-shot image classification. For instance, on ImageNet-1K, with only 5,000 annotated images, our base MSN model achieves 72.4% top-1 accuracy, and with 1% of ImageNet-1K labels, we achieve 75.7% top-1 accuracy, setting a new state-of-the-art for self-supervised learning on this benchmark. Our code is publicly available at https://github.com/facebookresearch/msn.},
bibtype = {article},
author = {Assran, Mahmoud and Caron, Mathilde and Misra, Ishan and Bojanowski, Piotr and Bordes, Florian and Vincent, Pascal and Joulin, Armand and Rabbat, Michael and Ballas, Nicolas and Ai, Meta}
}
Downloads: 0
{"_id":"8T5YaoEdHFydGS7jE","bibbaseid":"assran-caron-misra-bojanowski-bordes-vincent-joulin-rabbat-etal-maskedsiamesenetworksforlabelecientlearning","author_short":["Assran, M.","Caron, M.","Misra, I.","Bojanowski, P.","Bordes, F.","Vincent, P.","Joulin, A.","Rabbat, M.","Ballas, N.","Ai, M."],"bibdata":{"title":"Masked Siamese Networks for Label-Ecient Learning","type":"article","keywords":"Low-Shot Classifi-cation,Self-Supervised Representation Learning,Siamese Networks,Vision Transformers","websites":"https://github.com/facebookresearch/msn.","id":"fca3f057-a3c9-37f9-a6bf-a6e4b71cbfef","created":"2022-11-01T11:50:24.706Z","accessed":"2022-11-01","file_attached":"true","profile_id":"235249c2-3ed4-314a-b309-b1ea0330f5d9","group_id":"5ec9cc91-a5d6-3de5-82f3-3ef3d98a89c1","last_modified":"2022-11-15T06:28:33.422Z","read":false,"starred":false,"authored":false,"confirmed":false,"hidden":false,"folder_uuids":"5a010301-acb6-4642-a6b2-8afaee1b741c,bd3c6f2e-3514-47cf-bc42-12db8b9abe45","private_publication":false,"abstract":"We propose Masked Siamese Networks (MSN), a self-supervised learning framework for learning image representations. Our approach matches the representation of an image view containing randomly masked patches to the representation of the original unmasked image. This self-supervised pre-training strategy is particularly scalable when applied to Vision Transformers since only the unmasked patches are processed by the network. As a result, MSNs improve the scalability of joint-embedding architectures, while producing representations of a high semantic level that perform competitively on low-shot image classification. For instance, on ImageNet-1K, with only 5,000 annotated images, our base MSN model achieves 72.4% top-1 accuracy, and with 1% of ImageNet-1K labels, we achieve 75.7% top-1 accuracy, setting a new state-of-the-art for self-supervised learning on this benchmark. Our code is publicly available at https://github.com/facebookresearch/msn.","bibtype":"article","author":"Assran, Mahmoud and Caron, Mathilde and Misra, Ishan and Bojanowski, Piotr and Bordes, Florian and Vincent, Pascal and Joulin, Armand and Rabbat, Michael and Ballas, Nicolas and Ai, Meta","bibtex":"@article{\n title = {Masked Siamese Networks for Label-Ecient Learning},\n type = {article},\n keywords = {Low-Shot Classifi-cation,Self-Supervised Representation Learning,Siamese Networks,Vision Transformers},\n websites = {https://github.com/facebookresearch/msn.},\n id = {fca3f057-a3c9-37f9-a6bf-a6e4b71cbfef},\n created = {2022-11-01T11:50:24.706Z},\n accessed = {2022-11-01},\n file_attached = {true},\n profile_id = {235249c2-3ed4-314a-b309-b1ea0330f5d9},\n group_id = {5ec9cc91-a5d6-3de5-82f3-3ef3d98a89c1},\n last_modified = {2022-11-15T06:28:33.422Z},\n read = {false},\n starred = {false},\n authored = {false},\n confirmed = {false},\n hidden = {false},\n folder_uuids = {5a010301-acb6-4642-a6b2-8afaee1b741c,bd3c6f2e-3514-47cf-bc42-12db8b9abe45},\n private_publication = {false},\n abstract = {We propose Masked Siamese Networks (MSN), a self-supervised learning framework for learning image representations. Our approach matches the representation of an image view containing randomly masked patches to the representation of the original unmasked image. This self-supervised pre-training strategy is particularly scalable when applied to Vision Transformers since only the unmasked patches are processed by the network. As a result, MSNs improve the scalability of joint-embedding architectures, while producing representations of a high semantic level that perform competitively on low-shot image classification. For instance, on ImageNet-1K, with only 5,000 annotated images, our base MSN model achieves 72.4% top-1 accuracy, and with 1% of ImageNet-1K labels, we achieve 75.7% top-1 accuracy, setting a new state-of-the-art for self-supervised learning on this benchmark. Our code is publicly available at https://github.com/facebookresearch/msn.},\n bibtype = {article},\n author = {Assran, Mahmoud and Caron, Mathilde and Misra, Ishan and Bojanowski, Piotr and Bordes, Florian and Vincent, Pascal and Joulin, Armand and Rabbat, Michael and Ballas, Nicolas and Ai, Meta}\n}","author_short":["Assran, M.","Caron, M.","Misra, I.","Bojanowski, P.","Bordes, F.","Vincent, P.","Joulin, A.","Rabbat, M.","Ballas, N.","Ai, M."],"urls":{"Paper":"https://bibbase.org/service/mendeley/bfbbf840-4c42-3914-a463-19024f50b30c/file/5282f3df-e0b4-340d-1eb7-c54718ab1bd6/full_text.pdf.pdf","Website":"https://github.com/facebookresearch/msn."},"biburl":"https://bibbase.org/service/mendeley/bfbbf840-4c42-3914-a463-19024f50b30c","bibbaseid":"assran-caron-misra-bojanowski-bordes-vincent-joulin-rabbat-etal-maskedsiamesenetworksforlabelecientlearning","role":"author","keyword":["Low-Shot Classifi-cation","Self-Supervised Representation Learning","Siamese Networks","Vision Transformers"],"metadata":{"authorlinks":{}},"downloads":0},"bibtype":"article","biburl":"https://bibbase.org/service/mendeley/bfbbf840-4c42-3914-a463-19024f50b30c","dataSources":["2252seNhipfTmjEBQ"],"keywords":["low-shot classifi-cation","self-supervised representation learning","siamese networks","vision transformers"],"search_terms":["masked","siamese","networks","label","ecient","learning","assran","caron","misra","bojanowski","bordes","vincent","joulin","rabbat","ballas","ai"],"title":"Masked Siamese Networks for Label-Ecient Learning","year":null}