Analyzing and Improving Representations with the Soft Nearest Neighbor Loss

Analyzing and Improving Representations with the Soft Nearest Neighbor Loss. Frosst, N., Papernot, N., & Hinton, G. 2019. cite arxiv:1902.01889

Paper abstract bibtex

We explore and expand the $Soft Nearest Neighbor Loss}$ to measure the $entanglement}$ of class manifolds in representation space: i.e., how close pairs of points from the same class are relative to pairs of points from different classes. We demonstrate several use cases of the loss. As an analytical tool, it provides insights into the evolution of class similarity structures during learning. Surprisingly, we find that $maximizing}$ the entanglement of representations of different classes in the hidden layers is beneficial for discrimination in the final layer, possibly because it encourages representations to identify class-independent similarity structures. Maximizing the soft nearest neighbor loss in the hidden layers leads not only to improved generalization but also to better-calibrated estimates of uncertainty on outlier data. Data that is not from the training distribution can be recognized by observing that in the hidden layers, it has fewer than the normal number of neighbors from the predicted class.

@article{frosst2019analyzing,
  abstract = {We explore and expand the $\textit{Soft Nearest Neighbor Loss}$ to measure
the $\textit{entanglement}$ of class manifolds in representation space: i.e.,
how close pairs of points from the same class are relative to pairs of points
from different classes. We demonstrate several use cases of the loss. As an
analytical tool, it provides insights into the evolution of class similarity
structures during learning. Surprisingly, we find that $\textit{maximizing}$
the entanglement of representations of different classes in the hidden layers
is beneficial for discrimination in the final layer, possibly because it
encourages representations to identify class-independent similarity structures.
Maximizing the soft nearest neighbor loss in the hidden layers leads not only
to improved generalization but also to better-calibrated estimates of
uncertainty on outlier data. Data that is not from the training distribution
can be recognized by observing that in the hidden layers, it has fewer than the
normal number of neighbors from the predicted class.},
  added-at = {2019-02-27T03:11:53.000+0100},
  author = {Frosst, Nicholas and Papernot, Nicolas and Hinton, Geoffrey},
  biburl = {https://www.bibsonomy.org/bibtex/2ed553aa46c6f4edbe5320058b515f303/kirk86},
  description = {Analyzing and Improving Representations with the Soft Nearest Neighbor Loss},
  interhash = {6d84aeb2edfb9d1c14cbfdad3405901e},
  intrahash = {ed553aa46c6f4edbe5320058b515f303},
  keywords = {distance-based-neighbor},
  note = {cite arxiv:1902.01889},
  timestamp = {2019-02-27T03:11:53.000+0100},
  title = {Analyzing and Improving Representations with the Soft Nearest Neighbor
  Loss},
  url = {http://arxiv.org/abs/1902.01889},
  year = 2019
}

Downloads: 0

{"_id":"HmKpi8HQCmSwjQ5SF","bibbaseid":"frosst-papernot-hinton-analyzingandimprovingrepresentationswiththesoftnearestneighborloss-2019","authorIDs":["gN5Lfqjgx8P4c7HJT"],"author_short":["Frosst, N.","Papernot, N.","Hinton, G."],"bibdata":{"bibtype":"article","type":"article","abstract":"We explore and expand the $Soft Nearest Neighbor Loss}$ to measure the $entanglement}$ of class manifolds in representation space: i.e., how close pairs of points from the same class are relative to pairs of points from different classes. We demonstrate several use cases of the loss. As an analytical tool, it provides insights into the evolution of class similarity structures during learning. Surprisingly, we find that $maximizing}$ the entanglement of representations of different classes in the hidden layers is beneficial for discrimination in the final layer, possibly because it encourages representations to identify class-independent similarity structures. Maximizing the soft nearest neighbor loss in the hidden layers leads not only to improved generalization but also to better-calibrated estimates of uncertainty on outlier data. Data that is not from the training distribution can be recognized by observing that in the hidden layers, it has fewer than the normal number of neighbors from the predicted class.","added-at":"2019-02-27T03:11:53.000+0100","author":[{"propositions":[],"lastnames":["Frosst"],"firstnames":["Nicholas"],"suffixes":[]},{"propositions":[],"lastnames":["Papernot"],"firstnames":["Nicolas"],"suffixes":[]},{"propositions":[],"lastnames":["Hinton"],"firstnames":["Geoffrey"],"suffixes":[]}],"biburl":"https://www.bibsonomy.org/bibtex/2ed553aa46c6f4edbe5320058b515f303/kirk86","description":"Analyzing and Improving Representations with the Soft Nearest Neighbor Loss","interhash":"6d84aeb2edfb9d1c14cbfdad3405901e","intrahash":"ed553aa46c6f4edbe5320058b515f303","keywords":"distance-based-neighbor","note":"cite arxiv:1902.01889","timestamp":"2019-02-27T03:11:53.000+0100","title":"Analyzing and Improving Representations with the Soft Nearest Neighbor Loss","url":"http://arxiv.org/abs/1902.01889","year":"2019","bibtex":"@article{frosst2019analyzing,\n abstract = {We explore and expand the $\\textit{Soft Nearest Neighbor Loss}$ to measure\r\nthe $\\textit{entanglement}$ of class manifolds in representation space: i.e.,\r\nhow close pairs of points from the same class are relative to pairs of points\r\nfrom different classes. We demonstrate several use cases of the loss. As an\r\nanalytical tool, it provides insights into the evolution of class similarity\r\nstructures during learning. Surprisingly, we find that $\\textit{maximizing}$\r\nthe entanglement of representations of different classes in the hidden layers\r\nis beneficial for discrimination in the final layer, possibly because it\r\nencourages representations to identify class-independent similarity structures.\r\nMaximizing the soft nearest neighbor loss in the hidden layers leads not only\r\nto improved generalization but also to better-calibrated estimates of\r\nuncertainty on outlier data. Data that is not from the training distribution\r\ncan be recognized by observing that in the hidden layers, it has fewer than the\r\nnormal number of neighbors from the predicted class.},\n added-at = {2019-02-27T03:11:53.000+0100},\n author = {Frosst, Nicholas and Papernot, Nicolas and Hinton, Geoffrey},\n biburl = {https://www.bibsonomy.org/bibtex/2ed553aa46c6f4edbe5320058b515f303/kirk86},\n description = {Analyzing and Improving Representations with the Soft Nearest Neighbor Loss},\n interhash = {6d84aeb2edfb9d1c14cbfdad3405901e},\n intrahash = {ed553aa46c6f4edbe5320058b515f303},\n keywords = {distance-based-neighbor},\n note = {cite arxiv:1902.01889},\n timestamp = {2019-02-27T03:11:53.000+0100},\n title = {Analyzing and Improving Representations with the Soft Nearest Neighbor\r\n Loss},\n url = {http://arxiv.org/abs/1902.01889},\n year = 2019\n}\n\n","author_short":["Frosst, N.","Papernot, N.","Hinton, G."],"key":"frosst2019analyzing","id":"frosst2019analyzing","bibbaseid":"frosst-papernot-hinton-analyzingandimprovingrepresentationswiththesoftnearestneighborloss-2019","role":"author","urls":{"Paper":"http://arxiv.org/abs/1902.01889"},"keyword":["distance-based-neighbor"],"downloads":0},"bibtype":"article","biburl":"http://www.bibsonomy.org/bib/author/Geoffrey Hinton?items=1000","creationDate":"2020-02-11T20:02:58.117Z","downloads":0,"keywords":["distance-based-neighbor"],"search_terms":["analyzing","improving","representations","soft","nearest","neighbor","loss","frosst","papernot","hinton"],"title":"Analyzing and Improving Representations with the Soft Nearest Neighbor Loss","year":2019,"dataSources":["9qjpnLCP4efAcKjDr"]}