Understanding and Enhancing Safety Mechanisms of LLMs via Safety-Specific Neuron. Zhao, Y., Zhang, W., Xie, Y., Goyal, A., Kawaguchi, K., & Shieh, M. In ICLR, 2025. OpenReview.net.
Link
Paper bibtex @inproceedings{conf/iclr/00060XGKS25,
added-at = {2025-05-15T00:00:00.000+0200},
author = {Zhao, Yiran and Zhang, Wenxuan and Xie, Yuxi and Goyal, Anirudh and Kawaguchi, Kenji and Shieh, Michael},
biburl = {https://www.bibsonomy.org/bibtex/2ed98a12bdb834a43e255800db61056f9/dblp},
booktitle = {ICLR},
crossref = {conf/iclr/2025},
ee = {https://openreview.net/forum?id=yR47RmND1m},
interhash = {06e7a13e129905936d2a53d9e93f0c7d},
intrahash = {ed98a12bdb834a43e255800db61056f9},
keywords = {dblp},
publisher = {OpenReview.net},
timestamp = {2025-05-19T07:11:53.000+0200},
title = {Understanding and Enhancing Safety Mechanisms of LLMs via Safety-Specific Neuron.},
url = {http://dblp.uni-trier.de/db/conf/iclr/iclr2025.html#00060XGKS25},
year = 2025
}
Downloads: 0
{"_id":"3m2GZQmYsMCoxcNcM","bibbaseid":"zhao-zhang-xie-goyal-kawaguchi-shieh-understandingandenhancingsafetymechanismsofllmsviasafetyspecificneuron-2025","author_short":["Zhao, Y.","Zhang, W.","Xie, Y.","Goyal, A.","Kawaguchi, K.","Shieh, M."],"bibdata":{"bibtype":"inproceedings","type":"inproceedings","added-at":"2025-05-15T00:00:00.000+0200","author":[{"propositions":[],"lastnames":["Zhao"],"firstnames":["Yiran"],"suffixes":[]},{"propositions":[],"lastnames":["Zhang"],"firstnames":["Wenxuan"],"suffixes":[]},{"propositions":[],"lastnames":["Xie"],"firstnames":["Yuxi"],"suffixes":[]},{"propositions":[],"lastnames":["Goyal"],"firstnames":["Anirudh"],"suffixes":[]},{"propositions":[],"lastnames":["Kawaguchi"],"firstnames":["Kenji"],"suffixes":[]},{"propositions":[],"lastnames":["Shieh"],"firstnames":["Michael"],"suffixes":[]}],"biburl":"https://www.bibsonomy.org/bibtex/2ed98a12bdb834a43e255800db61056f9/dblp","booktitle":"ICLR","crossref":"conf/iclr/2025","ee":"https://openreview.net/forum?id=yR47RmND1m","interhash":"06e7a13e129905936d2a53d9e93f0c7d","intrahash":"ed98a12bdb834a43e255800db61056f9","keywords":"dblp","publisher":"OpenReview.net","timestamp":"2025-05-19T07:11:53.000+0200","title":"Understanding and Enhancing Safety Mechanisms of LLMs via Safety-Specific Neuron.","url":"http://dblp.uni-trier.de/db/conf/iclr/iclr2025.html#00060XGKS25","year":"2025","bibtex":"@inproceedings{conf/iclr/00060XGKS25,\n added-at = {2025-05-15T00:00:00.000+0200},\n author = {Zhao, Yiran and Zhang, Wenxuan and Xie, Yuxi and Goyal, Anirudh and Kawaguchi, Kenji and Shieh, Michael},\n biburl = {https://www.bibsonomy.org/bibtex/2ed98a12bdb834a43e255800db61056f9/dblp},\n booktitle = {ICLR},\n crossref = {conf/iclr/2025},\n ee = {https://openreview.net/forum?id=yR47RmND1m},\n interhash = {06e7a13e129905936d2a53d9e93f0c7d},\n intrahash = {ed98a12bdb834a43e255800db61056f9},\n keywords = {dblp},\n publisher = {OpenReview.net},\n timestamp = {2025-05-19T07:11:53.000+0200},\n title = {Understanding and Enhancing Safety Mechanisms of LLMs via Safety-Specific Neuron.},\n url = {http://dblp.uni-trier.de/db/conf/iclr/iclr2025.html#00060XGKS25},\n year = 2025\n}\n\n","author_short":["Zhao, Y.","Zhang, W.","Xie, Y.","Goyal, A.","Kawaguchi, K.","Shieh, M."],"key":"conf/iclr/00060XGKS25","id":"conf/iclr/00060XGKS25","bibbaseid":"zhao-zhang-xie-goyal-kawaguchi-shieh-understandingandenhancingsafetymechanismsofllmsviasafetyspecificneuron-2025","role":"author","urls":{"Link":"https://openreview.net/forum?id=yR47RmND1m","Paper":"http://dblp.uni-trier.de/db/conf/iclr/iclr2025.html#00060XGKS25"},"keyword":["dblp"],"metadata":{"authorlinks":{}},"downloads":0,"html":""},"bibtype":"inproceedings","biburl":"http://www.bibsonomy.org/bib/author/Kawaguchi?items=1000","dataSources":["LMwAa6kDzduetX9sY"],"keywords":["dblp"],"search_terms":["understanding","enhancing","safety","mechanisms","llms","via","safety","specific","neuron","zhao","zhang","xie","goyal","kawaguchi","shieh"],"title":"Understanding and Enhancing Safety Mechanisms of LLMs via Safety-Specific Neuron.","year":2025}