LLMs Encode Harmfulness and Refusal Separately. Zhao, J., Huang, J., Wu, Z., Bau, D., & Shi, W. arXiv preprint arXiv:2507.11878, 2025. bibtex @article{zhao2025harmfulness,
title = {LLMs Encode Harmfulness and Refusal Separately},
author = {Zhao, Jiachen and Huang, Jing and Wu, Zhengxuan and Bau, David and Shi, Weiyan},
journal = {arXiv preprint arXiv:2507.11878},
year = {2025}
}
Downloads: 0
{"_id":"QRBASJciEHAHCgHCi","bibbaseid":"zhao-huang-wu-bau-shi-llmsencodeharmfulnessandrefusalseparately-2025","author_short":["Zhao, J.","Huang, J.","Wu, Z.","Bau, D.","Shi, W."],"bibdata":{"bibtype":"article","type":"article","title":"LLMs Encode Harmfulness and Refusal Separately","author":[{"propositions":[],"lastnames":["Zhao"],"firstnames":["Jiachen"],"suffixes":[]},{"propositions":[],"lastnames":["Huang"],"firstnames":["Jing"],"suffixes":[]},{"propositions":[],"lastnames":["Wu"],"firstnames":["Zhengxuan"],"suffixes":[]},{"propositions":[],"lastnames":["Bau"],"firstnames":["David"],"suffixes":[]},{"propositions":[],"lastnames":["Shi"],"firstnames":["Weiyan"],"suffixes":[]}],"journal":"arXiv preprint arXiv:2507.11878","year":"2025","bibtex":"@article{zhao2025harmfulness,\n title = {LLMs Encode Harmfulness and Refusal Separately},\n author = {Zhao, Jiachen and Huang, Jing and Wu, Zhengxuan and Bau, David and Shi, Weiyan},\n journal = {arXiv preprint arXiv:2507.11878},\n year = {2025}\n}\n\n","author_short":["Zhao, J.","Huang, J.","Wu, Z.","Bau, D.","Shi, W."],"key":"zhao2025harmfulness","id":"zhao2025harmfulness","bibbaseid":"zhao-huang-wu-bau-shi-llmsencodeharmfulnessandrefusalseparately-2025","role":"author","urls":{},"metadata":{"authorlinks":{}},"downloads":0,"html":""},"bibtype":"article","biburl":"https://bibbase.org/network/files/v9iLSAgPxqijWtNb2","dataSources":["HQzSNhug84wo985ng"],"keywords":[],"search_terms":["llms","encode","harmfulness","refusal","separately","zhao","huang","wu","bau","shi"],"title":"LLMs Encode Harmfulness and Refusal Separately","year":2025}