Towards Understanding Jailbreak Attacks in LLMs: A Representation Space Analysis. Lin, Y., He, P., Xu, H., Xing, Y., Yamada, M., Liu, H., & Tang, J. In EMNLP, 2024. bibtex @inproceedings{yuping-emnlp-2024-1,
author = {Yuping Lin and Pengfei He and Han Xu and Yue Xing and Makoto Yamada and Hui Liu and Jiliang Tang},
title = {Towards Understanding Jailbreak Attacks in LLMs: A Representation Space Analysis},
booktitle = {EMNLP},
year = {2024}
}
Downloads: 0
{"_id":"LKDHeRB3EWDjC2zEK","bibbaseid":"lin-he-xu-xing-yamada-liu-tang-towardsunderstandingjailbreakattacksinllmsarepresentationspaceanalysis-2024","author_short":["Lin, Y.","He, P.","Xu, H.","Xing, Y.","Yamada, M.","Liu, H.","Tang, J."],"bibdata":{"bibtype":"inproceedings","type":"inproceedings","author":[{"firstnames":["Yuping"],"propositions":[],"lastnames":["Lin"],"suffixes":[]},{"firstnames":["Pengfei"],"propositions":[],"lastnames":["He"],"suffixes":[]},{"firstnames":["Han"],"propositions":[],"lastnames":["Xu"],"suffixes":[]},{"firstnames":["Yue"],"propositions":[],"lastnames":["Xing"],"suffixes":[]},{"firstnames":["Makoto"],"propositions":[],"lastnames":["Yamada"],"suffixes":[]},{"firstnames":["Hui"],"propositions":[],"lastnames":["Liu"],"suffixes":[]},{"firstnames":["Jiliang"],"propositions":[],"lastnames":["Tang"],"suffixes":[]}],"title":"Towards Understanding Jailbreak Attacks in LLMs: A Representation Space Analysis","booktitle":"EMNLP","year":"2024","bibtex":"@inproceedings{yuping-emnlp-2024-1,\n author = {Yuping Lin and Pengfei He and Han Xu and Yue Xing and Makoto Yamada and Hui Liu and Jiliang Tang},\n title = {Towards Understanding Jailbreak Attacks in LLMs: A Representation Space Analysis},\n booktitle = {EMNLP},\n year = {2024}\n}\n\n\n","author_short":["Lin, Y.","He, P.","Xu, H.","Xing, Y.","Yamada, M.","Liu, H.","Tang, J."],"key":"yuping-emnlp-2024-1","id":"yuping-emnlp-2024-1","bibbaseid":"lin-he-xu-xing-yamada-liu-tang-towardsunderstandingjailbreakattacksinllmsarepresentationspaceanalysis-2024","role":"author","urls":{},"metadata":{"authorlinks":{}}},"bibtype":"inproceedings","biburl":"https://bibbase.org/network/files/a6tWMqLRtcFTz3zv2","dataSources":["WKfsR8PDSWzp46ZNs","NMy64xygxcefaFhFL"],"keywords":[],"search_terms":["towards","understanding","jailbreak","attacks","llms","representation","space","analysis","lin","he","xu","xing","yamada","liu","tang"],"title":"Towards Understanding Jailbreak Attacks in LLMs: A Representation Space Analysis","year":2024}