var bibbase_data = {"data":"\"Loading..\"\n\n
\n\n \n\n \n\n \n \n\n \n\n \n \n\n \n\n \n
\n generated by\n \n \"bibbase.org\"\n\n \n
\n \n\n
\n\n \n\n\n
\n\n Excellent! Next you can\n create a new website with this list, or\n embed it in an existing web page by copying & pasting\n any of the following snippets.\n\n
\n JavaScript\n (easiest)\n
\n \n <script src=\"https://bibbase.org/show?bib=https%3A%2F%2Fachievements.cs.arizona.edu%2Fpublications.bib&fullnames=1&jsonp=1&jsonp=1\"></script>\n \n
\n\n PHP\n
\n \n <?php\n $contents = file_get_contents(\"https://bibbase.org/show?bib=https%3A%2F%2Fachievements.cs.arizona.edu%2Fpublications.bib&fullnames=1&jsonp=1\");\n print_r($contents);\n ?>\n \n
\n\n iFrame\n (not recommended)\n
\n \n <iframe src=\"https://bibbase.org/show?bib=https%3A%2F%2Fachievements.cs.arizona.edu%2Fpublications.bib&fullnames=1&jsonp=1\"></iframe>\n \n
\n\n

\n For more details see the documention.\n

\n
\n
\n\n
\n\n This is a preview! To use this list on your own web site\n or create a new web site from it,\n create a free account. The file will be added\n and you will be able to edit it in the File Manager.\n We will show you instructions once you've created your account.\n
\n\n
\n\n

To the site owner:

\n\n

Action required! Mendeley is changing its\n API. In order to keep using Mendeley with BibBase past April\n 14th, you need to:\n

    \n
  1. renew the authorization for BibBase on Mendeley, and
  2. \n
  3. update the BibBase URL\n in your page the same way you did when you initially set up\n this page.\n
  4. \n
\n

\n\n

\n \n \n Fix it now\n

\n
\n\n
\n\n\n
\n \n \n
\n
\n  \n 2025\n \n \n (4)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n HAVER: Instance-Dependent Error Bounds for Maximum Mean Estimation and Applications to Q-Learning and Monte Carlo Tree Search.\n \n \n \n\n\n \n Tuan Nguyen; Jay Barrett; and Kwang-Sung Jun.\n\n\n \n\n\n\n In International Conference on Artificial Intelligence and Statistics (AISTATS 2025), 2025. \n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Does Stylomery Really Survive Compilation? Binary Code Stylometry Revisited.\n \n \n \n\n\n \n Muaz Ali; Tugay Bilgis; Beyza Bozdag; Saumya Debray; and Sazzadur Rahaman.\n\n\n \n\n\n\n In Privacy Enhancing Technologies Symposium (PETS) 2025, Washington, DC and Online, 2025. Proceedings on Privacy Enhancing Technologies (PoPETs)\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{Ali25PETS,\n  author       = {Ali, Muaz and Bilgis, Tugay and Bozdag, Beyza and Debray, Saumya and Rahaman, Sazzadur},\n  title        = {Does Stylomery Really Survive Compilation? Binary Code Stylometry Revisited},\n  booktitle    = {Privacy Enhancing Technologies Symposium (PETS) 2025},\n  address      = "Washington, DC and Online",\n  year         = {2025},\n  publisher    = {Proceedings on Privacy Enhancing Technologies (PoPETs)}\n}\n\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Minimum Empirical Divergence for Sub-Gaussian Linear Bandits.\n \n \n \n\n\n \n Kapilan Balagopalan; and Kwang-Sung Jun.\n\n\n \n\n\n\n In International Conference on Artificial Intelligence and Statistics (AISTATS 2025), 2025. \n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Comparative Analysis of 2D and 3D Visualization on Cognitive Workload and Performance in Surgical Peg Transfer Task Using Eye-Tracking Data.\n \n \n \n\n\n \n Ruoshan Lan; Jimmy Tran; Iman Ghaderi; and Ehsan Azimi.\n\n\n \n\n\n\n In IEEE/CVF Winter Conference on Applications of Computer Vision - GMCV Workshop, 2025. \n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{Lan20252DVS3D, \n  title     = {Comparative Analysis of 2D and 3D Visualization on Cognitive Workload and Performance in Surgical Peg Transfer Task Using Eye-Tracking Data}, \n  author    = {Ruoshan Lan and Jimmy Tran and Iman Ghaderi and Ehsan Azimi},\n  booktitle = {IEEE/CVF Winter Conference on Applications of Computer Vision - GMCV Workshop},\n  year      = {2025}, \n}\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2024\n \n \n (19)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Impeding LLM-assisted Cheating in Introductory Programming Assignments via Adversarial Perturbation.\n \n \n \n \n\n\n \n Saiful Islam Salim; Rubin Yuchan Yang; Alexander Cooper; Suryashree Ray; Saumya Debray; and Sazzadur Rahaman.\n\n\n \n\n\n\n In Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing (EMNLP 2024), Miami, USA and virtual meeting, 2024. Association for Computational Linguistics\n \n\n\n\n
\n\n\n\n \n \n \"ImpedingPaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n  \n \n 1 download\n \n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{salim_2024_adversarial_perturbation,\n    title={Impeding LLM-assisted Cheating in Introductory Programming Assignments via Adversarial Perturbation}, \n    author={Saiful Islam Salim and Rubin Yuchan Yang and Alexander Cooper and Suryashree Ray and Saumya Debray and Sazzadur Rahaman},\n    year = "2024",\n    address = "Miami, USA and virtual meeting",\n    publisher = "Association for Computational Linguistics",\n    booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing (EMNLP 2024)",\n    url={https://arxiv.org/abs/2410.09318},\n    abstract = "While Large language model (LLM)-based programming assistants such as CoPilot and ChatGPT can help improve the productivity of professional software developers, they can also facilitate cheating in introductory computer programming courses. Assuming instructors have limited control over the industrial-strength models, this paper investigates the baseline performance of 5 widely used LLMs on a collection of introductory programming problems, examines adversarial perturbations to degrade their performance, and describes the results of a user study aimed at understanding the efficacy of such perturbations in hindering actual code generation for introductory programming assignments. The user study suggests that i) perturbations combinedly reduced the average correctness score by 77%, ii) the drop in correctness caused by these perturbations was affected based on their detectability."\n}\n\n\n
\n
\n\n\n
\n While Large language model (LLM)-based programming assistants such as CoPilot and ChatGPT can help improve the productivity of professional software developers, they can also facilitate cheating in introductory computer programming courses. Assuming instructors have limited control over the industrial-strength models, this paper investigates the baseline performance of 5 widely used LLMs on a collection of introductory programming problems, examines adversarial perturbations to degrade their performance, and describes the results of a user study aimed at understanding the efficacy of such perturbations in hindering actual code generation for introductory programming assignments. The user study suggests that i) perturbations combinedly reduced the average correctness score by 77%, ii) the drop in correctness caused by these perturbations was affected based on their detectability.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n On the Contents and Utility of IoT Cybersecurity Guidelines.\n \n \n \n\n\n \n Jesse Chen; Dharun Anandayuvaraj; James C Davis; and Sazzadur Rahaman.\n\n\n \n\n\n\n Proceedings of the ACM on Software Engineering, 1(FSE): 1400–1423. 2024.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{chen2024contents,\n  title={On the Contents and Utility of IoT Cybersecurity Guidelines},\n  author={Chen, Jesse and Anandayuvaraj, Dharun and Davis, James C and Rahaman, Sazzadur},\n  journal={Proceedings of the ACM on Software Engineering},\n  volume={1},\n  number={FSE},\n  pages={1400--1423},\n  year={2024},\n  publisher={ACM New York, NY, USA}\n}\n\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Localized Evaluation for Constructing Discrete Vector Fields.\n \n \n \n\n\n \n Tanner Finken; Julien Tierny; and Joshua A. Levine.\n\n\n \n\n\n\n IEEE TVCG. 2024.\n \n\n\n\n
\n\n\n\n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{finken2024OSA,\n  author    = {Tanner Finken and\n               Julien Tierny and\n               Joshua A. Levine},\n  title     = {Localized Evaluation for Constructing Discrete Vector Fields},\n  journal   = {IEEE TVCG},\n  year      = {2024}, \n  doi = {10.1109/TVCG.2024.3456355}\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Learning to Generate Rules for Realistic Few-Shot Relation Classification: An Encoder-Decoder Approach.\n \n \n \n\n\n \n Mayank Singh; and Eduardo Blanco.\n\n\n \n\n\n\n In Findings of the Association for Computational Linguistics: EMNLP 2024, Miami, USA and virtual meeting, November 2024. Association for Computational Linguistics\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{singh-2024-learning,\n    title = "Learning to Generate Rules for Realistic Few-Shot Relation Classification: An Encoder-Decoder Approach",\n    author = "Singh, Mayank and Blanco, Eduardo",\n    booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",\n    month = nov,\n    year = "2024",\n    address = "Miami, USA and virtual meeting",\n    publisher = "Association for Computational Linguistics",\n    abstract = "We propose a neuro-symbolic approach for realistic few-shot relation classification via rules. Instead of building neural models to predict relations, we design them to output straightforward rules that can be used to extract relations. The rules are generated using custom T5-style Encoder-Decoder Language Models. Crucially, our rules are fully interpretable and pliable (i.e., humans can easily modify them to boost performance). Through a combination of rules generated by these models along with a very effective, novel baseline, we demonstrate a few-shot relation-classification performance that is comparable to or stronger than the state of the art on the Few-Shot TACRED and NYT29 benchmarks while increasing interpretability and maintaining pliability.",\n}\n\n
\n
\n\n\n
\n We propose a neuro-symbolic approach for realistic few-shot relation classification via rules. Instead of building neural models to predict relations, we design them to output straightforward rules that can be used to extract relations. The rules are generated using custom T5-style Encoder-Decoder Language Models. Crucially, our rules are fully interpretable and pliable (i.e., humans can easily modify them to boost performance). Through a combination of rules generated by these models along with a very effective, novel baseline, we demonstrate a few-shot relation-classification performance that is comparable to or stronger than the state of the art on the Few-Shot TACRED and NYT29 benchmarks while increasing interpretability and maintaining pliability.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Change Is the Only Constant: Dynamic LLM Slicing based on Layer Redundancy.\n \n \n \n \n\n\n \n Razvan-Gabriel Dumitru; Paul Ioan Clotan; Vikas Yadav; Darius Peteleaza; and Mihai Surdeanu.\n\n\n \n\n\n\n In Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing (EMNLP), September 2024. \n \n\n\n\n
\n\n\n\n \n \n \"ChangePaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n  \n \n 1 download\n \n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{dumitru2024slicing,\n  title={Change Is the Only Constant: Dynamic LLM Slicing based on Layer Redundancy},\n  author={Razvan-Gabriel Dumitru and Paul Ioan Clotan and Vikas Yadav and Darius Peteleaza and Mihai Surdeanu},\n  booktitle={Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing (EMNLP)},\n  year={2024},\n  month={September},\n  abstract={This paper introduces a novel model compression approach through dynamic layer-specific pruning in Large Language Models (LLMs), enhancing the traditional methodology established by SliceGPT. By transitioning from constant to dynamic slicing, our method leverages the newly proposed Layer Redundancy (LR) score, which assesses how much change each layer changes its input by measuring the cosine similarity of the input to the output of the layer. The method prunes parts of individual layers based on redundancy, maintaining performance while reducing model size.},\n  url={https://openreview.net/forum?id=CHFf0AViDz}\n}\n\n
\n
\n\n\n
\n This paper introduces a novel model compression approach through dynamic layer-specific pruning in Large Language Models (LLMs), enhancing the traditional methodology established by SliceGPT. By transitioning from constant to dynamic slicing, our method leverages the newly proposed Layer Redundancy (LR) score, which assesses how much change each layer changes its input by measuring the cosine similarity of the input to the output of the layer. The method prunes parts of individual layers based on redundancy, maintaining performance while reducing model size.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Enhancing Transformer RNNs with Multiple Temporal Perspectives.\n \n \n \n \n\n\n \n Razvan-Gabriel Dumitru; Darius Peteleaza; and Mihai Surdeanu.\n\n\n \n\n\n\n 2024.\n \n\n\n\n
\n\n\n\n \n \n \"EnhancingPaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n  \n \n 2 downloads\n \n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@misc{dumitru2024enhancingtransformerrnnsmultiple,\n      title={Enhancing Transformer RNNs with Multiple Temporal Perspectives}, \n      author={Razvan-Gabriel Dumitru and Darius Peteleaza and Mihai Surdeanu},\n      year={2024},\n      eprint={2402.02625},\n      booktitle={Next Generation of Sequence Modeling Architectures (at ICML) 2024},\n      primaryClass={cs.LG},\n      url={https://arxiv.org/abs/2402.02625}, \n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Retrieval Augmented Generation of Subjective Explanations for Socioeconomic Scenarios.\n \n \n \n \n\n\n \n Razvan-Gabriel Dumitru; Maria Alexeeva; Keith Alcock; Nargiza Ludgate; Cheonkam Jeong; Zara Fatima Abdurahaman; Prateek Puri; Brian Kirchhoff; Santadarshan Sadhu; and Mihai Surdeanu.\n\n\n \n\n\n\n In Sixth Workshop on NLP and Computational Social Science (at NAACL) 2024, 2024. \n \n\n\n\n
\n\n\n\n \n \n \"RetrievalPaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n  \n \n 5 downloads\n \n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{\n  dumitru2024retrieval,\n  title={Retrieval Augmented Generation of Subjective Explanations for Socioeconomic Scenarios},\n  author={Dumitru, Razvan-Gabriel and Alexeeva, Maria and Alcock, Keith and Ludgate, Nargiza and Jeong, Cheonkam and Abdurahaman, Zara Fatima and Puri, Prateek and Kirchhoff, Brian and Sadhu, Santadarshan and Surdeanu, Mihai},\n  booktitle={Sixth Workshop on NLP and Computational Social Science (at NAACL) 2024},\n  year={2024},\n  url={http://clulab.org/papers/naacl-css2024-rag.pdf}\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Time Travel in LLMs: Tracing Data Contamination in Large Language Models.\n \n \n \n \n\n\n \n Shahriar Golchin; and Mihai Surdeanu.\n\n\n \n\n\n\n In The Twelfth International Conference on Learning Representations (ICLR), 2024. \n \n\n\n\n
\n\n\n\n \n \n \"TimePaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n  \n \n 7 downloads\n \n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{golchin2024time,\ntitle={Time Travel in {LLM}s: Tracing Data Contamination in Large Language Models},\nauthor={Shahriar Golchin and Mihai Surdeanu},\nbooktitle={The Twelfth International Conference on Learning Representations (ICLR)},\nyear={2024},\nurl={https://openreview.net/forum?id=2Rwq6c3tvr}\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Building large-scale registries from unstructured clinical notes using a low-resource natural language processing pipeline.\n \n \n \n \n\n\n \n Nazgol Tavabi; James Pruneski; Shahriar Golchin; Mallika Singh; Ryan Sanborn; Benton Heyworth; Assaf Landschaft; Amir Kimia; and Ata Kiapour.\n\n\n \n\n\n\n Artificial Intelligence in Medicine, 151: 102847. 2024.\n \n\n\n\n
\n\n\n\n \n \n \"BuildingPaper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n \n \n \n \n\n\n\n
\n
@article{TAVABI2024102847,\ntitle = {Building large-scale registries from unstructured clinical notes using a low-resource natural language processing pipeline},\njournal = {Artificial Intelligence in Medicine},\nvolume = {151},\npages = {102847},\nyear = {2024},\nissn = {0933-3657},\ndoi = {https://doi.org/10.1016/j.artmed.2024.102847},\nurl = {https://www.sciencedirect.com/science/article/pii/S0933365724000897},\nauthor = {Nazgol Tavabi and James Pruneski and Shahriar Golchin and Mallika Singh and Ryan Sanborn and Benton Heyworth and Assaf Landschaft and Amir Kimia and Ata Kiapour},\nkeywords = {Electronic health records, Natural language processing, Registry building, Clinical notes, ACL},\nabstract = {Building clinical registries is an important step in clinical research and improvement of patient care quality. Natural Language Processing (NLP) methods have shown promising results in extracting valuable information from unstructured clinical notes. However, the structure and nature of clinical notes are very different from regular text that state-of-the-art NLP models are trained and tested on, and they have their own set of challenges. In this study, we propose Sentence Extractor with Keywords (SE-K), an efficient and interpretable classification approach for extracting information from clinical notes and show that it outperforms more computationally expensive methods in text classification. Following the Institutional Review Board (IRB) approval, we used SE-K and two embedding based NLP approaches (Sentence Extractor with Embeddings (SE-E) and Bidirectional Encoder Representations from Transformers (BERT)) to develop comprehensive registry of anterior cruciate ligament surgeries from 20 years of unstructured clinical data at a multi-site tertiary-care regional children's hospital. The low-resource approach (SE-K) had better performance (average AUROC of 0.94 ± 0.04) than the embedding-based approaches (SE-E: 0.93 ± 0.04 and BERT: 0.87 ± 0.09) for out of sample validation, in addition to minimum performance drop between test and out-of-sample validation. Moreover, the SE-K approach was at least six times faster (on CPU) than SE-E (on CPU) and BERT (on GPU) and provides interpretability. Our proposed approach, SE-K, can be effectively used to extract relevant variables from clinic notes to build large-scale registries, with consistently better performance compared to the more resource-intensive approaches (e.g., BERT). Such approaches can facilitate information extraction from unstructured notes for registry building, quality improvement and adverse event monitoring.}\n}\n\n
\n
\n\n\n
\n Building clinical registries is an important step in clinical research and improvement of patient care quality. Natural Language Processing (NLP) methods have shown promising results in extracting valuable information from unstructured clinical notes. However, the structure and nature of clinical notes are very different from regular text that state-of-the-art NLP models are trained and tested on, and they have their own set of challenges. In this study, we propose Sentence Extractor with Keywords (SE-K), an efficient and interpretable classification approach for extracting information from clinical notes and show that it outperforms more computationally expensive methods in text classification. Following the Institutional Review Board (IRB) approval, we used SE-K and two embedding based NLP approaches (Sentence Extractor with Embeddings (SE-E) and Bidirectional Encoder Representations from Transformers (BERT)) to develop comprehensive registry of anterior cruciate ligament surgeries from 20 years of unstructured clinical data at a multi-site tertiary-care regional children's hospital. The low-resource approach (SE-K) had better performance (average AUROC of 0.94 ± 0.04) than the embedding-based approaches (SE-E: 0.93 ± 0.04 and BERT: 0.87 ± 0.09) for out of sample validation, in addition to minimum performance drop between test and out-of-sample validation. Moreover, the SE-K approach was at least six times faster (on CPU) than SE-E (on CPU) and BERT (on GPU) and provides interpretability. Our proposed approach, SE-K, can be effectively used to extract relevant variables from clinic notes to build large-scale registries, with consistently better performance compared to the more resource-intensive approaches (e.g., BERT). Such approaches can facilitate information extraction from unstructured notes for registry building, quality improvement and adverse event monitoring.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Best of Both Worlds: A Pliable and Generalizable Neuro-Symbolic Approach for Relation Classification.\n \n \n \n\n\n \n Robert Vacareanu; Fahmida Alam; Md Asiful Islam; Haris Riaz; and Mihai Surdeanu.\n\n\n \n\n\n\n In Findings of the Association for Computational Linguistics: NAACL 2024, Mexico City, Mexico, June 2024. Association for Computational Linguistics\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n  \n \n 28 downloads\n \n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{vacareanu2024softrules,\n    title = "Best of Both Worlds: A Pliable and Generalizable Neuro-Symbolic Approach for Relation Classification",\n    author = "Robert Vacareanu and Fahmida Alam and Md Asiful Islam and Haris Riaz and Mihai Surdeanu",\n    booktitle = "Findings of the Association for Computational Linguistics: NAACL 2024",\n    month = jun,\n    year = "2024",\n    address = "Mexico City, Mexico",\n    publisher = "Association for Computational Linguistics",\n    abstract = "This paper introduces a novel neuro-symbolic architecture for relation classification (RC) that combines rule-based methods with contemporary deep learning techniques. This approach capitalizes on the strengths of both paradigms: the adaptability of rule-based systems and the generalization power of neural networks. Our architecture consists of two components: a declarative rule-based model for transparent classification and a neural component to enhance rule generalizability through semantic text matching. Notably, our semantic matcher is trained in an unsupervised domain-agnostic way, solely with synthetic data. Further, these components are loosely coupled, allowing for rule modifications without retraining the semantic matcher. In our evaluation, we focused on two few-shot relation classification datasets: Few-Shot TACRED and a Few-Shot version of NYT29. We show that our proposed method outperforms previous state-of-the-art models in three out of four settings, despite not seeing any human-annotated training data. Further, we show that our approach remains modular and pliable, i.e., the corresponding rules can be locally modified to improve the overall model. Human interventions to the rules for the TACRED relation \\texttt{org:parents} boost the performance on that relation by as much as 26\\% relative improvement, without negatively impacting the other relations, and without retraining the semantic matching component.",\n}\n\n
\n
\n\n\n
\n This paper introduces a novel neuro-symbolic architecture for relation classification (RC) that combines rule-based methods with contemporary deep learning techniques. This approach capitalizes on the strengths of both paradigms: the adaptability of rule-based systems and the generalization power of neural networks. Our architecture consists of two components: a declarative rule-based model for transparent classification and a neural component to enhance rule generalizability through semantic text matching. Notably, our semantic matcher is trained in an unsupervised domain-agnostic way, solely with synthetic data. Further, these components are loosely coupled, allowing for rule modifications without retraining the semantic matcher. In our evaluation, we focused on two few-shot relation classification datasets: Few-Shot TACRED and a Few-Shot version of NYT29. We show that our proposed method outperforms previous state-of-the-art models in three out of four settings, despite not seeing any human-annotated training data. Further, we show that our approach remains modular and pliable, i.e., the corresponding rules can be locally modified to improve the overall model. Human interventions to the rules for the TACRED relation \\textttorg:parents boost the performance on that relation by as much as 26% relative improvement, without negatively impacting the other relations, and without retraining the semantic matching component.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Active Learning Design Choices for NER with Transformers.\n \n \n \n\n\n \n Robert Vacareanu; Enrique Noriega-Atala; Gus Hahn-Powell; Marco A. Valenzuela-Escarcega; and Mihai Surdeanu.\n\n\n \n\n\n\n In Proceedings of the Joint International Conference on Computational Linguistics, Language Resources and Evaluation, Torino, Italy, May 2024. European Language Resources Association\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{vacareanu2024ActiveLearningNER,\n    title = "Active Learning Design Choices for NER with Transformers",\n    author = "Robert Vacareanu and Enrique Noriega-Atala and Gus Hahn-Powell and Marco A. Valenzuela-Escarcega and Mihai Surdeanu ",\n    booktitle = "Proceedings of the Joint International Conference on Computational Linguistics, Language Resources and Evaluation",\n    month = may,\n    year = "2024",\n    address = "Torino, Italy",\n    publisher = "European Language Resources Association",\n    abstract = "We explore multiple important choices that have not been analyzed in conjunction regarding active learning for token classification using transformer networks. These choices are: (i) how to select what to annotate, (ii) decide whether to annotate entire sentences or smaller sentence fragments, (iii) how to train with incomplete annotations at token-level, and (iv) how to select the initial seed dataset. We explore whether annotating at sub-sentence level can translate to an improved downstream performance by considering two different sub-sentence annotation strategies: (i) entity-level, and (ii) token-level. These approaches result in some sentences being only partially annotated. To address this issue, we introduce and evaluate multiple strategies to deal with partially-annotated sentences during the training process. We show that annotating at the sub-sentence level achieves comparable or better performance than sentence-level annotations with a smaller number of annotated tokens. We then explore the extent to which the performance gap remains once accounting for the annotation time and found that both annotation schemes perform similarly.",\n}\n\n
\n
\n\n\n
\n We explore multiple important choices that have not been analyzed in conjunction regarding active learning for token classification using transformer networks. These choices are: (i) how to select what to annotate, (ii) decide whether to annotate entire sentences or smaller sentence fragments, (iii) how to train with incomplete annotations at token-level, and (iv) how to select the initial seed dataset. We explore whether annotating at sub-sentence level can translate to an improved downstream performance by considering two different sub-sentence annotation strategies: (i) entity-level, and (ii) token-level. These approaches result in some sentences being only partially annotated. To address this issue, we introduce and evaluate multiple strategies to deal with partially-annotated sentences during the training process. We show that annotating at the sub-sentence level achieves comparable or better performance than sentence-level annotations with a smaller number of annotated tokens. We then explore the extent to which the performance gap remains once accounting for the annotation time and found that both annotation schemes perform similarly.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n A Weak Supervision Approach for Few-Shot Aspect Based Sentiment Analysis.\n \n \n \n \n\n\n \n Robert Vacareanu; Siddharth Varia; Kishaloy Halder; Shuai Wang; Giovanni Paolini; Neha Anna John; Miguel Ballesteros; and Smaranda Muresan.\n\n\n \n\n\n\n In Yvette Graham; and Matthew Purver., editor(s), Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers), pages 2734–2752, St. Julian's, Malta, March 2024. Association for Computational Linguistics\n \n\n\n\n
\n\n\n\n \n \n \"APaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n  \n \n 2 downloads\n \n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{vacareanu-etal-2024-weak,\n    title = "A Weak Supervision Approach for Few-Shot Aspect Based Sentiment Analysis",\n    author = "Vacareanu, Robert  and\n      Varia, Siddharth  and\n      Halder, Kishaloy  and\n      Wang, Shuai  and\n      Paolini, Giovanni  and\n      Anna John, Neha  and\n      Ballesteros, Miguel  and\n      Muresan, Smaranda",\n    editor = "Graham, Yvette  and\n      Purver, Matthew",\n    booktitle = "Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)",\n    month = mar,\n    year = "2024",\n    address = "St. Julian{'}s, Malta",\n    publisher = "Association for Computational Linguistics",\n    url = "https://aclanthology.org/2024.eacl-long.167",\n    pages = "2734--2752",\n    abstract = "We explore how weak supervision on abundant unlabeled data can be leveraged to improve few-shot performance in aspect-based sentiment analysis (ABSA) tasks. We propose a pipeline approach to construct a noisy ABSA dataset, and we use it to adapt a pre-trained sequence-to-sequence model to the ABSA tasks. We test the resulting model on three widely used ABSA datasets, before and after fine-tuning. Our proposed method preserves the full fine-tuning performance while showing significant improvements (15.84 absolute F1) in the few-shot learning scenario for the harder tasks. In zero-shot (i.e., without fine-tuning), our method outperforms the previous state of the art on the aspect extraction sentiment classification (AESC) task and is, additionally, capable of performing the harder aspect sentiment triplet extraction (ASTE) task.",\n}\n\n
\n
\n\n\n
\n We explore how weak supervision on abundant unlabeled data can be leveraged to improve few-shot performance in aspect-based sentiment analysis (ABSA) tasks. We propose a pipeline approach to construct a noisy ABSA dataset, and we use it to adapt a pre-trained sequence-to-sequence model to the ABSA tasks. We test the resulting model on three widely used ABSA datasets, before and after fine-tuning. Our proposed method preserves the full fine-tuning performance while showing significant improvements (15.84 absolute F1) in the few-shot learning scenario for the harder tasks. In zero-shot (i.e., without fine-tuning), our method outperforms the previous state of the art on the aspect extraction sentiment classification (AESC) task and is, additionally, capable of performing the harder aspect sentiment triplet extraction (ASTE) task.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Interpreting Answers to Yes-No Questions in Dialogues from Multiple Domains.\n \n \n \n\n\n \n Zijie Wang; Farzana Rashid; and Eduardo Blanco.\n\n\n \n\n\n\n In Findings of the Association for Computational Linguistics: NAACL 2024, Mexico City, Mexico, June 2024. Association for Computational Linguistics\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{wang2024naaclfindings,\n    title = "Interpreting Answers to Yes-No Questions in Dialogues from Multiple Domains",\n    author = "Wang, Zijie and Rashid, Farzana and Blanco, Eduardo",\n    booktitle = "Findings of the Association for Computational Linguistics: NAACL 2024",\n    month = jun,\n    year = "2024",\n    address = "Mexico City, Mexico",\n    publisher = "Association for Computational Linguistics",\n    abstract = "People often answer yes-no questions without explicitly saying yes, no, or similar polar keywords. Figuring out the meaning of indirect answers is challenging, even for large language models. In this paper, we investigate this problem working with dialogues from multiple domains. We present new benchmarks in three diverse domains: movie scripts, tennis interviews, and airline customer service. We present an approach grounded on distant supervision and blended training to quickly adapt to a new dialogue domain. Experimental results show that our approach is never detrimental and yields F1 improvements as high as 11-34%.",\n}\n\n
\n
\n\n\n
\n People often answer yes-no questions without explicitly saying yes, no, or similar polar keywords. Figuring out the meaning of indirect answers is challenging, even for large language models. In this paper, we investigate this problem working with dialogues from multiple domains. We present new benchmarks in three diverse domains: movie scripts, tennis interviews, and airline customer service. We present an approach grounded on distant supervision and blended training to quickly adapt to a new dialogue domain. Experimental results show that our approach is never detrimental and yields F1 improvements as high as 11-34%.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n ELLEN: Extremely Lightly Supervised Learning For Efficient Named Entity Recognition.\n \n \n \n\n\n \n Haris Riaz; Razvan-Gabriel Dumitru; and Mihai Surdeanu.\n\n\n \n\n\n\n In Proceedings of the Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING), Torino, Italy, May 2024. European Language Resources Association\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n  \n \n 5 downloads\n \n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{riaz2024ellen,\n    title = "ELLEN: Extremely Lightly Supervised Learning For Efficient Named Entity Recognition",\n    author = "Haris Riaz and Razvan-Gabriel Dumitru and Mihai Surdeanu",\n    booktitle = "Proceedings of the Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING)",\n    month = may,\n    year = "2024",\n    address = "Torino, Italy",\n    publisher = "European Language Resources Association",\n    abstract = "In this work, we revisit the problem of semi-supervised named entity recognition (NER) focusing on extremely light supervision, consisting of a lexicon containing only 10 examples per class. We introduce ELLEN, a simple, fully modular, neuro-symbolic method that blends fine-tuned language models with linguistic rules. These rules include insights such as ''One Sense Per Discourse'', using a Masked Language Model as an unsupervised NER, leveraging part-of-speech tags to identify and eliminate unlabeled entities as false negatives, and other intuitions about classifier confidence scores in local and global context. ELLEN achieves very strong performance on the CoNLL-2003 dataset when using the minimal supervision from the lexicon above. It also outperforms most existing (and considerably more complex) semi-supervised NER methods under the same supervision settings commonly used in the literature (i.e., 5% of the training data). Further, we evaluate our CoNLL-2003 model in a zero-shot scenario on WNUT-17 where we find that it outperforms GPT-3.5 and achieves comparable performance to GPT-4. In a zero-shot setting, ELLEN also achieves over 75% of the performance of a strong, fully supervised model trained on gold data. Our code is available at: https://github.com/hriaz17/ELLEN",\n}\n\n
\n
\n\n\n
\n In this work, we revisit the problem of semi-supervised named entity recognition (NER) focusing on extremely light supervision, consisting of a lexicon containing only 10 examples per class. We introduce ELLEN, a simple, fully modular, neuro-symbolic method that blends fine-tuned language models with linguistic rules. These rules include insights such as ''One Sense Per Discourse'', using a Masked Language Model as an unsupervised NER, leveraging part-of-speech tags to identify and eliminate unlabeled entities as false negatives, and other intuitions about classifier confidence scores in local and global context. ELLEN achieves very strong performance on the CoNLL-2003 dataset when using the minimal supervision from the lexicon above. It also outperforms most existing (and considerably more complex) semi-supervised NER methods under the same supervision settings commonly used in the literature (i.e., 5% of the training data). Further, we evaluate our CoNLL-2003 model in a zero-shot scenario on WNUT-17 where we find that it outperforms GPT-3.5 and achieves comparable performance to GPT-4. In a zero-shot setting, ELLEN also achieves over 75% of the performance of a strong, fully supervised model trained on gold data. Our code is available at: https://github.com/hriaz17/ELLEN\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Widespread prevalence of a methylation-dependent switch to activate an essential DNA damage response in bacteria.\n \n \n \n \n\n\n \n Aditya AND Tran Kamat; Mohak AND Sontakke Ngat T. Sharda; and Tung B. K. AND Badrinarayanan Neha Le.\n\n\n \n\n\n\n PLOS Biology, 22(3): 1-25. 03 2024.\n \n\n\n\n
\n\n\n\n \n \n \"WidespreadPaper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{10.1371/journal.pbio.3002540,\n    doi = {10.1371/journal.pbio.3002540},\n    author = {Kamat, Aditya AND Tran, Ngat T. AND Sharda, Mohak AND Sontakke, Neha AND Le, Tung B. K. AND Badrinarayanan, Anjana},\n    journal = {PLOS Biology},\n    publisher = {Public Library of Science},\n    title = {Widespread prevalence of a methylation-dependent switch to activate an essential DNA damage response in bacteria},\n    year = {2024},\n    month = {03},\n    volume = {22},\n    url = {https://doi.org/10.1371/journal.pbio.3002540},\n    pages = {1-25},\n    abstract = {DNA methylation plays central roles in diverse cellular processes, ranging from error-correction during replication to regulation of bacterial defense mechanisms. Nevertheless, certain aberrant methylation modifications can have lethal consequences. The mechanisms by which bacteria detect and respond to such damage remain incompletely understood. Here, we discover a highly conserved but previously uncharacterized transcription factor (Cada2), which orchestrates a methylation-dependent adaptive response in Caulobacter. This response operates independently of the SOS response, governs the expression of genes crucial for direct repair, and is essential for surviving methylation-induced damage. Our molecular investigation of Cada2 reveals a cysteine methylation-dependent posttranslational modification (PTM) and mode of action distinct from its Escherichia coli counterpart, a trait conserved across all bacteria harboring a Cada2-like homolog instead. Extending across the bacterial kingdom, our findings support the notion of divergence and coevolution of adaptive response transcription factors and their corresponding sequence-specific DNA motifs. Despite this diversity, the ubiquitous prevalence of adaptive response regulators underscores the significance of a transcriptional switch, mediated by methylation PTM, in driving a specific and essential bacterial DNA damage response.},\n    number = {3},\n}\n\n
\n
\n\n\n
\n DNA methylation plays central roles in diverse cellular processes, ranging from error-correction during replication to regulation of bacterial defense mechanisms. Nevertheless, certain aberrant methylation modifications can have lethal consequences. The mechanisms by which bacteria detect and respond to such damage remain incompletely understood. Here, we discover a highly conserved but previously uncharacterized transcription factor (Cada2), which orchestrates a methylation-dependent adaptive response in Caulobacter. This response operates independently of the SOS response, governs the expression of genes crucial for direct repair, and is essential for surviving methylation-induced damage. Our molecular investigation of Cada2 reveals a cysteine methylation-dependent posttranslational modification (PTM) and mode of action distinct from its Escherichia coli counterpart, a trait conserved across all bacteria harboring a Cada2-like homolog instead. Extending across the bacterial kingdom, our findings support the notion of divergence and coevolution of adaptive response transcription factors and their corresponding sequence-specific DNA motifs. Despite this diversity, the ubiquitous prevalence of adaptive response regulators underscores the significance of a transcriptional switch, mediated by methylation PTM, in driving a specific and essential bacterial DNA damage response.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Towards Realistic Few-Shot Relation Extraction: A New Meta Dataset and Evaluation.\n \n \n \n\n\n \n Fahmida Alam; Md Asiful Islam; Robert Vacareanu; and Mihai Surdeanu.\n\n\n \n\n\n\n In Proceedings of the Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING), Torino, Italy, May 2024. European Language Resources Association\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n  \n \n 5 downloads\n \n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{alam2024-meta-dataset,\n    title = "Towards Realistic Few-Shot Relation Extraction: A New Meta Dataset and Evaluation",\n    author = "Fahmida Alam and Md Asiful Islam and Robert Vacareanu and Mihai Surdeanu",\n    booktitle = "Proceedings of the Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING)",\n    month = may,\n    year = "2024",\n    address = "Torino, Italy",\n    publisher = "European Language Resources Association",\n    abstract = "We introduce a meta dataset for few-shot relation extraction, which includes two datasets derived from existing supervised relation extraction datasets – NYT29 (Takanobu et al., 2019; Nayak and Ng , 2020) and WIKIDATA (Sorokin and Gurevych, 2017) – as well as a few-shot form of the TACRED dataset (Sabo et al., 2021). Importantly, all these few-shot datasets were generated under realistic assumptions such as: the test relations are different from any relations a model might have seen before, limited training data, and a preponderance of candidate relation mentions that do not correspond to any of the relations of interest. Using this large resource, we conduct a comprehensive evaluation of six recent few-shot relation extraction methods, and observe that no method comes out as a clear winner. Further, the overall performance on this task is low, indicating substantial need for future research. We release all versions of the data, i.e., both supervised and few-shot, for future research."\n}\n\n
\n
\n\n\n
\n We introduce a meta dataset for few-shot relation extraction, which includes two datasets derived from existing supervised relation extraction datasets – NYT29 (Takanobu et al., 2019; Nayak and Ng , 2020) and WIKIDATA (Sorokin and Gurevych, 2017) – as well as a few-shot form of the TACRED dataset (Sabo et al., 2021). Importantly, all these few-shot datasets were generated under realistic assumptions such as: the test relations are different from any relations a model might have seen before, limited training data, and a preponderance of candidate relation mentions that do not correspond to any of the relations of interest. Using this large resource, we conduct a comprehensive evaluation of six recent few-shot relation extraction methods, and observe that no method comes out as a clear winner. Further, the overall performance on this task is low, indicating substantial need for future research. We release all versions of the data, i.e., both supervised and few-shot, for future research.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n ASSOCIATION OF MRI-DEFINED STRUCTURE FEATURES AT BASELINE WITH KNEE PAIN TRAJECTORIES.\n \n \n \n\n\n \n S Liu; X Sun; Y Ge; TN Duong; and CK Kwoh.\n\n\n \n\n\n\n Osteoarthritis Imaging, 4: 100187. 2024.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{liu2024association,\n  title={ASSOCIATION OF MRI-DEFINED STRUCTURE FEATURES AT BASELINE WITH KNEE PAIN TRAJECTORIES},\n  author={Liu, S and Sun, X and Ge, Y and Duong, TN and Kwoh, CK},\n  journal={Osteoarthritis Imaging},\n  volume={4},\n  pages={100187},\n  year={2024},\n  publisher={Elsevier}\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Non-stationary Bandits and Meta-Learning with a Small Set of Optimal Arms.\n \n \n \n\n\n \n Javad Azizi; Thang Duong; Yasin Abbasi-Yadkori; András György; Claire Vernade; and Mohammad Ghavamzadeh.\n\n\n \n\n\n\n Reinforcement Learning Journal, 5: 2461–2491. 2024.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{azizi2024stationary,\n    title={Non-stationary Bandits and Meta-Learning with a Small Set of Optimal Arms},\n    author={Azizi, Javad and Duong, Thang and Abbasi-Yadkori, Yasin and Gy{\\"{o}}rgy, Andr{\\'{a}}s and Vernade, Claire and Ghavamzadeh, Mohammad},\n    journal={Reinforcement Learning Journal},\n    volume={5},\n    pages={2461--2491},\n    year={2024}\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Beyond task diversity: provable representation transfer for sequential multi-task linear bandits.\n \n \n \n\n\n \n Thang Duong; Zhi Wang; and Chicheng Zhang.\n\n\n \n\n\n\n In Advances in Neural Information Processing Systems 37 (NeurIPS 2024), 2024. NeurIPS\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{duong2024beyond,\n  title={Beyond task diversity: provable representation transfer for sequential multi-task linear bandits},\n  author={Duong, Thang and Wang, Zhi and Zhang, Chicheng},\n  booktitle={Advances in Neural Information Processing Systems 37 (NeurIPS 2024)},\n  year={2024},\n  publisher={NeurIPS}\n}\n\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2023\n \n \n (15)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n Efficient Variational Sequential Information Control.\n \n \n \n\n\n \n Jianwei Shen; and Jason Pacheco.\n\n\n \n\n\n\n In NeurIPS 2023 Workshop on Adaptive Experimental Design and Active Learning in the Real World, 2023. \n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{shen2023efficient,\n  title={Efficient Variational Sequential Information Control},\n  author={Shen, Jianwei and Pacheco, Jason},\n  booktitle={NeurIPS 2023 Workshop on Adaptive Experimental Design and Active Learning in the Real World},\n  year={2023}\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Synthetic Dataset for Evaluating Complex Compositional Knowledge for Natural Language Inference.\n \n \n \n \n\n\n \n Sushma Anand Akoju; Robert Vacareanu; Eduardo Blanco; Haris Riaz; and Mihai Surdeanu.\n\n\n \n\n\n\n In Bhavana Dalvi Mishra; Greg Durrett; Peter Jansen; Danilo Neves Ribeiro; and Jason Wei., editor(s), Proceedings of the 1st Workshop on Natural Language Reasoning and Structured Explanations (NLRSE), Toronto, Canada, June 2023. Association for Computational Linguistics\n \n\n\n\n
\n\n\n\n \n \n \"SyntheticPaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n It's not Sexually Suggestive; It's Educative | Separating Sex Education from Suggestive Content on TikTok videos.\n \n \n \n \n\n\n \n Enfa George; and Mihai Surdeanu.\n\n\n \n\n\n\n In Findings of the Association for Computational Linguistics: ACL 2023, pages 5904–5915, Toronto, Canada, July 2023. Association for Computational Linguistics\n \n\n\n\n
\n\n\n\n \n \n \"It'sPaper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n  \n \n 3 downloads\n \n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{george-surdeanu-2023-sexually,\n    title = "It{'}s not Sexually Suggestive; It{'}s Educative | Separating Sex Education from Suggestive Content on {T}ik{T}ok videos",\n    author = "George, Enfa  and Surdeanu, Mihai",\n    booktitle = "Findings of the Association for Computational Linguistics: ACL 2023",\n    month = jul,\n    year = "2023",\n    address = "Toronto, Canada",\n    publisher = "Association for Computational Linguistics",\n    url = "https://aclanthology.org/2023.findings-acl.365",\n    doi = "10.18653/v1/2023.findings-acl.365",\n    pages = "5904--5915",\n    abstract = "We introduce SexTok, a multi-modal dataset composed of TikTok videos labeled as sexually suggestive (from the annotator{'}s point of view), sex-educational content, or neither. Such a dataset is necessary to address the challenge of distinguishing between sexually suggestive content and virtual sex education videos on TikTok. Children{'}s exposure to sexually suggestive videos has been shown to have adversarial effects on their development (Collins et al. 2017). Meanwhile, virtual sex education, especially on subjects that are more relevant to the LGBTQIA+ community, is very valuable (Mitchell et al. 2014). The platform{'}s current system removes/punishes some of both types of videos, even though they serve different purposes. Our dataset contains video URLs, and it is also audio transcribed. To validate its importance, we explore two transformer-based models for classifying the videos. Our preliminary results suggest that the task of distinguishing between these types of videos is learnable but challenging. These experiments suggest that this dataset is meaningful and invites further study on the subject.",\n}\n\n
\n
\n\n\n
\n We introduce SexTok, a multi-modal dataset composed of TikTok videos labeled as sexually suggestive (from the annotator's point of view), sex-educational content, or neither. Such a dataset is necessary to address the challenge of distinguishing between sexually suggestive content and virtual sex education videos on TikTok. Children's exposure to sexually suggestive videos has been shown to have adversarial effects on their development (Collins et al. 2017). Meanwhile, virtual sex education, especially on subjects that are more relevant to the LGBTQIA+ community, is very valuable (Mitchell et al. 2014). The platform's current system removes/punishes some of both types of videos, even though they serve different purposes. Our dataset contains video URLs, and it is also audio transcribed. To validate its importance, we explore two transformer-based models for classifying the videos. Our preliminary results suggest that the task of distinguishing between these types of videos is learnable but challenging. These experiments suggest that this dataset is meaningful and invites further study on the subject.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Interpreting Indirect Answers to Yes-No Questions in Multiple Languages.\n \n \n \n \n\n\n \n Zijie Wang; Md Hossain; Shivam Mathur; Terry Melo; Kadir Ozler; Keun Park; Jacob Quintero; MohammadHossein Rezaei; Shreya Shakya; Md Uddin; and Eduardo Blanco.\n\n\n \n\n\n\n In Houda Bouamor; Juan Pino; and Kalika Bali., editor(s), Findings of the Association for Computational Linguistics: EMNLP 2023, pages 2210–2227, Singapore, December 2023. Association for Computational Linguistics\n \n\n\n\n
\n\n\n\n \n \n \"InterpretingPaper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{wang-etal-2023-interpreting,\n    title = "Interpreting Indirect Answers to Yes-No Questions in Multiple Languages",\n    author = "Wang, Zijie  and\n      Hossain, Md  and\n      Mathur, Shivam  and\n      Melo, Terry  and\n      Ozler, Kadir  and\n      Park, Keun  and\n      Quintero, Jacob  and\n      Rezaei, MohammadHossein  and\n      Shakya, Shreya  and\n      Uddin, Md  and\n      Blanco, Eduardo",\n    editor = "Bouamor, Houda  and\n      Pino, Juan  and\n      Bali, Kalika",\n    booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",\n    month = dec,\n    year = "2023",\n    address = "Singapore",\n    publisher = "Association for Computational Linguistics",\n    url = "https://aclanthology.org/2023.findings-emnlp.146",\n    doi = "10.18653/v1/2023.findings-emnlp.146",\n    pages = "2210--2227",\n    abstract = "Yes-no questions expect a yes or no for an answer, but people often skip polar keywords. Instead, they answer with long explanations that must be interpreted. In this paper, we focus on this challenging problem and release new benchmarks in eight languages. We present a distant supervision approach to collect training data, and demonstrate that direct answers (i.e., with polar keywords) are useful to train models to interpret indirect answers (i.e., without polar keywords). We show that monolingual fine-tuning is beneficial if training data can be obtained via distant supervision for the language of interest (5 languages). Additionally, we show that cross-lingual fine-tuning is always beneficial (8 languages).",\n}\n\n
\n
\n\n\n
\n Yes-no questions expect a yes or no for an answer, but people often skip polar keywords. Instead, they answer with long explanations that must be interpreted. In this paper, we focus on this challenging problem and release new benchmarks in eight languages. We present a distant supervision approach to collect training data, and demonstrate that direct answers (i.e., with polar keywords) are useful to train models to interpret indirect answers (i.e., without polar keywords). We show that monolingual fine-tuning is beneficial if training data can be obtained via distant supervision for the language of interest (5 languages). Additionally, we show that cross-lingual fine-tuning is always beneficial (8 languages).\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Revisiting Simple Regret: Fast Rates for Returning a Good Arm.\n \n \n \n\n\n \n Yao Zhao; Connor Stephens; Csaba Szepesvari; and Kwang-Sung Jun.\n\n\n \n\n\n\n In International Conference on Machine Learning (ICML), 2023. PMLR\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{zhao2023revisiting,\n  title={Revisiting Simple Regret: Fast Rates for Returning a Good Arm},\n  author={Zhao, Yao and Stephens, Connor and Szepesvari, Csaba and Jun, Kwang-Sung},\n  booktitle={International Conference on Machine Learning (ICML)},\n  year={2023},\n  organization={PMLR}\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Improving Zero-shot Relation Classification via Automatically-acquired Entailment Templates.\n \n \n \n\n\n \n Mahdi Rahimi; and Mihai Surdeanu.\n\n\n \n\n\n\n In Proceedings of the 8th Workshop on Representation Learning for NLP (RepL4NLP 2023), pages 187–195, 2023. \n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n  \n \n 3 downloads\n \n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{rahimi2023improving,\n  title={Improving Zero-shot Relation Classification via Automatically-acquired Entailment Templates},\n  author={Rahimi, Mahdi and Surdeanu, Mihai},\n  booktitle={Proceedings of the 8th Workshop on Representation Learning for NLP (RepL4NLP 2023)},\n  pages={187--195},\n  year={2023}\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Intermediate Domain Finetuning for Weakly Supervised Domain-adaptive Clinical NER.\n \n \n \n\n\n \n Shilpa Suresh; Nazgol Tavabi; Shahriar Golchin; Leah Gilreath; Rafael Garcia-Andujar; Alexander Kim; Joseph Murray; Blake Bacevich; and Ata Kiapour.\n\n\n \n\n\n\n In The 22nd Workshop on Biomedical Natural Language Processing and BioNLP Shared Tasks, pages 320–325, 2023. \n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{suresh2023intermediate,\n  title={Intermediate Domain Finetuning for Weakly Supervised Domain-adaptive Clinical NER},\n  author={Suresh, Shilpa and Tavabi, Nazgol and Golchin, Shahriar and Gilreath, Leah and Garcia-Andujar, Rafael and Kim, Alexander and Murray, Joseph and Bacevich, Blake and Kiapour, Ata},\n  booktitle={The 22nd Workshop on Biomedical Natural Language Processing and BioNLP Shared Tasks},\n  pages={320--325},\n  year={2023}\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Do not Mask Randomly: Effective Domain-adaptive Pre-training by Masking In-domain Keywords.\n \n \n \n\n\n \n Shahriar Golchin; Mihai Surdeanu; Nazgol Tavabi; and Ata Kiapour.\n\n\n \n\n\n\n In Proceedings of the 8th Workshop on Representation Learning for NLP (RepL4NLP 2023), pages 13–21, 2023. \n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{golchin2023not,\n  title={Do not Mask Randomly: Effective Domain-adaptive Pre-training by Masking In-domain Keywords},\n  author={Golchin, Shahriar and Surdeanu, Mihai and Tavabi, Nazgol and Kiapour, Ata},\n  booktitle={Proceedings of the 8th Workshop on Representation Learning for NLP (RepL4NLP 2023)},\n  pages={13--21},\n  year={2023}\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Disparities in cannabis use and documentation in electronic health records among children and young adults.\n \n \n \n\n\n \n Nazgol Tavabi; Marium Raza; Mallika Singh; Shahriar Golchin; Harsev Singh; Grant D Hogue; and Ata M Kiapour.\n\n\n \n\n\n\n NPJ digital medicine, 6(1): 138. 2023.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{tavabi2023disparities,\n  title={Disparities in cannabis use and documentation in electronic health records among children and young adults},\n  author={Tavabi, Nazgol and Raza, Marium and Singh, Mallika and Golchin, Shahriar and Singh, Harsev and Hogue, Grant D and Kiapour, Ata M},\n  journal={NPJ digital medicine},\n  volume={6},\n  number={1},\n  pages={138},\n  year={2023},\n  publisher={Nature Publishing Group UK London}\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Automated Supervised Topic Modeling Framework for Hardware Weaknesses.\n \n \n \n\n\n \n Rakibul Hassan; Charan Bandi; Meng-Tien Tsai; Shahriar Golchin; Sai Manoj PD; Setareh Rafatirad; and Soheil Salehi.\n\n\n \n\n\n\n In 2023 24th International Symposium on Quality Electronic Design (ISQED), pages 1–8, 2023. IEEE\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{hassan2023automated,\n  title={Automated Supervised Topic Modeling Framework for Hardware Weaknesses},\n  author={Hassan, Rakibul and Bandi, Charan and Tsai, Meng-Tien and Golchin, Shahriar and PD, Sai Manoj and Rafatirad, Setareh and Salehi, Soheil},\n  booktitle={2023 24th International Symposium on Quality Electronic Design (ISQED)},\n  pages={1--8},\n  year={2023},\n  organization={IEEE}\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n A Tale of Reduction, Security and Correctness: Evaluating Program Debloating Paradigms and Their Compositions.\n \n \n \n\n\n \n Muaz Ali; Muhammad Muzammil; Faraz Karim; Ayesha Naeem; Rukhshan Haroon; Muhammad Haris; Huzaifa Nadeem; Waseem Sabir; Fahad Shaon; Fareed Zaffar; Vinod Yegneswaran; Ashish Gehani; and Sazzadur Rahaman.\n\n\n \n\n\n\n In Computer Security – ESORICS 2023, 2023. Springer International Publishing\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@InProceedings{Ali2023ESORICS,\ntitle = {A Tale of Reduction, Security and Correctness: Evaluating Program Debloating Paradigms and Their Compositions},\nauthor = {Muaz Ali and Muhammad Muzammil and Faraz Karim and Ayesha Naeem and Rukhshan Haroon and Muhammad Haris and Huzaifa Nadeem and Waseem Sabir and Fahad Shaon and Fareed Zaffar and Vinod Yegneswaran and Ashish Gehani and Sazzadur Rahaman\t},\nbooktitle="Computer Security -- ESORICS 2023",\nyear="2023",\npublisher="Springer International Publishing",\nabstract="Automated software debloating of program source or binary code has tremendous potential to improve both application performance and security. Unfortunately, measuring and comparing the effectiveness of various debloating methods is challenging due to the absence of a universal benchmarking platform that can accommodate diverse approaches. In this paper, first, we present ProdeBench, an extensible and sustainable benchmarking platform that enables comparison of different research techniques. Then, we perform a holistic comparison of the techniques and explore the various hidden and explicit tradeoffs in using them.",\n}\n\n
\n
\n\n\n
\n Automated software debloating of program source or binary code has tremendous potential to improve both application performance and security. Unfortunately, measuring and comparing the effectiveness of various debloating methods is challenging due to the absence of a universal benchmarking platform that can accommodate diverse approaches. In this paper, first, we present ProdeBench, an extensible and sustainable benchmarking platform that enables comparison of different research techniques. Then, we perform a holistic comparison of the techniques and explore the various hidden and explicit tradeoffs in using them.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Blade: Scalable Source Code Debloating Framework.\n \n \n \n\n\n \n Muaz Ali; Rumaisa Habib; Ashish Gehani; Sazzadur Rahaman; and Zartash Uzmi.\n\n\n \n\n\n\n In 2023 IEEE Secure Development Conference (SecDev), 2023. \n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@INPROCEEDINGS{Ali2023SecDev,\n  author={Ali, Muaz and Habib, Rumaisa and Gehani, Ashish and Rahaman, Sazzadur and Uzmi, Zartash},\n  booktitle={2023 IEEE Secure Development Conference (SecDev)}, \n  title={Blade: Scalable Source Code Debloating Framework}, \n  year={2023},\n}\n\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n The ToMCAT Dataset.\n \n \n \n\n\n \n Adarsh Pyarelal; Eric Duong; Caleb Shibu; Paulo Soares; Savannah Boyd; Payal Khosla; Valeria A. Pfeifer; Diheng Zhang; Eric Andrews; Rick Champlin; Vincent Raymond; Meghavarshini Krishnaswamy; Clayton Morrison; Emily Butler; and Kobus Barnard.\n\n\n \n\n\n\n In 2023. NeurIPS\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n  \n \n 1 download\n \n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{eduong2023tomcat,\n  title={The ToMCAT Dataset},\n  author={Pyarelal, Adarsh and Duong, Eric and Shibu, Caleb and Soares, Paulo and Boyd, Savannah and Khosla, Payal and Pfeifer, Valeria A. and Zhang, Diheng and Andrews, Eric and Champlin, Rick and Raymond, Vincent and Krishnaswamy, Meghavarshini and Morrison, Clayton and Butler, Emily and Barnard, Kobus},\n  journal={NeurIPS 2023},\n  year={2023},\n  publisher={NeurIPS}\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Machine learning based integrated scheduling and rescheduling for elective and emergency patients in the operating theatre.\n \n \n \n\n\n \n Masoud Eshghali; Devika Kannan; Navid Salmanzadeh-Meydani; and Amir Mohammad Esmaieeli Sikaroudi.\n\n\n \n\n\n\n Annals of Operations Research,1–24. 2023.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{eshghali2023machine,\n  title={Machine learning based integrated scheduling and rescheduling for elective and emergency patients in the operating theatre},\n  author={Eshghali, Masoud and Kannan, Devika and Salmanzadeh-Meydani, Navid and Esmaieeli Sikaroudi, Amir Mohammad},\n  journal={Annals of Operations Research},\n  pages={1--24},\n  year={2023},\n  publisher={Springer}\n}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Automatically Localizing Dynamic Code Generation Bugs in JIT Compiler Back-End.\n \n \n \n \n\n\n \n HeuiChan Lim; and Saumya Debray.\n\n\n \n\n\n\n In Proceedings of the 32nd ACM SIGPLAN International Conference on Compiler Construction, of CC 2023, pages 145–155, New York, NY, USA, 2023. Association for Computing Machinery\n \n\n\n\n
\n\n\n\n \n \n \"AutomaticallyPaper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n \n \n \n \n\n\n\n
\n
@inproceedings{10.1145/3578360.3580260,\n  author = {Lim, HeuiChan and Debray, Saumya},\n  title = {Automatically Localizing Dynamic Code Generation Bugs in JIT Compiler Back-End},\n  year = {2023},\n  isbn = {9798400700880},\n  publisher = {Association for Computing Machinery},\n  address = {New York, NY, USA},\n  url = {https://doi.org/10.1145/3578360.3580260},\n  doi = {10.1145/3578360.3580260},\n  abstract = {Just-in-Time (JIT) compilers are ubiquitous in modern computing systems and are used in a wide variety of software. Dynamic code generation bugs, where the JIT compiler silently emits incorrect code, can result in exploitable vulnerabilities. They, therefore, pose serious security concerns and make quick mitigation essential. However, due to the size and complexity of JIT compilers, quickly locating and fixing bugs is often challenging. In addition, the unique characteristics of JIT compilers make existing bug localization approaches inapplicable. Therefore, this paper proposes a new approach to automatic bug localization, explicitly targeting the JIT compiler back-end. The approach is based on explicitly modeling architecture-independent back-end representation and architecture-specific code-generation. Experiments using a prototype implementation on a widely used JIT compiler (Turbofan) indicate that it can successfully localize dynamic code generation bugs in the back-end with high accuracy.},\n  booktitle = {Proceedings of the 32nd ACM SIGPLAN International Conference on Compiler Construction},\n  pages = {145–155},\n  numpages = {11},\n  keywords = {JIT Compiler, Dynamic Program Analysis, Back-End, Automatic Bug Localization, Dynamic Code Generation},\n  location = {Montr\\'{e}al, QC, Canada},\n  series = {CC 2023}\n}\n
\n
\n\n\n
\n Just-in-Time (JIT) compilers are ubiquitous in modern computing systems and are used in a wide variety of software. Dynamic code generation bugs, where the JIT compiler silently emits incorrect code, can result in exploitable vulnerabilities. They, therefore, pose serious security concerns and make quick mitigation essential. However, due to the size and complexity of JIT compilers, quickly locating and fixing bugs is often challenging. In addition, the unique characteristics of JIT compilers make existing bug localization approaches inapplicable. Therefore, this paper proposes a new approach to automatic bug localization, explicitly targeting the JIT compiler back-end. The approach is based on explicitly modeling architecture-independent back-end representation and architecture-specific code-generation. Experiments using a prototype implementation on a widely used JIT compiler (Turbofan) indicate that it can successfully localize dynamic code generation bugs in the back-end with high accuracy.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2022\n \n \n (9)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Neural-Guided Program Synthesis of Information Extraction Rules Using Self-Supervision.\n \n \n \n \n\n\n \n Enrique Noriega-Atala; Robert Vacareanu; Gus Hahn-Powell; and Marco Antonio Valenzuela-Escarcega.\n\n\n \n\n\n\n In PANDL, 2022. \n \n\n\n\n
\n\n\n\n \n \n \"Neural-GuidedPaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{NoriegaAtala2022NeuralGuidedPS,\n    title = {Neural-Guided Program Synthesis of Information Extraction Rules Using Self-Supervision},\n    author = {Enrique Noriega-Atala and Robert Vacareanu and Gus Hahn-Powell and Marco Antonio Valenzuela-Escarcega},\n    booktitle = {PANDL},\n    abstract="{We propose a neural-based approach for rule synthesis designed to help bridge the gap between the interpretability, precision and maintainability exhibited by rule-based information extraction systems with the scalability and convenience of statistical information extraction systems. This is achieved by avoiding placing the burden of learning another specialized language on domain experts and instead asking them to provide a small set of examples in the form of highlighted spans of text. We introduce a transformer-based architecture that drives a rule synthesis system that leverages a self-supervised approach for pre-training a large-scale language model complemented by an analysis of different loss functions and aggregation mechanisms for variable length sequences of user-annotated spans of text. The results are encouraging and point to different desirable properties, such as speed and quality, depending on the choice of loss and aggregation method. }",\n    url = {https://aclanthology.org/2022.pandl-1.10.pdf},\n    year = {2022}\n}\n\n
\n
\n\n\n
\n We propose a neural-based approach for rule synthesis designed to help bridge the gap between the interpretability, precision and maintainability exhibited by rule-based information extraction systems with the scalability and convenience of statistical information extraction systems. This is achieved by avoiding placing the burden of learning another specialized language on domain experts and instead asking them to provide a small set of examples in the form of highlighted spans of text. We introduce a transformer-based architecture that drives a rule synthesis system that leverages a self-supervised approach for pre-training a large-scale language model complemented by an analysis of different loss functions and aggregation mechanisms for variable length sequences of user-annotated spans of text. The results are encouraging and point to different desirable properties, such as speed and quality, depending on the choice of loss and aggregation method. \n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n PatternRank: Jointly Ranking Patterns and Extractions for Relation Extraction Using Graph-Based Algorithms.\n \n \n \n \n\n\n \n Robert Vacareanu; Dane Bell; and Mihai Surdeanu.\n\n\n \n\n\n\n In PANDL, 2022. \n \n\n\n\n
\n\n\n\n \n \n \"PatternRank:Paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n  \n \n 1 download\n \n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{Vacareanu2022PatternRankJR,\n    title = {PatternRank: Jointly Ranking Patterns and Extractions for Relation Extraction Using Graph-Based Algorithms},\n    author = {Robert Vacareanu and Dane Bell and Mihai Surdeanu},\n    booktitle = {PANDL},\n    abstract="{In this paper we revisit the direction of using lexico-syntactic patterns for relation extraction instead of today's ubiquitous neural classifiers. We propose a semi-supervised graph-based algorithm for pattern acquisition that scores patterns and the relations they extract jointly, using a variant of PageRank. We insert light supervision in the form of seed patterns or relations, and model it with several custom teleportation probabilities that bias random-walk scores of patterns/relations based on their proximity to correct information. We evaluate our approach on Few-Shot TACRED, and show that our method outperforms (or performs competitively with) more expensive and opaque deep neural networks. Lastly, we thoroughly compare our proposed approach with the seminal RlogF pattern acquisition algorithm of, showing that it outperforms it for all the hyper parameters tested, in all settings. }",\n    url = {https://aclanthology.org/2022.pandl-1.1.pdf},\n    year = {2022}\n}\n\n
\n
\n\n\n
\n In this paper we revisit the direction of using lexico-syntactic patterns for relation extraction instead of today's ubiquitous neural classifiers. We propose a semi-supervised graph-based algorithm for pattern acquisition that scores patterns and the relations they extract jointly, using a variant of PageRank. We insert light supervision in the form of seed patterns or relations, and model it with several custom teleportation probabilities that bias random-walk scores of patterns/relations based on their proximity to correct information. We evaluate our approach on Few-Shot TACRED, and show that our method outperforms (or performs competitively with) more expensive and opaque deep neural networks. Lastly, we thoroughly compare our proposed approach with the seminal RlogF pattern acquisition algorithm of, showing that it outperforms it for all the hyper parameters tested, in all settings. \n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Instruction Tuning for Few-Shot Aspect-Based Sentiment Analysis.\n \n \n \n \n\n\n \n Siddharth Varia; Shuai Wang; Kishaloy Halder; Robert Vacareanu; Miguel Ballesteros; Yassine Benajiba; Neha Ann John; Rishita Anubhai; Smaranda Muresan; and Dan Roth.\n\n\n \n\n\n\n ArXiv, abs/2210.06629. 2022.\n \n\n\n\n
\n\n\n\n \n \n \"InstructionPaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{Varia2022InstructionTF,\n    title = {Instruction Tuning for Few-Shot Aspect-Based Sentiment Analysis},\n    author = {Siddharth Varia and Shuai Wang and Kishaloy Halder and Robert Vacareanu and Miguel Ballesteros and Yassine Benajiba and Neha Ann John and Rishita Anubhai and Smaranda Muresan and Dan Roth},\n    journal = {ArXiv},\n    abstract="{Aspect-based Sentiment Analysis (ABSA) is a fine-grained sentiment analysis task which involves four elements from user-generated texts: aspect term, aspect category, opinion term, and sentiment polarity. Most computational approaches focus on some of the ABSA sub-tasks such as tuple (aspect term, sentiment polarity) or triplet (aspect term, opinion term, sentiment polarity) extraction using either pipeline or joint modeling approaches. Recently, generative approaches have been proposed to extract all four elements as (one or more) quadruplets from text as a single task. In this work, we take a step further and propose a unified framework for solving ABSA, and the associated sub-tasks to improve the performance in few-shot scenarios. To this end, we fine-tune a T5 model with instructional prompts in a multi-task learning fashion covering all the sub-tasks, as well as the entire quadruple prediction task. In experiments with multiple benchmark data sets, we show that the proposed multi-task prompting approach brings performance boost (by abso-lute 6 . 75 F1) in the few-shot learning setting.}",\n    year = {2022},\n    url = {https://arxiv.org/pdf/2210.06629.pdf},\n    volume = {abs/2210.06629}\n}\n\n
\n
\n\n\n
\n Aspect-based Sentiment Analysis (ABSA) is a fine-grained sentiment analysis task which involves four elements from user-generated texts: aspect term, aspect category, opinion term, and sentiment polarity. Most computational approaches focus on some of the ABSA sub-tasks such as tuple (aspect term, sentiment polarity) or triplet (aspect term, opinion term, sentiment polarity) extraction using either pipeline or joint modeling approaches. Recently, generative approaches have been proposed to extract all four elements as (one or more) quadruplets from text as a single task. In this work, we take a step further and propose a unified framework for solving ABSA, and the associated sub-tasks to improve the performance in few-shot scenarios. To this end, we fine-tune a T5 model with instructional prompts in a multi-task learning fashion covering all the sub-tasks, as well as the entire quadruple prediction task. In experiments with multiple benchmark data sets, we show that the proposed multi-task prompting approach brings performance boost (by abso-lute 6 . 75 F1) in the few-shot learning setting.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n A Human-machine Interface for Few-shot Rule Synthesis for Information Extraction.\n \n \n \n \n\n\n \n Robert Vacareanu; George Caique Gouveia Barbosa; Enrique Noriega-Atala; Gus Hahn-Powell; Rebecca Sharp; Marco Antonio Valenzuela-Escarcega; and Mihai Surdeanu.\n\n\n \n\n\n\n Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: System Demonstrations. 2022.\n \n\n\n\n
\n\n\n\n \n \n \"APaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n  \n \n 4 downloads\n \n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{Vacareanu2022AHI,\n    title = {A Human-machine Interface for Few-shot Rule Synthesis for Information Extraction},\n    author = {Robert Vacareanu and George Caique Gouveia Barbosa and Enrique Noriega-Atala and Gus Hahn-Powell and Rebecca Sharp and Marco Antonio Valenzuela-Escarcega and Mihai Surdeanu},\n    journal = {Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: System Demonstrations},\n    abstract = "{We propose a system that assists a user in constructing transparent information extraction models, consisting of patterns (or rules) written in a declarative language, through program synthesis.Users of our system can specify their requirements through the use of examples,which are collected with a search interface.The rule-synthesis system proposes rule candidates and the results of applying them on a textual corpus; the user has the option to accept the candidate, request another option, or adjust the examples provided to the system.Through an interactive evaluation, we show that our approach generates high-precision rules even in a 1-shot setting. On a second evaluation on a widely-used relation extraction dataset (TACRED), our method generates rules that outperform considerably manually written patterns.Our code, demo, and documentation is available at https://clulab.github.io/odinsynth.}",\n    url = {https://aclanthology.org/2022.naacl-demo.8.pdf},\n    year = {2022}\n}\n\n
\n
\n\n\n
\n We propose a system that assists a user in constructing transparent information extraction models, consisting of patterns (or rules) written in a declarative language, through program synthesis.Users of our system can specify their requirements through the use of examples,which are collected with a search interface.The rule-synthesis system proposes rule candidates and the results of applying them on a textual corpus; the user has the option to accept the candidate, request another option, or adjust the examples provided to the system.Through an interactive evaluation, we show that our approach generates high-precision rules even in a 1-shot setting. On a second evaluation on a widely-used relation extraction dataset (TACRED), our method generates rules that outperform considerably manually written patterns.Our code, demo, and documentation is available at https://clulab.github.io/odinsynth.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n From Examples to Rules: Neural Guided Rule Synthesis for Information Extraction.\n \n \n \n \n\n\n \n Robert Vacareanu; Marco Antonio Valenzuela-Escarcega; George Caique Gouveia Barbosa; Rebecca Sharp; and Mihai Surdeanu.\n\n\n \n\n\n\n . 2022.\n \n\n\n\n
\n\n\n\n \n \n \"FromPaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{Vacareanu2022FromET,\n    title = {From Examples to Rules: Neural Guided Rule Synthesis for Information Extraction},\n    author = {Robert Vacareanu and Marco Antonio Valenzuela-Escarcega and George Caique Gouveia Barbosa and Rebecca Sharp and Mihai Surdeanu},\n    booktitle = {International Conference on Language Resources and Evaluation},\n    abstract = "{While deep learning approaches to information extraction have had many successes, they can be difficult to augment or maintain as needs shift. Rule-based methods, on the other hand, can be more easily modified. However, crafting rules requires expertise in linguistics and the domain of interest, making it infeasible for most users. Here we attempt to combine the advantages of these two directions while mitigating their drawbacks. We adapt recent advances from the adjacent field of program synthesis to information extraction, synthesizing rules from provided examples. We use a transformer-based architecture to guide an enumerative search, and show that this reduces the number of steps that need to be explored before a rule is found. Further, we show that without training the synthesis algorithm on the specific domain, our synthesized rules achieve state-of-the-art performance on the 1-shot scenario of a task that focuses on few-shot learning for relation classification, and competitive performance in the 5-shot scenario.}",\n    url = {https://aclanthology.org/2022.lrec-1.665.pdf},\n    year = {2022},\n}\n\n
\n
\n\n\n
\n While deep learning approaches to information extraction have had many successes, they can be difficult to augment or maintain as needs shift. Rule-based methods, on the other hand, can be more easily modified. However, crafting rules requires expertise in linguistics and the domain of interest, making it infeasible for most users. Here we attempt to combine the advantages of these two directions while mitigating their drawbacks. We adapt recent advances from the adjacent field of program synthesis to information extraction, synthesizing rules from provided examples. We use a transformer-based architecture to guide an enumerative search, and show that this reduces the number of steps that need to be explored before a rule is found. Further, we show that without training the synthesis algorithm on the specific domain, our synthesized rules achieve state-of-the-art performance on the 1-shot scenario of a task that focuses on few-shot learning for relation classification, and competitive performance in the 5-shot scenario.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n It Takes Two Flints to Make a Fire: Multitask Learning of Neural Relation and Explanation Classifiers.\n \n \n \n \n\n\n \n Zheng Tang; and Mihai Surdeanu.\n\n\n \n\n\n\n Computational Linguistics,1-40. 09 2022.\n \n\n\n\n
\n\n\n\n \n \n \"ItPaper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n  \n \n 67 downloads\n \n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{10.1162/coli_a_00463,\n    author = {Tang, Zheng and Surdeanu, Mihai},\n    title = "{It Takes Two Flints to Make a Fire: Multitask Learning of Neural Relation and Explanation Classifiers}",\n    journal = {Computational Linguistics},\n    pages = {1-40},\n    year = {2022},\n    month = {09},\n    abstract = "{We propose an explainable approach for relation extraction that mitigates the tension between generalization and explainability by jointly training for the two goals. Our approach uses a multi-task learning architecture, which jointly trains a classifier for relation extraction, and a sequence model that labels words in the context of the relation that explain the decisions of the relation classifier. We also convert the model outputs to rules to bring global explanations to this approach. This sequence model is trained using a hybrid strategy: supervised, when supervision from pre-existing patterns is available, and semi-supervised otherwise. In the latter situation, we treat the sequence model’s labels as latent variables, and learn the best assignment that maximizes the performance of the relation classifier. We evaluate the proposed approach on the two datasets and show that the sequence model provides labels that serve as accurate explanations for the relation classifier’s decisions, and, importantly, that the joint training generally improves the performance of the relation classifier. We also evaluate the performance of the generated rules and show that the new rules are great add-on to the manual rules and bring the rule-based system much closer to the neural models.}",\n    issn = {0891-2017},\n    doi = {10.1162/coli_a_00463},\n    url = {https://doi.org/10.1162/coli\\_a\\_00463},\n    eprint = {https://direct.mit.edu/coli/article-pdf/doi/10.1162/coli\\_a\\_00463/2046371/coli\\_a\\_00463.pdf},\n}\n\n\n
\n
\n\n\n
\n We propose an explainable approach for relation extraction that mitigates the tension between generalization and explainability by jointly training for the two goals. Our approach uses a multi-task learning architecture, which jointly trains a classifier for relation extraction, and a sequence model that labels words in the context of the relation that explain the decisions of the relation classifier. We also convert the model outputs to rules to bring global explanations to this approach. This sequence model is trained using a hybrid strategy: supervised, when supervision from pre-existing patterns is available, and semi-supervised otherwise. In the latter situation, we treat the sequence model’s labels as latent variables, and learn the best assignment that maximizes the performance of the relation classifier. We evaluate the proposed approach on the two datasets and show that the sequence model provides labels that serve as accurate explanations for the relation classifier’s decisions, and, importantly, that the joint training generally improves the performance of the relation classifier. We also evaluate the performance of the generated rules and show that the new rules are great add-on to the manual rules and bring the rule-based system much closer to the neural models.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Prediction of blast loading on protruded structures using machine learning methods.\n \n \n \n\n\n \n Mona Zahedi; and Shahriar Golchin.\n\n\n \n\n\n\n International Journal of Protective Structures,20414196221144067. 2022.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{zahedi2022prediction,\n  title={Prediction of blast loading on protruded structures using machine learning methods},\n  author={Zahedi, Mona and Golchin, Shahriar},\n  journal={International Journal of Protective Structures},\n  pages={20414196221144067},\n  year={2022},\n  publisher={SAGE Publications Sage UK: London, England}\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Revisiting Simple Regret: Fast Rates for Returning a Good Arm.\n \n \n \n\n\n \n Yao Zhao; Connor Stephens; Csaba Szepesvári; and Kwang-Sung Jun.\n\n\n \n\n\n\n arXiv preprint arXiv:2210.16913. 2022.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{zhao2022revisiting,\n  title={Revisiting Simple Regret: Fast Rates for Returning a Good Arm},\n  author={Zhao, Yao and Stephens, Connor and Szepesv{\\'a}ri, Csaba and Jun, Kwang-Sung},\n  journal={arXiv preprint arXiv:2210.16913},\n  year={2022}\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n On Efficient Online Imitation Learning via Classification.\n \n \n \n\n\n \n Yichen Li; and Chicheng Zhang.\n\n\n \n\n\n\n Advances in Neural Information Processing Systems, 35: 32383–32397. 2022.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{li2022efficient,\n  title={On Efficient Online Imitation Learning via Classification},\n  author={Li, Yichen and Zhang, Chicheng},\n  journal={Advances in Neural Information Processing Systems},\n  volume={35},\n  pages={32383--32397},\n  year={2022}\n}\n\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2020\n \n \n (2)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n An Unsupervised Method for Learning Representations of Multi-word Expressions for Semantic Classification.\n \n \n \n \n\n\n \n Robert Vacareanu; Marco Antonio Valenzuela-Escarcega; Rebecca Sharp; and Mihai Surdeanu.\n\n\n \n\n\n\n In International Conference on Computational Linguistics, 2020. \n \n\n\n\n
\n\n\n\n \n \n \"AnPaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n  \n \n 8 downloads\n \n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{Vacareanu2020AnUM,\n    title = {An Unsupervised Method for Learning Representations of Multi-word Expressions for Semantic Classification},\n    author = {Robert Vacareanu and Marco Antonio Valenzuela-Escarcega and Rebecca Sharp and Mihai Surdeanu},\n    booktitle = {International Conference on Computational Linguistics},\n    abstract = "{This paper explores an unsupervised approach to learning a compositional representation function for multi-word expressions (MWEs), and evaluates it on the Tratz dataset, which associates two-word expressions with the semantic relation between the compound constituents (e.g. the label employer is associated with the noun compound government agency) (Tratz, 2011). The composition function is based on recurrent neural networks, and is trained using the Skip-Gram objective to predict the words in the context of MWEs. Thus our approach can naturally leverage large unlabeled text sources. Further, our method can make use of provided MWEs when available, but can also function as a completely unsupervised algorithm, using MWE boundaries predicted by a single, domain-agnostic part-of-speech pattern. With pre-defined MWE boundaries, our method outperforms the previous state-of-the-art performance on the coarse-grained evaluation of the Tratz dataset (Tratz, 2011), with an F1 score of 50.4%. The unsupervised version of our method approaches the performance of the supervised one, and even outperforms it in some configurations. }",\n    url = {https://www.aclweb.org/anthology/2020.coling-main.297.pdf},\n    year = {2020}\n}\n\n
\n
\n\n\n
\n This paper explores an unsupervised approach to learning a compositional representation function for multi-word expressions (MWEs), and evaluates it on the Tratz dataset, which associates two-word expressions with the semantic relation between the compound constituents (e.g. the label employer is associated with the noun compound government agency) (Tratz, 2011). The composition function is based on recurrent neural networks, and is trained using the Skip-Gram objective to predict the words in the context of MWEs. Thus our approach can naturally leverage large unlabeled text sources. Further, our method can make use of provided MWEs when available, but can also function as a completely unsupervised algorithm, using MWE boundaries predicted by a single, domain-agnostic part-of-speech pattern. With pre-defined MWE boundaries, our method outperforms the previous state-of-the-art performance on the coarse-grained evaluation of the Tratz dataset (Tratz, 2011), with an F1 score of 50.4%. The unsupervised version of our method approaches the performance of the supervised one, and even outperforms it in some configurations. \n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Parsing as Tagging.\n \n \n \n \n\n\n \n Robert Vacareanu; George Caique Gouveia Barbosa; Marco Antonio Valenzuela-Escarcega; and Mihai Surdeanu.\n\n\n \n\n\n\n In International Conference on Language Resources and Evaluation, 2020. \n \n\n\n\n
\n\n\n\n \n \n \"ParsingPaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n  \n \n 31 downloads\n \n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{Vacareanu2020ParsingAT,\n    title = {Parsing as Tagging},\n    author = {Robert Vacareanu and George Caique Gouveia Barbosa and Marco Antonio Valenzuela-Escarcega and Mihai Surdeanu},\n    booktitle = {International Conference on Language Resources and Evaluation},\n    abstract = "{We propose a simple yet accurate method for dependency parsing that treats parsing as tagging (PaT). That is, our approach addresses the parsing of dependency trees with a sequence model implemented with a bidirectional LSTM over BERT embeddings, where the “tag” to be predicted at each token position is the relative position of the corresponding head. For example, for the sentence John eats cake, the tag to be predicted for the token cake is -1 because its head (eats) occurs one token to the left. Despite its simplicity, our approach performs well. For example, our approach outperforms the state-of-the-art method of (Fernández-González and Gómez-Rodríguez, 2019) on Universal Dependencies (UD) by 1.76% unlabeled attachment score (UAS) for English, 1.98% UAS for French, and 1.16% UAS for German. On average, on 12 UD languages, our method with minimal tuning performs comparably with this state-of-the-art approach: better by 0.11% UAS, and worse by 0.58% LAS.}",\n    url = {https://www.aclweb.org/anthology/2020.lrec-1.643.pdf},\n    year = {2020}\n}\n\n\n
\n
\n\n\n
\n We propose a simple yet accurate method for dependency parsing that treats parsing as tagging (PaT). That is, our approach addresses the parsing of dependency trees with a sequence model implemented with a bidirectional LSTM over BERT embeddings, where the “tag” to be predicted at each token position is the relative position of the corresponding head. For example, for the sentence John eats cake, the tag to be predicted for the token cake is -1 because its head (eats) occurs one token to the left. Despite its simplicity, our approach performs well. For example, our approach outperforms the state-of-the-art method of (Fernández-González and Gómez-Rodríguez, 2019) on Universal Dependencies (UD) by 1.76% unlabeled attachment score (UAS) for English, 1.98% UAS for French, and 1.16% UAS for German. On average, on 12 UD languages, our method with minimal tuning performs comparably with this state-of-the-art approach: better by 0.11% UAS, and worse by 0.58% LAS.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n\n\n\n
\n\n\n \n\n \n \n \n \n\n
\n"}; document.write(bibbase_data.data);