<script src="https://bibbase.org/show?bib=https%3A%2F%2Fbibbase.org%2Ff%2Fvr5ooa48xeYes5KDD%2Failaw.bib&jsonp=1"></script>
<?php
$contents = file_get_contents("https://bibbase.org/show?bib=https%3A%2F%2Fbibbase.org%2Ff%2Fvr5ooa48xeYes5KDD%2Failaw.bib");
print_r($contents);
?>
<iframe src="https://bibbase.org/show?bib=https%3A%2F%2Fbibbase.org%2Ff%2Fvr5ooa48xeYes5KDD%2Failaw.bib"></iframe>
For more details see the documention.
To the site owner:
Action required! Mendeley is changing its API. In order to keep using Mendeley with BibBase past April 14th, you need to:
@misc{kapoorPromisesPitfallsArtificial2024, title = {Promises and pitfalls of artificial intelligence for legal applications}, url = {http://arxiv.org/abs/2402.01656}, doi = {10.48550/arXiv.2402.01656}, abstract = {Is AI set to redefine the legal profession? We argue that this claim is not supported by the current evidence. We dive into AI's increasingly prevalent roles in three types of legal tasks: information processing; tasks involving creativity, reasoning, or judgment; and predictions about the future. We find that the ease of evaluating legal applications varies greatly across legal tasks, based on the ease of identifying correct answers and the observability of information relevant to the task at hand. Tasks that would lead to the most significant changes to the legal profession are also the ones most prone to overoptimism about AI capabilities, as they are harder to evaluate. We make recommendations for better evaluation and deployment of AI in legal contexts.}, urldate = {2024-07-28}, publisher = {arXiv}, author = {Kapoor, Sayash and Henderson, Peter and Narayanan, Arvind}, month = jan, year = {2024}, note = {arXiv:2402.01656 [cs]}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Computers and Society}, }
@misc{santoshSupportingLegalArgumentation2024, title = {Towards {Supporting} {Legal} {Argumentation} with {NLP}: {Is} {More} {Data} {Really} {All} {You} {Need}?}, shorttitle = {Towards {Supporting} {Legal} {Argumentation} with {NLP}}, url = {http://arxiv.org/abs/2406.10974}, doi = {10.48550/arXiv.2406.10974}, abstract = {Modeling legal reasoning and argumentation justifying decisions in cases has always been central to AI \& Law, yet contemporary developments in legal NLP have increasingly focused on statistically classifying legal conclusions from text. While conceptually simpler, these approaches often fall short in providing usable justifications connecting to appropriate legal concepts. This paper reviews both traditional symbolic works in AI \& Law and recent advances in legal NLP, and distills possibilities of integrating expert-informed knowledge to strike a balance between scalability and explanation in symbolic vs. data-driven approaches. We identify open challenges and discuss the potential of modern NLP models and methods that integrate}, urldate = {2024-07-28}, publisher = {arXiv}, author = {Santosh, T. Y. S. S. and Ashley, Kevin D. and Atkinson, Katie and Grabmair, Matthias}, month = jun, year = {2024}, note = {arXiv:2406.10974 [cs]}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Computation and Language}, }
@misc{valvodaExplainabilityLegalOutcome2024, title = {Towards {Explainability} in {Legal} {Outcome} {Prediction} {Models}}, url = {http://arxiv.org/abs/2403.16852}, doi = {10.48550/arXiv.2403.16852}, abstract = {Current legal outcome prediction models - a staple of legal NLP - do not explain their reasoning. However, to employ these models in the real world, human legal actors need to be able to understand the model's decisions. In the case of common law, legal practitioners reason towards the outcome of a case by referring to past case law, known as precedent. We contend that precedent is, therefore, a natural way of facilitating explainability for legal NLP models. In this paper, we contribute a novel method for identifying the precedent employed by legal outcome prediction models. Furthermore, by developing a taxonomy of legal precedent, we are able to compare human judges and neural models with respect to the different types of precedent they rely on. We find that while the models learn to predict outcomes reasonably well, their use of precedent is unlike that of human judges.}, urldate = {2024-07-28}, publisher = {arXiv}, author = {Valvoda, Josef and Cotterell, Ryan}, month = apr, year = {2024}, note = {arXiv:2403.16852 [cs]}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Computation and Language}, }
@misc{mageshHallucinationFreeAssessingReliability2024, title = {Hallucination-{Free}? {Assessing} the {Reliability} of {Leading} {AI} {Legal} {Research} {Tools}}, shorttitle = {Hallucination-{Free}?}, url = {http://arxiv.org/abs/2405.20362}, doi = {10.48550/arXiv.2405.20362}, abstract = {Legal practice has witnessed a sharp rise in products incorporating artificial intelligence (AI). Such tools are designed to assist with a wide range of core legal tasks, from search and summarization of caselaw to document drafting. But the large language models used in these tools are prone to "hallucinate," or make up false information, making their use risky in high-stakes domains. Recently, certain legal research providers have touted methods such as retrieval-augmented generation (RAG) as "eliminating" (Casetext, 2023) or "avoid[ing]" hallucinations (Thomson Reuters, 2023), or guaranteeing "hallucination-free" legal citations (LexisNexis, 2023). Because of the closed nature of these systems, systematically assessing these claims is challenging. In this article, we design and report on the first preregistered empirical evaluation of AI-driven legal research tools. We demonstrate that the providers' claims are overstated. While hallucinations are reduced relative to general-purpose chatbots (GPT-4), we find that the AI research tools made by LexisNexis (Lexis+ AI) and Thomson Reuters (Westlaw AI-Assisted Research and Ask Practical Law AI) each hallucinate between 17\% and 33\% of the time. We also document substantial differences between systems in responsiveness and accuracy. Our article makes four key contributions. It is the first to assess and report the performance of RAG-based proprietary legal AI tools. Second, it introduces a comprehensive, preregistered dataset for identifying and understanding vulnerabilities in these systems. Third, it proposes a clear typology for differentiating between hallucinations and accurate legal responses. Last, it provides evidence to inform the responsibilities of legal professionals in supervising and verifying AI outputs, which remains a central open question for the responsible integration of AI into law.}, urldate = {2024-07-28}, publisher = {arXiv}, author = {Magesh, Varun and Surani, Faiz and Dahl, Matthew and Suzgun, Mirac and Manning, Christopher D. and Ho, Daniel E.}, month = may, year = {2024}, note = {arXiv:2405.20362 [cs]}, keywords = {Computer Science - Computers and Society, Computer Science - Computation and Language}, }
@misc{hackerGenerativeDiscriminationWhat2024, title = {Generative {Discrimination}: {What} {Happens} {When} {Generative} {AI} {Exhibits} {Bias}, and {What} {Can} {Be} {Done} {About} {It}}, shorttitle = {Generative {Discrimination}}, url = {http://arxiv.org/abs/2407.10329}, doi = {10.48550/arXiv.2407.10329}, abstract = {As generative Artificial Intelligence (genAI) technologies proliferate across sectors, they offer significant benefits but also risk exacerbating discrimination. This chapter explores how genAI intersects with non-discrimination laws, identifying shortcomings and suggesting improvements. It highlights two main types of discriminatory outputs: (i) demeaning and abusive content and (ii) subtler biases due to inadequate representation of protected groups, which may not be overtly discriminatory in individual cases but have cumulative discriminatory effects. For example, genAI systems may predominantly depict white men when asked for images of people in important jobs. This chapter examines these issues, categorizing problematic outputs into three legal categories: discriminatory content; harassment; and legally hard cases like unbalanced content, harmful stereotypes or misclassification. It argues for holding genAI providers and deployers liable for discriminatory outputs and highlights the inadequacy of traditional legal frameworks to address genAI-specific issues. The chapter suggests updating EU laws, including the AI Act, to mitigate biases in training and input data, mandating testing and auditing, and evolving legislation to enforce standards for bias mitigation and inclusivity as technology advances.}, urldate = {2024-07-28}, publisher = {arXiv}, author = {Hacker, Philipp and Mittelstadt, Brent and Borgesius, Frederik Zuiderveen and Wachter, Sandra}, month = jun, year = {2024}, note = {arXiv:2407.10329 [cs]}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Computers and Society}, annote = {Comment: forthcoming in: Philipp Hacker, Andreas Engel, Sarah Hammer and Brent Mittelstadt (eds.), Oxford Handbook on the Foundations and Regulation of Generative AI (Oxford University Press, 2024)}, }
@misc{deroyArtificialIntelligenceAI2024, title = {Artificial {Intelligence} ({AI}) in {Legal} {Data} {Mining}}, url = {http://arxiv.org/abs/2405.14707}, doi = {10.48550/arXiv.2405.14707}, abstract = {Despite the availability of vast amounts of data, legal data is often unstructured, making it difficult even for law practitioners to ingest and comprehend the same. It is important to organise the legal information in a way that is useful for practitioners and downstream automation tasks. The word ontology was used by Greek philosophers to discuss concepts of existence, being, becoming and reality. Today, scientists use this term to describe the relation between concepts, data, and entities. A great example for a working ontology was developed by Dhani and Bhatt. This ontology deals with Indian court cases on intellectual property rights (IPR) The future of legal ontologies is likely to be handled by computer experts and legal experts alike.}, urldate = {2024-07-28}, publisher = {arXiv}, author = {Deroy, Aniket and Bailung, Naksatra Kumar and Ghosh, Kripabandhu and Ghosh, Saptarshi and Chakraborty, Abhijnan}, month = may, year = {2024}, note = {arXiv:2405.14707 [cs]}, keywords = {Computer Science - Artificial Intelligence}, }
@misc{chenSurveyLargeLanguage2024, title = {A {Survey} on {Large} {Language} {Models} for {Critical} {Societal} {Domains}: {Finance}, {Healthcare}, and {Law}}, shorttitle = {A {Survey} on {Large} {Language} {Models} for {Critical} {Societal} {Domains}}, url = {http://arxiv.org/abs/2405.01769}, doi = {10.48550/arXiv.2405.01769}, abstract = {In the fast-evolving domain of artificial intelligence, large language models (LLMs) such as GPT-3 and GPT-4 are revolutionizing the landscapes of finance, healthcare, and law: domains characterized by their reliance on professional expertise, challenging data acquisition, high-stakes, and stringent regulatory compliance. This survey offers a detailed exploration of the methodologies, applications, challenges, and forward-looking opportunities of LLMs within these high-stakes sectors. We highlight the instrumental role of LLMs in enhancing diagnostic and treatment methodologies in healthcare, innovating financial analytics, and refining legal interpretation and compliance strategies. Moreover, we critically examine the ethics for LLM applications in these fields, pointing out the existing ethical concerns and the need for transparent, fair, and robust AI systems that respect regulatory norms. By presenting a thorough review of current literature and practical applications, we showcase the transformative impact of LLMs, and outline the imperative for interdisciplinary cooperation, methodological advancements, and ethical vigilance. Through this lens, we aim to spark dialogue and inspire future research dedicated to maximizing the benefits of LLMs while mitigating their risks in these precision-dependent sectors. To facilitate future research on LLMs in these critical societal domains, we also initiate a reading list that tracks the latest advancements under this topic, which will be continually updated: {\textbackslash}url\{https://github.com/czyssrs/LLM\_X\_papers\}.}, urldate = {2024-07-28}, publisher = {arXiv}, author = {Chen, Zhiyu Zoey and Ma, Jing and Zhang, Xinlu and Hao, Nan and Yan, An and Nourbakhsh, Armineh and Yang, Xianjun and McAuley, Julian and Petzold, Linda and Wang, William Yang}, month = may, year = {2024}, note = {arXiv:2405.01769 [cs]}, keywords = {Computer Science - Computation and Language}, annote = {Comment: 35 pages, 6 figures}, }
@misc{hussainLargeLanguageModels2024, title = {Large {Language} {Models} for {Judicial} {Entity} {Extraction}: {A} {Comparative} {Study}}, shorttitle = {Large {Language} {Models} for {Judicial} {Entity} {Extraction}}, url = {http://arxiv.org/abs/2407.05786}, doi = {10.48550/arXiv.2407.05786}, abstract = {Domain-specific Entity Recognition holds significant importance in legal contexts, serving as a fundamental task that supports various applications such as question-answering systems, text summarization, machine translation, sentiment analysis, and information retrieval specifically within case law documents. Recent advancements have highlighted the efficacy of Large Language Models in natural language processing tasks, demonstrating their capability to accurately detect and classify domain-specific facts (entities) from specialized texts like clinical and financial documents. This research investigates the application of Large Language Models in identifying domain-specific entities (e.g., courts, petitioner, judge, lawyer, respondents, FIR nos.) within case law documents, with a specific focus on their aptitude for handling domain-specific language complexity and contextual variations. The study evaluates the performance of state-of-the-art Large Language Model architectures, including Large Language Model Meta AI 3, Mistral, and Gemma, in the context of extracting judicial facts tailored to Indian judicial texts. Mistral and Gemma emerged as the top-performing models, showcasing balanced precision and recall crucial for accurate entity identification. These findings confirm the value of Large Language Models in judicial documents and demonstrate how they can facilitate and quicken scientific research by producing precise, organised data outputs that are appropriate for in-depth examination.}, urldate = {2024-07-28}, publisher = {arXiv}, author = {Hussain, Atin Sakkeer and Thomas, Anu}, month = jul, year = {2024}, note = {arXiv:2407.05786 [cs]}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Computation and Language, I.2.1}, }
@misc{ghoshHumanCenteredAI2024, title = {Human {Centered} {AI} for {Indian} {Legal} {Text} {Analytics}}, url = {http://arxiv.org/abs/2403.10944}, doi = {10.48550/arXiv.2403.10944}, abstract = {Legal research is a crucial task in the practice of law. It requires intense human effort and intellectual prudence to research a legal case and prepare arguments. Recent boom in generative AI has not translated to proportionate rise in impactful legal applications, because of low trustworthiness and and the scarcity of specialized datasets for training Large Language Models (LLMs). This position paper explores the potential of LLMs within Legal Text Analytics (LTA), highlighting specific areas where the integration of human expertise can significantly enhance their performance to match that of experts. We introduce a novel dataset and describe a human centered, compound AI system that principally incorporates human inputs for performing LTA tasks with LLMs.}, urldate = {2024-07-28}, publisher = {arXiv}, author = {Ghosh, Sudipto and Verma, Devanshu and Ganesan, Balaji and Bindal, Purnima and Kumar, Vikas and Bhatnagar, Vasudha}, month = mar, year = {2024}, note = {arXiv:2403.10944 [cs]}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Human-Computer Interaction}, }
@misc{zhangEvaluationEthicsLLMs2024, title = {Evaluation {Ethics} of {LLMs} in {Legal} {Domain}}, url = {http://arxiv.org/abs/2403.11152}, doi = {10.48550/arXiv.2403.11152}, abstract = {In recent years, the utilization of large language models for natural language dialogue has gained momentum, leading to their widespread adoption across various domains. However, their universal competence in addressing challenges specific to specialized fields such as law remains a subject of scrutiny. The incorporation of legal ethics into the model has been overlooked by researchers. We asserts that rigorous ethic evaluation is essential to ensure the effective integration of large language models in legal domains, emphasizing the need to assess domain-specific proficiency and domain-specific ethic. To address this, we propose a novelty evaluation methodology, utilizing authentic legal cases to evaluate the fundamental language abilities, specialized legal knowledge and legal robustness of large language models (LLMs). The findings from our comprehensive evaluation contribute significantly to the academic discourse surrounding the suitability and performance of large language models in legal domains.}, urldate = {2024-07-28}, publisher = {arXiv}, author = {Zhang, Ruizhe and Li, Haitao and Wu, Yueyue and Ai, Qingyao and Liu, Yiqun and Zhang, Min and Ma, Shaoping}, month = mar, year = {2024}, note = {arXiv:2403.11152 [cs]}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Computation and Language}, }
@misc{deroyApplicabilityLargeLanguage2024, title = {Applicability of {Large} {Language} {Models} and {Generative} {Models} for {Legal} {Case} {Judgement} {Summarization}}, url = {http://arxiv.org/abs/2407.12848}, doi = {10.48550/arXiv.2407.12848}, abstract = {Automatic summarization of legal case judgements, which are known to be long and complex, has traditionally been tried via extractive summarization models. In recent years, generative models including abstractive summarization models and Large language models (LLMs) have gained huge popularity. In this paper, we explore the applicability of such models for legal case judgement summarization. We applied various domain specific abstractive summarization models and general domain LLMs as well as extractive summarization models over two sets of legal case judgements from the United Kingdom (UK) Supreme Court and the Indian (IN) Supreme Court and evaluated the quality of the generated summaries. We also perform experiments on a third dataset of legal documents of a different type, Government reports from the United States (US). Results show that abstractive summarization models and LLMs generally perform better than the extractive methods as per traditional metrics for evaluating summary quality. However, detailed investigation shows the presence of inconsistencies and hallucinations in the outputs of the generative models, and we explore ways to reduce the hallucinations and inconsistencies in the summaries. Overall, the investigation suggests that further improvements are needed to enhance the reliability of abstractive models and LLMs for legal case judgement summarization. At present, a human-in-the-loop technique is more suitable for performing manual checks to identify inconsistencies in the generated summaries.}, urldate = {2024-07-28}, publisher = {arXiv}, author = {Deroy, Aniket and Ghosh, Kripabandhu and Ghosh, Saptarshi}, month = jul, year = {2024}, note = {arXiv:2407.12848 [cs]}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Computation and Language}, }
@misc{solow-niedermanCanAIStandards2024, address = {Rochester, NY}, type = {{SSRN} {Scholarly} {Paper}}, title = {Can {AI} {Standards} {Have} {Politics}?}, url = {https://papers.ssrn.com/abstract=4714812}, abstract = {How to govern a technology like artificial intelligence (AI)? When it comes to designing and deploying fair, ethical, and safe AI systems, standards are a tempting answer. By establishing the best way of doing something, standards might seem to provide plug-and-play guardrails for AI systems that avoid the costs of formal legal intervention. AI standards are all the more tantalizing because they seem to provide a neutral, objective way to proceed in a normatively contested space. But this vision of AI standards blinks a practical reality. Standards do not appear out of thin air. They are constructed. This Essay analyzes three concrete examples from the European Union, China, and the United States to underscore how standards are neither objective nor neutral. It thereby exposes an inconvenient truth for AI governance: Standards have politics, and yet recognizing that standards are crafted by actors who make normative choices in particular institutional contexts, subject to political and economic incentives and constraints, may undermine the functional utility of standards as soft law regulatory instruments that can set forth a single, best formula to disseminate across contexts.}, language = {en}, urldate = {2024-05-02}, author = {Solow-Niederman, Alicia}, month = feb, year = {2024}, keywords = {AI Ethics, AI Governance, AI Regulation, Soft Law, Technology and Law}, }
@misc{martinBetterCallGPT2024, title = {Better {Call} {GPT}, {Comparing} {Large} {Language} {Models} {Against} {Lawyers}}, url = {http://arxiv.org/abs/2401.16212}, doi = {10.48550/arXiv.2401.16212}, abstract = {This paper presents a groundbreaking comparison between Large Language Models and traditional legal contract reviewers, Junior Lawyers and Legal Process Outsourcers. We dissect whether LLMs can outperform humans in accuracy, speed, and cost efficiency during contract review. Our empirical analysis benchmarks LLMs against a ground truth set by Senior Lawyers, uncovering that advanced models match or exceed human accuracy in determining legal issues. In speed, LLMs complete reviews in mere seconds, eclipsing the hours required by their human counterparts. Cost wise, LLMs operate at a fraction of the price, offering a staggering 99.97 percent reduction in cost over traditional methods. These results are not just statistics, they signal a seismic shift in legal practice. LLMs stand poised to disrupt the legal industry, enhancing accessibility and efficiency of legal services. Our research asserts that the era of LLM dominance in legal contract review is upon us, challenging the status quo and calling for a reimagined future of legal workflows.}, urldate = {2024-03-28}, publisher = {arXiv}, author = {Martin, Lauren and Whitehouse, Nick and Yiu, Stephanie and Catterson, Lizzie and Perera, Rivindu}, month = jan, year = {2024}, note = {arXiv:2401.16212 [cs]}, keywords = {Computer Science - Computers and Society, Computer Science - Computation and Language}, }
@misc{tonmoyComprehensiveSurveyHallucination2024, title = {A {Comprehensive} {Survey} of {Hallucination} {Mitigation} {Techniques} in {Large} {Language} {Models}}, url = {http://arxiv.org/abs/2401.01313}, abstract = {As Large Language Models (LLMs) continue to advance in their ability to write human-like text, a key challenge remains around their tendency to “hallucinate” – generating content that appears factual but is ungrounded. This issue of hallucination is arguably the biggest hindrance to safely deploying these powerful LLMs into real-world production systems that impact people’s lives. The journey toward widespread adoption of LLMs in practical settings heavily relies on addressing and mitigating hallucinations. Unlike traditional AI systems focused on limited tasks, LLMs have been exposed to vast amounts of online text data during training. While this allows them to display impressive language fluency, it also means they are capable of extrapolating information from the biases in training data, misinterpreting ambiguous prompts, or modifying the information to align superficially with the input. This becomes hugely alarming when we rely on language generation capabilities for sensitive applications, such as summarizing medical records, customer support conversations, financial analysis reports, and providing erroneous legal advice. Small errors could lead to harm, revealing the LLMs’ lack of actual comprehension despite advances in self-learning. This paper presents a comprehensive survey of over thirty-two techniques developed to mitigate hallucination in LLMs. Notable among these are RetrievalAugmented Generation (RAG) (Lewis et al., 2021), Knowledge Retrieval (Varshney et al., 2023), CoNLI (Lei et al., 2023), and CoVe (Dhuliawala et al., 2023). Furthermore, we introduce a detailed taxonomy categorizing these methods based on various parameters, such as dataset utilization, common tasks, feedback mechanisms, and retriever types. This classification helps distinguish the diverse approaches specifically designed to tackle hallucination issues in LLMs. Additionally, we analyze the challenges and limitations inherent in these ∗Work does not relate to position at Amazon.}, language = {en}, urldate = {2024-01-10}, publisher = {arXiv}, author = {Tonmoy, S. M. Towhidul Islam and Zaman, S. M. Mehedi and Jain, Vinija and Rani, Anku and Rawte, Vipula and Chadha, Aman and Das, Amitava}, month = jan, year = {2024}, note = {arXiv:2401.01313 [cs]}, keywords = {Computer Science - Computation and Language}, }
@misc{cuiChatlawMultiAgentCollaborative2024, title = {Chatlaw: {A} {Multi}-{Agent} {Collaborative} {Legal} {Assistant} with {Knowledge} {Graph} {Enhanced} {Mixture}-of-{Experts} {Large} {Language} {Model}}, shorttitle = {Chatlaw}, url = {http://arxiv.org/abs/2306.16092}, doi = {10.48550/arXiv.2306.16092}, abstract = {AI legal assistants based on Large Language Models (LLMs) can provide accessible legal consulting services, but the hallucination problem poses potential legal risks. This paper presents Chatlaw, an innovative legal assistant utilizing a Mixture-of-Experts (MoE) model and a multi-agent system to enhance the reliability and accuracy of AI-driven legal services. By integrating knowledge graphs with artificial screening, we construct a high-quality legal dataset to train the MoE model. This model utilizes different experts to address various legal issues, optimizing the accuracy of legal responses. Additionally, Standardized Operating Procedures (SOP), modeled after real law firm workflows, significantly reduce errors and hallucinations in legal services. Our MoE model outperforms GPT-4 in the Lawbench and Unified Qualification Exam for Legal Professionals by 7.73\% in accuracy and 11 points, respectively, and also surpasses other models in multiple dimensions during real-case consultations, demonstrating our robust capability for legal consultation.}, urldate = {2024-07-29}, publisher = {arXiv}, author = {Cui, Jiaxi and Ning, Munan and Li, Zongjian and Chen, Bohua and Yan, Yang and Li, Hao and Ling, Bin and Tian, Yonghong and Yuan, Li}, month = may, year = {2024}, note = {arXiv:2306.16092 [cs]}, keywords = {Computer Science - Computation and Language}, }
@article{dahlLargeLegalFictions2024, title = {Large {Legal} {Fictions}: {Profiling} {Legal} {Hallucinations} in {Large} {Language} {Models}}, volume = {16}, issn = {2161-7201, 1946-5319}, shorttitle = {Large {Legal} {Fictions}}, url = {http://arxiv.org/abs/2401.01301}, doi = {10.1093/jla/laae003}, abstract = {Do large language models (LLMs) know the law? These models are increasingly being used to augment legal practice, education, and research, yet their revolutionary potential is threatened by the presence of hallucinations -- textual output that is not consistent with legal facts. We present the first systematic evidence of these hallucinations, documenting LLMs' varying performance across jurisdictions, courts, time periods, and cases. Our work makes four key contributions. First, we develop a typology of legal hallucinations, providing a conceptual framework for future research in this area. Second, we find that legal hallucinations are alarmingly prevalent, occurring between 58\% of the time with ChatGPT 4 and 88\% with Llama 2, when these models are asked specific, verifiable questions about random federal court cases. Third, we illustrate that LLMs often fail to correct a user's incorrect legal assumptions in a contra-factual question setup. Fourth, we provide evidence that LLMs cannot always predict, or do not always know, when they are producing legal hallucinations. Taken together, our findings caution against the rapid and unsupervised integration of popular LLMs into legal tasks. Even experienced lawyers must remain wary of legal hallucinations, and the risks are highest for those who stand to benefit from LLMs the most -- pro se litigants or those without access to traditional legal resources.}, number = {1}, urldate = {2024-07-29}, journal = {Journal of Legal Analysis}, author = {Dahl, Matthew and Magesh, Varun and Suzgun, Mirac and Ho, Daniel E.}, month = jan, year = {2024}, note = {arXiv:2401.01301 [cs]}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Computers and Society, Computer Science - Computation and Language}, pages = {64--93}, }
@misc{singhRethinkingInterpretabilityEra2024, title = {Rethinking {Interpretability} in the {Era} of {Large} {Language} {Models}}, url = {http://arxiv.org/abs/2402.01761}, doi = {10.48550/arXiv.2402.01761}, abstract = {Interpretable machine learning has exploded as an area of interest over the last decade, sparked by the rise of increasingly large datasets and deep neural networks. Simultaneously, large language models (LLMs) have demonstrated remarkable capabilities across a wide array of tasks, offering a chance to rethink opportunities in interpretable machine learning. Notably, the capability to explain in natural language allows LLMs to expand the scale and complexity of patterns that can be given to a human. However, these new capabilities raise new challenges, such as hallucinated explanations and immense computational costs. In this position paper, we start by reviewing existing methods to evaluate the emerging field of LLM interpretation (both interpreting LLMs and using LLMs for explanation). We contend that, despite their limitations, LLMs hold the opportunity to redefine interpretability with a more ambitious scope across many applications, including in auditing LLMs themselves. We highlight two emerging research priorities for LLM interpretation: using LLMs to directly analyze new datasets and to generate interactive explanations.}, urldate = {2024-07-29}, publisher = {arXiv}, author = {Singh, Chandan and Inala, Jeevana Priya and Galley, Michel and Caruana, Rich and Gao, Jianfeng}, month = jan, year = {2024}, note = {arXiv:2402.01761 [cs]}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Computation and Language, Computer Science - Machine Learning}, }
@misc{cuiChatlawMultiAgentCollaborative2024a, title = {Chatlaw: {A} {Multi}-{Agent} {Collaborative} {Legal} {Assistant} with {Knowledge} {Graph} {Enhanced} {Mixture}-of-{Experts} {Large} {Language} {Model}}, shorttitle = {Chatlaw}, url = {http://arxiv.org/abs/2306.16092}, doi = {10.48550/arXiv.2306.16092}, abstract = {AI legal assistants based on Large Language Models (LLMs) can provide accessible legal consulting services, but the hallucination problem poses potential legal risks. This paper presents Chatlaw, an innovative legal assistant utilizing a Mixture-of-Experts (MoE) model and a multi-agent system to enhance the reliability and accuracy of AI-driven legal services. By integrating knowledge graphs with artificial screening, we construct a high-quality legal dataset to train the MoE model. This model utilizes different experts to address various legal issues, optimizing the accuracy of legal responses. Additionally, Standardized Operating Procedures (SOP), modeled after real law firm workflows, significantly reduce errors and hallucinations in legal services. Our MoE model outperforms GPT-4 in the Lawbench and Unified Qualification Exam for Legal Professionals by 7.73\% in accuracy and 11 points, respectively, and also surpasses other models in multiple dimensions during real-case consultations, demonstrating our robust capability for legal consultation.}, urldate = {2024-07-29}, publisher = {arXiv}, author = {Cui, Jiaxi and Ning, Munan and Li, Zongjian and Chen, Bohua and Yan, Yang and Li, Hao and Ling, Bin and Tian, Yonghong and Yuan, Li}, month = may, year = {2024}, note = {arXiv:2306.16092 [cs]}, keywords = {Computer Science - Computation and Language}, }
@article{dahlLargeLegalFictions2024a, title = {Large {Legal} {Fictions}: {Profiling} {Legal} {Hallucinations} in {Large} {Language} {Models}}, volume = {16}, issn = {2161-7201, 1946-5319}, shorttitle = {Large {Legal} {Fictions}}, url = {http://arxiv.org/abs/2401.01301}, doi = {10.1093/jla/laae003}, abstract = {Do large language models (LLMs) know the law? These models are increasingly being used to augment legal practice, education, and research, yet their revolutionary potential is threatened by the presence of hallucinations -- textual output that is not consistent with legal facts. We present the first systematic evidence of these hallucinations, documenting LLMs' varying performance across jurisdictions, courts, time periods, and cases. Our work makes four key contributions. First, we develop a typology of legal hallucinations, providing a conceptual framework for future research in this area. Second, we find that legal hallucinations are alarmingly prevalent, occurring between 58\% of the time with ChatGPT 4 and 88\% with Llama 2, when these models are asked specific, verifiable questions about random federal court cases. Third, we illustrate that LLMs often fail to correct a user's incorrect legal assumptions in a contra-factual question setup. Fourth, we provide evidence that LLMs cannot always predict, or do not always know, when they are producing legal hallucinations. Taken together, our findings caution against the rapid and unsupervised integration of popular LLMs into legal tasks. Even experienced lawyers must remain wary of legal hallucinations, and the risks are highest for those who stand to benefit from LLMs the most -- pro se litigants or those without access to traditional legal resources.}, number = {1}, urldate = {2024-07-29}, journal = {Journal of Legal Analysis}, author = {Dahl, Matthew and Magesh, Varun and Suzgun, Mirac and Ho, Daniel E.}, month = jan, year = {2024}, note = {arXiv:2401.01301 [cs]}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Computers and Society, Computer Science - Computation and Language}, pages = {64--93}, }
@misc{katzNaturalLanguageProcessing2023, title = {Natural {Language} {Processing} in the {Legal} {Domain}}, url = {http://arxiv.org/abs/2302.12039}, abstract = {In this paper, we summarize the current state of the field of NLP \& Law with a specific focus on recent technical and substantive developments. To support our analysis, we construct and analyze a nearly complete corpus of more than six hundred NLP \& Law related papers published over the past decade. Our analysis highlights several major trends. Namely, we document an increasing number of papers written, tasks undertaken, and languages covered over the course of the past decade. We observe an increase in the sophistication of the methods which researchers deployed in this applied context. Slowly but surely, Legal NLP is beginning to match not only the methodological sophistication of general NLP but also the professional standards of data availability and code reproducibility observed within the broader scientific community. We believe all of these trends bode well for the future of the field, but many questions in both the academic and commercial sphere still remain open.}, language = {en}, urldate = {2023-06-09}, publisher = {arXiv}, author = {Katz, Daniel Martin and Hartung, Dirk and Gerlach, Lauritz and Jana, Abhik and Bommarito II, Michael J.}, month = feb, year = {2023}, note = {arXiv:2302.12039 [cs]}, keywords = {Computer Science - Artificial Intelligence, ler, Computer Science - Computation and Language}, }
@misc{chalkidisLeXFilesLegalLAMAFacilitating2023, title = {{LeXFiles} and {LegalLAMA}: {Facilitating} {English} {Multinational} {Legal} {Language} {Model} {Development}}, shorttitle = {{LeXFiles} and {LegalLAMA}}, url = {http://arxiv.org/abs/2305.07507}, abstract = {In this work, we conduct a detailed analysis on the performance of legal-oriented pre-trained language models (PLMs). We examine the interplay between their original objective, acquired knowledge, and legal language understanding capacities which we define as the upstream, probing, and downstream performance, respectively. We consider not only the models’ size but also the pre-training corpora used as important dimensions in our study. To this end, we release a multinational English legal corpus (LeXFiles) and a legal knowledge probing benchmark (LegalLAMA) to facilitate training and detailed analysis of legal-oriented PLMs. We release two new legal PLMs trained on LeXFiles and evaluate them alongside others on LegalLAMA and LexGLUE. We find that probing performance strongly correlates with upstream performance in related legal topics. On the other hand, downstream performance is mainly driven by the model’s size and prior legal knowledge which can be estimated by upstream and probing performance. Based on these findings, we can conclude that both dimensions are important for those seeking the development of domain-specific PLMs.}, language = {en}, urldate = {2023-06-09}, publisher = {arXiv}, author = {Chalkidis, Ilias and Garneau, Nicolas and Goanta, Catalina and Katz, Daniel Martin and Søgaard, Anders}, month = may, year = {2023}, note = {arXiv:2305.07507 [cs]}, keywords = {Computer Science - Computation and Language}, annote = {Comment: 9 pages, long paper at ACL 2023 proceedings}, }
@misc{katzGPT4PassesBar2023, address = {Rochester, NY}, type = {{SSRN} {Scholarly} {Paper}}, title = {{GPT}-4 {Passes} the {Bar} {Exam}}, url = {https://papers.ssrn.com/abstract=4389233}, doi = {10.2139/ssrn.4389233}, abstract = {In this paper, we experimentally evaluate the zero-shot performance of a preliminary version of GPT-4 against prior generations of GPT on the entire Uniform Bar Examination (UBE), including not only the multiple-choice Multistate Bar Examination (MBE), but also the open-ended Multistate Essay Exam (MEE) and Multistate Performance Test (MPT) components. On the MBE, GPT-4 significantly outperforms both human test-takers and prior models, demonstrating a 26\% increase over ChatGPT and beating humans in five of seven subject areas. On the MEE and MPT, which have not previously been evaluated by scholars, GPT-4 scores an average of 4.2/6.0 as compared to much lower scores for ChatGPT. Graded across the UBE components, in the manner in which a human tast-taker would be, GPT-4 scores approximately 297 points, significantly in excess of the passing threshold for all UBE jurisdictions. These findings document not just the rapid and remarkable advance of large language model performance generally, but also the potential for such models to support the delivery of legal services in society.}, language = {en}, urldate = {2023-06-12}, author = {Katz, Daniel Martin and Bommarito, Michael James and Gao, Shang and Arredondo, Pablo}, month = mar, year = {2023}, keywords = {machine learning, ChatGPT, natural language processing, lido, artificial intelligence, artificial intelligence and law, Bar Exam, evaluation, GPT, GPT-4, Legal Analytics, Legal Data, Legal NLP, Legal Tech, natural language understanding, Neural NLP, NLP}, }
@misc{chalkidisChatGPTMayPass2023, title = {{ChatGPT} may {Pass} the {Bar} {Exam} soon, but has a {Long} {Way} to {Go} for the {LexGLUE} benchmark}, url = {http://arxiv.org/abs/2304.12202}, doi = {10.48550/arXiv.2304.12202}, abstract = {Following the hype around OpenAI's ChatGPT conversational agent, the last straw in the recent development of Large Language Models (LLMs) that demonstrate emergent unprecedented zero-shot capabilities, we audit the latest OpenAI's GPT-3.5 model, `gpt-3.5-turbo', the first available ChatGPT model, in the LexGLUE benchmark in a zero-shot fashion providing examples in a templated instruction-following format. The results indicate that ChatGPT achieves an average micro-F1 score of 47.6\% across LexGLUE tasks, surpassing the baseline guessing rates. Notably, the model performs exceptionally well in some datasets, achieving micro-F1 scores of 62.8\% and 70.2\% in the ECtHR B and LEDGAR datasets, respectively. The code base and model predictions are available for review on https://github.com/coastalcph/zeroshot\_lexglue.}, urldate = {2023-06-12}, publisher = {arXiv}, author = {Chalkidis, Ilias}, month = mar, year = {2023}, note = {arXiv:2304.12202 [cs]}, keywords = {Computer Science - Computation and Language}, annote = {Comment: Working paper}, }
@inproceedings{fangSuperSCOTUSMultisourcedDataset2023, address = {Singapore}, title = {Super-{SCOTUS}: {A} multi-sourced dataset for the {Supreme} {Court} of the {US}}, shorttitle = {Super-{SCOTUS}}, url = {https://aclanthology.org/2023.nllp-1.20}, doi = {10.18653/v1/2023.nllp-1.20}, abstract = {Given the complexity of the judiciary in the US Supreme Court, various procedures, along with various resources, contribute to the court system. However, most research focuses on a limited set of resources, e.g., court opinions or oral arguments, for analyzing a specific perspective in court, e.g., partisanship or voting. To gain a fuller understanding of these perspectives in the legal system of the US Supreme Court, a more comprehensive dataset, connecting different sources in different phases of the court procedure, is needed. To address this gap, we present a multi-sourced dataset for the Supreme Court, comprising court resources from different procedural phases, connecting language documents with extensive metadata. We showcase its utility through a case study on how different court documents reveal the decision direction (conservative vs. liberal) of the cases. We analyze performance differences across three protected attributes, indicating that different court resources encode different biases, and reinforcing that considering various resources provides a fuller picture of the court procedures. We further discuss how our dataset can contribute to future research directions.}, urldate = {2024-07-29}, booktitle = {Proceedings of the {Natural} {Legal} {Language} {Processing} {Workshop} 2023}, publisher = {Association for Computational Linguistics}, author = {Fang, Biaoyan and Cohn, Trevor and Baldwin, Timothy and Frermann, Lea}, editor = {Preo{\textbackslash}textcommabelowtiuc-Pietro, Daniel and Goanta, Catalina and Chalkidis, Ilias and Barrett, Leslie and Spanakis, Gerasimos and Aletras, Nikolaos}, month = dec, year = {2023}, pages = {202--214}, }
@misc{sunShortSurveyViewing2023, title = {A {Short} {Survey} of {Viewing} {Large} {Language} {Models} in {Legal} {Aspect}}, url = {http://arxiv.org/abs/2303.09136}, doi = {10.48550/arXiv.2303.09136}, abstract = {Large language models (LLMs) have transformed many fields, including natural language processing, computer vision, and reinforcement learning. These models have also made a significant impact in the field of law, where they are being increasingly utilized to automate various legal tasks, such as legal judgement prediction, legal document analysis, and legal document writing. However, the integration of LLMs into the legal field has also raised several legal problems, including privacy concerns, bias, and explainability. In this survey, we explore the integration of LLMs into the field of law. We discuss the various applications of LLMs in legal tasks, examine the legal challenges that arise from their use, and explore the data resources that can be used to specialize LLMs in the legal domain. Finally, we discuss several promising directions and conclude this paper. By doing so, we hope to provide an overview of the current state of LLMs in law and highlight the potential benefits and challenges of their integration.}, urldate = {2024-07-29}, publisher = {arXiv}, author = {Sun, Zhongxiang}, month = mar, year = {2023}, note = {arXiv:2303.09136 [cs]}, keywords = {Computer Science - Computation and Language}, annote = {Comment: 8 pages}, }
@misc{louisInterpretableLongFormLegal2023, title = {Interpretable {Long}-{Form} {Legal} {Question} {Answering} with {Retrieval}-{Augmented} {Large} {Language} {Models}}, url = {http://arxiv.org/abs/2309.17050}, doi = {10.48550/arXiv.2309.17050}, abstract = {Many individuals are likely to face a legal dispute at some point in their lives, but their lack of understanding of how to navigate these complex issues often renders them vulnerable. The advancement of natural language processing opens new avenues for bridging this legal literacy gap through the development of automated legal aid systems. However, existing legal question answering (LQA) approaches often suffer from a narrow scope, being either confined to specific legal domains or limited to brief, uninformative responses. In this work, we propose an end-to-end methodology designed to generate long-form answers to any statutory law questions, utilizing a "retrieve-then-read" pipeline. To support this approach, we introduce and release the Long-form Legal Question Answering (LLeQA) dataset, comprising 1,868 expert-annotated legal questions in the French language, complete with detailed answers rooted in pertinent legal provisions. Our experimental results demonstrate promising performance on automatic evaluation metrics, but a qualitative analysis uncovers areas for refinement. As one of the only comprehensive, expert-annotated long-form LQA dataset, LLeQA has the potential to not only accelerate research towards resolving a significant real-world issue, but also act as a rigorous benchmark for evaluating NLP models in specialized domains. We publicly release our code, data, and models.}, urldate = {2024-07-29}, publisher = {arXiv}, author = {Louis, Antoine and van Dijck, Gijs and Spanakis, Gerasimos}, month = sep, year = {2023}, note = {arXiv:2309.17050 [cs]}, keywords = {Computer Science - Computation and Language}, }
@misc{zhuoRedTeamingChatGPT2023, title = {Red teaming {ChatGPT} via {Jailbreaking}: {Bias}, {Robustness}, {Reliability} and {Toxicity}}, shorttitle = {Red teaming {ChatGPT} via {Jailbreaking}}, url = {http://arxiv.org/abs/2301.12867}, doi = {10.48550/arXiv.2301.12867}, abstract = {Recent breakthroughs in natural language processing (NLP) have permitted the synthesis and comprehension of coherent text in an open-ended way, therefore translating the theoretical algorithms into practical applications. The large language models (LLMs) have significantly impacted businesses such as report summarization software and copywriters. Observations indicate, however, that LLMs may exhibit social prejudice and toxicity, posing ethical and societal dangers of consequences resulting from irresponsibility. Large-scale benchmarks for accountable LLMs should consequently be developed. Although several empirical investigations reveal the existence of a few ethical difficulties in advanced LLMs, there is little systematic examination and user study of the risks and harmful behaviors of current LLM usage. To further educate future efforts on constructing ethical LLMs responsibly, we perform a qualitative research method called ``red teaming'' on OpenAI's ChatGPT{\textbackslash}footnote\{In this paper, ChatGPT refers to the version released on Dec 15th.\} to better understand the practical features of ethical dangers in recent LLMs. We analyze ChatGPT comprehensively from four perspectives: 1) {\textbackslash}textit\{Bias\} 2) {\textbackslash}textit\{Reliability\} 3) {\textbackslash}textit\{Robustness\} 4) {\textbackslash}textit\{Toxicity\}. In accordance with our stated viewpoints, we empirically benchmark ChatGPT on multiple sample datasets. We find that a significant number of ethical risks cannot be addressed by existing benchmarks, and hence illustrate them via additional case studies. In addition, we examine the implications of our findings on AI ethics and harmal behaviors of ChatGPT, as well as future problems and practical design considerations for responsible LLMs. We believe that our findings may give light on future efforts to determine and mitigate the ethical hazards posed by machines in LLM applications.}, urldate = {2024-07-29}, publisher = {arXiv}, author = {Zhuo, Terry Yue and Huang, Yujin and Chen, Chunyang and Xing, Zhenchang}, month = may, year = {2023}, note = {arXiv:2301.12867 [cs]}, keywords = {Computer Science - Computation and Language, Computer Science - Software Engineering}, }
@misc{choiHowUseLarge2023, address = {Rochester, NY}, type = {{SSRN} {Scholarly} {Paper}}, title = {How to {Use} {Large} {Language} {Models} for {Empirical} {Legal} {Research}}, url = {https://papers.ssrn.com/abstract=4536852}, abstract = {Legal scholars have long annotated cases by hand to summarize and learn about developments in jurisprudence. Dramatic recent improvements in the performance of large language models (LLMs) now provide a potential alternative. This Article demonstrates how to use LLMs to analyze legal documents. It evaluates best practices and suggests both the uses and potential limitations of LLMs in empirical legal research. In a simple classification task involving Supreme Court opinions, it finds that GPT-4 performs approximately as well as human coders and significantly better than a variety of prior-generation NLP classifiers, with no improvement from supervised training, fine-tuning, or specialized prompting.}, language = {en}, urldate = {2024-07-29}, author = {Choi, Jonathan H.}, month = aug, year = {2023}, keywords = {machine learning, AI, empirical legal studies, natural language processing, artificial intelligence, computational analysis of law, large language models, LLMs}, }
@misc{iuChatGPTOpenAIEnd2023, address = {Rochester, NY}, type = {{SSRN} {Scholarly} {Paper}}, title = {{ChatGPT} by {OpenAI}: {The} {End} of {Litigation} {Lawyers}?}, shorttitle = {{ChatGPT} by {OpenAI}}, url = {https://papers.ssrn.com/abstract=4339839}, doi = {10.2139/ssrn.4339839}, abstract = {ChatGPT, a revolutionary AI language model developed by OpenAI, can understand instructions with unprecedented efficiency. This study aims to evaluate the extent to which ChatGPT can potentially serve as a replacement for litigation lawyers through an examination of its drafting and research capabilities. The results indicate that ChatGPT has advanced legal drafting skills for various types of documents, including demand letters, without-prejudice letters, and pleadings. ChatGPT was able to elaborate and enhance the contents based on the simple facts inputted into the system and demonstrated the ability to understand simple facts and articulate the legal basis of the claim. Additionally, ChatGPT can identify legal strategies, draft a summary judgment, generate a skeleton argument, conduct cross-examination, and provide simple legal advice. The results also reveal that ChatGPT performed excellently in analysing a more complicated case. However, there were limitations in the data sources used in ChatGPT, which resulted in a weakness in identifying recent case law. At this stage, the paper suggests that ChatGPT should be viewed as a supplement, rather than a replacement, to litigation lawyers.}, language = {en}, urldate = {2024-07-29}, author = {Iu, Kwan Yuen and Wong, Vanessa Man-Yi}, month = jan, year = {2023}, keywords = {ChatGPT, Common Law, Law and Technology, Legal Education, Legal Technology, Litigation, OpenAI}, }
@misc{steenhuisWeavingPathwaysJustice2023, title = {Weaving {Pathways} for {Justice} with {GPT}: {LLM}-driven automated drafting of interactive legal applications}, shorttitle = {Weaving {Pathways} for {Justice} with {GPT}}, url = {http://arxiv.org/abs/2312.09198}, doi = {10.48550/arXiv.2312.09198}, abstract = {Can generative AI help us speed up the authoring of tools to help self-represented litigants? In this paper, we describe 3 approaches to automating the completion of court forms: a generative AI approach that uses GPT-3 to iteratively prompt the user to answer questions, a constrained template-driven approach that uses GPT-4-turbo to generate a draft of questions that are subject to human review, and a hybrid method. We use the open source Docassemble platform in all 3 experiments, together with a tool created at Suffolk University Law School called the Assembly Line Weaver. We conclude that the hybrid model of constrained automated drafting with human review is best suited to the task of authoring guided interviews.}, urldate = {2024-07-29}, publisher = {arXiv}, author = {Steenhuis, Quinten and Colarusso, David and Willey, Bryce}, month = dec, year = {2023}, note = {arXiv:2312.09198 [cs]}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Computers and Society, Computer Science - Social and Information Networks, Computer Science - Human-Computer Interaction, Computer Science - Computer Vision and Pattern Recognition}, }
@inproceedings{thakurLanguageModelsGet2023, address = {Toronto, Canada}, title = {Language {Models} {Get} a {Gender} {Makeover}: {Mitigating} {Gender} {Bias} with {Few}-{Shot} {Data} {Interventions}}, shorttitle = {Language {Models} {Get} a {Gender} {Makeover}}, url = {https://aclanthology.org/2023.acl-short.30}, doi = {10.18653/v1/2023.acl-short.30}, abstract = {Societal biases present in pre-trained large language models are a critical issue as these models have been shown to propagate biases in countless downstream applications, rendering them unfair towards specific groups of people. Since large-scale retraining of these models from scratch is both time and compute-expensive, a variety of approaches have been previously proposed that de-bias a pre-trained model. While the majority of current state-of-the-art debiasing methods focus on changes to the training regime, in this paper, we propose data intervention strategies as a powerful yet simple technique to reduce gender bias in pre-trained models. Specifically, we empirically show that by fine-tuning a pre-trained model on only 10 debiased (intervened) training examples, the tendency to favor any gender is significantly reduced. Since our proposed method only needs a few training examples, we argue that our few-shot de-biasing approach is highly feasible and practical. Through extensive experimentation, we show that our de-biasing technique performs better than competitive state-of-the-art baselines with minimal loss in language modeling ability.}, urldate = {2024-07-29}, booktitle = {Proceedings of the 61st {Annual} {Meeting} of the {Association} for {Computational} {Linguistics} ({Volume} 2: {Short} {Papers})}, publisher = {Association for Computational Linguistics}, author = {Thakur, Himanshu and Jain, Atishay and Vaddamanu, Praneetha and Liang, Paul Pu and Morency, Louis-Philippe}, editor = {Rogers, Anna and Boyd-Graber, Jordan and Okazaki, Naoaki}, month = jul, year = {2023}, pages = {340--351}, }
@misc{chalkidisLexGLUEBenchmarkDataset2022, title = {{LexGLUE}: {A} {Benchmark} {Dataset} for {Legal} {Language} {Understanding} in {English}}, shorttitle = {{LexGLUE}}, url = {http://arxiv.org/abs/2110.00976}, abstract = {Laws and their interpretations, legal arguments and agreements are typically expressed in writing, leading to the production of vast corpora of legal text. Their analysis, which is at the center of legal practice, becomes increasingly elaborate as these collections grow in size. Natural language understanding (NLU) technologies can be a valuable tool to support legal practitioners in these endeavors. Their usefulness, however, largely depends on whether current state-of-the-art models can generalize across various tasks in the legal domain. To answer this currently open question, we introduce the Legal General Language Understanding Evaluation (LexGLUE) benchmark, a collection of datasets for evaluating model performance across a diverse set of legal NLU tasks in a standardized way. We also provide an evaluation and analysis of several generic and legal-oriented models demonstrating that the latter consistently offer performance improvements across multiple tasks.}, language = {en}, urldate = {2023-06-09}, publisher = {arXiv}, author = {Chalkidis, Ilias and Jana, Abhik and Hartung, Dirk and Bommarito, Michael and Androutsopoulos, Ion and Katz, Daniel Martin and Aletras, Nikolaos}, month = nov, year = {2022}, note = {arXiv:2110.00976 [cs]}, keywords = {Computer Science - Computation and Language}, }
@misc{bommaritoiiGPTTakesBar2022, title = {{GPT} {Takes} the {Bar} {Exam}}, url = {http://arxiv.org/abs/2212.14402}, abstract = {Nearly all jurisdictions in the United States require a professional license exam, commonly referred to as “the Bar Exam,” as a precondition for law practice. To even sit for the exam, most jurisdictions require that an applicant completes at least seven years of post-secondary education, including three years at an accredited law school. In addition, most test-takers also undergo weeks to months of further, exam-specific preparation. Despite this significant investment of time and capital, approximately one in five test-takers still score under the rate required to pass the exam on their first try. In the face of a complex task that requires such depth of knowledge, what, then, should we expect of the state of the art in “AI?” In this research, we document our experimental evaluation of the performance of OpenAI’s text-davinci-003 model, often-referred to as GPT-3.5, on the multistate multiple choice (MBE) section of the exam. While we find no benefit in fine-tuning over GPT-3.5’s zero-shot performance at the scale of our training data, we do find that hyperparameter optimization and prompt engineering positively impacted GPT-3.5’s zero-shot performance. For best prompt and parameters, GPT-3.5 achieves a headline correct rate of 50.3\% on a complete NCBE MBE practice exam, significantly in excess of the 25\% baseline guessing rate, and performs at a passing rate for both Evidence and Torts. GPT-3.5’s ranking of responses is also highly-correlated with correctness; its top two and top three choices are correct 71\% and 88\% of the time, respectively, indicating very strong non-entailment performance. While our ability to interpret these results is limited by nascent scientific understanding of LLMs and the proprietary nature of GPT, we believe that these results strongly suggest that an LLM will pass the MBE component of the Bar Exam in the near future.}, language = {en}, urldate = {2023-06-09}, publisher = {arXiv}, author = {Bommarito II, Michael and Katz, Daniel Martin}, month = dec, year = {2022}, note = {arXiv:2212.14402 [cs]}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Computation and Language, lido, Computer Science - Machine Learning}, annote = {Comment: Additional material available online at https://github.com/mjbommar/gpt-takes-the-bar-exam}, }
@misc{aumillerEURLexSumMultiCrosslingual2022, title = {{EUR}-{Lex}-{Sum}: {A} {Multi}- and {Cross}-lingual {Dataset} for {Long}-form {Summarization} in the {Legal} {Domain}}, shorttitle = {{EUR}-{Lex}-{Sum}}, url = {http://arxiv.org/abs/2210.13448}, doi = {10.48550/arXiv.2210.13448}, abstract = {Existing summarization datasets come with two main drawbacks: (1) They tend to focus on overly exposed domains, such as news articles or wiki-like texts, and (2) are primarily monolingual, with few multilingual datasets. In this work, we propose a novel dataset, called EUR-Lex-Sum, based on manually curated document summaries of legal acts from the European Union law platform (EUR-Lex). Documents and their respective summaries exist as cross-lingual paragraph-aligned data in several of the 24 official European languages, enabling access to various cross-lingual and lower-resourced summarization setups. We obtain up to 1,500 document/summary pairs per language, including a subset of 375 cross-lingually aligned legal acts with texts available in all 24 languages. In this work, the data acquisition process is detailed and key characteristics of the resource are compared to existing summarization resources. In particular, we illustrate challenging sub-problems and open questions on the dataset that could help the facilitation of future research in the direction of domain-specific cross-lingual summarization. Limited by the extreme length and language diversity of samples, we further conduct experiments with suitable extractive monolingual and cross-lingual baselines for future work. Code for the extraction as well as access to our data and baselines is available online at: https://github.com/achouhan93/eur-lex-sum.}, urldate = {2023-06-12}, publisher = {arXiv}, author = {Aumiller, Dennis and Chouhan, Ashish and Gertz, Michael}, month = oct, year = {2022}, note = {arXiv:2210.13448 [cs]}, keywords = {Computer Science - Computation and Language}, }
@misc{cuiSurveyLegalJudgment2022, title = {A {Survey} on {Legal} {Judgment} {Prediction}: {Datasets}, {Metrics}, {Models} and {Challenges}}, shorttitle = {A {Survey} on {Legal} {Judgment} {Prediction}}, url = {http://arxiv.org/abs/2204.04859}, doi = {10.48550/arXiv.2204.04859}, abstract = {Legal judgment prediction (LJP) applies Natural Language Processing (NLP) techniques to predict judgment results based on fact descriptions automatically. Recently, large-scale public datasets and advances in NLP research have led to increasing interest in LJP. Despite a clear gap between machine and human performance, impressive results have been achieved in various benchmark datasets. In this paper, to address the current lack of comprehensive survey of existing LJP tasks, datasets, models and evaluations, (1) we analyze 31 LJP datasets in 6 languages, present their construction process and define a classification method of LJP with 3 different attributes; (2) we summarize 14 evaluation metrics under four categories for different outputs of LJP tasks; (3) we review 12 legal-domain pretrained models in 3 languages and highlight 3 major research directions for LJP; (4) we show the state-of-art results for 8 representative datasets from different court cases and discuss the open challenges. This paper can provide up-to-date and comprehensive reviews to help readers understand the status of LJP. We hope to facilitate both NLP researchers and legal professionals for further joint efforts in this problem.}, urldate = {2024-07-29}, publisher = {arXiv}, author = {Cui, Junyun and Shen, Xiaoyu and Nie, Feiping and Wang, Zheng and Wang, Jinglong and Chen, Yulong}, month = apr, year = {2022}, note = {arXiv:2204.04859 [cs]}, keywords = {Computer Science - Computation and Language, Computer Science - Machine Learning}, }
@misc{diasStateArtArtificial2022, title = {State of the {Art} in {Artificial} {Intelligence} applied to the {Legal} {Domain}}, url = {http://arxiv.org/abs/2204.07047}, doi = {10.48550/arXiv.2204.07047}, abstract = {While Artificial Intelligence applied to the legal domain is a topic with origins in the last century, recent advances in Artificial Intelligence are posed to revolutionize it. This work presents an overview and contextualizes the main advances on the field of Natural Language Processing and how these advances have been used to further the state of the art in legal text analysis.}, urldate = {2024-07-29}, publisher = {arXiv}, author = {Dias, João and Santos, Pedro A. and Cordeiro, Nuno and Antunes, Ana and Martins, Bruno and Baptista, Jorge and Gonçalves, Carlos}, month = mar, year = {2022}, note = {arXiv:2204.07047 [cs]}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Computation and Language}, }
@misc{yuLegalPromptingTeaching2022, title = {Legal {Prompting}: {Teaching} a {Language} {Model} to {Think} {Like} a {Lawyer}}, shorttitle = {Legal {Prompting}}, url = {http://arxiv.org/abs/2212.01326}, doi = {10.48550/arXiv.2212.01326}, abstract = {Large language models that are capable of zero or few-shot prompting approaches have given rise to the new research area of prompt engineering. Recent advances showed that for example Chain-of-Thought (CoT) prompts can improve arithmetic or common sense tasks significantly. We explore how such approaches fare with legal reasoning tasks and take the COLIEE entailment task based on the Japanese Bar exam for testing zero-shot/few-shot and fine-tuning approaches. Our findings show that while CoT prompting and fine-tuning with explanations approaches show improvements, the best results are produced by prompts that are derived from specific legal reasoning techniques such as IRAC (Issue, Rule, Application, Conclusion). Based on our experiments we improve the 2021 best result from 0.7037 accuracy to 0.8148 accuracy and beat the 2022 best system of 0.6789 accuracy with an accuracy of 0.7431.}, urldate = {2024-07-29}, publisher = {arXiv}, author = {Yu, Fangyi and Quartey, Lee and Schilder, Frank}, month = dec, year = {2022}, note = {arXiv:2212.01326 [cs]}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Computation and Language, I.2.7}, }
@inproceedings{stahlPreferChooseGenerating2022, address = {Abu Dhabi, UAE}, title = {To {Prefer} or to {Choose}? {Generating} {Agency} and {Power} {Counterfactuals} {Jointly} for {Gender} {Bias} {Mitigation}}, shorttitle = {To {Prefer} or to {Choose}?}, url = {https://aclanthology.org/2022.nlpcss-1.6}, doi = {10.18653/v1/2022.nlpcss-1.6}, abstract = {Gender bias may emerge from an unequal representation of agency and power, for example, by portraying women frequently as passive and powerless (“She accepted her future”) and men as proactive and powerful (“He chose his future”). When language models learn from respective texts, they may reproduce or even amplify the bias. An effective way to mitigate bias is to generate counterfactual sentences with opposite agency and power to the training. Recent work targeted agency-specific verbs from a lexicon to this end. We argue that this is insufficient, due to the interaction of agency and power and their dependence on context. In this paper, we thus develop a new rewriting model that identifies verbs with the desired agency and power in the context of the given sentence. The verbs' probability is then boosted to encourage the model to rewrite both connotations jointly. According to automatic metrics, our model effectively controls for power while being competitive in agency to the state of the art. In our main evaluation, human annotators favored its counterfactuals in terms of both connotations, also deeming its meaning preservation better.}, urldate = {2024-07-29}, booktitle = {Proceedings of the {Fifth} {Workshop} on {Natural} {Language} {Processing} and {Computational} {Social} {Science} ({NLP}+{CSS})}, publisher = {Association for Computational Linguistics}, author = {Stahl, Maja and Spliethöver, Maximilian and Wachsmuth, Henning}, editor = {Bamman, David and Hovy, Dirk and Jurgens, David and Keith, Katherine and O'Connor, Brendan and Volkova, Svitlana}, month = nov, year = {2022}, pages = {39--51}, }
@misc{cuiSurveyLegalJudgment2022a, title = {A {Survey} on {Legal} {Judgment} {Prediction}: {Datasets}, {Metrics}, {Models} and {Challenges}}, shorttitle = {A {Survey} on {Legal} {Judgment} {Prediction}}, url = {http://arxiv.org/abs/2204.04859}, doi = {10.48550/arXiv.2204.04859}, abstract = {Legal judgment prediction (LJP) applies Natural Language Processing (NLP) techniques to predict judgment results based on fact descriptions automatically. Recently, large-scale public datasets and advances in NLP research have led to increasing interest in LJP. Despite a clear gap between machine and human performance, impressive results have been achieved in various benchmark datasets. In this paper, to address the current lack of comprehensive survey of existing LJP tasks, datasets, models and evaluations, (1) we analyze 31 LJP datasets in 6 languages, present their construction process and define a classification method of LJP with 3 different attributes; (2) we summarize 14 evaluation metrics under four categories for different outputs of LJP tasks; (3) we review 12 legal-domain pretrained models in 3 languages and highlight 3 major research directions for LJP; (4) we show the state-of-art results for 8 representative datasets from different court cases and discuss the open challenges. This paper can provide up-to-date and comprehensive reviews to help readers understand the status of LJP. We hope to facilitate both NLP researchers and legal professionals for further joint efforts in this problem.}, urldate = {2024-07-29}, publisher = {arXiv}, author = {Cui, Junyun and Shen, Xiaoyu and Nie, Feiping and Wang, Zheng and Wang, Jinglong and Chen, Yulong}, month = apr, year = {2022}, note = {arXiv:2204.04859 [cs]}, keywords = {Computer Science - Computation and Language, Computer Science - Machine Learning}, }
@misc{diasStateArtArtificial2022a, title = {State of the {Art} in {Artificial} {Intelligence} applied to the {Legal} {Domain}}, url = {http://arxiv.org/abs/2204.07047}, doi = {10.48550/arXiv.2204.07047}, abstract = {While Artificial Intelligence applied to the legal domain is a topic with origins in the last century, recent advances in Artificial Intelligence are posed to revolutionize it. This work presents an overview and contextualizes the main advances on the field of Natural Language Processing and how these advances have been used to further the state of the art in legal text analysis.}, urldate = {2024-07-29}, publisher = {arXiv}, author = {Dias, João and Santos, Pedro A. and Cordeiro, Nuno and Antunes, Ana and Martins, Bruno and Baptista, Jorge and Gonçalves, Carlos}, month = mar, year = {2022}, note = {arXiv:2204.07047 [cs]}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Computation and Language}, }
@misc{coupetteLawSmellsDefining2021, title = {Law {Smells}: {Defining} and {Detecting} {Problematic} {Patterns} in {Legal} {Drafting}}, shorttitle = {Law {Smells}}, url = {http://arxiv.org/abs/2110.11984}, abstract = {Building on the computer science concept of code smells, we initiate the study of law smells, i.e., patterns in legal texts that pose threats to the comprehensibility and maintainability of the law. With five intuitive law smells as running examples—namely, duplicated phrase, long element, large reference tree, ambiguous syntax, and natural language obsession—, we develop a comprehensive law smell taxonomy. This taxonomy classifies law smells by when they can be detected, which aspects of law they relate to, and how they can be discovered. We introduce text-based and graph-based methods to identify instances of law smells, confirming their utility in practice using the United States Code as a test case. Our work demonstrates how ideas from software engineering can be leveraged to assess and improve the quality of legal code, thus drawing attention to an understudied area in the intersection of law and computer science and highlighting the potential of computational legal drafting.}, language = {en}, urldate = {2023-06-09}, publisher = {arXiv}, author = {Coupette, Corinna and Hartung, Dirk and Beckedorf, Janis and Böther, Maximilian and Katz, Daniel Martin}, month = oct, year = {2021}, note = {arXiv:2110.11984 [cs]}, keywords = {Computer Science - Computers and Society, Computer Science - Social and Information Networks, Computer Science - Computation and Language, Computer Science - Information Retrieval, Computer Science - Software Engineering}, annote = {Comment: 36 pages, 11 figures}, }
@inproceedings{yamaneArtificialIntelligenceLegal2020, title = {Artificial {Intelligence} in the {Legal} {Field} and the {Indispensable} {Human} {Element} {Legal} {Ethics} {Demands}}, url = {https://www.semanticscholar.org/paper/Artificial-Intelligence-in-the-Legal-Field-and-the-Yamane/bd94cfc31d39b37ca7a9cc8a9361dfd6faed71af}, abstract = {David Lat, The Ethical Implications of Artificial Intelligence, ABOVE THE LAW (Feb. 21, 2020, 4:48 PM), https://abovethelaw.com/law2020/the-ethical-implications-of-artificial-intelligence/ [https://perma.cc/KV7QDSYK]. Many of these changes are positive as the use of advanced AI programs has the potential to both improve the quality of legal services and increase individual access to justice. The use of AI in the legal field, however, also invokes many legal ethics concerns. Because the Model Rules of Professional Conduct, which serve as ethics guidelines for legal practitioners, were written far before advanced AI programs existed,}, urldate = {2024-07-29}, author = {Yamane, Nicole}, year = {2020}, }
@inproceedings{wrightAILawAssessing2020, title = {{AI} in the {Law}: {Towards} {Assessing} {Ethical} {Risks}}, shorttitle = {{AI} in the {Law}}, url = {https://ieeexplore.ieee.org/document/9377950}, doi = {10.1109/BigData50022.2020.9377950}, abstract = {The exponential growth in data over the past decade has impacted the legal industry; both requiring automated solutions for the cost effective and efficient management of the volume and variety of big (legal) data; and, enabling artificial intelligence techniques based on machine learning for the analysis of that data. While many legal practitioners focus on specific services niches, the impact of AI in the law is much broader than individual niches. While AI systems and concerns for their ethical operation are not new, the scale of impact and adoption of AI systems in legal practice makes consideration of the ethics of these systems timely. While there has been recent progress in development of ethical guidelines for AI systems, much of this is targeted at the developers of these systems in general, or the actions of these AI systems as autonomous entities, rather than in the legal practice context. Much of the ethical guidance - whether for AI systems or legal professional is captured in high level principles within more narrowly defined domains, more specific guidance may be appropriate to identify and assess ethical risks. As adoption and operation of AI software in routine legal practice becomes more commonplace, more detailed guidance on assessing the scope and scale of ethical risks is needed.}, urldate = {2024-07-29}, booktitle = {2020 {IEEE} {International} {Conference} on {Big} {Data} ({Big} {Data})}, author = {Wright, Steven A.}, month = dec, year = {2020}, keywords = {Law, Artificial intelligence, AI, Assessment, Ethics, Guidelines, Industries, Software, Standards}, pages = {2160--2169}, }
@article{chalkidisDeepLearningLaw2019, title = {Deep learning in law: early adaptation and legal word embeddings trained on large corpora}, volume = {27}, issn = {0924-8463}, shorttitle = {Deep learning in law}, url = {https://doi.org/10.1007/s10506-018-9238-9}, doi = {10.1007/s10506-018-9238-9}, abstract = {Deep Learning has been widely used for tackling challenging natural language processing tasks over the recent years. Similarly, the application of Deep Neural Networks in legal analytics has increased significantly. In this survey, we study the early adaptation of Deep Learning in legal analytics focusing on three main fields; text classification, information extraction, and information retrieval. We focus on the semantic feature representations, a key instrument for the successful application of deep learning in natural language processing. Additionally, we share pre-trained legal word embeddings using the word2vec model over large corpora, comprised legislations from UK, EU, Canada, Australia, USA, and Japan among others.}, number = {2}, urldate = {2024-07-29}, journal = {Artif. Intell. Law}, author = {Chalkidis, Ilias and Kampas, Dimitrios}, month = jun, year = {2019}, pages = {171--198}, }
@inproceedings{chalkidisNeuralLegalJudgment2019, address = {Florence, Italy}, title = {Neural {Legal} {Judgment} {Prediction} in {English}}, url = {https://aclanthology.org/P19-1424}, doi = {10.18653/v1/P19-1424}, abstract = {Legal judgment prediction is the task of automatically predicting the outcome of a court case, given a text describing the case's facts. Previous work on using neural models for this task has focused on Chinese; only feature-based models (e.g., using bags of words and topics) have been considered in English. We release a new English legal judgment prediction dataset, containing cases from the European Court of Human Rights. We evaluate a broad variety of neural models on the new dataset, establishing strong baselines that surpass previous feature-based models in three tasks: (1) binary violation classification; (2) multi-label classification; (3) case importance prediction. We also explore if models are biased towards demographic information via data anonymization. As a side-product, we propose a hierarchical version of BERT, which bypasses BERT's length limitation.}, urldate = {2024-07-29}, booktitle = {Proceedings of the 57th {Annual} {Meeting} of the {Association} for {Computational} {Linguistics}}, publisher = {Association for Computational Linguistics}, author = {Chalkidis, Ilias and Androutsopoulos, Ion and Aletras, Nikolaos}, editor = {Korhonen, Anna and Traum, David and Màrquez, Lluís}, month = jul, year = {2019}, pages = {4317--4323}, }
@article{chalkidisDeepLearningLaw2019a, title = {Deep learning in law: early adaptation and legal word embeddings trained on large corpora}, volume = {27}, issn = {0924-8463}, shorttitle = {Deep learning in law}, url = {https://doi.org/10.1007/s10506-018-9238-9}, doi = {10.1007/s10506-018-9238-9}, abstract = {Deep Learning has been widely used for tackling challenging natural language processing tasks over the recent years. Similarly, the application of Deep Neural Networks in legal analytics has increased significantly. In this survey, we study the early adaptation of Deep Learning in legal analytics focusing on three main fields; text classification, information extraction, and information retrieval. We focus on the semantic feature representations, a key instrument for the successful application of deep learning in natural language processing. Additionally, we share pre-trained legal word embeddings using the word2vec model over large corpora, comprised legislations from UK, EU, Canada, Australia, USA, and Japan among others.}, number = {2}, urldate = {2024-07-29}, journal = {Artif. Intell. Law}, author = {Chalkidis, Ilias and Kampas, Dimitrios}, month = jun, year = {2019}, pages = {171--198}, }
@inproceedings{chalkidisNeuralLegalJudgment2019a, address = {Florence, Italy}, title = {Neural {Legal} {Judgment} {Prediction} in {English}}, url = {https://aclanthology.org/P19-1424}, doi = {10.18653/v1/P19-1424}, abstract = {Legal judgment prediction is the task of automatically predicting the outcome of a court case, given a text describing the case's facts. Previous work on using neural models for this task has focused on Chinese; only feature-based models (e.g., using bags of words and topics) have been considered in English. We release a new English legal judgment prediction dataset, containing cases from the European Court of Human Rights. We evaluate a broad variety of neural models on the new dataset, establishing strong baselines that surpass previous feature-based models in three tasks: (1) binary violation classification; (2) multi-label classification; (3) case importance prediction. We also explore if models are biased towards demographic information via data anonymization. As a side-product, we propose a hierarchical version of BERT, which bypasses BERT's length limitation.}, urldate = {2024-07-29}, booktitle = {Proceedings of the 57th {Annual} {Meeting} of the {Association} for {Computational} {Linguistics}}, publisher = {Association for Computational Linguistics}, author = {Chalkidis, Ilias and Androutsopoulos, Ion and Aletras, Nikolaos}, editor = {Korhonen, Anna and Traum, David and Màrquez, Lluís}, month = jul, year = {2019}, pages = {4317--4323}, }
@misc{bommaritoOpenEDGAROpenSource2018, address = {Rochester, NY}, type = {{SSRN} {Scholarly} {Paper}}, title = {{OpenEDGAR}: {Open} {Source} {Software} for {SEC} {EDGAR} {Analysis}}, shorttitle = {{OpenEDGAR}}, url = {https://papers.ssrn.com/abstract=3194754}, doi = {10.2139/ssrn.3194754}, abstract = {OpenEDGAR is an open source Python framework designed to rapidly construct research databases based on the Electronic Data Gathering, Analysis, and Retrieval (EDGAR) system operated by the US Securities and Exchange Commission (SEC). OpenEDGAR is built on the Django application framework, supports distributed compute across one or more servers, and includes functionality to (i) retrieve and parse index and filing data from EDGAR, (ii) build tables for key metadata like form type and filer, (iii) retrieve, parse, and update CIK to ticker and industry mappings, (iv) extract content and metadata from filing documents, and (v) search filing document contents. OpenEDGAR is designed for use in both academic research and industrial applications, and is distributed under MIT License.}, language = {en}, urldate = {2023-06-12}, author = {Bommarito, Michael James and Katz, Daniel Martin and Detterman, Eric}, month = jun, year = {2018}, keywords = {Natural Language Processing, Accounting, Corpora, Data, EDGAR, Finance, Legal, Machine Learning, Opensource, Python, Regulatory, SEC}, }
@misc{bommaritoiiLexNLPNaturalLanguage2018, title = {{LexNLP}: {Natural} language processing and information extraction for legal and regulatory texts}, shorttitle = {{LexNLP}}, url = {http://arxiv.org/abs/1806.03688}, abstract = {LexNLP is an open source Python package focused on natural language processing and machine learning for legal and regulatory text. The package includes functionality to (i) segment documents, (ii) identify key text such as titles and section headings, (iii) extract over eighteen types of structured information like distances and dates, (iv) extract named entities such as companies and geopolitical entities, (v) transform text into features for model training, and (vi) build unsupervised and supervised models such as word embedding or tagging models. LexNLP includes pre-trained models based on thousands of unit tests drawn from real documents available from the SEC EDGAR database as well as various judicial and regulatory proceedings. LexNLP is designed for use in both academic research and industrial applications, and is distributed at https://github.com/LexPredict/lexpredict-lexnlp.}, language = {en}, urldate = {2023-06-09}, publisher = {arXiv}, author = {Bommarito II, Michael J. and Katz, Daniel Martin and Detterman, Eric M.}, month = jun, year = {2018}, note = {arXiv:1806.03688 [cs, stat]}, keywords = {Statistics - Machine Learning, Computer Science - Computation and Language, Computer Science - Information Retrieval, F.2.2, H.3.1, H.3.3, I.2.7, I.7, lido}, }
@article{ruhlHarnessingLegalComplexity2017, title = {Harnessing legal complexity}, volume = {355}, issn = {0036-8075, 1095-9203}, url = {https://www.science.org/doi/10.1126/science.aag3013}, doi = {10.1126/science.aag3013}, abstract = {Bring tools of complexity science to bear on improving law , Complexity science has spread from its origins in the physical sciences into biological and social sciences ( 1 ). Increasingly, the social sciences frame policy problems from the financial system to the food system as complex adaptive systems (CAS) and urge policy-makers to design legal solutions with CAS properties in mind. What is often poorly recognized in these initiatives is that legal systems are also complex adaptive systems ( 2 ). Just as it seems unwise to pursue regulatory measures while ignoring known CAS properties of the systems targeted for regulation, so too might failure to appreciate CAS qualities of legal systems yield policies founded upon unrealistic assumptions. Despite a long empirical studies tradition in law, there has been little use of complexity science. With few robust empirical studies of legal systems as CAS, researchers are left to gesture at seemingly evident assertions, with limited scientific support. We outline a research agenda to help fill this knowledge gap and advance practical applications.}, language = {en}, number = {6332}, urldate = {2023-06-09}, journal = {Science}, author = {Ruhl, J. B. and Katz, Daniel Martin and Bommarito, Michael J.}, month = mar, year = {2017}, keywords = {lido}, pages = {1377--1378}, }
@misc{bommaritoMeasuringModelingRegulatory2017, address = {Rochester, NY}, type = {{SSRN} {Scholarly} {Paper}}, title = {Measuring and {Modeling} the {U}.{S}. {Regulatory} {Ecosystem}}, url = {https://papers.ssrn.com/abstract=2891494}, doi = {10.2139/ssrn.2891494}, abstract = {Over the last 23 years, the U.S. Securities and Exchange Commission has required over 34,000 companies to file over 165,000 annual reports. These reports, the so-called "Form 10-Ks," contain a characterization of a company's financial performance and its risks, including the regulatory environment in which a company operates. In this paper, we analyze over 4.5 million references to U.S. Federal Acts and Agencies contained within these reports to measure the regulatory ecosystem, in which companies are organisms inhabiting a regulatory environment. While individuals across the political, economic, and academic world frequently refer to trends in this regulatory ecosystem, far less attention has been paid to supporting such claims with large-scale, longitudinal data. In this paper, in addition to positing a model of regulatory ecosystems, we document an increase in the regulatory energy per filing, i.e., a warming "temperature." We also find that the diversity of the regulatory ecosystem has been increasing over the past two decades. These findings support the claim that regulatory activity and complexity are increasing, and this framework contributes an important step towards improving academic and policy discussions around legal complexity and regulation.}, language = {en}, urldate = {2023-06-12}, author = {Bommarito, Michael James and Katz, Daniel Martin}, month = jun, year = {2017}, keywords = {regulation, complex systems, diversity, empirical legal studies, legal complexity, legal science, natural language processing, techno-social systems}, }
@misc{katzCrowdsourcingAccuratelyRobustly2017, address = {Rochester, NY}, type = {{SSRN} {Scholarly} {Paper}}, title = {Crowdsourcing {Accurately} and {Robustly} {Predicts} {Supreme} {Court} {Decisions}}, url = {https://papers.ssrn.com/abstract=3085710}, doi = {10.2139/ssrn.3085710}, abstract = {Scholars have increasingly investigated “crowdsourcing” as an alternative to expert-based judgment or purely data-driven approaches to predicting the future. Under certain conditions, scholars have found that crowd-sourcing can outperform these other approaches. However, despite interest in the topic and a series of successful use cases, relatively few studies have applied empirical model thinking to evaluate the accuracy and robustness of crowdsourcing in real-world contexts. In this paper, we offer three novel contributions. First, we explore a dataset of over 600,000 predictions from over 7,000 participants in a multi-year tournament to predict the decisions of the Supreme Court of the United States. Second, we develop a comprehensive crowd construction framework that allows for the formal description and application of crowdsourcing to real-world data. Third, we apply this framework to our data to construct more than 275,000 crowd models. We find that in out-of-sample historical simulations, crowdsourcing robustly outperforms the commonly-accepted null model, yielding the highest-known performance for this context at 80.8\% case level accuracy. To our knowledge, this dataset and analysis represent one of the largest explorations of recurring human prediction to date, and our results provide additional empirical support for the use of crowdsourcing as a prediction method.}, language = {en}, urldate = {2023-06-12}, author = {Katz, Daniel Martin and Bommarito, Michael James and Blackman, Josh}, month = dec, year = {2017}, keywords = {legal prediction, quantitative legal prediction, crowdsourcing, judicial crowdsourcing, judicial prediction, legal analytics, legal data, model thinking}, }
@article{nunezArtificialIntelligenceLegal2017, title = {Artificial {Intelligence} and {Legal} {Ethics}: {Whether} {AI} {Lawyers} {Can} {Make} {Ethical} {Decisions}}, volume = {20}, copyright = {Copyright (c) 2019 Tulane Journal of Technology \& Intellectual Property}, issn = {2169-4567}, shorttitle = {Artificial {Intelligence} and {Legal} {Ethics}}, url = {https://journals.tulane.edu/TIP/article/view/2682}, language = {en}, urldate = {2024-07-29}, journal = {Tulane Journal of Technology \& Intellectual Property}, author = {Nunez, Catherine}, year = {2017}, }
@misc{katzPredictingBehaviorSupreme2014, title = {Predicting the {Behavior} of the {Supreme} {Court} of the {United} {States}: {A} {General} {Approach}}, shorttitle = {Predicting the {Behavior} of the {Supreme} {Court} of the {United} {States}}, url = {http://arxiv.org/abs/1407.6333}, abstract = {Building upon developments in theoretical and applied machine learning, as well as the efforts of various scholars including Guimera` and Sales-Pardo (2011), Ruger et al. (2004), and Martin et al. (2004), we construct a model designed to predict the voting behavior of the Supreme Court of the United States. Using the extremely randomized tree method first proposed in Geurts et al. (2006), a method similar to the random forest approach developed in Breiman (2001), as well as novel feature engineering, we predict more than sixty years of decisions by the Supreme Court of the United States (1953-2013). Using only data available prior to the date of decision, our model correctly identifies 69.7\% of the Court’s overall affirm / reverse decisions and correctly forecasts 70.9\% of the votes of individual justices across 7,700 cases and more than 68,000 justice votes. Our performance is consistent with the general level of prediction offered by prior scholars. However, our model is distinctive as it is the first robust, generalized, and fully predictive model of Supreme Court voting behavior offered to date. Our model predicts six decades of behavior of thirty Justices appointed by thirteen Presidents. With a more sound methodological foundation, our results represent a major advance for the science of quantitative legal prediction and portend a range of other potential applications, such as those described in Katz (2013).}, language = {en}, urldate = {2023-06-09}, publisher = {arXiv}, author = {Katz, Daniel Martin and Bommarito II, Michael J. and Blackman, Josh}, month = jul, year = {2014}, note = {arXiv:1407.6333 [physics]}, keywords = {\_tablet, Computer Science - Social and Information Networks, ler, Physics - Physics and Society}, }
@misc{katzMeasuringComplexityLaw2013, address = {Rochester, NY}, type = {{SSRN} {Scholarly} {Paper}}, title = {Measuring the {Complexity} of the {Law}: {The} {United} {States} {Code}}, shorttitle = {Measuring the {Complexity} of the {Law}}, url = {https://papers.ssrn.com/abstract=2307352}, doi = {10.2139/ssrn.2307352}, abstract = {Einstein’s razor, a corollary of Ockham’s razor, is often paraphrased as follows: make everything as simple as possible, but not simpler. This rule of thumb describes the challenge that designers of a legal system face — to craft simple laws that produce desired ends, but not to pursue simplicity so far as to undermine those ends. Complexity, simplicity’s inverse, taxes cognition and increases the likelihood of suboptimal decisions. In addition, unnecessary legal complexity can drive a misallocation of human capital toward comprehending and complying with legal rules and away from other productive ends.}, language = {en}, urldate = {2023-06-12}, author = {Katz, Daniel Martin and Bommarito, Michael James}, month = aug, year = {2013}, keywords = {Complexity of Law, Legal Complexity, legal entropy, optimal regulation, United States Code}, }
@misc{katzQuantitativeLegalPrediction2012, address = {Rochester, NY}, type = {{SSRN} {Scholarly} {Paper}}, title = {Quantitative {Legal} {Prediction} – or – {How} {I} {Learned} to {Stop} {Worrying} and {Start} {Preparing} for the {Data} {Driven} {Future} of the {Legal} {Services} {Industry}}, url = {https://papers.ssrn.com/abstract=2187752}, abstract = {Do I have a case? What is our likely exposure? How much is this going to cost? What will happen if we leave this particular provision out of this contract? How can we best staff this particular legal matter? These are core questions asked by sophisticated clients such as general counsels as well as consumers at the retail level. Whether generated by a mental model or a sophisticated algorithm, prediction is a core component of the guidance that lawyers offer. Indeed, it is by generating informed answers to these types of questions that many lawyers earn their respective wage.}, language = {en}, urldate = {2023-06-12}, author = {Katz, Daniel Martin}, month = dec, year = {2012}, keywords = {algorithmic justice, big data, law, legal prediction, legal services, machine learning, prediction, quantitative legal prediction}, }
@article{bommaritoiiMathematicalApproachStudy2010, title = {A {Mathematical} {Approach} to the {Study} of the {United} {States} {Code}}, volume = {389}, issn = {03784371}, url = {http://arxiv.org/abs/1003.4146}, doi = {10.1016/j.physa.2010.05.057}, abstract = {The United States Code (Code) is a document containing over 22 million words that represents a large and important source of Federal statutory law. Scholars and policy advocates often discuss the direction and magnitude of changes in various aspects of the Code. However, few have mathematically formalized the notions behind these discussions or directly measured the resulting representations. This paper addresses the current state of the literature in two ways. First, we formalize a representation of the United States Code as the union of a hierarchical network and a citation network over vertices containing the language of the Code. This representation reflects the fact that the Code is a hierarchically organized document containing language and explicit citations between provisions. Second, we use this formalization to measure aspects of the Code as codified in October 2008, November 2009, and March 2010. These measurements allow for a characterization of the actual changes in the Code over time. Our findings indicate that in the recent past, the Code has grown in its amount of structure, interdependence, and language.}, language = {en}, number = {19}, urldate = {2023-06-09}, journal = {Physica A: Statistical Mechanics and its Applications}, author = {Bommarito II, Michael J. and Katz, Daniel Martin}, month = oct, year = {2010}, note = {arXiv:1003.4146 [physics]}, keywords = {Computer Science - Computers and Society, Physics - Physics and Society, Computer Science - Information Retrieval, Computer Science - Digital Libraries}, pages = {4195--4200}, }
@misc{katzReproductionHierarchySocial2009, address = {Rochester, NY}, type = {{SSRN} {Scholarly} {Paper}}, title = {Reproduction of {Hierarchy}? {A} {Social} {Network} {Analysis} of the {American} {Law} {Professoriate}}, shorttitle = {Reproduction of {Hierarchy}?}, url = {https://papers.ssrn.com/abstract=1352656}, abstract = {As its structure offers one causal mechanism for the emergence of and convergence upon a collective conception of what constitutes a sound legal rule, we believe the social structure of the American law professoriate is an important piece of a broader model of American common law development. Leveraging advances in network science and drawing from available information on the more 7,200 tenure-track professor employed by an ABA accredited institution, we explore the topology of the legal academy including the relative distribution of authority among its institutions. Drawing from social epidemiology literature, we provide a computational model for diffusion on our network. The model provides a parsimonious display of the trade off between "idea infectiousness" and structural position. While our model is undoubtedly simple, our initial foray into computational legal studies should, at a minimum, motivate future scholarship.}, language = {en}, urldate = {2023-06-12}, author = {Katz, Daniel Martin and Gubler, Joshua R. and Zelner, Jon and Bommarito, Michael James and Provins, Eric A. and Ingall, Eitan M.}, month = mar, year = {2009}, keywords = {American Common Law, Complexity, Computational Legal Studies, Doctrinal Phase Transition, Law as a Complex System, Legal Academy, Organizational Studies, Peer Effects, Power Law, Public Law, Social Network Analysis, Sociology of Law}, }
@misc{bommaritoLawSeamlessWeb2009, address = {Rochester, NY}, type = {{SSRN} {Scholarly} {Paper}}, title = {Law as a {Seamless} {Web}? {Comparison} of {Various} {Network} {Representations} of the {United} {States} {Supreme} {Court} {Corpus} (1791-2005)}, shorttitle = {Law as a {Seamless} {Web}?}, url = {https://papers.ssrn.com/abstract=1419525}, doi = {10.2139/ssrn.1419525}, abstract = {Citation networks are a cornerstone of network research and have been important to the general development of network theory. Citation data have the advantage of constituting a well-defined set where the nature of nodes and edges is reasonably well specified. Much interesting and important work has been done in this vein, with respect to not only academic but also judicial citation networks. For example, previous scholarship focuses upon broad citation patterns, the evolution of precedent, and time-varying change in the likelihood that communities of cases will be cited. As research of judicial citation and semantic networks transitions from a strict focus on the structural characteristics of these networks to the evolutionary dynamics behind their growth, it becomes even more important to develop theoretically coherent and empirically grounded ideas about the nature of edges and nodes. In this paper, we move in this direction on several fronts. We compare several network representations of the corpus of United States Supreme Court decisions (1791-2005). This corpus is not only of seminal importance, but also represents a highly structured and largely self-contained body of case law. As constructed herein, nodes represent whole cases or individual 'opinion units' within cases. Edges represent either citations or semantic connections. As our broader goal is to better understand American common law development, we are particularly interested in the union, intersect and compliment of these various citation networks as they offer potential insight into the long-standing question of whether 'law is a seamless web'? We believe the characterization of law’s interconnectedness is an empirical question well suited to the tools of computer science and applied graph theory. While much work still remains, the analysis provided herein is designed to advance the broader cause.}, language = {en}, urldate = {2023-06-12}, author = {Bommarito, Michael James and Katz, Daniel Martin and Zelner, Jon}, month = jun, year = {2009}, keywords = {computational legal studies, computational linguistics and law, computer programming and law, evolution of law, evolutionary graph theory, judicial citation networks, law as a complex system, network analysis, semantic analysis, supreme court citations}, }