var bibbase_data = {"data":"\"Loading..\"\n\n
\n\n \n\n \n\n \n \n\n \n\n \n \n\n \n\n \n
\n generated by\n \n \"bibbase.org\"\n\n \n
\n \n\n
\n\n \n\n\n
\n\n Excellent! Next you can\n create a new website with this list, or\n embed it in an existing web page by copying & pasting\n any of the following snippets.\n\n
\n JavaScript\n (easiest)\n
\n \n <script src=\"https://bibbase.org/show?bib=https%3A%2F%2Fbibbase.org%2Fzotero-group%2Fndrmyrvtl%2F5786256&jsonp=1&jsonp=1\"></script>\n \n
\n\n PHP\n
\n \n <?php\n $contents = file_get_contents(\"https://bibbase.org/show?bib=https%3A%2F%2Fbibbase.org%2Fzotero-group%2Fndrmyrvtl%2F5786256&jsonp=1\");\n print_r($contents);\n ?>\n \n
\n\n iFrame\n (not recommended)\n
\n \n <iframe src=\"https://bibbase.org/show?bib=https%3A%2F%2Fbibbase.org%2Fzotero-group%2Fndrmyrvtl%2F5786256&jsonp=1\"></iframe>\n \n
\n\n

\n For more details see the documention.\n

\n
\n
\n\n
\n\n This is a preview! To use this list on your own web site\n or create a new web site from it,\n create a free account. The file will be added\n and you will be able to edit it in the File Manager.\n We will show you instructions once you've created your account.\n
\n\n
\n\n

To the site owner:

\n\n

Action required! Mendeley is changing its\n API. In order to keep using Mendeley with BibBase past April\n 14th, you need to:\n

    \n
  1. renew the authorization for BibBase on Mendeley, and
  2. \n
  3. update the BibBase URL\n in your page the same way you did when you initially set up\n this page.\n
  4. \n
\n

\n\n

\n \n \n Fix it now\n

\n
\n\n
\n\n\n
\n \n \n
\n
\n  \n 2025\n \n \n (2)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n AI Act / KI-VO und Standardisierung.\n \n \n \n \n\n\n \n Meyer-Vitali, A.\n\n\n \n\n\n\n Datenschutz und Datensicherheit - DuD, 49(4): 241–244. April 2025.\n \n\n\n\n
\n\n\n\n \n \n \"AIPaper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n\n\n\n
\n
@article{meyer-vitali_ai_2025,\n\ttitle = {{AI} {Act} / {KI}-{VO} und {Standardisierung}},\n\tvolume = {49},\n\tissn = {1862-2607},\n\turl = {https://doi.org/10.1007/s11623-025-2079-2},\n\tdoi = {10.1007/s11623-025-2079-2},\n\tabstract = {Bei der Einführung der KI-Verordnung müssen noch viele Aspekte definiert werden, damit einesorgfältige Prüfung geschehen kann. Wir betrachten die Landschaft der Standardisierung, definierenRisiken und das damit einhergehende Vertrauen. Einige Methoden und Prüfmetriken werden dargestellt},\n\tlanguage = {de},\n\tnumber = {4},\n\turldate = {2025-04-23},\n\tjournal = {Datenschutz und Datensicherheit - DuD},\n\tauthor = {Meyer-Vitali, André},\n\tmonth = apr,\n\tyear = {2025},\n\tkeywords = {Artificial Intelligence},\n\tpages = {241--244},\n}\n\n\n\n\n\n\n\n\n\n\n\n
\n
\n\n\n
\n Bei der Einführung der KI-Verordnung müssen noch viele Aspekte definiert werden, damit einesorgfältige Prüfung geschehen kann. Wir betrachten die Landschaft der Standardisierung, definierenRisiken und das damit einhergehende Vertrauen. Einige Methoden und Prüfmetriken werden dargestellt\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Route Sparse Autoencoder to Interpret Large Language Models.\n \n \n \n \n\n\n \n Shi, W.; Li, S.; Liang, T.; Wan, M.; Ma, G.; Wang, X.; and He, X.\n\n\n \n\n\n\n March 2025.\n arXiv:2503.08200 [cs]\n\n\n\n
\n\n\n\n \n \n \"RoutePaper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n\n\n\n
\n
@misc{shi_route_2025,\n\ttitle = {Route {Sparse} {Autoencoder} to {Interpret} {Large} {Language} {Models}},\n\turl = {http://arxiv.org/abs/2503.08200},\n\tdoi = {10.48550/arXiv.2503.08200},\n\tabstract = {Mechanistic interpretability of large language models (LLMs) aims to uncover the internal processes of information propagation and reasoning. Sparse autoencoders (SAEs) have demonstrated promise in this domain by extracting interpretable and monosemantic features. However, prior works primarily focus on feature extraction from a single layer, failing to effectively capture activations that span multiple layers. In this paper, we introduce Route Sparse Autoencoder (RouteSAE), a new framework that integrates a routing mechanism with a shared SAE to efficiently extract features from multiple layers. It dynamically assigns weights to activations from different layers, incurring minimal parameter overhead while achieving high interpretability and flexibility for targeted feature manipulation. We evaluate RouteSAE through extensive experiments on Llama-3.2-1B-Instruct. Specifically, under the same sparsity constraint of 64, RouteSAE extracts 22.5\\% more features than baseline SAEs while achieving a 22.3\\% higher interpretability score. These results underscore the potential of RouteSAE as a scalable and effective method for LLM interpretability, with applications in feature discovery and model intervention. Our codes are available at https://github.com/swei2001/RouteSAEs.},\n\turldate = {2025-03-12},\n\tpublisher = {arXiv},\n\tauthor = {Shi, Wei and Li, Sihang and Liang, Tao and Wan, Mingyang and Ma, Gojun and Wang, Xiang and He, Xiangnan},\n\tmonth = mar,\n\tyear = {2025},\n\tnote = {arXiv:2503.08200 [cs]},\n\tkeywords = {Computer Science - Machine Learning},\n}\n\n\n\n
\n
\n\n\n
\n Mechanistic interpretability of large language models (LLMs) aims to uncover the internal processes of information propagation and reasoning. Sparse autoencoders (SAEs) have demonstrated promise in this domain by extracting interpretable and monosemantic features. However, prior works primarily focus on feature extraction from a single layer, failing to effectively capture activations that span multiple layers. In this paper, we introduce Route Sparse Autoencoder (RouteSAE), a new framework that integrates a routing mechanism with a shared SAE to efficiently extract features from multiple layers. It dynamically assigns weights to activations from different layers, incurring minimal parameter overhead while achieving high interpretability and flexibility for targeted feature manipulation. We evaluate RouteSAE through extensive experiments on Llama-3.2-1B-Instruct. Specifically, under the same sparsity constraint of 64, RouteSAE extracts 22.5% more features than baseline SAEs while achieving a 22.3% higher interpretability score. These results underscore the potential of RouteSAE as a scalable and effective method for LLM interpretability, with applications in feature discovery and model intervention. Our codes are available at https://github.com/swei2001/RouteSAEs.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2024\n \n \n (10)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n SelfIE: Self-Interpretation of Large Language Model Embeddings.\n \n \n \n \n\n\n \n Chen, H.; Vondrick, C.; and Mao, C.\n\n\n \n\n\n\n March 2024.\n arXiv:2403.10949 [cs]\n\n\n\n
\n\n\n\n \n \n \"SelfIE:Paper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n\n\n\n
\n
@misc{chen_selfie_2024,\n\ttitle = {{SelfIE}: {Self}-{Interpretation} of {Large} {Language} {Model} {Embeddings}},\n\tshorttitle = {{SelfIE}},\n\turl = {http://arxiv.org/abs/2403.10949},\n\tdoi = {10.48550/arXiv.2403.10949},\n\tabstract = {How do large language models (LLMs) obtain their answers? The ability to explain and control an LLM's reasoning process is key for reliability, transparency, and future model developments. We propose SelfIE (Self-Interpretation of Embeddings), a framework that enables LLMs to interpret their own embeddings in natural language by leveraging their ability to respond to inquiries about a given passage. Capable of interpreting open-world concepts in the hidden embeddings, SelfIE reveals LLM internal reasoning in cases such as making ethical decisions, internalizing prompt injection, and recalling harmful knowledge. SelfIE's text descriptions on hidden embeddings also open up new avenues to control LLM reasoning. We propose Supervised Control, which allows editing open-ended concepts while only requiring gradient computation of individual layer. We extend RLHF to hidden embeddings and propose Reinforcement Control that erases harmful knowledge in LLM without supervision targets.},\n\turldate = {2025-03-12},\n\tpublisher = {arXiv},\n\tauthor = {Chen, Haozhe and Vondrick, Carl and Mao, Chengzhi},\n\tmonth = mar,\n\tyear = {2024},\n\tnote = {arXiv:2403.10949 [cs]},\n\tkeywords = {Computer Science - Artificial Intelligence, Computer Science - Computation and Language, Computer Science - Machine Learning},\n}\n\n\n\n\n\n\n\n\n\n\n\n
\n
\n\n\n
\n How do large language models (LLMs) obtain their answers? The ability to explain and control an LLM's reasoning process is key for reliability, transparency, and future model developments. We propose SelfIE (Self-Interpretation of Embeddings), a framework that enables LLMs to interpret their own embeddings in natural language by leveraging their ability to respond to inquiries about a given passage. Capable of interpreting open-world concepts in the hidden embeddings, SelfIE reveals LLM internal reasoning in cases such as making ethical decisions, internalizing prompt injection, and recalling harmful knowledge. SelfIE's text descriptions on hidden embeddings also open up new avenues to control LLM reasoning. We propose Supervised Control, which allows editing open-ended concepts while only requiring gradient computation of individual layer. We extend RLHF to hidden embeddings and propose Reinforcement Control that erases harmful knowledge in LLM without supervision targets.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Cross-Refine: Improving Natural Language Explanation Generation by Learning in Tandem.\n \n \n \n \n\n\n \n Wang, Q.; Anikina, T.; Feldhus, N.; Ostermann, S.; Möller, S.; and Schmitt, V.\n\n\n \n\n\n\n December 2024.\n arXiv:2409.07123\n\n\n\n
\n\n\n\n \n \n \"Cross-Refine:Paper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n\n\n\n
\n
@misc{wang_cross-refine_2024,\n\tseries = {{COLING}},\n\ttitle = {Cross-{Refine}: {Improving} {Natural} {Language} {Explanation} {Generation} by {Learning} in {Tandem}},\n\tshorttitle = {Cross-{Refine}},\n\turl = {http://arxiv.org/abs/2409.07123},\n\tdoi = {10.48550/arXiv.2409.07123},\n\tabstract = {Natural language explanations (NLEs) are vital for elucidating the reasoning behind large language model (LLM) decisions. Many techniques have been developed to generate NLEs using LLMs. However, like humans, LLMs might not always produce optimal NLEs on first attempt. Inspired by human learning processes, we introduce Cross-Refine, which employs role modeling by deploying two LLMs as generator and critic, respectively. The generator outputs a first NLE and then refines this initial explanation using feedback and suggestions provided by the critic. Cross-Refine does not require any supervised training data or additional training. We validate Cross-Refine across three NLP tasks using three state-of-the-art open-source LLMs through automatic and human evaluation. We select Self-Refine (Madaan et al., 2023) as the baseline, which only utilizes self-feedback to refine the explanations. Our findings from automatic evaluation and a user study indicate that Cross-Refine outperforms Self-Refine. Meanwhile, Cross-Refine can perform effectively with less powerful LLMs, whereas Self-Refine only yields strong results with ChatGPT. Additionally, we conduct an ablation study to assess the importance of feedback and suggestions. Both of them play an important role in refining explanations. We further evaluate Cross-Refine on a bilingual dataset in English and German.},\n\turldate = {2024-12-04},\n\tpublisher = {arXiv},\n\tauthor = {Wang, Qianli and Anikina, Tatiana and Feldhus, Nils and Ostermann, Simon and Möller, Sebastian and Schmitt, Vera},\n\tmonth = dec,\n\tyear = {2024},\n\tnote = {arXiv:2409.07123},\n\tkeywords = {Computer Science - Computation and Language, Computer Science - Machine Learning},\n}\n\n\n\n
\n
\n\n\n
\n Natural language explanations (NLEs) are vital for elucidating the reasoning behind large language model (LLM) decisions. Many techniques have been developed to generate NLEs using LLMs. However, like humans, LLMs might not always produce optimal NLEs on first attempt. Inspired by human learning processes, we introduce Cross-Refine, which employs role modeling by deploying two LLMs as generator and critic, respectively. The generator outputs a first NLE and then refines this initial explanation using feedback and suggestions provided by the critic. Cross-Refine does not require any supervised training data or additional training. We validate Cross-Refine across three NLP tasks using three state-of-the-art open-source LLMs through automatic and human evaluation. We select Self-Refine (Madaan et al., 2023) as the baseline, which only utilizes self-feedback to refine the explanations. Our findings from automatic evaluation and a user study indicate that Cross-Refine outperforms Self-Refine. Meanwhile, Cross-Refine can perform effectively with less powerful LLMs, whereas Self-Refine only yields strong results with ChatGPT. Additionally, we conduct an ablation study to assess the importance of feedback and suggestions. Both of them play an important role in refining explanations. We further evaluate Cross-Refine on a bilingual dataset in English and German.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n CoXQL: A Dataset for Parsing Explanation Requests in Conversational XAI Systems.\n \n \n \n \n\n\n \n Wang, Q.; Anikina, T.; Feldhus, N.; Ostermann, S.; and Möller, S.\n\n\n \n\n\n\n September 2024.\n arXiv:2406.08101\n\n\n\n
\n\n\n\n \n \n \"CoXQL:Paper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n\n\n\n
\n
@misc{wang_coxql_2024,\n\tseries = {Findings of {EMNLP}},\n\ttitle = {{CoXQL}: {A} {Dataset} for {Parsing} {Explanation} {Requests} in {Conversational} {XAI} {Systems}},\n\tshorttitle = {{CoXQL}},\n\turl = {http://arxiv.org/abs/2406.08101},\n\tdoi = {10.48550/arXiv.2406.08101},\n\tabstract = {Conversational explainable artificial intelligence (ConvXAI) systems based on large language models (LLMs) have garnered significant interest from the research community in natural language processing (NLP) and human-computer interaction (HCI). Such systems can provide answers to user questions about explanations in dialogues, have the potential to enhance users' comprehension and offer more information about the decision-making and generation processes of LLMs. Currently available ConvXAI systems are based on intent recognition rather than free chat, as this has been found to be more precise and reliable in identifying users' intentions. However, the recognition of intents still presents a challenge in the case of ConvXAI, since little training data exist and the domain is highly specific, as there is a broad range of XAI methods to map requests onto. In order to bridge this gap, we present CoXQL, the first dataset in the NLP domain for user intent recognition in ConvXAI, covering 31 intents, seven of which require filling multiple slots. Subsequently, we enhance an existing parsing approach by incorporating template validations, and conduct an evaluation of several LLMs on CoXQL using different parsing strategies. We conclude that the improved parsing approach (MP+) surpasses the performance of previous approaches. We also discover that intents with multiple slots remain highly challenging for LLMs.},\n\turldate = {2024-12-04},\n\tpublisher = {arXiv},\n\tauthor = {Wang, Qianli and Anikina, Tatiana and Feldhus, Nils and Ostermann, Simon and Möller, Sebastian},\n\tmonth = sep,\n\tyear = {2024},\n\tnote = {arXiv:2406.08101},\n\tkeywords = {Computer Science - Computation and Language},\n}\n\n\n\n\n\n\n\n\n\n\n\n
\n
\n\n\n
\n Conversational explainable artificial intelligence (ConvXAI) systems based on large language models (LLMs) have garnered significant interest from the research community in natural language processing (NLP) and human-computer interaction (HCI). Such systems can provide answers to user questions about explanations in dialogues, have the potential to enhance users' comprehension and offer more information about the decision-making and generation processes of LLMs. Currently available ConvXAI systems are based on intent recognition rather than free chat, as this has been found to be more precise and reliable in identifying users' intentions. However, the recognition of intents still presents a challenge in the case of ConvXAI, since little training data exist and the domain is highly specific, as there is a broad range of XAI methods to map requests onto. In order to bridge this gap, we present CoXQL, the first dataset in the NLP domain for user intent recognition in ConvXAI, covering 31 intents, seven of which require filling multiple slots. Subsequently, we enhance an existing parsing approach by incorporating template validations, and conduct an evaluation of several LLMs on CoXQL using different parsing strategies. We conclude that the improved parsing approach (MP+) surpasses the performance of previous approaches. We also discover that intents with multiple slots remain highly challenging for LLMs.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n ALERT: A Comprehensive Benchmark for Assessing Large Language Models' Safety through Red Teaming.\n \n \n \n \n\n\n \n Tedeschi, S.; Friedrich, F.; Schramowski, P.; Kersting, K.; Navigli, R.; Nguyen, H.; and Li, B.\n\n\n \n\n\n\n June 2024.\n arXiv:2404.08676 [cs]\n\n\n\n
\n\n\n\n \n \n \"ALERT:Paper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n\n\n\n
\n
@misc{tedeschi_alert_2024,\n\ttitle = {{ALERT}: {A} {Comprehensive} {Benchmark} for {Assessing} {Large} {Language} {Models}' {Safety} through {Red} {Teaming}},\n\tshorttitle = {{ALERT}},\n\turl = {http://arxiv.org/abs/2404.08676},\n\tdoi = {10.48550/arXiv.2404.08676},\n\tabstract = {When building Large Language Models (LLMs), it is paramount to bear safety in mind and protect them with guardrails. Indeed, LLMs should never generate content promoting or normalizing harmful, illegal, or unethical behavior that may contribute to harm to individuals or society. This principle applies to both normal and adversarial use. In response, we introduce ALERT, a large-scale benchmark to assess safety based on a novel fine-grained risk taxonomy. It is designed to evaluate the safety of LLMs through red teaming methodologies and consists of more than 45k instructions categorized using our novel taxonomy. By subjecting LLMs to adversarial testing scenarios, ALERT aims to identify vulnerabilities, inform improvements, and enhance the overall safety of the language models. Furthermore, the fine-grained taxonomy enables researchers to perform an in-depth evaluation that also helps one to assess the alignment with various policies. In our experiments, we extensively evaluate 10 popular open- and closed-source LLMs and demonstrate that many of them still struggle to attain reasonable levels of safety.},\n\turldate = {2024-12-04},\n\tpublisher = {arXiv},\n\tauthor = {Tedeschi, Simone and Friedrich, Felix and Schramowski, Patrick and Kersting, Kristian and Navigli, Roberto and Nguyen, Huu and Li, Bo},\n\tmonth = jun,\n\tyear = {2024},\n\tnote = {arXiv:2404.08676 [cs]},\n\tkeywords = {Computer Science - Computation and Language, Computer Science - Computers and Society, Computer Science - Machine Learning},\n}\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n
\n
\n\n\n
\n When building Large Language Models (LLMs), it is paramount to bear safety in mind and protect them with guardrails. Indeed, LLMs should never generate content promoting or normalizing harmful, illegal, or unethical behavior that may contribute to harm to individuals or society. This principle applies to both normal and adversarial use. In response, we introduce ALERT, a large-scale benchmark to assess safety based on a novel fine-grained risk taxonomy. It is designed to evaluate the safety of LLMs through red teaming methodologies and consists of more than 45k instructions categorized using our novel taxonomy. By subjecting LLMs to adversarial testing scenarios, ALERT aims to identify vulnerabilities, inform improvements, and enhance the overall safety of the language models. Furthermore, the fine-grained taxonomy enables researchers to perform an in-depth evaluation that also helps one to assess the alignment with various policies. In our experiments, we extensively evaluate 10 popular open- and closed-source LLMs and demonstrate that many of them still struggle to attain reasonable levels of safety.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Modular Design Patterns for Generative Neuro-Symbolic Systems.\n \n \n \n \n\n\n \n Boer, M. H. T. d.; Smit, Q. S.; Bekkum, M. v.; Meyer-Vitali, A.; and Schmid, T.\n\n\n \n\n\n\n In Sartini, B.; Raad, J.; Lisena, P.; Peñuela, A. M.; Beetz, M.; Blin, I.; Cimiano, P.; Berardinis, J. d.; Gottschalk, S.; Ilievski, F.; Jain, N.; Kim, J.; Kümpel, M.; Motta, E.; Tiddi, I.; and Töberg, J., editor(s), Joint Proceedings of the ESWC 2024 Workshops and Tutorials, volume 3749, of CEUR Workshop Proceedings, Hersonissos, Greece, May 2024. CEUR\n ISSN: 1613-0073\n\n\n\n
\n\n\n\n \n \n \"ModularPaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{boer_modular_2024,\n\taddress = {Hersonissos, Greece},\n\tseries = {{CEUR} {Workshop} {Proceedings}},\n\ttitle = {Modular {Design} {Patterns} for {Generative} {Neuro}-{Symbolic} {Systems}},\n\tvolume = {3749},\n\turl = {https://ceur-ws.org/Vol-3749/#genesy-03},\n\tlanguage = {en},\n\turldate = {2024-09-11},\n\tbooktitle = {Joint {Proceedings} of the {ESWC} 2024 {Workshops} and {Tutorials}},\n\tpublisher = {CEUR},\n\tauthor = {Boer, Maaike H. T. de and Smit, Quirine S. and Bekkum, Michael van and Meyer-Vitali, André and Schmid, Thomas},\n\teditor = {Sartini, Bruno and Raad, Joe and Lisena, Pasquale and Peñuela, Albert Meroño and Beetz, Michael and Blin, Inès and Cimiano, Philipp and Berardinis, Jacopo de and Gottschalk, Simon and Ilievski, Filip and Jain, Nitisha and Kim, Jongmo and Kümpel, Michaela and Motta, Enrico and Tiddi, Ilaria and Töberg, Jan-Philipp},\n\tmonth = may,\n\tyear = {2024},\n\tnote = {ISSN: 1613-0073},\n}\n\n\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Trusted AI – Vertrauenswürdigkeit und große Sprachmodelle.\n \n \n \n \n\n\n \n Meyer-Vitali, A.; and Ostermann, S.\n\n\n \n\n\n\n dfki ai next, 2024(2): 6–7. September 2024.\n \n\n\n\n
\n\n\n\n \n \n \"TrustedPaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{meyer-vitali_trusted_2024,\n\ttitle = {Trusted {AI} – {Vertrauenswürdigkeit} und große  {Sprachmodelle}},\n\tvolume = {2024},\n\turl = {https://www.dfki.de/web/news-media/news/dfki-ai-next},\n\tlanguage = {German},\n\tnumber = {2},\n\tjournal = {dfki ai next},\n\tauthor = {Meyer-Vitali, André and Ostermann, Simon},\n\tmonth = sep,\n\tyear = {2024},\n\tpages = {6--7},\n}\n\n\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n AI Engineering for Trust by Design.\n \n \n \n \n\n\n \n Meyer-Vitali, A.\n\n\n \n\n\n\n In Proceedings of the 12th International Conference on Model-Based Software and Systems Engineering (MODELSWARD 2024), pages 357–364, Rome, Italy, February 2024. SCITEPRESS – Science and Technology Publications, Lda\n \n\n\n\n
\n\n\n\n \n \n \"AIPaper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{meyer-vitali_ai_2024,\n\taddress = {Rome, Italy},\n\ttitle = {{AI} {Engineering} for {Trust} by {Design}},\n\tcopyright = {All rights reserved},\n\tisbn = {978-989-758-682-8},\n\turl = {https://www.scitepress.org/PublicationsDetail.aspx?ID=/skO/EwOJr4=&t=1},\n\tdoi = {10.5220/0012622400003645},\n\tabstract = {Digital Library},\n\turldate = {2024-02-29},\n\tbooktitle = {Proceedings of the 12th {International} {Conference} on {Model}-{Based} {Software} and {Systems} {Engineering} ({MODELSWARD} 2024)},\n\tpublisher = {SCITEPRESS – Science and Technology Publications, Lda},\n\tauthor = {Meyer-Vitali, André},\n\tmonth = feb,\n\tyear = {2024},\n\tpages = {357--364},\n}\n\n\n\n
\n
\n\n\n
\n Digital Library\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Human-AI Engineering for Adults.\n \n \n \n \n\n\n \n Meyer-Vitali, A.; and Mulder, W.\n\n\n \n\n\n\n In HHAI 2024: Hybrid Human AI Systems for the Social Good, pages 228–240. IOS Press, 2024.\n \n\n\n\n
\n\n\n\n \n \n \"Human-AIPaper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@incollection{meyer-vitali_human-ai_2024,\n\ttitle = {Human-{AI} {Engineering} for {Adults}},\n\tcopyright = {All rights reserved},\n\turl = {https://ebooks.iospress.nl/doi/10.3233/FAIA240197},\n\turldate = {2024-06-08},\n\tbooktitle = {{HHAI} 2024: {Hybrid} {Human} {AI} {Systems} for the {Social} {Good}},\n\tpublisher = {IOS Press},\n\tauthor = {Meyer-Vitali, Andr\\&\\#233 and Mulder, Wico},\n\tyear = {2024},\n\tdoi = {10.3233/FAIA240197},\n\tpages = {228--240},\n}\n\n\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Engineering Principles for Building Trusted Human-AI Systems.\n \n \n \n\n\n \n Meyer-Vitali, A.; and Mulder, W.\n\n\n \n\n\n\n In Arai, K., editor(s), Intelligent Systems and Applications, pages 468–485, Cham, 2024. Springer Nature Switzerland\n \n\n\n\n
\n\n\n\n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{meyer-vitali_engineering_2024,\n\taddress = {Cham},\n\ttitle = {Engineering {Principles} for {Building} {Trusted} {Human}-{AI} {Systems}},\n\tcopyright = {All rights reserved},\n\tisbn = {978-3-031-66428-1},\n\tdoi = {10.1007/978-3-031-66428-1_30},\n\tabstract = {In the process engineering reliable and trustworthy AI systems there is significant wisdom to be gained from traditional engineering domains. Extending on earlier work our attention is on topics that stress the principles of building human-AI systems. We plea for a reinforced attention for engineering methods and processes in order to urge the essence for improved scientific progress and industrial AI applications where one can stand on the shoulders of giants. On the one hand, we see their complexity increase on an individual level, as well as on their connected dependency levels, whilst on the other hand, we see a growing lack of experience on the level of their design and engineering. The complexity of current AI models often limits our understanding. The methods and processes to ensure safety, reliability, and transparency are insufficient. This poses serious risks at the level of trustworthiness, particularly when it comes to critical applications with significant social, economic or even physical impact. Future AI systems must adhere to stringent requirements, as mandated, for instance, by the European AI Act, ensuring meticulous design, validation, and certification based on clearly defined criteria.},\n\tlanguage = {en},\n\tbooktitle = {Intelligent {Systems} and {Applications}},\n\tpublisher = {Springer Nature Switzerland},\n\tauthor = {Meyer-Vitali, André and Mulder, Wico},\n\teditor = {Arai, Kohei},\n\tyear = {2024},\n\tpages = {468--485},\n}\n\n\n\n
\n
\n\n\n
\n In the process engineering reliable and trustworthy AI systems there is significant wisdom to be gained from traditional engineering domains. Extending on earlier work our attention is on topics that stress the principles of building human-AI systems. We plea for a reinforced attention for engineering methods and processes in order to urge the essence for improved scientific progress and industrial AI applications where one can stand on the shoulders of giants. On the one hand, we see their complexity increase on an individual level, as well as on their connected dependency levels, whilst on the other hand, we see a growing lack of experience on the level of their design and engineering. The complexity of current AI models often limits our understanding. The methods and processes to ensure safety, reliability, and transparency are insufficient. This poses serious risks at the level of trustworthiness, particularly when it comes to critical applications with significant social, economic or even physical impact. Future AI systems must adhere to stringent requirements, as mandated, for instance, by the European AI Act, ensuring meticulous design, validation, and certification based on clearly defined criteria.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n AI Engineering for Trust by Design.\n \n \n \n \n\n\n \n Meyer-Vitali, A.\n\n\n \n\n\n\n Survey of Tools for Software Engineering, 24(1): 20–22. 2024.\n \n\n\n\n
\n\n\n\n \n \n \"AIPaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{meyer-vitali_ai_2024,\n\ttitle = {{AI} {Engineering} for {Trust} by {Design}},\n\tvolume = {24},\n\turl = {https://www.software-innovations.eu/publikationen/},\n\tnumber = {1},\n\tjournal = {Survey of Tools for Software Engineering},\n\tauthor = {Meyer-Vitali, André},\n\tyear = {2024},\n\tpages = {20--22},\n}\n\n\n\n\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2023\n \n \n (6)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Investigating the Encoding of Words in BERT's Neurons Using Feature Textualization.\n \n \n \n \n\n\n \n Baeumel, T.; Vijayakumar, S.; van Genabith, J.; Neumann, G.; and Ostermann, S.\n\n\n \n\n\n\n In Belinkov, Y.; Hao, S.; Jumelet, J.; Kim, N.; McCarthy, A.; and Mohebbi, H., editor(s), Proceedings of the 6th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP, of BlackBoxNLP, pages 261–270, Singapore, December 2023. Association for Computational Linguistics\n \n\n\n\n
\n\n\n\n \n \n \"InvestigatingPaper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{baeumel_investigating_2023,\n\taddress = {Singapore},\n\tseries = {{BlackBoxNLP}},\n\ttitle = {Investigating the {Encoding} of {Words} in {BERT}'s {Neurons} {Using} {Feature} {Textualization}},\n\turl = {https://aclanthology.org/2023.blackboxnlp-1.20},\n\tdoi = {10.18653/v1/2023.blackboxnlp-1.20},\n\tabstract = {Pretrained language models (PLMs) form the basis of most state-of-the-art NLP technologies. Nevertheless, they are essentially black boxes: Humans do not have a clear understanding of what knowledge is encoded in different parts of the models, especially in individual neurons. A contrast is in computer vision, where feature visualization provides a decompositional interpretability technique for neurons of vision models. Activation maximization is used to synthesize inherently interpretable visual representations of the information encoded in individual neurons. Our work is inspired by this but presents a cautionary tale on the interpretability of single neurons, based on the first large-scale attempt to adapt activation maximization to NLP, and, more specifically, large PLMs. We propose feature textualization, a technique to produce dense representations of neurons in the PLM word embedding space. We apply feature textualization to the BERT model to investigate whether the knowledge encoded in individual neurons can be interpreted and symbolized. We find that the produced representations can provide insights about the knowledge encoded in individual neurons, but that individual neurons do not represent clear-cut symbolic units of language such as words. Additionally, we use feature textualization to investigate how many neurons are needed to encode words in BERT.},\n\turldate = {2024-12-04},\n\tbooktitle = {Proceedings of the 6th {BlackboxNLP} {Workshop}: {Analyzing} and {Interpreting} {Neural} {Networks} for {NLP}},\n\tpublisher = {Association for Computational Linguistics},\n\tauthor = {Baeumel, Tanja and Vijayakumar, Soniya and van Genabith, Josef and Neumann, Guenter and Ostermann, Simon},\n\teditor = {Belinkov, Yonatan and Hao, Sophie and Jumelet, Jaap and Kim, Najoung and McCarthy, Arya and Mohebbi, Hosein},\n\tmonth = dec,\n\tyear = {2023},\n\tpages = {261--270},\n}\n\n\n\n
\n
\n\n\n
\n Pretrained language models (PLMs) form the basis of most state-of-the-art NLP technologies. Nevertheless, they are essentially black boxes: Humans do not have a clear understanding of what knowledge is encoded in different parts of the models, especially in individual neurons. A contrast is in computer vision, where feature visualization provides a decompositional interpretability technique for neurons of vision models. Activation maximization is used to synthesize inherently interpretable visual representations of the information encoded in individual neurons. Our work is inspired by this but presents a cautionary tale on the interpretability of single neurons, based on the first large-scale attempt to adapt activation maximization to NLP, and, more specifically, large PLMs. We propose feature textualization, a technique to produce dense representations of neurons in the PLM word embedding space. We apply feature textualization to the BERT model to investigate whether the knowledge encoded in individual neurons can be interpreted and symbolized. We find that the produced representations can provide insights about the knowledge encoded in individual neurons, but that individual neurons do not represent clear-cut symbolic units of language such as words. Additionally, we use feature textualization to investigate how many neurons are needed to encode words in BERT.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n From fear to action: AI governance and opportunities for all.\n \n \n \n \n\n\n \n Baum, K.; Bryson, J.; Dignum, F.; Dignum, V.; Grobelnik, M.; Hoos, H.; Irgens, M.; Lukowicz, P.; Muller, C.; Rossi, F.; Shawe-Taylor, J.; Theodorou, A.; and Vinuesa, R.\n\n\n \n\n\n\n Frontiers in Computer Science, 5. 2023.\n \n\n\n\n
\n\n\n\n \n \n \"FromPaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{baum_fear_2023,\n\ttitle = {From fear to action: {AI} governance and opportunities for all},\n\tvolume = {5},\n\tissn = {2624-9898},\n\tshorttitle = {From fear to action},\n\turl = {https://www.frontiersin.org/articles/10.3389/fcomp.2023.1210421},\n\turldate = {2023-05-18},\n\tjournal = {Frontiers in Computer Science},\n\tauthor = {Baum, Kevin and Bryson, Joanna and Dignum, Frank and Dignum, Virginia and Grobelnik, Marko and Hoos, Holger and Irgens, Morten and Lukowicz, Paul and Muller, Catelijne and Rossi, Francesca and Shawe-Taylor, John and Theodorou, Andreas and Vinuesa, Ricardo},\n\tyear = {2023},\n}\n\n\n\n\n\n\n\n\n\n\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Causing Intended Effects in Collaborative Decision-Making.\n \n \n \n \n\n\n \n Meyer-Vitali, A.; and Mulder, W.\n\n\n \n\n\n\n In Murukannaiah, P. K.; and Hirzle, T., editor(s), Proceedings of the Workshops at the Second International Conference on Hybrid Human-Artificial Intelligence, volume 3456, of CEUR Workshop Proceedings, pages 137–144, Munich, Germany, June 2023. CEUR\n ISSN: 1613-0073\n\n\n\n
\n\n\n\n \n \n \"CausingPaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{meyer-vitali_causing_2023,\n\taddress = {Munich, Germany},\n\tseries = {{CEUR} {Workshop} {Proceedings}},\n\ttitle = {Causing {Intended} {Effects} in {Collaborative} {Decision}-{Making}},\n\tvolume = {3456},\n\tcopyright = {All rights reserved},\n\turl = {https://ceur-ws.org/Vol-3456/#short4-1},\n\tlanguage = {en},\n\turldate = {2023-08-16},\n\tbooktitle = {Proceedings of the {Workshops} at the {Second} {International} {Conference} on {Hybrid} {Human}-{Artificial} {Intelligence}},\n\tpublisher = {CEUR},\n\tauthor = {Meyer-Vitali, André and Mulder, Wico},\n\teditor = {Murukannaiah, Pradeep K. and Hirzle, Teresa},\n\tmonth = jun,\n\tyear = {2023},\n\tnote = {ISSN: 1613-0073},\n\tpages = {137--144},\n}\n\n\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Knowledge Engineering for Hybrid Intelligence.\n \n \n \n \n\n\n \n Tiddi, I.; De Boer, V.; Schlobach, S.; and Meyer-Vitali, A.\n\n\n \n\n\n\n In Proceedings of the 12th Knowledge Capture Conference 2023, of K-CAP '23, pages 75–82, New York, NY, USA, December 2023. Association for Computing Machinery\n \n\n\n\n
\n\n\n\n \n \n \"KnowledgePaper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n\n\n\n
\n
@inproceedings{tiddi_knowledge_2023,\n\taddress = {New York, NY, USA},\n\tseries = {K-{CAP} '23},\n\ttitle = {Knowledge {Engineering} for {Hybrid} {Intelligence}},\n\tcopyright = {All rights reserved},\n\tisbn = {9798400701412},\n\turl = {https://doi.org/10.1145/3587259.3627541},\n\tdoi = {10.1145/3587259.3627541},\n\tabstract = {Hybrid Intelligence (HI) is a rapidly growing field aiming at creating collaborative systems where humans and intelligent machines cooperate in mixed teams towards shared goals. A clear characterization of the tasks and knowledge exchanged by the agents in HI applications is still missing, hampering both standardization and reuse when designing new HI systems. Knowledge Engineering (KE) methods have been used to solve such issue through the formalization of tasks and roles in knowledge-intensive processes. We investigate whether KE methods can be applied to HI scenarios, and specifically whether common, reusable elements such as knowledge roles, tasks and subtasks can be identified in contexts where symbolic, subsymbolic and human-in-the-loop components are involved. We first adapt the well-known CommonKADS methodology to HI, and then use it to analyze several HI projects and identify common tasks. The results are (i) a high-level ontology of HI knowledge roles, (ii) a set of novel, HI-specific tasks and (iii) an open repository to store scenarios1 – allowing reuse, validation and design of existing and new HI applications.},\n\turldate = {2023-11-29},\n\tbooktitle = {Proceedings of the 12th {Knowledge} {Capture} {Conference} 2023},\n\tpublisher = {Association for Computing Machinery},\n\tauthor = {Tiddi, Ilaria and De Boer, Victor and Schlobach, Stefan and Meyer-Vitali, André},\n\tmonth = dec,\n\tyear = {2023},\n\tkeywords = {CommonKADS, Hybrid Intelligence, Knowledge Engineering},\n\tpages = {75--82},\n}\n\n\n\n
\n
\n\n\n
\n Hybrid Intelligence (HI) is a rapidly growing field aiming at creating collaborative systems where humans and intelligent machines cooperate in mixed teams towards shared goals. A clear characterization of the tasks and knowledge exchanged by the agents in HI applications is still missing, hampering both standardization and reuse when designing new HI systems. Knowledge Engineering (KE) methods have been used to solve such issue through the formalization of tasks and roles in knowledge-intensive processes. We investigate whether KE methods can be applied to HI scenarios, and specifically whether common, reusable elements such as knowledge roles, tasks and subtasks can be identified in contexts where symbolic, subsymbolic and human-in-the-loop components are involved. We first adapt the well-known CommonKADS methodology to HI, and then use it to analyze several HI projects and identify common tasks. The results are (i) a high-level ontology of HI knowledge roles, (ii) a set of novel, HI-specific tasks and (iii) an open repository to store scenarios1 – allowing reuse, validation and design of existing and new HI applications.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Security of AI-Systems: Fundamentals - Security Considerations for Symbolic and Hybrid AI.\n \n \n \n \n\n\n \n Müller, C.; Vogt, R.; Nonnengart, A.; Klusch, M.; and Meyer-Vitali, A.\n\n\n \n\n\n\n Technical Report Bundesamt für Sicherheit in der Informationstechnik (BSI), June 2023.\n https://www.bsi.bund.de/DE/Service-Navi/Publikationen/Studien/Projekt_P464/Projekt_P464_node.html https://www.bsi.bund.de/DE/Service-Navi/Presse/Alle-Meldungen-News/Meldungen/Studien_Cyber-Sicherheit_KI-Systeme_230202.html\n\n\n\n
\n\n\n\n \n \n \"SecurityPaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@techreport{muller_security_2023,\n\ttitle = {Security of {AI}-{Systems}: {Fundamentals} - {Security} {Considerations} for {Symbolic} and {Hybrid} {AI}},\n\tcopyright = {All rights reserved},\n\tshorttitle = {Security of {AI}-{Systems}},\n\turl = {https://www.bsi.bund.de/SharedDocs/Downloads/EN/BSI/KI/Security-of-AI-systems_fundamentals_considerations_symbolic_hybrid.html},\n\tabstract = {Until recent years, cybersecurity was mostly concerned with an arms race of attackers and defence\nmechanisms on the level of admission control, protection of data transmission, cryptography, and so forth.\nOn the other side, functional safety of software-based systems dealt with faults in the system behaviour,\nwhich are caused by electro-mechanical errors in one of its sub-components or systematic errors (like bugs).\nWith AI-based systems becoming ever more wide-spread and complex, both paradigms need to be extended\nand, in a way, they are growing closer together. AI security and AI safety have a large overlap. Part of what\nAI safety tries to cope with are perturbations (or distribution shifts) that occur “naturally”, for example\nbecause the environment changes (day to night, summer to winter, Europe to Asia, simulation to reality,\netc.) or because the domain gradually evolves (demographic changes, generation changes, etc.). Seldom,\nevents can occur that have never been considered in training, causing an undesired emergent behaviour\n(misclassification, wrong decision, etc.) at inference time. Couldn’t we also say that the unexpected event\ncaused an “evasion”? AI security, aside from assuming an adversary, deals with similar problems. Data\npoisoning is the attempt to smuggle-in examples to the training set that decrease the accuracy of the system\n(or increase test error), thereby trying to be as efficient and subtle as possible. Evasion attacks alter the\ninference situation, either by manipulating the environment or otherwise making sure that the system\nreceives input that leads to misclassifications. In a sense, they are trying to create an event that was not\nexpected during training. It is possible that poisoning and evasion attacks are combined in a sense that the\npoisoning attack introduces a trigger for the later evasion. The proximity between the two problem domains\nexists on all levels: in highly automated driving, for example, it is plausible to describe a case in which the\ncar with ego-centric vision is tricked by the behaviour of another vehicle (agent) exhibiting a strange\nmanoeuvre. If we had reasons to assume that the agent’s “adversarial driving” was based on knowledge\nabout the inner working of the ego car, we would call it a security breach – otherwise a safety issue. It\nbecomes apparent that the distinction is somewhat arbitrary. Moreover, if we look at the body of literature\nin AI security, the game of finding new attacks, on the one side, and inventing new ways of defending them,\non the other, could also be framed under the umbrella of research on robustness.},\n\tinstitution = {Bundesamt für Sicherheit in der Informationstechnik (BSI)},\n\tauthor = {Müller, Christian and Vogt, Roland and Nonnengart, Andreas and Klusch, Matthias and Meyer-Vitali, André},\n\tmonth = jun,\n\tyear = {2023},\n\tnote = {https://www.bsi.bund.de/DE/Service-Navi/Publikationen/Studien/Projekt\\_P464/Projekt\\_P464\\_node.html\nhttps://www.bsi.bund.de/DE/Service-Navi/Presse/Alle-Meldungen-News/Meldungen/Studien\\_Cyber-Sicherheit\\_KI-Systeme\\_230202.html},\n}\n\n\n\n
\n
\n\n\n
\n Until recent years, cybersecurity was mostly concerned with an arms race of attackers and defence mechanisms on the level of admission control, protection of data transmission, cryptography, and so forth. On the other side, functional safety of software-based systems dealt with faults in the system behaviour, which are caused by electro-mechanical errors in one of its sub-components or systematic errors (like bugs). With AI-based systems becoming ever more wide-spread and complex, both paradigms need to be extended and, in a way, they are growing closer together. AI security and AI safety have a large overlap. Part of what AI safety tries to cope with are perturbations (or distribution shifts) that occur “naturally”, for example because the environment changes (day to night, summer to winter, Europe to Asia, simulation to reality, etc.) or because the domain gradually evolves (demographic changes, generation changes, etc.). Seldom, events can occur that have never been considered in training, causing an undesired emergent behaviour (misclassification, wrong decision, etc.) at inference time. Couldn’t we also say that the unexpected event caused an “evasion”? AI security, aside from assuming an adversary, deals with similar problems. Data poisoning is the attempt to smuggle-in examples to the training set that decrease the accuracy of the system (or increase test error), thereby trying to be as efficient and subtle as possible. Evasion attacks alter the inference situation, either by manipulating the environment or otherwise making sure that the system receives input that leads to misclassifications. In a sense, they are trying to create an event that was not expected during training. It is possible that poisoning and evasion attacks are combined in a sense that the poisoning attack introduces a trigger for the later evasion. The proximity between the two problem domains exists on all levels: in highly automated driving, for example, it is plausible to describe a case in which the car with ego-centric vision is tricked by the behaviour of another vehicle (agent) exhibiting a strange manoeuvre. If we had reasons to assume that the agent’s “adversarial driving” was based on knowledge about the inner working of the ego car, we would call it a security breach – otherwise a safety issue. It becomes apparent that the distinction is somewhat arbitrary. Moreover, if we look at the body of literature in AI security, the game of finding new attacks, on the one side, and inventing new ways of defending them, on the other, could also be framed under the umbrella of research on robustness.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n A Maturity Model for Collaborative Agents in Human-AI Ecosystems.\n \n \n \n\n\n \n Mulder, W.; and Meyer-Vitali, A.\n\n\n \n\n\n\n In Camarinha-Matos, L. M.; Boucher, X.; and Ortiz, A., editor(s), Collaborative Networks in Digitalization and Society 5.0, of IFIP Advances in Information and Communication Technology, pages 328–335, Cham, 2023. Springer Nature Switzerland\n \n\n\n\n
\n\n\n\n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n\n\n\n
\n
@inproceedings{mulder_maturity_2023,\n\taddress = {Cham},\n\tseries = {{IFIP} {Advances} in {Information} and {Communication} {Technology}},\n\ttitle = {A {Maturity} {Model} for {Collaborative} {Agents} in {Human}-{AI} {Ecosystems}},\n\tcopyright = {All rights reserved},\n\tisbn = {978-3-031-42622-3},\n\tdoi = {10.1007/978-3-031-42622-3_23},\n\tabstract = {AI entities lean on the aspects of their autonomy to carry out their tasks and perform intelligently. But when these entities collaborate in human-AI teams, their levels of autonomy and collaboration have to be balanced out. We present a maturity model for agents regarding this aspect of balancing. Whereas simple AI systems use pre-designed mechanisms, more advanced systems are able to learn this from experience. The maturity model is a two-dimensional matrix in which the degree of agency forms the horizontal axis, and the level of interaction the vertical axis. We validate the use of this maturity model with use-cases in the field of urban energy efficiency.},\n\tlanguage = {en},\n\tbooktitle = {Collaborative {Networks} in {Digitalization} and {Society} 5.0},\n\tpublisher = {Springer Nature Switzerland},\n\tauthor = {Mulder, Wico and Meyer-Vitali, André},\n\teditor = {Camarinha-Matos, Luis M. and Boucher, Xavier and Ortiz, Angel},\n\tyear = {2023},\n\tkeywords = {Agency, Collaborative networks, Human-AI teaming},\n\tpages = {328--335},\n}\n\n\n\n
\n
\n\n\n
\n AI entities lean on the aspects of their autonomy to carry out their tasks and perform intelligently. But when these entities collaborate in human-AI teams, their levels of autonomy and collaboration have to be balanced out. We present a maturity model for agents regarding this aspect of balancing. Whereas simple AI systems use pre-designed mechanisms, more advanced systems are able to learn this from experience. The maturity model is a two-dimensional matrix in which the degree of agency forms the horizontal axis, and the level of interaction the vertical axis. We validate the use of this maturity model with use-cases in the field of urban energy efficiency.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2022\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Trustworthy Hybrid Team Decision-Support Systems.\n \n \n \n \n\n\n \n Meyer-Vitali, A.; and Mulder, W.\n\n\n \n\n\n\n . March 2022.\n Number: 7607 Publisher: EasyChair\n\n\n\n
\n\n\n\n \n \n \"TrustworthyPaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{meyer-vitali_trustworthy_2022,\n\ttitle = {Trustworthy {Hybrid} {Team} {Decision}-{Support} {Systems}},\n\tcopyright = {All rights reserved},\n\tissn = {2516-2314},\n\turl = {https://easychair.org/publications/preprint/jRqf},\n\tlanguage = {en-US},\n\turldate = {2023-08-18},\n\tauthor = {Meyer-Vitali, André and Mulder, Wico},\n\tmonth = mar,\n\tyear = {2022},\n\tnote = {Number: 7607\nPublisher: EasyChair},\n}\n\n\n\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2021\n \n \n (2)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Modular design patterns for hybrid learning and reasoning systems.\n \n \n \n \n\n\n \n van Bekkum, M.; de Boer, M.; van Harmelen, F.; Meyer-Vitali, A.; and Teije, A. t.\n\n\n \n\n\n\n Applied Intelligence, 51(9): 6528–6546. September 2021.\n \n\n\n\n
\n\n\n\n \n \n \"ModularPaper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{van_bekkum_modular_2021,\n\ttitle = {Modular design patterns for hybrid learning and reasoning systems},\n\tvolume = {51},\n\tcopyright = {All rights reserved},\n\tissn = {1573-7497},\n\turl = {https://doi.org/10.1007/s10489-021-02394-3},\n\tdoi = {10.1007/s10489-021-02394-3},\n\tabstract = {The unification of statistical (data-driven) and symbolic (knowledge-driven) methods is widely recognized as one of the key challenges of modern AI. Recent years have seen a large number of publications on such hybrid neuro-symbolic AI systems. That rapidly growing literature is highly diverse, mostly empirical, and is lacking a unifying view of the large variety of these hybrid systems. In this paper, we analyze a large body of recent literature and we propose a set of modular design patterns for such hybrid, neuro-symbolic systems. We are able to describe the architecture of a very large number of hybrid systems by composing only a small set of elementary patterns as building blocks. The main contributions of this paper are: 1) a taxonomically organised vocabulary to describe both processes and data structures used in hybrid systems; 2) a set of 15+ design patterns for hybrid AI systems organized in a set of elementary patterns and a set of compositional patterns; 3) an application of these design patterns in two realistic use-cases for hybrid AI systems. Our patterns reveal similarities between systems that were not recognized until now. Finally, our design patterns extend and refine Kautz’s earlier attempt at categorizing neuro-symbolic architectures.},\n\tlanguage = {en},\n\tnumber = {9},\n\turldate = {2022-03-29},\n\tjournal = {Applied Intelligence},\n\tauthor = {van Bekkum, Michael and de Boer, Maaike and van Harmelen, Frank and Meyer-Vitali, André and Teije, Annette ten},\n\tmonth = sep,\n\tyear = {2021},\n\tpages = {6528--6546},\n}\n\n\n\n
\n
\n\n\n
\n The unification of statistical (data-driven) and symbolic (knowledge-driven) methods is widely recognized as one of the key challenges of modern AI. Recent years have seen a large number of publications on such hybrid neuro-symbolic AI systems. That rapidly growing literature is highly diverse, mostly empirical, and is lacking a unifying view of the large variety of these hybrid systems. In this paper, we analyze a large body of recent literature and we propose a set of modular design patterns for such hybrid, neuro-symbolic systems. We are able to describe the architecture of a very large number of hybrid systems by composing only a small set of elementary patterns as building blocks. The main contributions of this paper are: 1) a taxonomically organised vocabulary to describe both processes and data structures used in hybrid systems; 2) a set of 15+ design patterns for hybrid AI systems organized in a set of elementary patterns and a set of compositional patterns; 3) an application of these design patterns in two realistic use-cases for hybrid AI systems. Our patterns reveal similarities between systems that were not recognized until now. Finally, our design patterns extend and refine Kautz’s earlier attempt at categorizing neuro-symbolic architectures.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Modular Design Patterns for Hybrid Actors.\n \n \n \n \n\n\n \n Meyer-Vitali, A.; Mulder, W.; and de Boer, M. H. T.\n\n\n \n\n\n\n In Cooperative AI Workshop, volume 2021, of NeurIPS, December 2021. \n arXiv: 2109.09331\n\n\n\n
\n\n\n\n \n \n \"ModularPaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n\n\n\n
\n
@inproceedings{meyer-vitali_modular_2021,\n\tseries = {{NeurIPS}},\n\ttitle = {Modular {Design} {Patterns} for {Hybrid} {Actors}},\n\tvolume = {2021},\n\tcopyright = {Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International Licence (CC-BY-NC-ND)},\n\turl = {http://arxiv.org/abs/2109.09331},\n\tabstract = {Recently, a boxology (graphical language) with design patterns for hybrid AI was proposed, combining symbolic and sub-symbolic learning and reasoning. In this paper, we extend this boxology with actors and their interactions. The main contributions of this paper are: 1) an extension of the taxonomy to describe distributed hybrid AI systems with actors and interactions; and 2) showing examples using a few design patterns relevant in multi-agent systems and human-agent interaction.},\n\turldate = {2021-11-17},\n\tbooktitle = {Cooperative {AI} {Workshop}},\n\tauthor = {Meyer-Vitali, André and Mulder, Wico and de Boer, Maaike H. T.},\n\tmonth = dec,\n\tyear = {2021},\n\tnote = {arXiv: 2109.09331},\n\tkeywords = {Computer Science - Artificial Intelligence, Computer Science - Multiagent Systems, Computer Science - Software Engineering},\n}\n\n\n\n
\n
\n\n\n
\n Recently, a boxology (graphical language) with design patterns for hybrid AI was proposed, combining symbolic and sub-symbolic learning and reasoning. In this paper, we extend this boxology with actors and their interactions. The main contributions of this paper are: 1) an extension of the taxonomy to describe distributed hybrid AI systems with actors and interactions; and 2) showing examples using a few design patterns relevant in multi-agent systems and human-agent interaction.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n\n\n\n
\n\n\n \n\n \n \n \n \n\n
\n"}; document.write(bibbase_data.data);