BibBase greiner-petter, a

2024 (2)

MAGPIE: Multi-Task Analysis of Media-Bias Generalization with Pre-Trained Identification of Expressions. Horych, T.; Wessel, M. P.; Wahle, J. P.; Ruas, T.; Waßmuth, J.; Greiner-Petter, A.; Aizawa, A.; Gipp, B.; and Spinde, T. In Calzolari, N.; Kan, M.; Hoste, V.; Lenci, A.; Sakti, S.; and Xue, N., editor(s), Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), pages 10903–10920, Torino, Italy, May 2024. ELRA and ICCL Core Rank B

MAGPIE: Multi-Task Analysis of Media-Bias Generalization with Pre-Trained Identification of Expressions [link]

Paper link bibtex abstract

@inproceedings{BibbaseHorychWWR24,
	address = {Torino, Italy},
	title = {{MAGPIE}: {Multi}-{Task} {Analysis} of {Media}-{Bias} {Generalization} with {Pre}-{Trained} {Identification} of {Expressions}},
	shorttitle = {{MAGPIE}},
	url = {https://aclanthology.org/2024.lrec-main.952},
	abstract = {Media bias detection poses a complex, multifaceted problem traditionally tackled using single-task models and small in-domain datasets, consequently lacking generalizability. To address this, we introduce MAGPIE, a large-scale multi-task pre-training approach explicitly tailored for media bias detection. To enable large-scale pre-training, we construct Large Bias Mixture (LBM), a compilation of 59 bias-related tasks. MAGPIE outperforms previous approaches in media bias detection on the Bias Annotation By Experts (BABE) dataset, with a relative improvement of 3.3\% F1-score. Furthermore, using a RoBERTa encoder, we show that MAGPIE needs only 15\% of fine-tuning steps compared to single-task approaches. We provide insight into task learning interference and show that sentiment analysis and emotion detection help learning of all other tasks, and scaling the number of tasks leads to the best results. MAGPIE confirms that MTL is a promising approach for addressing media bias detection, enhancing the accuracy and efficiency of existing models. Furthermore, LBM is the first available resource collection focused on media bias MTL.},
	urldate = {2024-05-23},
	booktitle = {Proceedings of the 2024 {Joint} {International} {Conference} on {Computational} {Linguistics}, {Language} {Resources} and {Evaluation} ({LREC}-{COLING} 2024)},
	publisher = {ELRA and ICCL},
	author = {Horych, Tomáš and Wessel, Martin Paul and Wahle, Jan Philip and Ruas, Terry and Waßmuth, Jerome and Greiner-Petter, André and Aizawa, Akiko and Gipp, Bela and Spinde, Timo},
	editor = {Calzolari, Nicoletta and Kan, Min-Yen and Hoste, Veronique and Lenci, Alessandro and Sakti, Sakriani and Xue, Nianwen},
	month = may,
	year = {2024},
	note = {Core Rank B},
	pages = {10903--10920},
}

Taxonomy of Mathematical Plagiarism. Satpute, A.; Greiner-Petter, A.; Gießing, N.; Beckenbach, I.; Schubotz, M.; Teschke, O.; Aizawa, A.; and Gipp, B. In Goharian, N.; Tonellotto, N.; He, Y.; Lipani, A.; McDonald, G.; Macdonald, C.; and Ounis, I., editor(s), 46th European Conference on Information Retrieval (ECIR), volume 14611, pages 12–20, Glasgow, UK, March 2024. Springer Nature Switzerland Core Rank A

Taxonomy of Mathematical Plagiarism [link]

Paper doi link bibtex abstract

@inproceedings{BibbaseSatputeGGB24,
	address = {Glasgow, UK},
	title = {Taxonomy of {Mathematical} {Plagiarism}},
	volume = {14611},
	isbn = {978-3-031-56065-1},
	url = {https://link.springer.com/10.1007/978-3-031-56066-8_2},
	doi = {10.1007/978-3-031-56066-8_2},
	abstract = {Plagiarism is a pressing concern, even more so with the availability of large language models. Existing plagiarism detection systems reliably find copied and moderately reworded text but fail for idea plagiarism, especially in mathematical science, which heavily uses formal mathematical notation. We make two contributions. First, we establish a taxonomy of mathematical content reuse by annotating potentially plagiarised 122 scientific document pairs. Second, we analyze the best-performing approaches to detect plagiarism and mathematical content similarity on the newly established taxonomy. We found that the best-performing methods for plagiarism and math content similarity achieve an overall detection score (PlagDet) of 0.06 and 0.16, respectively. The best-performing methods failed to detect most cases from all seven newly established math similarity types. Outlined contributions will benefit research in plagiarism detection systems, recommender systems, question-answering systems, and search engines. We make our experiment’s code and annotated dataset available to the community: https://github.com/gipplab/Taxonomy-of-Mathematical-Plagiarism.},
	language = {en},
	urldate = {2024-05-23},
	booktitle = {46th {European} {Conference} on {Information} {Retrieval} ({ECIR})},
	publisher = {Springer Nature Switzerland},
	author = {Satpute, Ankit and Greiner-Petter, André and Gießing, Noah and Beckenbach, Isabel and Schubotz, Moritz and Teschke, Olaf and Aizawa, Akiko and Gipp, Bela},
	editor = {Goharian, Nazli and Tonellotto, Nicola and He, Yulan and Lipani, Aldo and McDonald, Graham and Macdonald, Craig and Ounis, Iadh},
	month = mar,
	year = {2024},
	note = {Core Rank A},
	pages = {12--20},
}

2023 (4)

Making Presentation Math Computable: A Context-Sensitive Approach for Translating LaTeX to Computer Algebra Systems. Greiner-Petter, A. Springer Fachmedien Wiesbaden, Wiesbaden, 2023. Doctoral Dissertation at University of Wuppertal, Germany

Making Presentation Math Computable: A Context-Sensitive Approach for Translating LaTeX to Computer Algebra Systems [link]

Paper doi link bibtex abstract 3 downloads

@book{BibbaseGreinerPetter23,
	address = {Wiesbaden},
	title = {Making {Presentation} {Math} {Computable}: {A} {Context}-{Sensitive} {Approach} for {Translating} {LaTeX} to {Computer} {Algebra} {Systems}},
	isbn = {978-3-658-40472-7 978-3-658-40473-4},
	shorttitle = {Making {Presentation} {Math} {Computable}},
	url = {https://link.springer.com/10.1007/978-3-658-40473-4},
	abstract = {This thesis addresses the issue of translating mathematical expressions from LaTeX to the syntax of Computer Algebra Systems (CAS). Over the past decades, especially in the domain of Sciences, Technology, Engineering, and Mathematics (STEM), LaTeX has become the de-facto standard to typeset mathematical formulae in publications. Since scientists are generally required to publish their work, LaTeX has become an integral part of today's publishing workflow. On the other hand, modern research increasingly relies on CAS to simplify, manipulate, compute, and visualize mathematics. However, existing LaTeX import functions in CAS are limited to simple arithmetic expressions and are, therefore, insufficient for most use cases. Consequently, the workflow of experimenting and publishing in the Sciences often includes time-consuming and error-prone manual conversions between presentational LaTeX and computational CAS formats.

To address the lack of a reliable and comprehensive translation tool between LaTeX and CAS, this thesis makes the following three contributions.

First, it provides an approach to semantically enhance LaTeX expressions with sufficient semantic information for translations into CAS syntaxes. This, so called, semantification process analyzes the structure of the formula and its textual context to conclude semantic information. 
The research for this semantification process additionally contributes towards related Mathematical Information Retrieval (MathIR) tasks, such as mathematical education assistance, math recommendation and question answering systems, search engines, automatic plagiarism detection, and math type assistance systems.

Second, this thesis demonstrates the first context-aware LaTeX to CAS translation framework LaCASt. LaCASt uses the developed semantification approach to transform LaTeX expressions into an intermediate semantic LaTeX format, which is then further translated to CAS based on translation patterns. These patterns were manually crafted by mathematicians to assure accurate and reliable translations. In comparison, this thesis additionally elaborates a non-context aware neural machine translation approach trained on a mathematical library generated by Mathematica. 

Third, the thesis provides a novel approach to evaluate the performance for LaTeX to CAS translations on large-scaled datasets with an automatic verification of equations in digital mathematical libraries. This evaluation approach is based on the assumption that equations in digital mathematical libraries can be computationally verified by CAS, if a translation between both systems exists. In addition, the thesis provides an in-depth manual evaluation on mathematical articles from the English Wikipedia.

The presented context-aware translation framework LaCASt increases the efficiency and reliability of translations to CAS. Via LaCASt, we strengthened the Digital Library of Mathematical Functions (DLMF) by identifying numerous of issues, from missing or wrong semantic annotations to sign errors. 
Further, via LaCASt, we were able to discover several issues with the commercial CAS Maple and Mathematica.
The fundamental approaches to semantically enhance mathematics developed in this thesis additionally contributed towards several related MathIR tasks. For instance, the large-scale analysis of mathematical notations and the studies on math-embeddings motivated new approaches for math plagiarism detection systems, search engines, and allow typing assistance for mathematical inputs. Finally, LaCASt translations will have a direct real-world impact, as they are scheduled to be integrated into upcoming versions of the DLMF and Wikipedia.},
	language = {en},
	urldate = {2023-02-24},
	publisher = {Springer Fachmedien Wiesbaden},
	author = {Greiner-Petter, Andre},
	year = {2023},
	doi = {10.1007/978-3-658-40473-4},
	note = {Doctoral Dissertation at University of Wuppertal, Germany},
}

This thesis addresses the issue of translating mathematical expressions from LaTeX to the syntax of Computer Algebra Systems (CAS). Over the past decades, especially in the domain of Sciences, Technology, Engineering, and Mathematics (STEM), LaTeX has become the de-facto standard to typeset mathematical formulae in publications. Since scientists are generally required to publish their work, LaTeX has become an integral part of today's publishing workflow. On the other hand, modern research increasingly relies on CAS to simplify, manipulate, compute, and visualize mathematics. However, existing LaTeX import functions in CAS are limited to simple arithmetic expressions and are, therefore, insufficient for most use cases. Consequently, the workflow of experimenting and publishing in the Sciences often includes time-consuming and error-prone manual conversions between presentational LaTeX and computational CAS formats. To address the lack of a reliable and comprehensive translation tool between LaTeX and CAS, this thesis makes the following three contributions. First, it provides an approach to semantically enhance LaTeX expressions with sufficient semantic information for translations into CAS syntaxes. This, so called, semantification process analyzes the structure of the formula and its textual context to conclude semantic information. The research for this semantification process additionally contributes towards related Mathematical Information Retrieval (MathIR) tasks, such as mathematical education assistance, math recommendation and question answering systems, search engines, automatic plagiarism detection, and math type assistance systems. Second, this thesis demonstrates the first context-aware LaTeX to CAS translation framework LaCASt. LaCASt uses the developed semantification approach to transform LaTeX expressions into an intermediate semantic LaTeX format, which is then further translated to CAS based on translation patterns. These patterns were manually crafted by mathematicians to assure accurate and reliable translations. In comparison, this thesis additionally elaborates a non-context aware neural machine translation approach trained on a mathematical library generated by Mathematica. Third, the thesis provides a novel approach to evaluate the performance for LaTeX to CAS translations on large-scaled datasets with an automatic verification of equations in digital mathematical libraries. This evaluation approach is based on the assumption that equations in digital mathematical libraries can be computationally verified by CAS, if a translation between both systems exists. In addition, the thesis provides an in-depth manual evaluation on mathematical articles from the English Wikipedia. The presented context-aware translation framework LaCASt increases the efficiency and reliability of translations to CAS. Via LaCASt, we strengthened the Digital Library of Mathematical Functions (DLMF) by identifying numerous of issues, from missing or wrong semantic annotations to sign errors. Further, via LaCASt, we were able to discover several issues with the commercial CAS Maple and Mathematica. The fundamental approaches to semantically enhance mathematics developed in this thesis additionally contributed towards several related MathIR tasks. For instance, the large-scale analysis of mathematical notations and the studies on math-embeddings motivated new approaches for math plagiarism detection systems, search engines, and allow typing assistance for mathematical inputs. Finally, LaCASt translations will have a direct real-world impact, as they are scheduled to be integrated into upcoming versions of the DLMF and Wikipedia.

Do the Math: Making Mathematics in Wikipedia Computable. Greiner-Petter, A.; Schubotz, M.; Breitinger, C.; Scharpf, P.; Aizawa, A.; and Gipp, B. IEEE Transactions on Pattern Analysis and Machine Intelligence, 45(4): 4384–4395. 2023. Journal Rank Q1; IF: 24.314

Do the Math: Making Mathematics in Wikipedia Computable [link]

Paper doi link bibtex abstract 2 downloads

@article{BibbaseGreinerPetter23b,
	title = {Do the {Math}: {Making} {Mathematics} in {Wikipedia} {Computable}},
	volume = {45},
	issn = {0162-8828, 1939-3539},
	shorttitle = {Do the {Math}},
	url = {https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9847017},
	doi = {10.1109/TPAMI.2022.3195261},
	abstract = {Wikipedia combines the power of AI solutions and human reviewers to safeguard article quality. Quality control objectives include detecting malicious edits, fixing typos, and spotting inconsistent formatting. However, no automated quality control mechanisms currently exist for mathematical formulae. Spell checkers are widely used to highlight textual errors, yet no equivalent tool exists to detect algebraically incorrect formulae. Our paper addresses this shortcoming by making mathematical formulae computable.

We present a method that (1) gathers the semantic information surrounding the context of each mathematical formulae, (2) provides access to the information in a graph-structured dependency hierarchy, and (3) performs automatic plausibility checks on equations. We evaluate the performance of our approach on 6,337 mathematical expressions contained in 104 Wikipedia articles on the topic of orthogonal polynomials and special functions. Our system, LaCASt, verified 358 out of 1,516 equations as error-free. LaCASt successfully translated 27\% of the mathematical expressions and outperformed existing translation approaches by 16\%. Additionally, LaCASt achieved an F1 score of .495 for annotating mathematical expressions with relevant textual descriptions, which is a significant step towards advancing searchability, readability, and accessibility of mathematical formulae in Wikipedia. 

A prototype of LaCASt and the semantically enhanced Wikipedia articles are available at: https://tpami.wmflabs.org.},
	number = {4},
	urldate = {2022-10-03},
	journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
	author = {Greiner-Petter, Andre and Schubotz, Moritz and Breitinger, Corinna and Scharpf, Philipp and Aizawa, Akiko and Gipp, Bela},
	year = {2023},
	note = {Journal Rank Q1; IF: 24.314},
	pages = {4384--4395},
}

TEIMMA: The First Content Reuse Annotator for Text, Images, and Math. Satpute, A.; Greiner-Petter, A.; Schubotz, M.; Meuschke, N.; Aizawa, A.; Teschke, O.; and Gipp, B. In 2023 ACM/IEEE Joint Conference on Digital Libraries (JCDL), pages 271–273, Santa Fe, NM, USA, June 2023. IEEE

TEIMMA: The First Content Reuse Annotator for Text, Images, and Math [pdf]

Paper doi link bibtex abstract

@inproceedings{BibbaseSatpute23,
	address = {Santa Fe, NM, USA},
	title = {{TEIMMA}: {The} {First} {Content} {Reuse} {Annotator} for {Text}, {Images}, and {Math}},
	isbn = {9798350399318},
	shorttitle = {{TEIMMA}},
	url = {https://arxiv.org/pdf/2305.13193.pdf},
	doi = {10.1109/JCDL57899.2023.00056},
	abstract = {This demo paper presents the first tool to annotate the reuse of text, images, and mathematical formulae in a document pair -- TEIMMA. Annotating content reuse is particularly useful to develop plagiarism detection algorithms. Real-world content reuse is often obfuscated, which makes it challenging to identify such cases. TEIMMA allows entering the obfuscation type to enable novel classifications for confirmed cases of plagiarism. It enables recording different reuse types for text, images, and mathematical formulae in HTML and supports users by visualizing the content reuse in a document pair using similarity detection methods for text and math.},
	urldate = {2023-11-03},
	booktitle = {2023 {ACM}/{IEEE} {Joint} {Conference} on {Digital} {Libraries} ({JCDL})},
	publisher = {IEEE},
	author = {Satpute, Ankit and Greiner-Petter, Andre and Schubotz, Moritz and Meuschke, Norman and Aizawa, Akiko and Teschke, Olaf and Gipp, Bela},
	month = jun,
	year = {2023},
	pages = {271--273},
}

Neural Machine Translation for Mathematical Formulae. Petersen, F.; Schubotz, M.; Greiner-Petter, A.; and Gipp, B. In Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 11534–11550, Toronto, Canada, 2023. Association for Computational Linguistics Core Rank A*

Neural Machine Translation for Mathematical Formulae [link]

Paper doi link bibtex abstract

@inproceedings{BibbasePetersen23,
	address = {Toronto, Canada},
	title = {Neural {Machine} {Translation} for {Mathematical} {Formulae}},
	url = {https://aclanthology.org/2023.acl-long.645},
	doi = {10.18653/v1/2023.acl-long.645},
	abstract = {We tackle the problem of neural machine translation of mathematical formulae between ambiguous presentation languages and unambiguous content languages. Compared to neural machine translation on natural language, mathematical formulae have a much smaller vocabulary and much longer sequences of symbols, while their translation requires extreme precision to satisfy mathematical information needs. In this work, we perform the tasks of translating from LaTeX to Mathematica as well as from LaTeX to semantic LaTeX. While recurrent, recursive, and transformer networks struggle with preserving all contained information, we find that convolutional sequence-to-sequence networks achieve 95.1\% and 90.7\% exact matches, respectively.},
	language = {en},
	urldate = {2023-11-03},
	booktitle = {Proceedings of the 61st {Annual} {Meeting} of the {Association} for {Computational} {Linguistics} ({Volume} 1: {Long} {Papers})},
	publisher = {Association for Computational Linguistics},
	author = {Petersen, Felix and Schubotz, Moritz and Greiner-Petter, Andre and Gipp, Bela},
	year = {2023},
	note = {Core Rank A*},
	pages = {11534--11550},
}

2022 (3)

Comparative Verification of the Digital Library of Mathematical Functions and Computer Algebra Systems. Greiner-Petter, A.; Cohl, H. S.; Youssef, A.; Schubotz, M.; Trost, A.; Dey, R.; Aizawa, A.; and Gipp, B. In Tools and Algorithms for the Construction and Analysis of Systems - 28th International Conference, (TACAS), of Lecture Notes in Computer Science, pages 87–105, Munich, Germany, April 2022. Springer Core Rank A

Comparative Verification of the Digital Library of Mathematical Functions and Computer Algebra Systems [link]

Paper doi link bibtex abstract 4 downloads

@inproceedings{BibbaseGreinerPetter22a,
	address = {Munich, Germany},
	series = {Lecture {Notes} in {Computer} {Science}},
	title = {Comparative {Verification} of the {Digital} {Library} of {Mathematical} {Functions} and {Computer} {Algebra} {Systems}},
	url = {https://arxiv.org/abs/2201.09488},
	doi = {10.1007/978-3-030-99524-9_5},
	abstract = {Digital mathematical libraries assemble the knowledge of years of mathematical research. Numerous disciplines (e.g., physics, engineering, pure and applied mathematics) rely heavily on compendia gathered findings. Likewise, modern research applications rely more and more on computational solutions, which are often calculated and verified by computer algebra systems. Hence, the correctness, accuracy, and reliability of both digital mathematical libraries and computer algebra systems is a crucial attribute for modern research.

In this paper, we present a novel approach to verify a digital mathematical library and two computer algebra systems with one another by converting mathematical expressions from one system to the other. We use our previously developed conversion tool (referred to as LaCASt) to translate formulae from the NIST Digital Library of Mathematical Functions to the computer algebra systems Maple and Mathematica. The contributions of our presented work are as follows: (1) we present the most comprehensive verification of computer algebra systems and digital mathematical libraries with one another; (2) we significantly enhance the performance of the underlying translator in terms of coverage and accuracy; and (3) we provide open access to translations for Maple and Mathematica of the formulae in the NIST Digital Library of Mathematical Functions.},
	booktitle = {Tools and {Algorithms} for the {Construction} and {Analysis} of {Systems} - 28th {International} {Conference}, ({TACAS})},
	publisher = {Springer},
	author = {Greiner-Petter, Andre and Cohl, Howard S. and Youssef, Abdou and Schubotz, Moritz and Trost, Avi and Dey, Rajen and Aizawa, Akiko and Gipp, Bela},
	month = apr,
	year = {2022},
	note = {Core Rank A},
	pages = {87--105},
}

Collaborative and AI-aided Exam Question Generation using Wikidata in Education. Scharpf, P.; Schubotz, M.; Spitz, A.; Greiner-Petter, A.; and Gipp, B. In Proceedings of the 3rd Wikidata Workshop 2022 co-located with the 21st International Semantic Web Conference (ISWC2022), volume 3262, of CEUR Workshop Proceedings, Hanghzou, China, 2022. CEUR-WS.org Core Rank A

Collaborative and AI-aided Exam Question Generation using Wikidata in Education [pdf]

Paper link bibtex abstract

@inproceedings{BibbaseScharpf22,
	address = {Hanghzou, China},
	series = {{CEUR} {Workshop} {Proceedings}},
	title = {Collaborative and {AI}-aided {Exam} {Question} {Generation} using {Wikidata} in {Education}},
	volume = {3262},
	url = {https://ceur-ws.org/Vol-3262/paper13.pdf},
	abstract = {Since the COVID-19 outbreak, the use of digital learning or education platforms has significantly increased. Teachers now digitally distribute homework and provide exercise questions. In both cases, teachers need to continuously develop novel and individual questions. This process can be very time-consuming and should be facilitated and accelerated both through exchange with other teachers and by using Artificial Intelligence (AI) capabilities. To address this need, we propose a multilingual Wikimedia framework that allows for collaborative worldwide teacher knowledge engineering and subsequent AI-aided question generation, test, and correction. As a proof of concept, we present {\textgreater}{\textgreater}PhysWikiQuiz{\textless}{\textless}, a physics question generation and test engine. Our system (hosted by Wikimedia at https://physwikiquiz.wmflabs.org/) retrieves physics knowledge from the open community-curated database Wikidata. It can generate questions in different variations and verify answer values and units using a Computer Algebra System (CAS). We evaluate the performance on a public benchmark dataset at each stage of the system workflow. For an average formula with three variables, the system can generate and correct up to 300 questions for individual students based on a single formula concept name as input by the teacher.},
	language = {en},
	booktitle = {Proceedings of the 3rd {Wikidata} {Workshop} 2022 co-located with the 21st {International} {Semantic} {Web} {Conference} ({ISWC2022})},
	publisher = {CEUR-WS.org},
	author = {Scharpf, Philipp and Schubotz, Moritz and Spitz, Andreas and Greiner-Petter, Andre and Gipp, Bela},
	year = {2022},
	note = {Core Rank A},
}

Caching and Reproducibility: Making Data Science Experiments Faster and FAIRer. Schubotz, M.; Satpute, A.; Greiner-Petter, A.; Aizawa, A.; and Gipp, B. Frontiers in Research Metrics and Analytics, 7: 861944. April 2022.

Caching and Reproducibility: Making Data Science Experiments Faster and FAIRer [link]

Paper doi link bibtex abstract 2 downloads

@article{BibbaseSchubotzSGA22,
	title = {Caching and {Reproducibility}: {Making} {Data} {Science} {Experiments} {Faster} and {FAIRer}},
	volume = {7},
	issn = {2504-0537},
	shorttitle = {Caching and {Reproducibility}},
	url = {https://www.frontiersin.org/articles/10.3389/frma.2022.861944/full},
	doi = {10.3389/frma.2022.861944},
	abstract = {Small to medium-scale data science experiments often rely on research software developed
              ad-hoc
              by individual scientists or small teams. Often there is no time to make the research software fast, reusable, and open access. The consequence is twofold. First, subsequent researchers must spend significant work hours building upon the proposed hypotheses or experimental framework. In the worst case, others cannot reproduce the experiment and reuse the findings for subsequent research. Second, suppose the
              ad-hoc
              research software fails during often long-running computational expensive experiments. In that case, the overall effort to iteratively improve the software and rerun the experiments creates significant time pressure on the researchers. We suggest making caching an integral part of the research software development process, even before the first line of code is written. This article outlines caching recommendations for developing research software in data science projects. Our recommendations provide a perspective to circumvent common problems such as propriety dependence, speed, etc. At the same time, caching contributes to the reproducibility of experiments in the open science workflow. Concerning the four guiding principles, i.e., Findability, Accessibility, Interoperability, and Reusability (FAIR), we foresee that including the proposed recommendation in a research software development will make the data related to that software FAIRer for both machines and humans. We exhibit the usefulness of some of the proposed recommendations on our recently completed research software project in mathematical information retrieval.},
	urldate = {2022-08-24},
	journal = {Frontiers in Research Metrics and Analytics},
	author = {Schubotz, Moritz and Satpute, Ankit and Greiner-Petter, Andre and Aizawa, Akiko and Gipp, Bela},
	month = apr,
	year = {2022},
	pages = {861944},
}

2021 (2)

Comparative Verification of Digital Mathematical Libraries and Computer Algebra Systems. Greiner-Petter, A. March 2021. Invited Talk

Comparative Verification of Digital Mathematical Libraries and Computer Algebra Systems [link]

Paper link bibtex 2 downloads

@misc{BibbaseGreinerPetterTalkSIGMathLing,
	type = {Invited talk},
	title = {Comparative {Verification} of {Digital} {Mathematical} {Libraries} and {Computer} {Algebra} {Systems}},
	url = {https://sigmathling.kwarc.info/seminar/},
	author = {Greiner-Petter, Andre},
	month = mar,
	year = {2021},
	note = {Invited Talk},
}

Semantic Preserving Translations between NIST's Digital Library of Mathematical Functions and Computer Algebra Systems. Greiner-Petter, A. July 2021. Invited Talk

Semantic Preserving Translations between NIST's Digital Library of Mathematical Functions and Computer Algebra Systems [link]

Paper link bibtex 1 download

@misc{BibbaseGreinerPetterTalkACMD,
	type = {Invited talk},
	title = {Semantic {Preserving} {Translations} between {NIST}'s {Digital} {Library} of {Mathematical} {Functions} and {Computer} {Algebra} {Systems}},
	url = {https://www.nist.gov/itl/math/acmd-seminar-semantic-preserving-translations-between-nists-digital-library-mathematical},
	author = {Greiner-Petter, Andre},
	month = jul,
	year = {2021},
	note = {Invited Talk},
}

2020 (6)

Towards Grounding of Formulae. Asakura, T.; Greiner-Petter, A.; Aizawa, A.; and Miyao, Y. In Proceedings of the First Workshop on Scholarly Document Processing (SDP@EMNLP), pages 138–147, Online, 2020. ACL Core Rank A*

Paper doi link bibtex abstract 1 download

@inproceedings{BibbaseAsakuraGAM20,
	address = {Online},
	title = {Towards {Grounding} of {Formulae}},
	url = {https://www.aclweb.org/anthology/2020.sdp-1.16},
	doi = {10/gjzg2r},
	abstract = {A large amount of scientific knowledge is represented within mixed forms of
natural language texts and mathematical formulae. Therefore, a collaboration of
natural language processing and formula analyses, so-called mathematical
language processing, is necessary to enable computers to understand and
retrieve information from the documents. However, as we will show in this
project, a mathematical notation can change its meaning even within the scope
of a single paragraph. This flexibility makes it difficult to extract the exact
meaning of a mathematical formula. In this project, we will propose a new task
direction for grounding mathematical formulae. Particularly, we are addressing
the widespread misconception of various research projects in mathematical
information retrieval, which presume that mathematical notations have a fixed
meaning within a single document. We manually annotated a long scientific paper
to illustrate the task concept. Our high inter-annotator agreement shows that
the task is well understood for humans. Our results indicate that it is
worthwhile to grow the techniques for the proposed task to contribute to the
further progress of mathematical language processing.},
	language = {en},
	urldate = {2021-08-02},
	booktitle = {Proceedings of the {First} {Workshop} on {Scholarly} {Document} {Processing} ({SDP}@{EMNLP})},
	publisher = {ACL},
	author = {Asakura, Takuto and Greiner-Petter, Andre and Aizawa, Akiko and Miyao, Yusuke},
	year = {2020},
	note = {Core Rank A*},
	pages = {138--147},
}

Making Presentation Math Computable: Proposing a Context Sensitive Approach for Translating LaTeX to Computer Algebra Systems. Greiner-Petter, A.; Schubotz, M.; Aizawa, A.; and Gipp, B. In Bigatti, A. M.; Carette, J.; Davenport, J. H.; Joswig, M.; and de Wolff, T., editor(s), International Congress of Mathematical Software (ICMS), volume 12097, of Lecture Notes in Computer Science, pages 335–341, Braunschweig, Germany, 2020. Springer

Making Presentation Math Computable: Proposing a Context Sensitive Approach for Translating LaTeX to Computer Algebra Systems [pdf]

Paper doi link bibtex abstract

@inproceedings{BibbaseGreinerPetterSAG20,
	address = {Braunschweig, Germany},
	series = {Lecture {Notes} in {Computer} {Science}},
	title = {Making {Presentation} {Math} {Computable}: {Proposing} a {Context} {Sensitive} {Approach} for {Translating} {LaTeX} to {Computer} {Algebra} {Systems}},
	volume = {12097},
	isbn = {978-3-030-52199-8},
	shorttitle = {Making {Presentation} {Math} {Computable}},
	url = {https://link.springer.com/content/pdf/10.1007%2F978-3-030-52200-1_33.pdf},
	doi = {10.1007/978-3-030-52200-1_33},
	abstract = {Scientists increasingly rely on computer algebra systems and digital mathematical libraries to compute, validate, or experiment with mathematical formulae. 
However, the focus in digital mathematical libraries and scientific documents often lies more on an accurate presentation of the formulae rather than providing uniform access to the semantic information. 
But, presentational math formats do not provide exclusive access to the underlying semantic meanings. 
One has to derive the semantic information from the context.
As a consequence, the workflow of experimenting and publishing in the Sciences often includes time-consuming, error-prone manual conversions between presentational and computational math formats.
As a contribution to improve this workflow, we propose a context-sensitive approach that extracts semantic information from a given context, embeds the information into the given input, and converts the semantically enhanced expressions to computer algebra systems.},
	language = {en},
	urldate = {2021-07-30},
	booktitle = {International {Congress} of {Mathematical} {Software} ({ICMS})},
	publisher = {Springer},
	author = {Greiner-Petter, Andre and Schubotz, Moritz and Aizawa, Akiko and Gipp, Bela},
	editor = {Bigatti, Anna Maria and Carette, Jacques and Davenport, James H. and Joswig, Michael and de Wolff, Timo},
	year = {2020},
	pages = {335--341},
}

Discovering Mathematical Objects of Interest — A Study of Mathematical Notations. Greiner-Petter, A.; Schubotz, M.; Müller, F.; Breitinger, C.; Cohl, H.; Aizawa, A.; and Gipp, B. In Proceedings of The Web Conference (WWW), pages 1445–1456, Taipei, Taiwan, April 2020. ACM Core Rank A*

Discovering Mathematical Objects of Interest — A Study of Mathematical Notations [link]

Paper doi link bibtex abstract

@inproceedings{BibbaseGreinerPetterSMB20,
	address = {Taipei, Taiwan},
	title = {Discovering {Mathematical} {Objects} of {Interest} — {A} {Study} of {Mathematical} {Notations}},
	isbn = {978-1-4503-7023-3},
	url = {https://arxiv.org/abs/2002.02712},
	doi = {10.1145/3366423.3380218},
	abstract = {Mathematical notation, i.e., the writing system used to communicate concepts in mathematics, encodes valuable information for a variety of information search and retrieval systems. Yet, mathematical notations remain mostly unutilized by today's systems. In this paper, we present the first in-depth study on the distributions of mathematical notation in two large scientific corpora: the open access arXiv (2.5B mathematical objects) and the mathematical reviewing service for pure and applied mathematics zbMATH (61M mathematical objects). Our study lays a foundation for future research projects on mathematical information retrieval for large scientific corpora. Further, we demonstrate the relevance of our results to a variety of use-cases. For example, to assist semantic extraction systems, to improve scientific search engines, and to facilitate specialized math recommendation systems.

The contributions of our presented research are as follows: (1) we present the first distributional analysis of mathematical formulae on arXiv and zbMATH; (2) we retrieve relevant mathematical objects for given textual search queries (e.g., linking \$P\_\{n\}{\textasciicircum}\{({\textbackslash}alpha, {\textbackslash}beta)\}{\textbackslash}!{\textbackslash}left(x{\textbackslash}right)\$ with `Jacobi polynomial'); (3) we extend zbMATH's search engine by providing relevant mathematical formulae; and (4) we exemplify the applicability of the results by presenting auto-completion for math inputs as the first contribution to math recommendation systems. To expedite future research projects, we have made available our source code and data.},
	language = {en},
	urldate = {2021-07-30},
	booktitle = {Proceedings of {The} {Web} {Conference} ({WWW})},
	publisher = {ACM},
	author = {Greiner-Petter, Andre and Schubotz, Moritz and Müller, Fabian and Breitinger, Corinna and Cohl, Howard and Aizawa, Akiko and Gipp, Bela},
	month = apr,
	year = {2020},
	note = {Core Rank A*},
	pages = {1445--1456},
}

Math-Word Embedding in Math Search and Semantic Extraction. Greiner-Petter, A.; Youssef, A.; Ruas, T.; Miller, B. R.; Schubotz, M.; Aizawa, A.; and Gipp, B. Scientometrics, 125(3): 3017–3046. December 2020. Journal Rank Q1; IF: 3.702

Paper doi link bibtex abstract

@article{BibbaseGreinerPetterYRM20,
	title = {Math-{Word} {Embedding} in {Math} {Search} and {Semantic} {Extraction}},
	volume = {125},
	issn = {0138-9130, 1588-2861},
	url = {https://link.springer.com/10.1007/s11192-020-03502-9},
	doi = {10.1007/s11192-020-03502-9},
	abstract = {Word embedding, which represents individual words with semantically fixed-length vectors, has made it possible to successfully apply deep learning to natural language processing tasks such as semantic role-modeling, question answering, and machine translation. As math text consists of natural text, as well as math expressions that similarly exhibit linear correlation and contextual characteristics, word embedding techniques can also be applied to math documents. However, while mathematics is a precise and accurate science, it is usually expressed through imprecise and less accurate descriptions, contributing to the relative dearth of machine learning applications for information retrieval in this domain. Generally, mathematical documents communicate their knowledge with an ambiguous, context-dependent, and non-formal language. Given recent advances in word embedding, it is worthwhile to explore their use and effectiveness in math information retrieval tasks, such as math language processing and semantic knowledge extraction. In this paper, we explore math embedding by testing it on several different scenarios, namely, (1) math-term similarity, (2) analogy, (3) numerical concept-modeling based on the centroid of the keywords that characterize a concept, (4) math search using query expansions, and (5) semantic extraction, i.e., extracting descriptive phrases for math expressions. Due to the lack of benchmarks, our investigations were performed using the arXiv collection of STEM documents and carefully selected illustrations on the Digital Library of Mathematical Functions (DLMF: NIST digital library of mathematical functions. Release 1.0.20 of 2018-09-1, 2018). Our results show that math embedding holds much promise for similarity, analogy, and search tasks. However, we also observed the need for more robust math embedding approaches. Moreover, we explore and discuss fundamental issues that we believe thwart the progress in mathematical information retrieval in the direction of machine learning.},
	language = {en},
	number = {3},
	urldate = {2021-06-30},
	journal = {Scientometrics},
	author = {Greiner-Petter, Andre and Youssef, Abdou and Ruas, Terry and Miller, Bruce R. and Schubotz, Moritz and Aizawa, Akiko and Gipp, Bela},
	month = dec,
	year = {2020},
	note = {Journal Rank Q1; IF: 3.702},
	pages = {3017--3046},
}

Word embedding, which represents individual words with semantically fixed-length vectors, has made it possible to successfully apply deep learning to natural language processing tasks such as semantic role-modeling, question answering, and machine translation. As math text consists of natural text, as well as math expressions that similarly exhibit linear correlation and contextual characteristics, word embedding techniques can also be applied to math documents. However, while mathematics is a precise and accurate science, it is usually expressed through imprecise and less accurate descriptions, contributing to the relative dearth of machine learning applications for information retrieval in this domain. Generally, mathematical documents communicate their knowledge with an ambiguous, context-dependent, and non-formal language. Given recent advances in word embedding, it is worthwhile to explore their use and effectiveness in math information retrieval tasks, such as math language processing and semantic knowledge extraction. In this paper, we explore math embedding by testing it on several different scenarios, namely, (1) math-term similarity, (2) analogy, (3) numerical concept-modeling based on the centroid of the keywords that characterize a concept, (4) math search using query expansions, and (5) semantic extraction, i.e., extracting descriptive phrases for math expressions. Due to the lack of benchmarks, our investigations were performed using the arXiv collection of STEM documents and carefully selected illustrations on the Digital Library of Mathematical Functions (DLMF: NIST digital library of mathematical functions. Release 1.0.20 of 2018-09-1, 2018). Our results show that math embedding holds much promise for similarity, analogy, and search tasks. However, we also observed the need for more robust math embedding approaches. Moreover, we explore and discuss fundamental issues that we believe thwart the progress in mathematical information retrieval in the direction of machine learning.

ARQMath Lab: An Incubator for Semantic Formula Search in zbMATH Open?. Scharpf, P.; Schubotz, M.; Greiner-Petter, A.; Ostendorff, M.; Teschke, O.; and Gipp, B. In Working Notes of (CLEF) 2020 - Conference and Labs of the Evaluation Forum, volume 2696, Thessaloniki, Greece, 2020. CEUR-WS.org

Paper link bibtex abstract 1 download

@inproceedings{BibbaseScharpfSGOTG20,
	address = {Thessaloniki, Greece},
	title = {{ARQMath} {Lab}: {An} {Incubator} for {Semantic} {Formula} {Search} in {zbMATH} {Open}?},
	volume = {2696},
	url = {http://ceur-ws.org/Vol-2696/paper_200.pdf},
	abstract = {The zbMATH database contains more than 4 million bibliographic entries. We aim to provide easy access to these entries. Therefore, we maintain dif-ferent index structures including a formula index. To optimize the findability of the entries in our database, we constantly investigate new approaches to satisfy the information needs of our users. We believe that the findings from the ARQMath evaluation will generate new insights into which index struc-tures are most suitable to satisfy mathematical information needs. Search en-gines, recommender systems, plagiarism checking software, and many other added-value services acting on databases such as the arXiv and zbMATH need to combine natural and formula language. One initial approach to ad-dress this challenge is to enrich the mostly unstructured document data via Entity Linking. The ARQMath Task at CLEF 2020 aims to tackle the problem of linking newly posted questions from Math Stack Exchange (MSE) to exist-ing ones that were already answered by the community. To deeply under-stand MSE information needs, answer-, and formula types, we performed manual runs for tasks 1 and 2. Furthermore, we explored several formula re-trieval methods for task 2, such as fuzzy string search, k-nearest neighbors, and our recently introduced approach to retrieve Mathematical Objects of In-terest (MOI) with textual search queries. The task results show that neither our automated methods nor our manual runs archived good scores in the competition. However, the perceived quality of the hits returned by the MOI search particularly motivates us to conduct further research about MOI.},
	booktitle = {Working {Notes} of ({CLEF}) 2020 - {Conference} and {Labs} of the {Evaluation} {Forum}},
	publisher = {CEUR-WS.org},
	author = {Scharpf, Philipp and Schubotz, Moritz and Greiner-Petter, Andre and Ostendorff, Malte and Teschke, Olaf and Gipp, Bela},
	year = {2020},
}

Mathematical Formulae in Wikimedia Projects 2020. Schubotz, M.; Greiner-Petter, A.; Meuschke, N.; Teschke, O.; and Gipp, B. In Proceedings of the ACM/IEEE Joint Conference on Digital Libraries (JCDL), pages 447–448, Virtual Event, China, August 2020. ACM Core Rank A*

Mathematical Formulae in Wikimedia Projects 2020 [link]

Paper doi link bibtex abstract 15 downloads

@inproceedings{BibbaseSchubotzGMT20,
	address = {Virtual Event, China},
	title = {Mathematical {Formulae} in {Wikimedia} {Projects} 2020},
	isbn = {978-1-4503-7585-6},
	url = {https://arxiv.org/abs/2003.09417},
	doi = {10/ghn2t2},
	abstract = {This poster summarizes our contributions to Wikimedia's processing pipeline for mathematical formulae.
We describe how we have supported the transition from rendering formulae as course-grained PNG images in 2001 to providing modern semantically enriched language-independent MathML formulae in 2020.
Additionally, we describe our plans to improve the accessibility and discoverability of mathematical knowledge in Wikimedia projects further.},
	language = {en},
	urldate = {2021-08-02},
	booktitle = {Proceedings of the {ACM}/{IEEE} {Joint} {Conference} on {Digital} {Libraries} ({JCDL})},
	publisher = {ACM},
	author = {Schubotz, Moritz and Greiner-Petter, Andre and Meuschke, Norman and Teschke, Olaf and Gipp, Bela},
	month = aug,
	year = {2020},
	note = {Core Rank A*},
	pages = {447--448},
}

2019 (3)

Automatic Mathematical Information Retrieval to Perform Translations up to Computer Algebra Systems. Greiner-Petter, A. Bulletin of IEEE Technical Committee on Digital Libraries (TCDL), 15(1). 2019.

Automatic Mathematical Information Retrieval to Perform Translations up to Computer Algebra Systems [pdf]

Paper link bibtex abstract

@article{BibbaseGreinerPetterD19,
	title = {Automatic {Mathematical} {Information} {Retrieval} to {Perform} {Translations} up to {Computer} {Algebra} {Systems}},
	volume = {15},
	url = {https://arxiv.org/pdf/2011.14616.pdf},
	abstract = {In mathematics, LaTeX is the de facto standard to prepare documents, e.g., scientific publications. While some formulae are still developed using pen and paper, more complicated mathematical expressions used more and more often with computer algebra systems. Mathematical expressions are often manually transcribed to computer algebra systems. The goal of my doctoral thesis is to improve the efficiency of this workflow. My envisioned method will automatically semantically enrich mathematical expressions so that they can be imported to computer algebra systems and other systems which can take advantage of the semantics, such as search engines or automatic plagiarism detection systems. These imports should preserve essential semantic features of the expression.},
	number = {1},
	journal = {Bulletin of IEEE Technical Committee on Digital Libraries (TCDL)},
	author = {Greiner-Petter, Andre},
	year = {2019},
}

Why Machines Cannot Learn Mathematics, Yet. Greiner-Petter, A.; Ruas, T.; Schubotz, M.; Aizawa, A.; Grosky, W. I.; and Gipp, B. In Proceedings of the 4th Joint Workshop on Bibliometric-Enhanced Information Retrieval and Natural Language Processing for Digital Libraries (BIRNDL@SIGIR), volume 2414, Paris, France, 2019. CEUR-WS.org Core Rank A*

Why Machines Cannot Learn Mathematics, Yet [pdf]

Paper link bibtex abstract 2 downloads

@inproceedings{BibbaseGreinerPetterRSA19,
	address = {Paris, France},
	title = {Why {Machines} {Cannot} {Learn} {Mathematics}, {Yet}},
	volume = {2414},
	url = {http://ceur-ws.org/Vol-2414/paper14.pdf},
	abstract = {Nowadays, Machine Learning (ML) is seen as the universal solution to improve the effectiveness of information retrieval (IR) methods. However, while mathematics is a precise and accurate science, it is usually expressed by less accurate and imprecise descriptions. Generally, mathematical documents communicate their knowledge with an ambiguous, context-dependent, and non-formal language. In this work, we apply text embedding techniques to the arXiv collection of STEM documents and explore how these are unable to properly understand mathematics from that corpus, while proposing alternative to mitigate such situation.},
	booktitle = {Proceedings of the 4th {Joint} {Workshop} on {Bibliometric}-{Enhanced} {Information} {Retrieval} and {Natural} {Language} {Processing} for {Digital} {Libraries} ({BIRNDL}@{SIGIR})},
	publisher = {CEUR-WS.org},
	author = {Greiner-Petter, Andre and Ruas, Terry and Schubotz, Moritz and Aizawa, Akiko and Grosky, William I. and Gipp, Bela},
	year = {2019},
	note = {Core Rank A*},
}

Semantic Preserving Bijective Mappings for Expressions involving Special Functions between Computer Algebra Systems and Document Preparation Systems. Greiner-Petter, A.; Schubotz, M.; Cohl, H. S.; and Gipp, B. Aslib Journal of Information Management, 71(3): 415–439. May 2019. Journal Rank Q1; IF: 2.653

Paper doi link bibtex abstract

@article{BibbaseGreinerPetterSCG19,
	title = {Semantic {Preserving} {Bijective} {Mappings} for {Expressions} involving {Special} {Functions} between {Computer} {Algebra} {Systems} and {Document} {Preparation} {Systems}},
	volume = {71},
	issn = {2050-3806},
	url = {https://arxiv.org/abs/1906.11485},
	doi = {10.1108/AJIM-08-2018-0185},
	abstract = {Modern mathematicians and scientists of math-related disciplines often use Document Preparation Systems (DPS) to write and Computer Algebra Systems (CAS) to calculate mathematical expressions. Usually, they translate the expressions manually between DPS and CAS. This process is time-consuming and error-prone. The purpose of this paper is to automate this translation. This paper uses Maple and Mathematica as the CAS, and LaTeX as the DPS.
Bruce Miller at the National Institute of Standards and Technology (NIST) developed a collection of special LaTeX macros that create links from mathematical symbols to their definitions in the NIST Digital Library of Mathematical Functions (DLMF). The authors are using these macros to perform rule-based translations between the formulae in the DLMF and CAS. Moreover, the authors develop software to ease the creation of new rules and to discover inconsistencies.
The authors created 396 mappings and translated 58.8 percent of DLMF formulae (2,405 expressions) successfully between Maple and DLMF. For a significant percentage, the special function definitions in Maple and the DLMF were different. An atomic symbol in one system maps to a composite expression in the other system. The translator was also successfully used for automatic verification of mathematical online compendia and CAS. The evaluation techniques discovered two errors in the DLMF and one defect in Maple.
This paper introduces the first translation tool for special functions between LaTeX and CAS. The approach improves error-prone manual translations and can be used to verify mathematical online compendia and CAS.},
	language = {en},
	number = {3},
	urldate = {2021-09-06},
	journal = {Aslib Journal of Information Management},
	author = {Greiner-Petter, Andre and Schubotz, Moritz and Cohl, Howard S. and Gipp, Bela},
	month = may,
	year = {2019},
	note = {Journal Rank Q1; IF: 2.653},
	pages = {415--439},
}

2018 (4)

Automated Symbolic and Numerical Testing of DLMF Formulae Using Computer Algebra Systems. Cohl, H. S.; Greiner-Petter, A.; and Schubotz, M. In Rabe, F.; Farmer, W. M.; Passmore, G. O.; and Youssef, A., editor(s), Proceedings International Conference on Intelligent Computer Mathematics (CICM), volume 11006, of Lecture Notes in Computer Science, pages 39–52, Hagenberg, Austria, 2018. Springer International Publishing

Automated Symbolic and Numerical Testing of DLMF Formulae Using Computer Algebra Systems [link]

Paper doi link bibtex abstract

@inproceedings{BibbaseCohlGS18,
	address = {Hagenberg, Austria},
	series = {Lecture {Notes} in {Computer} {Science}},
	title = {Automated {Symbolic} and {Numerical} {Testing} of {DLMF} {Formulae} {Using} {Computer} {Algebra} {Systems}},
	volume = {11006},
	isbn = {978-3-319-96811-7},
	url = {https://arxiv.org/abs/2109.08899},
	doi = {10/ggv8dn},
	abstract = {We have developed an automated procedure for symbolic and numerical testing
of formulae extracted from the NIST Digital Library of Mathematical Functions
(DLMF).  For the NIST Digital Repository of Mathematical Formulae, we have
developed conversion tools from semantic LaTeX to the Computer Algebra System
(CAS) Maple which relies on Youssef's part-of-math tagger.  We convert a test data subset
of 4,078 semantic LaTeX DLMF formulae
to the native CAS representation and then apply an automated scheme for symbolic and numerical
testing and verification.  Our framework is implemented using Java and Maple.
We describe in detail the conversion process which is required so that the
CAS can correctly interpret the mathematical representation of the formulae.
We describe the improvement of the effectiveness of our automated scheme through
incremental enhancement (making more precise) of the mathematical semantic markup
for the formulae.},
	urldate = {2021-09-08},
	booktitle = {Proceedings {International} {Conference} on {Intelligent} {Computer} {Mathematics} ({CICM})},
	publisher = {Springer International Publishing},
	author = {Cohl, Howard S. and Greiner-Petter, Andre and Schubotz, Moritz},
	editor = {Rabe, Florian and Farmer, William M. and Passmore, Grant O. and Youssef, Abdou},
	year = {2018},
	pages = {39--52},
}

Automatic Mathematical Information Retrieval to Perform Translations up to Computer Algebra Systems. Greiner-Petter, A. In Joint Proceedings of the CME-EI, FMM, CAAT, FVPS, M3SRD, OpenMath Workshops, Doctoral Program and Work in Progress at the Conference on Intelligent Computer Mathematics (CICM), volume 2307, Hagenberg, Austria, 2018. CEUR-WS.org

Paper link bibtex

@inproceedings{BibbaseGreinerPetterD18,
	address = {Hagenberg, Austria},
	title = {Automatic {Mathematical} {Information} {Retrieval} to {Perform} {Translations} up to {Computer} {Algebra} {Systems}},
	volume = {2307},
	url = {http://ceur-ws.org/Vol-2307/DP1.pdf},
	booktitle = {Joint {Proceedings} of the {CME}-{EI}, {FMM}, {CAAT}, {FVPS}, {M3SRD}, {OpenMath} {Workshops}, {Doctoral} {Program} and {Work} in {Progress} at the {Conference} on {Intelligent} {Computer} {Mathematics} ({CICM})},
	publisher = {CEUR-WS.org},
	author = {Greiner-Petter, Andre},
	year = {2018},
}

MathTools: An Open API for Convenient MathML Handling. Greiner-Petter, A.; Schubotz, M.; Cohl, H. S.; and Gipp, B. In Rabe, F.; Farmer, W. M.; Passmore, G. O.; and Youssef, A., editor(s), Proceedings of the International Conference on Intelligent Computer Mathematics (CICM), volume 11006, of Lecture Notes in Computer Science, pages 104–110, Hagenberg, Austria, 2018. Springer International Publishing

MathTools: An Open API for Convenient MathML Handling [link]

Paper doi link bibtex abstract

@inproceedings{BibbaseGreinerPetterSCG18,
	address = {Hagenberg, Austria},
	series = {Lecture {Notes} in {Computer} {Science}},
	title = {{MathTools}: {An} {Open} {API} for {Convenient} {MathML} {Handling}},
	volume = {11006},
	isbn = {978-3-319-96811-7},
	shorttitle = {{MathTools}},
	url = {https://arxiv.org/abs/2109.08539},
	doi = {10.1007/978-3-319-96812-4_9},
	abstract = {Mathematical formulae carry complex and essential semantic information in a variety of formats.
Accessing this information with different systems requires a standardized machine-readable format that is capable of encoding presentational and semantic information.
Even though MathML is an official recommendation by W3C and an ISO standard 
for representing mathematical expressions, we could identify only very few systems which use the full descriptiveness of MathML.
MathML's high complexity results in a steep learning curve for novice users.
We hypothesize that this complexity is the reason why many community-driven projects refrain from using MathML, and instead develop problem-specific data formats for their purposes.
We provide a user-friendly, open-source application programming interface for controlling MathML data.
Our API is written in JAVA and allows to create, manipulate, and efficiently access commonly needed information in presentation and content MathML.
Our interface also provides tools for calculating differences and similarities between MathML expressions.
The API also allows to determine the distance between expressions using different similarity measures.
In addition, we provide adapters for numerous conversion tools and the canonicalization project.
Our toolkit facilitates processing of mathematics for digital libraries, without the need to obtain XML expertise.},
	urldate = {2021-09-14},
	booktitle = {Proceedings of the {International} {Conference} on {Intelligent} {Computer} {Mathematics} ({CICM})},
	publisher = {Springer International Publishing},
	author = {Greiner-Petter, Andre and Schubotz, Moritz and Cohl, Howard S. and Gipp, Bela},
	editor = {Rabe, Florian and Farmer, William M. and Passmore, Grant O. and Youssef, Abdou},
	year = {2018},
	pages = {104--110},
}

Improving the Representation and Conversion of Mathematical Formulae by Considering their Textual Context. Schubotz, M.; Greiner-Petter, A.; Scharpf, P.; Meuschke, N.; Cohl, H. S.; and Gipp, B. In Proceedings of the 18th ACM/IEEE on Joint Conference on Digital Libraries (JCDL), pages 233–242, Fort Worth, Texas, USA, May 2018. ACM Core Rank A*

Improving the Representation and Conversion of Mathematical Formulae by Considering their Textual Context [link]

Paper doi link bibtex abstract 2 downloads

@inproceedings{BibbaseSchubotzGSM18,
	address = {Fort Worth, Texas, USA},
	title = {Improving the {Representation} and {Conversion} of {Mathematical} {Formulae} by {Considering} their {Textual} {Context}},
	isbn = {978-1-4503-5178-2},
	url = {https://arxiv.org/abs/1804.04956},
	doi = {10/ggv8jk},
	abstract = {Mathematical formulae represent complex semantic information in a concise form.
Especially in Science, Technology, Engineering, and Mathematics, mathematical formulae are crucial to communicate information, e.g., in scientific papers, and to perform computations using computer algebra systems. Enabling computers to access the information encoded in mathematical formulae requires machine-readable formats that can represent both the presentation and content, i.e., the semantics, of formulae. Exchanging such information between systems additionally requires conversion methods for mathematical representation formats. We analyze how the semantic enrichment of formulae improves the format conversion process and show that considering the textual context of formulae reduces the error rate of such conversions. 
Our main contributions are:
(1) providing an openly available benchmark dataset for the mathematical format conversion task consisting of a newly created test collection, an extensive, manually curated gold standard and task-specific evaluation metrics;
(2) performing a quantitative evaluation of state-of-the-art tools for mathematical format conversions;
(3) presenting a new approach that considers the textual context of formulae to reduce the error rate for mathematical format conversions.
Our benchmark dataset facilitates future research on mathematical format conversions as well as research on many problems in mathematical information retrieval. Because we annotated and linked all components of formulae, e.g., identifiers, operators and other entities, to Wikidata entries, the gold standard can, for instance, be used to train methods for formula concept discovery and recognition. Such methods can then be applied to improve mathematical information retrieval systems, e.g., for semantic formula search, recommendation of mathematical content, or detection of mathematical plagiarism.},
	language = {en},
	urldate = {2021-09-06},
	booktitle = {Proceedings of the 18th {ACM}/{IEEE} on {Joint} {Conference} on {Digital} {Libraries} ({JCDL})},
	publisher = {ACM},
	author = {Schubotz, Moritz and Greiner-Petter, Andre and Scharpf, Philipp and Meuschke, Norman and Cohl, Howard S. and Gipp, Bela},
	month = may,
	year = {2018},
	note = {Core Rank A*},
	pages = {233--242},
}

2017 (1)

Semantic Preserving Bijective Mappings of Mathematical Formulae Between Document Preparation Systems and Computer Algebra Systems. Cohl, H. S.; Schubotz, M.; Youssef, A.; Greiner-Petter, A.; Gerhard, J.; Saunders, B. V.; McClain, M. A.; Bang, J.; and Chen, K. In Geuvers, H.; England, M.; Hasan, O.; Rabe, F.; and Teschke, O., editor(s), Proceedings International Conference on Intelligent Computer Mathematics (CICM), volume 10383, of Lecture Notes in Computer Science, pages 115–131, Edinburgh, UK, 2017. Springer

Semantic Preserving Bijective Mappings of Mathematical Formulae Between Document Preparation Systems and Computer Algebra Systems [link]

Paper doi link bibtex abstract

@inproceedings{BibbaseCohlSYG17,
	address = {Edinburgh, UK},
	series = {Lecture {Notes} in {Computer} {Science}},
	title = {Semantic {Preserving} {Bijective} {Mappings} of {Mathematical} {Formulae} {Between} {Document} {Preparation} {Systems} and {Computer} {Algebra} {Systems}},
	volume = {10383},
	isbn = {978-3-319-62074-9},
	url = {https://arxiv.org/abs/2109.08655},
	doi = {10/ggv8dk},
	abstract = {There are many different approaches to represent mathematical expressions on
computers. Word processors like LaTeX offer the ability to render mathematical expressions as one
would write these on paper.  Using LaTeX, LaTeXML, and tools generated for use in the NIST
Digital Library of Mathematical Functions, semantically enhanced mathematical
LaTeX markup (semantic LaTeX) is achieved by using a semantic macro set.  Computer algebra
systems (CAS) such as Maple and Mathematica use alternative markup to represent
mathematical expressions.  For the conversion from semantic LaTeX to the CAS representations, we
have adapted the approach of Part of Speech Tagging from Natural Language Processing. By taking
advantage of Youssef's Part-of-Math Tagger, CAS internal representations, and locally developed
software, we develop algorithms to convert between semantic LaTeX and representations from CAS.
The goal of these efforts is to provide CAS formulae representations to the public for digital
mathematics libraries. In connection with these efforts, we have developed software which has
converted between CAS representations through semantic LaTeX to generate MediaWiki wikitext for
the NIST Digital Repository of Mathematical Formulae for the Wolfram Encoding Continued
Fraction dataset and the University of Antwerp Continued Fractions for Special Functions dataset.},
	urldate = {2021-09-08},
	booktitle = {Proceedings {International} {Conference} on {Intelligent} {Computer} {Mathematics} ({CICM})},
	publisher = {Springer},
	author = {Cohl, Howard S. and Schubotz, Moritz and Youssef, Abdou and Greiner-Petter, Andre and Gerhard, Jürgen and Saunders, Bonita V. and McClain, Marjorie A. and Bang, Joon and Chen, Kevin},
	editor = {Geuvers, Herman and England, Matthew and Hasan, Osman and Rabe, Florian and Teschke, Olaf},
	year = {2017},
	pages = {115--131},
}