@inbook{908e1ce58ec24448b100deaa69e50bd7,
title = "A Guide to Dictionary-Based Text Mining",
abstract = "PubMed contains more than 27 million documents, and this number is growing at an estimated 4% per year. Even within specialized topics, it is no longer possible for a researcher to read any field in its entirety, and thus nobody has a complete picture of the scientific knowledge in any given field at any time. Text mining provides a means to automatically read this corpus and to extract the relations found therein as structured information. Having data in a structured format is a huge boon for computational efforts to access, cross reference, and mine the data stored therein. This is increasingly useful as biological research is becoming more focused on systems and multi-omics integration. This chapter provides an overview of the steps that are required for text mining: tokenization, named entity recognition, normalization, event extraction, and benchmarking. It discusses a variety of approaches to these tasks and then goes into detail on how to prepare data for use specifically with the JensenLab tagger. This software uses a dictionary-based approach and provides the text mining evidence for STRING and several other databases.",
author = "Cook, {Helen V.} and Jensen, {Lars Juhl}",
year = "2019",
doi = "10.1007/978-1-4939-9089-4_5",
language = "English",
isbn = " 978-1-4939-9088-7",
volume = "1939",
series = "Methods in Molecular Biology",
publisher = "Humana Press",
pages = "73--89",
editor = "Larson, {Richard S.} and Oprea, {Tudor I.}",
booktitle = "Bioinformatics and Drug Discovery",
address = "United States",
edition = "3",
}