@ARTICLE{26543116_217553231_2018, author = {Dmitry Devyatkin and Elena Nechaeva and Roman Suvorov and Ilya Tikhomirov}, keywords = {, text mining, science mapping, research landscape, agricultural science, publication activity, scientometrics, young researchersRussian Science Index}, title = {Mapping the Research Landscape of Agricultural Sciences}, journal = {Foresight and STI Governance}, year = {2018}, volume = {12}, number = {1}, pages = {57-66}, url = {https://foresight-journal.hse.ru/en/2018-12-1/217553231.html}, publisher = {}, abstract = {A research landscape is a high-level description of the current state of a certain scientific field and its dynamics. High-quality research landscapes are important tools that allow for more effective research management. This paper presents a novel framework for the mapping of research. It relies on full-text mining and topic modeling to pool data from many sources without relying on any specific taxonomy of scientific fields and areas. The framework is especially useful for scientific fields that are poorly represented in scientometric databases, i.e., Scopus or Web of Science. The high-level algorithm consists of (1) full-text collection from reliable sources; (2) the automatic extraction of research fields using topic modeling; (3) semi-automatic linking to scientometric databases; and (4) a statistical analysis of metrics for the extracted scientific areas. Full-text mining is crucial due to (a) the poor representation of many Russian research areas in systems like Scopus or Web of Science; (b) the poor quality of Russian Science Index data; and (c) the differences between taxonomies used in different data sources. Major advantages of the proposed framework include its data-driven approach, its independence from scientific subjects’ taxonomies, and its ability to integrate data from multiple heterogeneous data sources. Furthermore, this framework complements traditional approaches to research mapping using scientometric software like Scopus or Web of Science rather than replacing them. We experimentally evaluated the framework using agricultural science as an example, but the framework is not limited to any particular domain. As a result, we created the first research landscape covering young researchers in agricultural science. Topic modeling yielded six major scientific areas within the field of agriculture. We found that statistically significant differences between these areas exist. This means that a differentiated approach to research management is critical. Further research on this subject includes the application of the framework to other scientific fields and the integration of other collections of research and technical documentation (especially patents).}, annote = {A research landscape is a high-level description of the current state of a certain scientific field and its dynamics. High-quality research landscapes are important tools that allow for more effective research management. This paper presents a novel framework for the mapping of research. It relies on full-text mining and topic modeling to pool data from many sources without relying on any specific taxonomy of scientific fields and areas. The framework is especially useful for scientific fields that are poorly represented in scientometric databases, i.e., Scopus or Web of Science. The high-level algorithm consists of (1) full-text collection from reliable sources; (2) the automatic extraction of research fields using topic modeling; (3) semi-automatic linking to scientometric databases; and (4) a statistical analysis of metrics for the extracted scientific areas. Full-text mining is crucial due to (a) the poor representation of many Russian research areas in systems like Scopus or Web of Science; (b) the poor quality of Russian Science Index data; and (c) the differences between taxonomies used in different data sources. Major advantages of the proposed framework include its data-driven approach, its independence from scientific subjects’ taxonomies, and its ability to integrate data from multiple heterogeneous data sources. Furthermore, this framework complements traditional approaches to research mapping using scientometric software like Scopus or Web of Science rather than replacing them. We experimentally evaluated the framework using agricultural science as an example, but the framework is not limited to any particular domain. As a result, we created the first research landscape covering young researchers in agricultural science. Topic modeling yielded six major scientific areas within the field of agriculture. We found that statistically significant differences between these areas exist. This means that a differentiated approach to research management is critical. Further research on this subject includes the application of the framework to other scientific fields and the integration of other collections of research and technical documentation (especially patents).} }