@article{Merlo_Henderson_Schneider_Wehrli_2003, title={Learning Document Similarity Using Natural Language Processing}, volume={17}, url={https://bop.unibe.ch/linguistik-online/article/view/788}, DOI={10.13092/lo.17.788}, abstractNote={The recent considerable growth in the amount of easily available on-line text has brought to the foreground the need for large-scale natural language processing tools for text data mining. In this paper we address the problem of organizing documents into meaningful groups according to their content and to visualize a text collection, providing an overview of the range of documents and of their relationships, so that they can be browsed more easily. We use Self-Organizing Maps (SOMs) (Kohonen 1984). Great efficiency challenges arise in creating these maps. We study linguistically-motivated ways of reducing the representation of a document to increase efficiency and ways to disambiguate the words in the documents. }, number={5}, journal={Linguistik Online}, author={Merlo, Paola and Henderson, James and Schneider, Gerold and Wehrli, Eric}, year={2003}, month={Dez.} }