@incollection{RieLasSon07, title = {Computation of Similarity Measures for Sequential Data using Generalized Suffix Trees}, author = {Konrad Rieck and Pavel Laskov and S\"oren Sonnenburg}, booktitle = {Advances in Neural Information Processing Systems 19}, editor = {B. Sch\"{o}lkopf and J. Platt and T. Hoffman}, publisher = {MIT Press}, address = {Cambridge, MA}, pages = {1177--1184}, year = {2007}, ps = {http://books.nips.cc/papers/files/nips19/NIPS2006_0685.ps.gz}, pdf = {http://books.nips.cc/papers/files/nips19/NIPS2006_0685.pdf}, abstract = { We propose a generic algorithm for computation of similarity measures for sequential data. The algorithm uses generalized suffix trees for efficient calculation of various kernel, distance and non-metric similarity functions. Its worst-case run-time is linear in the length of sequences and independent of the underlying embedding language, which can cover words, k-grams or all contained subsequences. Experiments with network intrusion detection, DNA analysis and text processing applications demonstrate the utility of distances and similarity coefficients for sequences as alternatives to classical kernel functions.} }