@inproceedings{SonRaeSch05,
author = {S{\"o}ren Sonnenburg and Gunnar R{\"a}tsch and Christin ~Sch{\"a}fer},
title = {Learning Interpretable {SVMs} for Biological Sequence Classification},
booktitle = {RECOMB 2005, LNBI 3500},
pages = {389--407},
publisher = {Springer-Verlag Berlin Heidelberg},
editors = { S. Miyano et al. },
year = {2005},
ps = {http://sonnenburgs.de/soeren/publications/SonRaeSch05.ps.gz},
pdf = {http://sonnenburgs.de/soeren/publications/SonRaeSch05.pdf},
abstract = {
Abstract. We propose novel algorithms for solving the so-called Support Vector Multiple Kernel Learning prob-
lem and show how they can be used to understand the resulting support vector decision function. While classical
kernel-based algorithms (such as {SVMs}) are based on a single kernel, in Multiple Kernel Learning a quadratically-
constraint quadratic program is solved in order to find a sparse convex combination of a set of support vector kernels.
We show how this problem can be cast into a semi-infinite linear optimization problem which can in turn be solved
efficiently using a boosting-like iterative method in combination with standard {SVM} optimization algorithms. The
proposed method is able to deal with thousands of examples while combining hundreds of kernels within reasonable
time.
In the second part we show how this technique can be used to understand the obtained decision function in order
to extract biologically relevant knowledge about the sequence analysis problem at hand. We consider the problem
of splice site identification and combine string kernels at different sequence positions and with various substring
(oligomer) lengths. The proposed algorithm computes a sparse weighting over the length and the substring, high-
lighting which substrings are important for discrimination. Finally, we propose a bootstrap scheme in order to reli-
ably identify a few statistically significant positions, which can then be used for further analysis such as consensus
finding.}
}