@incollection{RaeSon04, author = {Gunnar R\"atsch and S\"oren Sonnenburg}, editors = {Sch{\"o}lkopf, B. and K. Tsuda and J.P. Vert}, title = {Accurate Splice Site Prediction for Caenorhabditis Elegans}, series = {MIT Press series on Computational Molecular Biology}, booktitle = {Kernel Methods in Computational Biology}, publisher = {MIT Press}, pages = {277-298}, year = {2004}, pdf = {http://sonnenburgs.de/soeren/publications/RaeSon04.pdf}, abstract = { We propose a new system for predicting the splice form of Caenorhabditis elegans genes. As a first step we generate a clean set of genes from available exressed sequence tags (EST) and complete complementary (cDNA) sequences. From all such genes we then generate potential acceptor and donor sites as they would be required by any gene finder. This leads to a clean set of true and decoy splice sites. In a second step we use support vector machines (SVMs) with appropriately designed kernels to learn to distinguish between true and decoy sites. Using the newly generated data and the novel kernels we could considerably improve our previous results on the same task. In the last step we design and test a new splice finder system that combines the SVM predictions with additional statistical information about splicing. Using this system we are able to predict the exon-intron structure of a given gene with known translation initiation and stop co don site. The system has been tested successfully on a newly generated set of genes and compared with GenScan. We found that our system predicts the correct splice form for more than 92\% of these genes, whereas GenScan only achieves 77.5\% accuracy.} }