Go to the documentation of this file.00001 <!--
00002                            _    _______          _     
00003                           | |  |__   __\        | |    
00004                      _ __ | |_ __ | | ___   ___ | |___ 
00005                     | '_ \| | '_ \| |/ _ \ / _ \| / __|
00006                     | | | | | |_) | | (_) | (_) | \__ \
00007                     |_| |_|_| .__/|_|\___/ \___/|_|___/
00008  ___________________________| |_________________________________________
00009 |                           |_|                                        |\
00010 |                                                                      |_\
00011 |   File    : WhitespaceTok.php                                           |
00012 |   Created : 16-Feb-2012                                                 |
00013 |   By      : atrilla                                                     |
00014 |                                                                         |
00015 |   nlpTools - Natural Language Processing Toolkit for PHP                |
00016 |                                                                         |
00017 |   Copyright (c) 2012 Alexandre Trilla                                   |
00018 |                                                                         |
00019 |   ___________________________________________________________________   |
00020 |                                                                         |
00021 |   This file is part of nlpTools.                                        |
00022 |                                                                         |
00023 |   nlpTools is free software: you can redistribute it and/or modify      |
00024 |   it under the terms of the MIT/X11 License as published by the         |
00025 |   Massachusetts Institute of Technology. See the MIT/X11 License        |
00026 |   for more details.                                                     |
00027 |                                                                         |
00028 |   You should have received a copy of the MIT/X11 License along with     |
00029 |   this source code distribution of nlpTools (see the COPYING file       |
00030 |   in the root directory). If not, see                                   |
00031 |   <http:
00032 |_________________________________________________________________________|
00033 -->
00034 
00035 <?php
00036 
00037 require(dirname(__FILE__)."/Tokeniser.php");
00038 
00047 class WhitespaceTok implements Tokeniser {
00048 
00052     public function tokenise(&$text) {
00053         $text = (string)$text;
00054         $text = trim($text);
00055         $text = preg_replace("/[^a-zA-Z0-9\ ]/", " $0 ", $text);
00056         $text = preg_replace("/\ \ +/", " ", $text);
00057         $words = explode(" ", $text);
00058         return $words;
00059     }
00060 }
00061 
00062 ?>