Go to the documentation of this file.00001 <!--
00002 _ _______ _
00003 | | |__ __\ | |
00004 _ __ | |_ __ | | ___ ___ | |___
00005 | '_ \| | '_ \| |/ _ \ / _ \| / __|
00006 | | | | | |_) | | (_) | (_) | \__ \
00007 |_| |_|_| .__/|_|\___/ \___/|_|___/
00008 ___________________________| |_________________________________________
00009 | |_| |\
00010 | |_\
00011 | File : Dataset.php |
00012 | Created : 16-Feb-2012 |
00013 | By : atrilla |
00014 | |
00015 | nlpTools - Natural Language Processing Toolkit for PHP |
00016 | |
00017 | Copyright (c) 2012 Alexandre Trilla |
00018 | |
00019 | ___________________________________________________________________ |
00020 | |
00021 | This file is part of nlpTools. |
00022 | |
00023 | nlpTools is free software: you can redistribute it and/or modify |
00024 | it under the terms of the MIT/X11 License as published by the |
00025 | Massachusetts Institute of Technology. See the MIT/X11 License |
00026 | for more details. |
00027 | |
00028 | You should have received a copy of the MIT/X11 License along with |
00029 | this source code distribution of nlpTools (see the COPYING file |
00030 | in the root directory). If not, see |
00031 | <http:
00032 |_________________________________________________________________________|
00033 -->
00034
00035 <?php
00036
00037 require(dirname(__FILE__)."/Feeder.php");
00038
00039 include_once(dirname(__FILE__)."/../../tokenisation/WhitespaceTok.php");
00040
00047 class Dataset implements Feeder {
00048
00056 public function getFood($sourceURL) {
00057 $text = array();
00058 $labs = array();
00059 $sourceURL = (string)$sourceURL;
00060 $sourceURL = realpath($sourceURL);
00061 if (!is_file($sourceURL)) {
00062 throw new Exception("Dataset feeder: source location ".
00063 "unknown!\n");
00064 } else {
00065 $contents = file_get_contents($sourceURL);
00066 $instance = explode("\n", $contents);
00067
00068 array_pop($instance);
00069 $tokeniser = new WhitespaceTok;
00070 foreach($instance as $inst) {
00071 $words = $tokeniser->tokenise($inst);
00072 $labs[] = array_pop($words);
00073 $text[] = implode(" ", $words);
00074 }
00075 }
00076 return array($text, $labs);
00077 }
00078 }
00079
00080 ?>