class PosTagger {
private $dict;
public function __construct($lexicon) {
$fh = fopen($lexicon, 'r');
while($line = fgets($fh)) {
$tags = explode(' ', $line);
$this->dict[strtolower(array_shift($tags))] = $tags;
public function tag($text) {
preg_match_all("/[\w\d\.]+/", $text, $matches);
$nouns = array('NN', 'NNS');
$return = array();
$i = 0;
foreach($matches[0] as $token) {
// default to a common noun
$return[$i] = array('token' => $token, 'tag' => 'NN');
// remove trailing full stops
if(substr($token, -1) == '.') {
$token = preg_replace('/\.+$/', '', $token);
// get from dict if set
if(isset($this->dict[strtolower($token)])) {
$return[$i]['tag'] = $this->dict[strtolower($token)][0];
// Converts verbs after 'the' to nouns
if($i > 0) {
if($return[$i - 1]['tag'] == 'DT' &&
array('VBD', 'VBP', 'VB'))) {
$return[$i]['tag'] = 'NN';
// Convert noun to number if . appears
if($return[$i]['tag'][0] == 'N' && strpos($token, '.') !== false) {
$return[$i]['tag'] = 'CD';
// Convert noun to past particile if ends with 'ed'
if($return[$i]['tag'][0] == 'N' && substr($token, -2) == 'ed') {
$return[$i]['tag'] = 'VBN';
// Anything that ends 'ly' is an adverb
if(substr($token, -2) == 'ly') {
$return[$i]['tag'] = 'RB';
// Common noun to adjective if it ends with al
if(in_array($return[$i]['tag'], $nouns)
&& substr($token, -2) == 'al') {
$return[$i]['tag'] = 'JJ';
// Noun to verb if the word before is 'would'
if($i > 0) {
if($return[$i]['tag'] == 'NN'
&& strtolower($return[$i-1]['token']) == 'would') {
$return[$i]['tag'] = 'VB';
// Convert noun to plural if it ends with an s
if($return[$i]['tag'] == 'NN' && substr($token, -1) == 's') {
$return[$i]['tag'] = 'NNS';
// Convert common noun to gerund
if(in_array($return[$i]['tag'], $nouns)
&& substr($token, -3) == 'ing') {
$return[$i]['tag'] = 'VBG';
// If we get noun noun, and the second can be a verb, convert to verb
if($i > 0) {
if(in_array($return[$i]['tag'], $nouns)
&& in_array($return[$i-1]['tag'], $nouns)
&& isset($this->dict[strtolower($token)])) {
if(in_array('VBN', $this->dict[strtolower($token)])) {
$return[$i]['tag'] = 'VBN';
} else if(in_array('VBZ',
$this->dict[strtolower($token)])) {
$return[$i]['tag'] = 'VBZ';
return $return;
// Include the twitter oauth library for PHP (I put it in a subdirectory
// Init
define("CONSUMER_KEY", "mykeyhere");
define("CONSUMER_SECRET", "mykeyhere");
define("OAUTH_TOKEN", "mykeyhere");
define("OAUTH_TOKEN_SECRET", "mykeyhere");
$q='search term';
// Get an authorised connection to twitter
$content = $connection->get("search/tweets.json?q=".$q."&result_type=recent");
// Send the xml header
header("Content-Type: application/rss+xml; charset=ISO-8859-1");
$url = 'http://'.$_SERVER["SERVER_NAME"].$_SERVER["REQUEST_URI"];
$xml ="";
$xml.= '<!--?xml version="1.0" encoding="ISO-8859-1"?-->'.PHP_EOL;
$xml.= '<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">'.PHP_EOL;
$xml.= ' <channel>'.PHP_EOL;
$xml.= '<title>'.$q.'</title>'.PHP_EOL;
$xml.= ' <description>Search results</description>'.PHP_EOL;
$xml.= '<link>'.$url.'</link>'.PHP_EOL;
$xml.= ' <atom:link href="'.$url.'" rel="self" type="application/rss+xml">'.PHP_EOL;
// Get the status
foreach ($content->statuses as $status) {
$statustext = $status->text;
$tagger = new PosTagger('lexicon.txt');
$tags = $tagger->tag($statustext);
// Build the tweet url as we don't get this in the status object
$url = 'http://twitter.com/'.$status->user->screen_name.'/status/'.$status->id_str;
$date = explode(' ', $status->created_at);
$xml.= ' <item>'.PHP_EOL;
$xml.= '<title>'.$status->text.'</title>'.PHP_EOL;
$xml.= ' <description>'.$tags->text.'</description>'.PHP_EOL;
$xml.= '<link>'.$url.'</link>'.PHP_EOL;
$xml.= ' <guid ispermalink="true">'.$url.'</guid>'.PHP_EOL;
// Format the date since the twitter format doesn't work for rss
$xml.= ' <pubdate>'.$date[0].', '.$date[2].' '.$date[1].' '.$date[5].' '.$date[3].' '.$date[4].'</pubdate>'.PHP_EOL;
// Get the attached media
if ($status->entities) {
if (is_array($status->entities->media)) {
foreach ($status->entities->media as $media) {
switch ($media->type) {
//Currently only photo's supported but I suspected with vine video will be along soon
case 'photo':
$enc_type = 'image/jpeg';
if (!empty($enc_type)) {
// We need the file size for the media so try to get this from the headers
$headers = get_headers($media->media_url);
$size = $headers['Content-Length'];
if (empty($size)) {
foreach ($headers as $header) {
$h = explode(':', $header);
if ($h[0] == 'Content-Length') {
$size = trim($h[1]);
break; // Found what we need, stop looping
if (empty($size)) {
$size = 1; //This is basically a hack to make the rss validate
$xml.= ' <enclosure length="'.$size.'" type="'.$enc_type.'" url="'.$media->media_url.'">'.PHP_EOL;
$xml.= ' </enclosure></item>'.PHP_EOL;*/
$xml.= ' </item>'.PHP_EOL;
$xml.= ' </atom:link></channel>'.PHP_EOL;
$xml.= '</rss>'.PHP_EOL;
// Return the xml for the rss
print $xml;
解析错误:语法错误,/ var / www./test/Twitter/index.php中的意外T_OBJECT_OPERATOR
$tags = new PosTagger('lexicon.txt')->tag($status->text);