westdc-zf1/application/models/PingbackRpc.php

107 lines
3.6 KiB
PHP
Raw Normal View History

2009-03-06 03:20:46 +00:00
<?php
class PingbackRpc
{
private $BLOCK_ELEMENTS = array("div", "p", "body", "ul", "ol", "td", "pre", "center");
private $CONTEXT_BYTES = 200;
/**
* ping method for pingback xmlrpc server
*
* @param string $externalURI The absolute URI of the post on the source page containing the link to the target site.
* @param string $westdcURI The absolute URI of the target of the link, as given on the source page.
* @return string or int ping status
*/
public function ping($externalURI,$westdcURI)
{
//调试
$writer = new Zend_Log_Writer_Stream('pingback.log');
$logger = new Zend_Log($writer);
$logger->info('Informational message');
$logger->info('extern url:'.$externalURI);
//判断uuid
$uuid=explode('/',$westdcURI);
$uuid=$uuid[count($uuid)-1];
$db=Zend_Registry::get('db');
$sql="select count(*) as count from metadata where uuid=?";
$rs=$db->fetchRow($sql,array($uuid));
//不存在此数据
if ($rs['count']==0) return false;
//anti-spam
//来源地址和用户发送的IP地址相同
$ip=$_SERVER['REMOTE_ADDR'];
$p=parse_url($externalURI);
$ip1=gethostbyname($p['host']);
$logger->info('user ip:'.$ip);
//if ($ip<>$ip1) return false;
// 限制:来源中包含目标地址
$i = new Zend_Http_Client($externalURI);
$j = $i->request(Zend_Http_Client::GET);
// RETURN: our source blog post does not exist on remote server
if (($pos = strpos(($externalURIBody = $j->getBody()), $westdcURI)) === false)
{
return false;
}
//获取标题和内容
// get the title of the remote blog post
preg_match('/<title>([^<]*?)<\/title>/is', $externalURIBody, $titles);
$title = $titles[1];
unset($titles);
// RETURN: no title on page
if ($title == null) {
return false;
}
//limit title length?
$title=(mb_strlen($title)>100)?mb_substr($title,0,99):$title;
$agent=$_SERVER['HTTP_USER_AGENT'];
$logger->info('title:'.$title);
$logger->info('user agent:'.$agent);
$contents = $j->getBody();
$dom = new DOMDocument();
@$dom->loadHTML($contents);
$xml = @simplexml_import_dom($dom);
$path = "//body//a[@href=\"$westdcURI\"][1]";
$link = $this->xpath1($xml, $path);
$context = $link;
// Searching for the smallest block element containing the link
while (! in_array(strtolower($context->getName()), $this->BLOCK_ELEMENTS)) {
$path .= "/..";
$context = $this->xpath1($xml, $path);
}
// Expanding context
$previous = $this->xpath1($xml, "$path/preceding-sibling::*[position()=1]");
$next = $this->xpath1($xml, "$path/following-sibling::*[position()=1]");
// Join this all
$ret = "";
if ($previous) {
$ret = $previous->asXML();
}
$ret .= " " . $context->asXML() . " ";
if ($next) {
$ret .= " " . $next->asXML();
}
$ret = preg_replace('/[\s|\r|\n]+/im', ' ', $ret);
$ret = preg_replace('/ <(h1|h2|h3|h4|h5|h6|p|th|td|li|dt|dd|pre|caption|input|textarea|button|body|borrowed|from|wordpress)[^>]*>/is', ' ', $ret);
$stripper = new Zend_Filter_StripTags('a', 'href');
$ret = $stripper->filter($ret);
//保存到数据库最多只保留一个PINGBACK?
$sql="insert into comments (type,author,url,ip,content,agent,uuid) values('pingback',?,?,?,?,?,?)";
$db->query($sql,array($title,$externalURI,$ip,$ret,$agent,$uuid));
return "OK";
}
/**
* Queries SimpleXMLElement with XPath and return the first result or null if found nothing
*
* @param SimpleXMLElement $dom SimpleXMLElement object to query
* @param string $xpath the query
* @return SimpleXMLElement | null resulting element
*/
private function xpath1($dom, $xpath) {
$res = $dom->xpath($xpath);
if (count($res) < 1) {
return null;
}
return $res[0];
}
}
?>