westdc-zf1/application/models/PingbackRpc.php

110 lines
3.7 KiB
PHP
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
class PingbackRpc
{
private $BLOCK_ELEMENTS = array("div", "p", "body", "ul", "ol", "td", "pre", "center");
private $CONTEXT_BYTES = 200;
/**
* ping method for pingback xmlrpc server
*
* @param string $externalURI The absolute URI of the post on the source page containing the link to the target site.
* @param string $westdcURI The absolute URI of the target of the link, as given on the source page.
* @return string or int ping status
*/
public function ping($externalURI,$westdcURI)
{
//调试
$writer = new Zend_Log_Writer_Stream('pingback.log');
$logger = new Zend_Log($writer);
$logger->info('Informational message');
$logger->info('extern url:'.$externalURI);
//判断uuid
$uuid=explode('/',$westdcURI);
$uuid=$uuid[count($uuid)-1];
$db=Zend_Registry::get('db');
$sql="select count(*) as count from metadata where uuid=?";
$rs=$db->fetchRow($sql,array($uuid));
//不存在此数据
if ($rs['count']==0) return false;
//anti-spam
//来源地址和用户发送的IP地址相同
$ip=$_SERVER['REMOTE_ADDR'];
$p=parse_url($externalURI);
$ip1=gethostbyname($p['host']);
$logger->info('user ip:'.$ip);
//if ($ip<>$ip1) return false;
// 限制:来源中包含目标地址
$i = new Zend_Http_Client($externalURI);
$j = $i->request(Zend_Http_Client::GET);
// RETURN: our source blog post does not exist on remote server
if (($pos = strpos(($externalURIBody = $j->getBody()), $westdcURI)) === false)
{
return false;
}
//获取标题和内容
// get the title of the remote blog post
preg_match('/<title>([^<]*?)<\/title>/is', $externalURIBody, $titles);
$title = $titles[1];
unset($titles);
// RETURN: no title on page
if ($title == null) {
return false;
}
//limit title length?
$title=(mb_strlen($title)>100)?mb_substr($title,0,99):$title;
$agent=$_SERVER['HTTP_USER_AGENT'];
$logger->info('title:'.$title);
$logger->info('user agent:'.$agent);
$contents = $j->getBody();
$dom = new DOMDocument();
@$dom->loadHTML($contents);
$xml = @simplexml_import_dom($dom);
$path = "//body//a[@href=\"$westdcURI\"][1]";
$link = $this->xpath1($xml, $path);
$context = $link;
// Searching for the smallest block element containing the link
while (! in_array(strtolower($context->getName()), $this->BLOCK_ELEMENTS)) {
$path .= "/..";
$context = $this->xpath1($xml, $path);
}
// Expanding context
$previous = $this->xpath1($xml, "$path/preceding-sibling::*[position()=1]");
$next = $this->xpath1($xml, "$path/following-sibling::*[position()=1]");
// Join this all
$ret = "";
if ($previous) {
$ret = $previous->asXML();
}
$ret .= " " . $context->asXML() . " ";
if ($next) {
$ret .= " " . $next->asXML();
}
$ret=utf8_decode($ret);
$logger->info($ret);
$ret = preg_replace('/[\s|\r|\n]+/im', ' ', $ret);
$ret = preg_replace('/ <(h1|h2|h3|h4|h5|h6|p|th|td|li|dt|dd|pre|caption|input|textarea|button|body|borrowed|from|wordpress)[^>]*>/is', ' ', $ret);
$stripper = new Zend_Filter_StripTags('a', 'href');
$ret = $stripper->filter($ret);
//保存到数据库最多只保留一个PINGBACK?
$sql="insert into comments (type,author,url,ip,content,agent,uuid) values('pingback',?,?,?,?,?,?)";
$db->query($sql,array($title,$externalURI,$ip,$ret,$agent,$uuid));
return "OK";
}
/**
* Queries SimpleXMLElement with XPath and return the first result or null if found nothing
*
* @param SimpleXMLElement $dom SimpleXMLElement object to query
* @param string $xpath the query
* @return SimpleXMLElement | null resulting element
*/
private function xpath1($dom, $xpath) {
$res = $dom->xpath($xpath);
if (count($res) < 1) {
return null;
}
return $res[0];
}
}
?>