'discipline','place','stratum','temporal','theme'); public $author; function __construct() { $this->dom = new DOMDocument(); } /** * 从文件中读取元数据 **/ function load($file) { $this->dom->load($file); $this->parse(); } /** * 从字符串中读取元数据 */ function loadXML($str) { $this->dom->loadXML($str); $this->parse(); } /** * 保存元数据到文件file中 */ function save($file) { return $this->dom->save($file); } /** * 根据xml中的node保存为xml **/ function saveXML($node="") { return $this->dom->saveXML($node); } /** * 根据UUID从数据库中提取元数据信息,不需要进行parse()操作 **/ function loadUUID($uuid) { } function saveDB($db,$xml) { $this->loadXML($xml); //先删除已有元数据,然后再插入新数据 $sql="delete from metadata where uuid=?"; $db->query($sql,array($this->uuid)); //删除所有未用到的responsible数据 $sql="delete from responsible where id not in (select distinct(resid) from role)"; $db->query($sql); $this->view->config = Zend_Registry::get('config'); //if (!empty($this->doi)) $row->doi=$this->doi; //生成空白统计数据,可以转移到数据库端处理(todo) try { $sql="insert into mdstat (uuid) values(?)"; $db->query($sql,array($this->uuid)); } catch (Exception $e) { //do nothing. //说明数据库中已存在该信息 } //save metadata into database try { $sql="insert into metadata (uuid,title,title_en,description,citation,suppinfo,fileformat,projection,datatype,filesize,ts_created, timebegin,timeend,west,south,north,east,doi) values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"; $db->query($sql,array($this->uuid,$this->resTitle,$this->resAltTitle,$this->idAbs,$this->citation,$this->suppinfo,$this->fileformat, $this->projection,$this->datatype,$this->filesize,$this->mdDateSt,$this->timebegin,$this->timeend,$this->geoBox['w'], $this->geoBox['s'],$this->geoBox['n'],$this->geoBox['e'],$this->doi)); $id=$db->lastInsertId('metadata','id'); //处理在线资源 if ($this->onlineresource) foreach($this->onlineresource as $resource) { $sql="insert into onlineresource (uuid,linkage,protocol,name,description) values(?,?,?,?,?)"; $db->query($sql,array($this->uuid,$resource['linkage'],$resource['protocol'],$resource['name'],$resource['description'])); } //处理缩略图 if (!empty($this->thumbnail)) { $sql="insert into thumbnail (id,data,filetype) values(?,?,?)"; $db->query($sql,array($id,$this->thumbnail,'jpg')); } elseif ($this->graph) { //$trow->data=$this->graph['data']; $sql="select id as gid from geonetworkmetadata where uuid=?"; $r=$db->fetchRow($sql,array($this->uuid)); if ($r) { $geonetwork="http://".$_SERVER['SERVER_NAME']; if ($this->view->config) $geonetwork.=$this->view->config->geonetwork->url; else $geonetwork.='/geonetwork/'; $thumb=base64_encode(file_get_contents($geonetwork.'srv/cn/resources.get?access=public&id='.$r->gid.'&fname='.urlencode($this->graph['filename']))); $sql="insert into thumbnail (id,data,filetype,filedesc,filename) values(?,?,?,?,?)"; $db->query($sql,array($id,$thumb,$this->graph['filetype'],$this->graph['filedesc'],$this->graph['filename'])); } } //处理关键词 //1:N relation foreach($this->keyword as $keytype=>$keys) { foreach($keys as $key) { $sql="insert into keyword (id,keyword,keytype) values(?,?,?)"; $db->query($sql,array($id,$key,$keytype)); } } //处理数据集序列 //M:N relation if ($this->datasetSeries) foreach($this->datasetSeries as $ds) { $sql="select id from series where name=?"; $sth=$db->prepare($sql); $sth->execute(array($ds['seriesName'])); $trow=$sth->fetch(); if ($trow) $sid=$trow->id; else { $sql="insert into series (name) values(?)"; $db->query($sql,array($ds['seriesName'])); $sid=$db->lastInsertId('series','id'); } $sql="insert into dataseries (id,sid) values(?,?)"; $db->query($sql,array($id,$sid)); } //处理XML入库 $sql="insert into xml (id,data) values(?,?)"; $db->query($sql,array($id,$xml)); //处理数据分类 foreach($this->tpCat as $cat){ if (is_numeric($cat)) { $sql="insert into category (id,code) values(?,?)"; $db->query($sql,array($id,(int)$cat)); } else { //是字符串,geonetwork会采用这种模式 //从categorycode表中查找其对应的code $sql="insert into category (id,code) select ?,code from categorycode where name=?"; $db->query($sql,array($id, trim($cat))); } } //处理联系人信息 //先查询再进行处理 //若用户信息发生变化,则存在问题 //无法保证数据的更新状态 foreach($this->author as $au) { $inds=explode(";",$au['individual']); foreach($inds as $ind) { $sql="select id from responsible where individual=? and organisation=?"; $row=$db->fetchRow($sql,array(trim($ind),trim($au['organisation']))); if (!$row) { $sql="insert into responsible (individual,organisation,position,delivery,phone,email,city,country,administrative,postal) values(?,?,?,?,?,?,?,?,?,?)"; $db->query($sql,array(trim($ind),trim($au['organisation']),trim($au['position']),trim($au['delivery']),trim($au['phone']),trim($au['email']),trim($au['city']),trim($au['country']),trim($au['administrative']),trim($au['postal']))); } elseif (count($inds)==1) { //deal email address if (($au['email']) && empty($row->email)) { $sql="update responsible set email=? where id=?"; $db->query($sql,array(trim($au['email']),$row->id)); } } $sql="select id from responsible where individual=? and organisation=?"; $row=$db->fetchRow($sql,array(trim($ind),trim($au['organisation']))); if ($row->id>0) { $sql="insert into role (resid,uuid,role) values(?,?,?)"; $db->query($sql,array($row->id,$this->uuid,trim($au['role']))); } } } //处理数据限制信息 foreach($this->limits as $uselimit) { $sql="select id from uselimit where uselimit=?"; $row=$db->fetchRow($sql,array($uselimit)); if (!$row) { $sql="insert into uselimit (uselimit) values (?)"; $db->query($sql,array($uselimit)); } $sql="select id from uselimit where uselimit=?"; $row=$db->fetchRow($sql,array($uselimit)); if ($row) { $sql="insert into mdlimit (uuid,lid) values(?,?)"; $db->query($sql,array($this->uuid,$row->id)); } } $this->save("../data/import/$this->uuid.xml"); } catch (Exception $e) { //数据重复插入,此处忽略所有错误 print $this->uuid.' has error: '.$e->getMessage().'
'; } } function parse() { $this->resTitle=$this->dom->getElementsByTagName('resTitle')->item(0)->nodeValue; $this->resAltTitle=$this->dom->getElementsByTagName('resAltTitle')->item(0)->nodeValue; $this->idAbs=$this->dom->getElementsByTagName('idAbs')->item(0)->nodeValue; $this->mdFileID=$this->dom->getElementsByTagName('mdFileID')->item(0)->nodeValue; $this->mdDateSt=$this->dom->getElementsByTagName('mdDateSt')->item(0)->nodeValue; @$this->dataSetURI=$this->dom->getElementsByTagName('dataSetURI')->item(0)->nodeValue; //引用说明 @$this->citation=$this->dom->getElementsByTagName('otherCitDet')->item(0)->nodeValue; //项目支持信息 @$this->suppinfo=$this->dom->getElementsByTagName('suppInfo')->item(0)->nodeValue; //DOI,自定义项 $cittype=$this->dom->getElementsByTagName('citIdType')->item(0)->nodeValue; if ($cittype=='DOI') $this->doi=$this->dom->getElementsByTagName('citId')->item(0)->nodeValue; //数据大小,以MB为单位 @$this->filesize=$this->dom->getElementsByTagName('transSize')->item(0)->nodeValue; //数据格式,可以多个,但此处只用一个? @$this->fileformat=$this->dom->getElementsByTagName('formatName')->item(0)->nodeValue; //投影类型,可以多个,但只选一个 @$this->projection=$this->dom->getElementsByTagName('identCode')->item(0)->nodeValue; //数据限制信息 $limits=$this->dom->getElementsByTagName('useLimit'); foreach($limits as $limit) { $this->limits[]=$limit->nodeValue; } //数据作者 $authors=$this->dom->getElementsByTagName('citRespParty'); foreach($authors as $k=>$author) { $this->author[$k]['individual']=$author->getElementsByTagName('rpIndName')->item(0)->nodeValue; $this->author[$k]['organisation']=$author->getElementsByTagName('rpOrgName')->item(0)->nodeValue; $this->author[$k]['position']=$author->getElementsByTagName('rpPosName')->item(0)->nodeValue; $this->author[$k]['delivery']=$author->getElementsByTagName('delPoint')->item(0)->nodeValue; $this->author[$k]['phone']=$author->getElementsByTagName('voiceNum')->item(0)->nodeValue; $this->author[$k]['email']=$author->getElementsByTagName('eMailAdd')->item(0)->nodeValue; $this->author[$k]['postal']=$author->getElementsByTagName('postCode')->item(0)->nodeValue; $this->author[$k]['city']=$author->getElementsByTagName('city')->item(0)->nodeValue; $this->author[$k]['administrative']=$author->getElementsByTagName('adminArea')->item(0)->nodeValue; $this->author[$k]['country']=$author->getElementsByTagName('country')->item(0)->nodeValue; $this->author[$k]['role']=$author->getElementsByTagName('RoleCd')->item(0)->attributes->getNamedItem('value')->nodeValue; } $cnt=$k+1; $authors=$this->dom->getElementsByTagName('mdContact'); foreach($authors as $j=>$author) { $this->author[$cnt+$j]['individual']=$author->getElementsByTagName('rpIndName')->item(0)->nodeValue; $this->author[$cnt+$j]['organisation']=$author->getElementsByTagName('rpOrgName')->item(0)->nodeValue; $this->author[$cnt+$j]['position']=$author->getElementsByTagName('rpPosName')->item(0)->nodeValue; $this->author[$cnt+$j]['delivery']=$author->getElementsByTagName('delPoint')->item(0)->nodeValue; $this->author[$cnt+$j]['phone']=$author->getElementsByTagName('voiceNum')->item(0)->nodeValue; $this->author[$cnt+$j]['email']=$author->getElementsByTagName('eMailAdd')->item(0)->nodeValue; $this->author[$cnt+$j]['postal']=$author->getElementsByTagName('postCode')->item(0)->nodeValue; $this->author[$cnt+$j]['city']=$author->getElementsByTagName('city')->item(0)->nodeValue; $this->author[$cnt+$j]['administrative']=$author->getElementsByTagName('adminArea')->item(0)->nodeValue; $this->author[$cnt+$j]['country']=$author->getElementsByTagName('country')->item(0)->nodeValue; $this->author[$cnt+$j]['role']=$author->getElementsByTagName('RoleCd')->item(0)->attributes->getNamedItem('value')->nodeValue; } $cnt+=$j+1; $authors=$this->dom->getElementsByTagName('distorCont'); foreach($authors as $m=>$author) { $this->author[$cnt+$m]['individual']=$author->getElementsByTagName('rpIndName')->item(0)->nodeValue; $this->author[$cnt+$m]['organisation']=$author->getElementsByTagName('rpOrgName')->item(0)->nodeValue; $this->author[$cnt+$m]['position']=$author->getElementsByTagName('rpPosName')->item(0)->nodeValue; $this->author[$cnt+$m]['delivery']=$author->getElementsByTagName('delPoint')->item(0)->nodeValue; $this->author[$cnt+$m]['phone']=$author->getElementsByTagName('voiceNum')->item(0)->nodeValue; $this->author[$cnt+$m]['email']=$author->getElementsByTagName('eMailAdd')->item(0)->nodeValue; $this->author[$cnt+$m]['postal']=$author->getElementsByTagName('postCode')->item(0)->nodeValue; $this->author[$cnt+$m]['city']=$author->getElementsByTagName('city')->item(0)->nodeValue; $this->author[$cnt+$m]['administrative']=$author->getElementsByTagName('adminArea')->item(0)->nodeValue; $this->author[$cnt+$m]['country']=$author->getElementsByTagName('country')->item(0)->nodeValue; $this->author[$cnt+$m]['role']=$author->getElementsByTagName('RoleCd')->item(0)->attributes->getNamedItem('value')->nodeValue; } $cnt+=$m+1; $authors=$this->dom->getElementsByTagName('idPoC'); foreach($authors as $m=>$author) { $this->author[$cnt+$m]['individual']=$author->getElementsByTagName('rpIndName')->item(0)->nodeValue; $this->author[$cnt+$m]['organisation']=$author->getElementsByTagName('rpOrgName')->item(0)->nodeValue; $this->author[$cnt+$m]['position']=$author->getElementsByTagName('rpPosName')->item(0)->nodeValue; $this->author[$cnt+$m]['delivery']=$author->getElementsByTagName('delPoint')->item(0)->nodeValue; $this->author[$cnt+$m]['phone']=$author->getElementsByTagName('voiceNum')->item(0)->nodeValue; $this->author[$cnt+$m]['email']=$author->getElementsByTagName('eMailAdd')->item(0)->nodeValue; $this->author[$cnt+$m]['postal']=$author->getElementsByTagName('postCode')->item(0)->nodeValue; $this->author[$cnt+$m]['city']=$author->getElementsByTagName('city')->item(0)->nodeValue; $this->author[$cnt+$m]['administrative']=$author->getElementsByTagName('adminArea')->item(0)->nodeValue; $this->author[$cnt+$m]['country']=$author->getElementsByTagName('country')->item(0)->nodeValue; $this->author[$cnt+$m]['role']=$author->getElementsByTagName('RoleCd')->item(0)->attributes->getNamedItem('value')->nodeValue; } /* foreach($authors as $author) { if($author->getElementsByTagName('rpIndName')->item(0) && $author->getElementsByTagName('RoleCd')->item(0) && $author->getElementsByTagName('RoleCd')->item(0)->hasAttributes() && $author->getElementsByTagName('RoleCd')->item(0)->attributes->getNamedItem('value')->nodeValue=="author") { $this->author=$author->getElementsByTagName('rpIndName')->item(0)->nodeValue; break; }elseif ($author->getElementsByTagName('rpIndName')->item(0) && $author->getElementsByTagName('RoleCd')->item(0) && $author->getElementsByTagName('RoleCd')->item(0)->hasAttributes() && $author->getElementsByTagName('RoleCd')->item(0)->attributes->getNamedItem('value')->nodeValue=="originator" ) { $this->author=$author->getElementsByTagName('rpIndName')->item(0)->nodeValue; break; }elseif ($author->getElementsByTagName('rpIndName')->item(0) && $author->getElementsByTagName('RoleCd')->item(0) && $author->getElementsByTagName('RoleCd')->item(0)->hasAttributes() && $author->getElementsByTagName('RoleCd')->item(0)->attributes->getNamedItem('value')->nodeValue=="owner" ) { $this->author=$author->getElementsByTagName('rpIndName')->item(0)->nodeValue; break; }elseif ($author->getElementsByTagName('rpIndName')->item(0) && $author->getElementsByTagName('RoleCd')->item(0) && $author->getElementsByTagName('RoleCd')->item(0)->hasAttributes() && $author->getElementsByTagName('RoleCd')->item(0)->attributes->getNamedItem('value')->nodeValue=="processor" ) { $this->author=$author->getElementsByTagName('rpIndName')->item(0)->nodeValue; break; }elseif ($author->getElementsByTagName('rpIndName')->item(0) && $author->getElementsByTagName('RoleCd')->item(0) && $author->getElementsByTagName('RoleCd')->item(0)->hasAttributes() && $author->getElementsByTagName('RoleCd')->item(0)->attributes->getNamedItem('value')->nodeValue=="publisher" ) { $this->author=$author->getElementsByTagName('rpIndName')->item(0)->nodeValue; break; }elseif ($author->getElementsByTagName('rpIndName')->item(0) && $author->getElementsByTagName('RoleCd')->item(0) && $author->getElementsByTagName('RoleCd')->item(0)->hasAttributes() && $author->getElementsByTagName('RoleCd')->item(0)->attributes->getNamedItem('value')->nodeValue=="resourceProvider" ) { $this->author=$author->getElementsByTagName('rpIndName')->item(0)->nodeValue; break; } } */ //数据类型,在线或离线 $this->datatype=0; $onlinesrc=$this->dom->getElementsByTagName('onLineSrc'); foreach($onlinesrc as $k=>$src) { if ($src->getElementsByTagName('OnFunctCd')->item(0) && $src->getElementsByTagName('OnFunctCd')->item(0)->hasAttributes() && $src->getElementsByTagName('OnFunctCd')->item(0)->attributes->getNamedItem('value')->nodeValue=="offlineAccess") $this->datatype=1; { //在线资源 $this->onlineresource[$k]['linkage']=$src->getElementsByTagName('linkage')->item(0)->nodeValue; if ($src->getElementsByTagName('protocol')->item(0)) $this->onlineresource[$k]['protocol']=$src->getElementsByTagName('protocol')->item(0)->nodeValue; if ($src->getElementsByTagName('orName')->item(0)) $this->onlineresource[$k]['name']=$src->getElementsByTagName('orName')->item(0)->nodeValue; $this->onlineresource[$k]['description']=$src->getElementsByTagName('orDesc')->item(0)->nodeValue; } } $cats=$this->dom->getElementsByTagName('tpCat'); foreach($cats as $cat) { if ($cat->getElementsByTagName('TopicCatCd')->item(0)->hasAttributes()) $this->tpCat[]=$cat->getElementsByTagName('TopicCatCd')->item(0)->attributes->getNamedItem('value')->nodeValue; } @$this->bgFileName=$this->dom->getElementsByTagName('bgFileName')->item(0)->nodeValue; $keywords=$this->dom->getElementsByTagName('descKeys');//Keywords? which one? foreach($keywords as $keys) { if ($keys->getElementsByTagName('KeyTypCd')->item(0) && $keys->getElementsByTagName('KeyTypCd')->item(0)->hasAttributes()) $k=$keys->getElementsByTagName('KeyTypCd')->item(0)->attributes->getNamedItem('value')->nodeValue; if (is_numeric($k)) $kt=$this->keytypecode[(int)$k]; else $kt=$k; //如果没有找到类别,则默认为主题关键词 if (!$kt) $kt=$this->keytypecode[5]; $ks=$keys->getElementsByTagName('keyword'); foreach($ks as $key) { //处理特殊情况:多关键词输入在一起,用一些符号来分割,如分号等。 $pos=strpos($key->nodeValue,';'); if ($pos===false) $this->keyword[$kt][]=$key->nodeValue; else { $str=explode(';',$key->nodeValue); foreach($str as $s){ $this->keyword[$kt][]=$s; } } } } //in ISO 19115 draft. $ds=$this->dom->getElementsByTagName('datasetSeries'); foreach($ds as $k=>$dataset) { $this->datasetSeries[$k]['seriesName']=$dataset->getElementsByTagName('seriesName')->item(0)->nodeValue; $this->datasetSeries[$k]['issId']=$dataset->getElementsByTagName('issId')->item(0)->nodeValue; $this->datasetSeries[$k]['artPage']=$dataset->getElementsByTagName('artPage')->item(0)->nodeValue; } //unsure which one. $ds=$this->dom->getElementsByTagName('Series'); foreach($ds as $k=>$dataset) { $this->datasetSeries[$k]['seriesName']=$dataset->getElementsByTagName('Name')->item(0)->nodeValue; $this->datasetSeries[$k]['issId']=$dataset->getElementsByTagName('issId')->item(0)->nodeValue; $this->datasetSeries[$k]['artPage']=$dataset->getElementsByTagName('artPage')->item(0)->nodeValue; } //可能只适用于ESRI ARCCATELOG产生的元数据 $extent=$this->dom->getElementsByTagName('geoBox'); foreach($extent as $geo) { $this->geoBox['w']=$geo->getElementsByTagName('westBL')->item(0)->nodeValue; $this->geoBox['s']=$geo->getElementsByTagName('southBL')->item(0)->nodeValue; $this->geoBox['e']=$geo->getElementsByTagName('eastBL')->item(0)->nodeValue; $this->geoBox['n']=$geo->getElementsByTagName('northBL')->item(0)->nodeValue; } //提取ESRI格式中的UUID if (empty($this->uuid)) { @$this->uuid=$this->dom->getElementsByTagName('MetaID')->item(0)->nodeValue; //剔除前后大括号 $this->uuid=trim($this->uuid,'{'); $this->uuid=trim($this->uuid,'}'); } //根据mdFileID来判断uuid //如果mdFileID为uuid的组织形式,则进行提取 if (strlen($this->mdFileID)==36 && empty($this->uuid)) $this->uuid=$this->mdFileID; //仍然没有UUID信息,则创建一个 if (empty($this->uuid)) { $this->uuid=new uuid(); $this->uuid=$this->uuid->toString(); //反馈回XML文件 $mdfile=$this->dom->getElementsByTagName('mdFileID'); if ($mdfile->length>0) { $this->dom->getElementsByTagName('mdFileID')->item(0)->nodeValue=$this->uuid; } else { $mdfile=$this->dom->createElement('mdFileID', $this->uuid); $this->dom->getElementsByTagName('Metadata')->item(0)->appendChild($mdfile); } } //提取时间信息 $tm_period=$this->dom->getElementsByTagName('TM_Period'); foreach($tm_period as $period) { $this->timebegin=$period->getElementsByTagName('begin')->item(0)->nodeValue; $this->timeend=$period->getElementsByTagName('end')->item(0)->nodeValue; } $tm_day=$this->dom->getElementsByTagName('TM_CalDate'); foreach($tm_day as $day) { $this->timebegin=$day->getElementsByTagName('calDate')->item(0)->nodeValue; } //Todo:处理缩略图,限制为一个。 //$thumb=$this->dom->getElementsByTagName('Thumbnail'); $xpath = new DOMXPath($this->dom); $query = '//metadata/Binary/Thumbnail/Data';//should be Metadata $entries = $xpath->query($query); if ($entries->length>0) $this->thumbnail=$entries->item(0)->nodeValue; else { $query='//Metadata/Binary/Thumbnail/Data'; $entries=$xpath->query($query); @$this->thumbnail=$entries->item(0)->nodeValue; } //如果是从GEONETWORK获取,判断是否已有缩略图,获取最小的缩略图 $graphs=$this->dom->getElementsByTagName('graphOver'); foreach ($graphs as $g) { if ($g->getElementsByTagName('bgFileDesc')->item(0)->nodeValue=='thumbnail') { $this->graph['filename']=$g->getElementsByTagName('bgFileName')->item(0)->nodeValue; $this->graph['filedesc']='thumbnail'; $this->graph['filetype']=$g->getElementsByTagName('bgFileType')->item(0)->nodeValue; } } } } ?>