From 4c95532808e0d0ff3e36fbf0cf1e5d0a09f6486c Mon Sep 17 00:00:00 2001 From: wlx Date: Tue, 22 Oct 2013 12:12:24 +0000 Subject: [PATCH] add LibRIS code --- include/LibRIS/ParseException.php | 3 + include/LibRIS/RISReader.php | 192 ++++++++++++++++++++++++++++++ include/LibRIS/RISTags.php | 168 ++++++++++++++++++++++++++ include/LibRIS/RISWriter.php | 66 ++++++++++ 4 files changed, 429 insertions(+) create mode 100755 include/LibRIS/ParseException.php create mode 100755 include/LibRIS/RISReader.php create mode 100755 include/LibRIS/RISTags.php create mode 100755 include/LibRIS/RISWriter.php diff --git a/include/LibRIS/ParseException.php b/include/LibRIS/ParseException.php new file mode 100755 index 00000000..834d8066 --- /dev/null +++ b/include/LibRIS/ParseException.php @@ -0,0 +1,3 @@ +parseFile('path/to/file.ris'); + * + * // Parse a string containing RIS data. + * $reader->parseString($someRisString); + * + * // Parse an array of lines. + * $reader->parseArray($arrayOfRISDirectives); + * + * // Get an associative array of records. + * $array = $reader->getRecords(); + * + * // Dump the records to STDOUT + * $reader->printRecords(); + * + * ?> + * @endcode + * + * The data structure generated by this class is of the form + * @code + * array( + * 'T1' => array('title one', 'title 2'), + * 'TY' => array('JOUR'), + * // Other tags and their values. + * ), + * [1] => array( + * 'T1' => array('another entry'), + * 'TY' => array('JOUR'), + * ), + * ); + * ?> + * @endcode + */ +class RISReader { + + const RIS_EOL = "\r\n"; + const LINE_REGEX = '/^(([A-Z1-9]{2})\s+-(.*))|(.*)$/'; + + protected $data = NULL; + + public function __construct($options = array()) { + + } + + /** + * Parse an RIS file. + * + * This will parse the file and return a data structure representing the + * record. + * + * @param string $filename + * The full path to the file to parse. + * @param StreamContext $context + * The stream context (in desired) for handling the file. + * @retval array + * An indexed array of individual sources, each of which is an + * associative array of entry details. (See LibRIS) + */ + public function parseFile($filename, $context = NULL) { + if (!is_file($filename)) { + throw new ParseException(sprintf('File %s not found.', htmlentities($filename))); + } + $flags = FILE_SKIP_EMPTY_LINES | FILE_TEXT; + $contents = file($filename, $flags, $context); + + $this->parseArray($contents); + } + + /** + * Parse a string of RIS data. + * + * This will parse an RIS record into a representative data structure. + * + * @param string $string + * RIS-formatted data in a string. + * @param StreamContext $context + * The stream context (in desired) for handling the file. + * @retval array + * An indexed array of individual sources, each of which is an + * associative array of entry details. (See {@link LibRIS}) + */ + public function parseString($string) { + $contents = explode (RISReader::RIS_EOL, $string); + $this->parseArray($contents); + } + + /** + * Take an array of lines and parse them into an RIS record. + */ + protected function parseArray($lines) { + $recordset = array(); + + // Do any cleaning and normalizing. + $this->cleanData($lines); + + $record = array(); + $lastTag = NULL; + foreach ($lines as $line) { + $line = trim($line); + $matches = array(); + + preg_match(self::LINE_REGEX, $line, $matches); + if (!empty($matches[3])) { + $lastTag = $matches[2]; + $record[$matches[2]][] = trim($matches[3]); + } + // End record and prep a new one. + elseif (!empty($matches[2]) && $matches[2] == 'ER') { + $lastTag = NULL; + $recordset[] = $record; + $record = array(); + } + elseif (!empty($matches[4])) { + // Append to the last one. + // We skip leading info (like BOMs). + if (!empty($lastTag)) { + $lastEntry = count($record[$lastTag]) - 1; + // We trim because some encoders add tabs or multiple spaces. + // Standard is silent on how this should be handled. + $record[$lastTag][$lastEntry] .= ' ' . trim($matches[4]); + } + } + } + if (!empty($record)) $recordset[] = $record; + + $this->data = $recordset; + } + + public function getRecords() { + return $this->data; + } + + public function printRecords() { + $format = "%s:\n\t%s\n"; + foreach ($this->data as $record) { + foreach ($record as $key => $values) { + foreach ($values as $value) { + printf($format, RISTags::describeTag($key), $value); + } + } + + print PHP_EOL; + } + } + + /** + * Clean up the data before processing. + * + * @param array $lines + * Indexed array of lines of data. + */ + protected function cleanData(&$lines) { + + if (empty($lines)) return; + + // Currently, we only need to strip a BOM if it exists. + // Thanks to Derik Badman (http://madinkbeard.com/) for finding the + // bug and suggesting this fix: + // http://blog.philipp-michels.de/?p=32 + $first = $lines[0]; + if (substr($first, 0, 3) == pack('CCC', 0xef, 0xbb, 0xbf)) { + $lines[0] = substr($first, 3); + } + } + +} diff --git a/include/LibRIS/RISTags.php b/include/LibRIS/RISTags.php new file mode 100755 index 00000000..39a7e370 --- /dev/null +++ b/include/LibRIS/RISTags.php @@ -0,0 +1,168 @@ + 'Type', + 'ID' => 'Reference ID', + 'T1' => 'Title', + 'TI' => 'Book title', + 'CT' => 'Title of unpublished reference', + 'A1' => 'Primary author', + 'A2' => 'Secondary author', + 'AU' => 'Author', + 'Y1' => 'Primary date', + 'PY' => 'Publication year', + 'N1' => 'Notes', + 'KW' => 'Keywords', + 'RP' => 'Reprint status', + 'SP' => 'Start page', + 'EP' => 'Ending page', + 'JF' => 'Periodical full name', + 'JO' => 'Periodical standard abbreviation', + 'JA' => 'Periodical in which article was published', + 'J1' => 'Periodical name - User abbreviation 1', + 'J2' => 'Periodical name - User abbreviation 2', + 'VL' => 'Volume', + 'IS' => 'Issue', + 'T2' => 'Title secondary', + 'CY' => 'City of Publication', + 'PB' => 'Publisher', + 'U1' => 'User 1', + 'U2' => 'User 2', + 'U3' => 'User 3', + 'U4' => 'User 4', + 'U5' => 'User 5', + 'T3' => 'Title series', + 'N2' => 'Abstract', + 'SN' => 'ISSN/ISBN/ASIN', + 'AV' => 'Availability', + 'M1' => 'Misc. 1', + 'M2' => 'Misc. 2', + 'M3' => 'Misc. 3', + 'AD' => 'Address', + 'UR' => 'URL', + 'L1' => 'Link to PDF', + 'L2' => 'Link to Full-text', + 'L3' => 'Related records', + 'L4' => 'Images', + 'ER' => 'End of Reference', + + // Unsure about the origin of these + 'Y2' => 'Primary date 2', + 'BT' => 'Institution [?]', + ); + + public static $tagDescriptions = array( + 'TY' => 'Type of reference (must be the first tag)', + 'ID' => 'Reference ID (not imported to reference software)', + 'T1' => 'Primary title', + 'TI' => 'Book title', + 'CT' => 'Title of unpublished reference', + 'A1' => 'Primary author', + 'A2' => 'Secondary author (each name on separate line)', + 'AU' => 'Author (syntax. Last name, First name, Suffix)', + 'Y1' => 'Primary date', + 'PY' => 'Publication year (YYYY/MM/DD)', + 'N1' => 'Notes ', + 'KW' => 'Keywords (each keyword must be on separate line preceded KW -)', + 'RP' => 'Reprint status (IN FILE, NOT IN FILE, ON REQUEST (MM/DD/YY))', + 'SP' => 'Start page number', + 'EP' => 'Ending page number', + 'JF' => 'Periodical full name', + 'JO' => 'Periodical standard abbreviation', + 'JA' => 'Periodical in which article was published', + 'J1' => 'Periodical name - User abbreviation 1', + 'J2' => 'Periodical name - User abbreviation 2', + 'VL' => 'Volume number', + 'IS' => 'Issue number', + 'T2' => 'Title secondary', + 'CY' => 'City of Publication', + 'PB' => 'Publisher', + 'U1' => 'User definable 1', + 'U2' => 'User definable 2', + 'U3' => 'User definable 3', + 'U4' => 'User definable 4', + 'U5' => 'User definable 5', + 'T3' => 'Title series', + 'N2' => 'Abstract', + 'SN' => 'ISSN/ISBN (e.g. ISSN XXXX-XXXX)', + 'AV' => 'Availability', + 'M1' => 'Misc. 1', + 'M2' => 'Misc. 2', + 'M3' => 'Misc. 3', + 'AD' => 'Address', + 'UR' => 'Web/URL', + 'L1' => 'Link to PDF', + 'L2' => 'Link to Full-text', + 'L3' => 'Related records', + 'L4' => 'Images', + 'ER' => 'End of Reference (must be the last tag)', + ); + + /** + * Map of all types (tag TY) defined for RIS. + * @var array + * @see http://en.wikipedia.org/wiki/RIS_%28file_format%29 + * @see http://www.refman.com/support/risformat_intro.asp + */ + public static $typeMap = array( + 'ABST' => 'Abstract', + 'ADVS' => 'Audiovisual material', + 'ART' => 'Art Work', + 'BOOK' => 'Whole book', + 'CASE' => 'Case', + 'CHAP' => 'Book chapter', + 'COMP' => 'Computer program', + 'CONF' => 'Conference proceeding', + 'CTLG' => 'Catalog', + 'DATA' => 'Data file', + 'ELEC' => 'Electronic Citation', + 'GEN' => 'Generic', + 'HEAR' => 'Hearing', + 'ICOMM' => 'Internet Communication', + 'INPR' => 'In Press', + 'JFULL' => 'Journal (full)', + 'JOUR' => 'Journal', + 'MAP' => 'Map', + 'MGZN' => 'Magazine article', + 'MPCT' => 'Motion picture', + 'MUSIC' => 'Music score', + 'NEWS' => 'Newspaper', + 'PAMP' => 'Pamphlet', + 'PAT' => 'Patent', + 'PCOMM' => 'Personal communication', + 'RPRT' => 'Report', + 'SER' => 'Serial publication', + 'SLIDE' => 'Slide', + 'SOUND' => 'Sound recording', + 'STAT' => 'Statute', + 'THES' => 'Thesis/Dissertation', + 'UNPB' => 'Unpublished work', + 'VIDEO' => 'Video recording', + ); +} diff --git a/include/LibRIS/RISWriter.php b/include/LibRIS/RISWriter.php new file mode 100755 index 00000000..471e1759 --- /dev/null +++ b/include/LibRIS/RISWriter.php @@ -0,0 +1,66 @@ +writeRecords($records); + * ?> + * @endcode + */ +class RISWriter { + + public function __construct() {} + + /** + * Write a series of records to a single RIS string. + * + * @param array $records + * An array in the format generated by RISReader::parseFile() + * @retval string + * The record as a string. + */ + public function writeRecords($records) { + $buffer = array(); + foreach ($records as $record) { + $buffer[] = $this->writeRecord($record); + } + return implode(RISReader::RIS_EOL, $buffer); + } + + /** + * Write a single record as an RIS string. + * + * The record should be an associative array of tags to values. + * + * @param array $tags + * An associative array of key => array(value1, value2,...). + * @retval string + * The record as a string. + */ + public function writeRecord($tags) { + $buffer = array(); + $fmt = '%s - %s'; + + $buffer[] = sprintf($fmt, 'TY', $tags['TY'][0]); + unset($tags['TY']); + + foreach ($tags as $tag => $values) { + foreach ($values as $value) { + $buffer[] = sprintf($fmt, $tag, $value); + } + } + $buffer[] = 'ER - '; + + return implode(RISReader::RIS_EOL, $buffer); + } + +}