From e10e770cbef96be859b572c97ac1b9f0bc1e95bd Mon Sep 17 00:00:00 2001 From: Andreas Gohr <andi@splitbrain.org> Date: Sun, 16 Oct 2011 14:23:18 +0200 Subject: [PATCH] Upgraded SimplePie library to 1.2.1 From the release notes: This release fixes a few bugs, including a major URL parsing bug, where URLs with query strings were parsed incorrectly. --- inc/SimplePie.php | 570 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 423 insertions(+), 147 deletions(-) diff --git a/inc/SimplePie.php b/inc/SimplePie.php index d35443165..10d8141bd 100644 --- a/inc/SimplePie.php +++ b/inc/SimplePie.php @@ -5,7 +5,7 @@ * A PHP-Based RSS and Atom Feed Framework. * Takes the hard work out of managing a complete RSS/Atom solution. * - * Copyright (c) 2004-2009, Ryan Parman and Geoffrey Sneddon + * Copyright (c) 2004-2011, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are @@ -33,10 +33,11 @@ * POSSIBILITY OF SUCH DAMAGE. * * @package SimplePie - * @version 1.2.1-dev - * @copyright 2004-2009 Ryan Parman, Geoffrey Sneddon + * @version 1.2.1 + * @copyright 2004-2011 Ryan Parman, Geoffrey Sneddon, Ryan McCue * @author Ryan Parman * @author Geoffrey Sneddon + * @author Ryan McCue * @link http://simplepie.org/ SimplePie * @link http://simplepie.org/support/ Please submit all bug reports and feature requests to the SimplePie forums * @license http://www.opensource.org/licenses/bsd-license.php BSD License @@ -55,9 +56,8 @@ define('SIMPLEPIE_VERSION', '1.2.1-dev'); /** * SimplePie Build - * @todo Hardcode for release (there's no need to have to call SimplePie_Misc::parse_date() only every load of simplepie.inc) */ -define('SIMPLEPIE_BUILD', gmdate('YmdHis', SimplePie_Misc::parse_date(substr('$Date$', 7, 25)) ? SimplePie_Misc::parse_date(substr('$Date$', 7, 25)) : filemtime(__FILE__))); +define('SIMPLEPIE_BUILD', '20111015034325'); /** * SimplePie Website URL @@ -3279,6 +3279,11 @@ class SimplePie_Item { return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT); } + elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_RSS_090, 'description')) + { + return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_HTML); + } + elseif (!$description_only) { return $this->get_content(true); @@ -3643,7 +3648,7 @@ class SimplePie_Item { return $this->sanitize($this->get_date(''), SIMPLEPIE_CONSTRUCT_TEXT); } - elseif (($date = $this->get_date('U')) !== null) + elseif (($date = $this->get_date('U')) !== null && $date !== false) { return strftime($date_format, $date); } @@ -4068,16 +4073,16 @@ class SimplePie_Item $temp = explode(':', $this->sanitize($duration_parent[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT)); if (sizeof($temp) > 0) { - (int) $seconds = array_pop($temp); + $seconds = (int) array_pop($temp); } if (sizeof($temp) > 0) { - (int) $minutes = array_pop($temp); + $minutes = (int) array_pop($temp); $seconds += $minutes * 60; } if (sizeof($temp) > 0) { - (int) $hours = array_pop($temp); + $hours = (int) array_pop($temp); $seconds += $hours * 3600; } unset($temp); @@ -5056,7 +5061,7 @@ class SimplePie_Item { foreach ((array) $this->data['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['content'] as $content) { - if (isset($content['attribs']['']['url'])) + if (isset($content['attribs']['']['url']) || isset($content['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['player'])) { // Attributes $bitrate = null; @@ -5141,8 +5146,10 @@ class SimplePie_Item { $width = $this->sanitize($content['attribs']['']['width'], SIMPLEPIE_CONSTRUCT_TEXT); } - $url = $this->sanitize($content['attribs']['']['url'], SIMPLEPIE_CONSTRUCT_IRI); - + if (isset($content['attribs']['']['url'])) + { + $url = $this->sanitize($content['attribs']['']['url'], SIMPLEPIE_CONSTRUCT_IRI); + } // Checking the other optional media: elements. Priority: media:content, media:group, item, channel // CAPTIONS @@ -7731,16 +7738,17 @@ class SimplePie_File { $this->method = SIMPLEPIE_FILE_SOURCE_REMOTE | SIMPLEPIE_FILE_SOURCE_FSOCKOPEN; $url_parts = parse_url($url); + $socket_host = $url_parts['host']; if (isset($url_parts['scheme']) && strtolower($url_parts['scheme']) === 'https') { - $url_parts['host'] = "ssl://$url_parts[host]"; + $socket_host = "ssl://$url_parts[host]"; $url_parts['port'] = 443; } if (!isset($url_parts['port'])) { $url_parts['port'] = 80; } - $fp = @fsockopen($url_parts['host'], $url_parts['port'], $errno, $errstr, $timeout); + $fp = @fsockopen($socket_host, $url_parts['port'], $errno, $errstr, $timeout); if (!$fp) { $this->error = 'fsockopen error: ' . $errstr; @@ -8486,7 +8494,7 @@ class SimplePie_gzdecode // Get the length of the extra field $len = current(unpack('v', substr($this->compressed_data, $this->position, 2))); - $position += 2; + $this->position += 2; // Check the length of the string is still valid $this->min_compressed_size += $len + 4; @@ -9448,12 +9456,12 @@ class SimplePie_Misc } // This is first, as behaviour of this is completely predictable - if ($input === 'Windows-1252' && $output === 'UTF-8') + if ($input === 'windows-1252' && $output === 'UTF-8') { return SimplePie_Misc::windows_1252_to_utf8($data); } // This is second, as behaviour of this varies only with PHP version (the middle part of this expression checks the encoding is supported). - elseif (function_exists('mb_convert_encoding') && @mb_convert_encoding("\x80", 'UTF-16BE', $input) !== "\x00\x80" && ($return = @mb_convert_encoding($data, $output, $input))) + elseif (function_exists('mb_convert_encoding') && @mb_convert_encoding("\x80", 'UTF-16BE', $input) !== "\x00\x80" && in_array($input, mb_list_encodings()) && ($return = @mb_convert_encoding($data, $output, $input))) { return $return; } @@ -9469,6 +9477,17 @@ class SimplePie_Misc } } + /** + * Normalize an encoding name + * + * This is automatically generated by create.php + * + * To generate it, run `php create.php` on the command line, and copy the + * output to replace this function. + * + * @param string $charset Character set to standardise + * @return string Standardised name + */ function encoding($charset) { // Normalization from UTS #22 @@ -9502,7 +9521,6 @@ class SimplePie_Misc case 'big5': case 'csbig5': - case 'xxbig5': return 'Big5'; case 'big5hkscs': @@ -9658,14 +9676,14 @@ class SimplePie_Misc case 'isoir85': return 'ES2'; - case 'cseucfixwidjapanese': - case 'extendedunixcodefixedwidthforjapanese': - return 'Extended_UNIX_Code_Fixed_Width_for_Japanese'; - case 'cseucpkdfmtjapanese': case 'eucjp': case 'extendedunixcodepackedformatforjapanese': - return 'Extended_UNIX_Code_Packed_Format_for_Japanese'; + return 'EUC-JP'; + + case 'cseucfixwidjapanese': + case 'extendedunixcodefixedwidthforjapanese': + return 'Extended_UNIX_Code_Fixed_Width_for_Japanese'; case 'gb18030': return 'GB18030'; @@ -9744,80 +9762,6 @@ class SimplePie_Misc case 'ibmthai': return 'IBM-Thai'; - case 'ccsid858': - case 'cp858': - case 'ibm858': - case 'pcmultilingual850euro': - return 'IBM00858'; - - case 'ccsid924': - case 'cp924': - case 'ebcdiclatin9euro': - case 'ibm924': - return 'IBM00924'; - - case 'ccsid1140': - case 'cp1140': - case 'ebcdicus37euro': - case 'ibm1140': - return 'IBM01140'; - - case 'ccsid1141': - case 'cp1141': - case 'ebcdicde273euro': - case 'ibm1141': - return 'IBM01141'; - - case 'ccsid1142': - case 'cp1142': - case 'ebcdicdk277euro': - case 'ebcdicno277euro': - case 'ibm1142': - return 'IBM01142'; - - case 'ccsid1143': - case 'cp1143': - case 'ebcdicfi278euro': - case 'ebcdicse278euro': - case 'ibm1143': - return 'IBM01143'; - - case 'ccsid1144': - case 'cp1144': - case 'ebcdicit280euro': - case 'ibm1144': - return 'IBM01144'; - - case 'ccsid1145': - case 'cp1145': - case 'ebcdices284euro': - case 'ibm1145': - return 'IBM01145'; - - case 'ccsid1146': - case 'cp1146': - case 'ebcdicgb285euro': - case 'ibm1146': - return 'IBM01146'; - - case 'ccsid1147': - case 'cp1147': - case 'ebcdicfr297euro': - case 'ibm1147': - return 'IBM01147'; - - case 'ccsid1148': - case 'cp1148': - case 'ebcdicinternational500euro': - case 'ibm1148': - return 'IBM01148'; - - case 'ccsid1149': - case 'cp1149': - case 'ebcdicis871euro': - case 'ibm1149': - return 'IBM01149'; - case 'cp37': case 'csibm37': case 'ebcdiccpca': @@ -9965,6 +9909,12 @@ class SimplePie_Misc case 'ibm857': return 'IBM857'; + case 'ccsid858': + case 'cp858': + case 'ibm858': + case 'pcmultilingual850euro': + return 'IBM00858'; + case '860': case 'cp860': case 'csibm860': @@ -10067,6 +10017,12 @@ class SimplePie_Misc case 'ibm918': return 'IBM918'; + case 'ccsid924': + case 'cp924': + case 'ebcdiclatin9euro': + case 'ibm924': + return 'IBM00924'; + case 'cp1026': case 'csibm1026': case 'ibm1026': @@ -10075,6 +10031,68 @@ class SimplePie_Misc case 'ibm1047': return 'IBM1047'; + case 'ccsid1140': + case 'cp1140': + case 'ebcdicus37euro': + case 'ibm1140': + return 'IBM01140'; + + case 'ccsid1141': + case 'cp1141': + case 'ebcdicde273euro': + case 'ibm1141': + return 'IBM01141'; + + case 'ccsid1142': + case 'cp1142': + case 'ebcdicdk277euro': + case 'ebcdicno277euro': + case 'ibm1142': + return 'IBM01142'; + + case 'ccsid1143': + case 'cp1143': + case 'ebcdicfi278euro': + case 'ebcdicse278euro': + case 'ibm1143': + return 'IBM01143'; + + case 'ccsid1144': + case 'cp1144': + case 'ebcdicit280euro': + case 'ibm1144': + return 'IBM01144'; + + case 'ccsid1145': + case 'cp1145': + case 'ebcdices284euro': + case 'ibm1145': + return 'IBM01145'; + + case 'ccsid1146': + case 'cp1146': + case 'ebcdicgb285euro': + case 'ibm1146': + return 'IBM01146'; + + case 'ccsid1147': + case 'cp1147': + case 'ebcdicfr297euro': + case 'ibm1147': + return 'IBM01147'; + + case 'ccsid1148': + case 'cp1148': + case 'ebcdicinternational500euro': + case 'ibm1148': + return 'IBM01148'; + + case 'ccsid1149': + case 'cp1149': + case 'ebcdicis871euro': + case 'ibm1149': + return 'IBM01149'; + case 'csiso143iecp271': case 'iecp271': case 'isoir143': @@ -10617,11 +10635,6 @@ class SimplePie_Misc case 'sen850200c': return 'SEN_850200_C'; - case 'csshiftjis': - case 'mskanji': - case 'shiftjis': - return 'Shift_JIS'; - case 'csiso102t617bit': case 'isoir102': case 't617bit': @@ -10720,7 +10733,10 @@ class SimplePie_Misc case 'viscii': return 'VISCII'; + case 'csshiftjis': case 'cswindows31j': + case 'mskanji': + case 'shiftjis': case 'windows31j': return 'Windows-31J'; @@ -11381,6 +11397,58 @@ function embed_wmedia(width, height, link) { } <?php } + + + + /** + * Format debugging information + */ + function debug($sp) + { + $info = 'SimplePie ' . SIMPLEPIE_VERSION . ' Build ' . SIMPLEPIE_BUILD . "\n"; + $info .= 'PHP ' . PHP_VERSION . "\n"; + if ($sp->error() !== null) + { + $info .= 'Error occurred: ' . $sp->error() . "\n"; + } + else + { + $info .= "No error found.\n"; + } + $info .= "Extensions:\n"; + $extensions = array('pcre', 'curl', 'zlib', 'mbstring', 'iconv', 'xmlreader', 'xml'); + foreach ($extensions as $ext) + { + if (extension_loaded($ext)) + { + $info .= " $ext loaded\n"; + switch ($ext) + { + case 'pcre': + $info .= ' Version ' . PCRE_VERSION . "\n"; + break; + case 'curl': + $version = curl_version(); + $info .= ' Version ' . $version['version'] . "\n"; + break; + case 'mbstring': + $info .= ' Overloading: ' . mb_get_info('func_overload') . "\n"; + break; + case 'iconv': + $info .= ' Version ' . ICONV_VERSION . "\n"; + break; + case 'xml': + $info .= ' Version ' . LIBXML_DOTTED_VERSION . "\n"; + break; + } + } + else + { + $info .= " $ext not loaded\n"; + } + } + return $info; + } } /** @@ -11852,14 +11920,135 @@ class SimplePie_IRI /** * Replace invalid character with percent encoding * - * @access private * @param string $string Input string * @param string $valid_chars Valid characters * @param int $case Normalise case * @return string */ - function replace_invalid_with_pct_encoding($string, $valid_chars, $case = SIMPLEPIE_SAME_CASE) + function replace_invalid_with_pct_encoding($string, $valid_chars, $case = SIMPLEPIE_SAME_CASE, $iprivate = false) { + // Normalize as many pct-encoded sections as possible + $string = preg_replace_callback('/(?:%[A-Fa-f0-9]{2})+/', array(&$this, 'remove_iunreserved_percent_encoded'), $string); + + // Replace invalid percent characters + $string = preg_replace('/%(?![A-Fa-f0-9]{2})/', '%25', $string); + + // Add unreserved and % to $valid_chars (the latter is safe because all + // pct-encoded sections are now valid). + $valid_chars .= 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~%'; + + // Now replace any bytes that aren't allowed with their pct-encoded versions + $position = 0; + $strlen = strlen($string); + while (($position += strspn($string, $valid_chars, $position)) < $strlen) + { + $value = ord($string[$position]); + + // Start position + $start = $position; + + // By default we are valid + $valid = true; + + // No one byte sequences are valid due to the while. + // Two byte sequence: + if (($value & 0xE0) === 0xC0) + { + $character = ($value & 0x1F) << 6; + $length = 2; + $remaining = 1; + } + // Three byte sequence: + elseif (($value & 0xF0) === 0xE0) + { + $character = ($value & 0x0F) << 12; + $length = 3; + $remaining = 2; + } + // Four byte sequence: + elseif (($value & 0xF8) === 0xF0) + { + $character = ($value & 0x07) << 18; + $length = 4; + $remaining = 3; + } + // Invalid byte: + else + { + $valid = false; + $length = 1; + $remaining = 0; + } + + if ($remaining) + { + if ($position + $length <= $strlen) + { + for ($position++; $remaining; $position++) + { + $value = ord($string[$position]); + + // Check that the byte is valid, then add it to the character: + if (($value & 0xC0) === 0x80) + { + $character |= ($value & 0x3F) << (--$remaining * 6); + } + // If it is invalid, count the sequence as invalid and reprocess the current byte: + else + { + $valid = false; + $position--; + break; + } + } + } + else + { + $position = $strlen - 1; + $valid = false; + } + } + + // Percent encode anything invalid or not in ucschar + if ( + // Invalid sequences + !$valid + // Non-shortest form sequences are invalid + || $length > 1 && $character <= 0x7F + || $length > 2 && $character <= 0x7FF + || $length > 3 && $character <= 0xFFFF + // Outside of range of ucschar codepoints + // Noncharacters + || ($character & 0xFFFE) === 0xFFFE + || $character >= 0xFDD0 && $character <= 0xFDEF + || ( + // Everything else not in ucschar + $character > 0xD7FF && $character < 0xF900 + || $character < 0xA0 + || $character > 0xEFFFD + ) + && ( + // Everything not in iprivate, if it applies + !$iprivate + || $character < 0xE000 + || $character > 0x10FFFD + ) + ) + { + // If we were a character, pretend we weren't, but rather an error. + if ($valid) + $position--; + + for ($j = $start; $j <= $position; $j++) + { + $string = substr_replace($string, sprintf('%%%02X', ord($string[$j])), $j, 1); + $j += 2; + $position += 2; + $strlen += 2; + } + } + } + // Normalise case if ($case & SIMPLEPIE_LOWERCASE) { @@ -11870,61 +12059,148 @@ class SimplePie_IRI $string = strtoupper($string); } - // Store position and string length (to avoid constantly recalculating this) - $position = 0; - $strlen = strlen($string); + return $string; + } - // Loop as long as we have invalid characters, advancing the position to the next invalid character - while (($position += strspn($string, $valid_chars, $position)) < $strlen) + /** + * Callback function for preg_replace_callback. + * + * Removes sequences of percent encoded bytes that represent UTF-8 + * encoded characters in iunreserved + * + * @access private + * @param array $match PCRE match + * @return string Replacement + */ + function remove_iunreserved_percent_encoded($match) + { + // As we just have valid percent encoded sequences we can just explode + // and ignore the first member of the returned array (an empty string). + $bytes = explode('%', $match[0]); + + // Initialize the new string (this is what will be returned) and that + // there are no bytes remaining in the current sequence (unsurprising + // at the first byte!). + $string = ''; + $remaining = 0; + + // Loop over each and every byte, and set $value to its value + for ($i = 1, $len = count($bytes); $i < $len; $i++) { - // If we have a % character - if ($string[$position] === '%') + $value = hexdec($bytes[$i]); + + // If we're the first byte of sequence: + if (!$remaining) { - // If we have a pct-encoded section - if ($position + 2 < $strlen && strspn($string, '0123456789ABCDEFabcdef', $position + 1, 2) === 2) - { - // Get the the represented character - $chr = chr(hexdec(substr($string, $position + 1, 2))); + // Start position + $start = $i; - // If the character is valid, replace the pct-encoded with the actual character while normalising case - if (strpos($valid_chars, $chr) !== false) - { - if ($case & SIMPLEPIE_LOWERCASE) - { - $chr = strtolower($chr); - } - elseif ($case & SIMPLEPIE_UPPERCASE) - { - $chr = strtoupper($chr); - } - $string = substr_replace($string, $chr, $position, 3); - $strlen -= 2; - $position++; - } + // By default we are valid + $valid = true; - // Otherwise just normalise the pct-encoded to uppercase - else + // One byte sequence: + if ($value <= 0x7F) + { + $character = $value; + $length = 1; + } + // Two byte sequence: + elseif (($value & 0xE0) === 0xC0) + { + $character = ($value & 0x1F) << 6; + $length = 2; + $remaining = 1; + } + // Three byte sequence: + elseif (($value & 0xF0) === 0xE0) + { + $character = ($value & 0x0F) << 12; + $length = 3; + $remaining = 2; + } + // Four byte sequence: + elseif (($value & 0xF8) === 0xF0) + { + $character = ($value & 0x07) << 18; + $length = 4; + $remaining = 3; + } + // Invalid byte: + else + { + $valid = false; + $remaining = 0; + } + } + // Continuation byte: + else + { + // Check that the byte is valid, then add it to the character: + if (($value & 0xC0) === 0x80) + { + $remaining--; + $character |= ($value & 0x3F) << ($remaining * 6); + } + // If it is invalid, count the sequence as invalid and reprocess the current byte as the start of a sequence: + else + { + $valid = false; + $remaining = 0; + $i--; + } + } + + // If we've reached the end of the current byte sequence, append it to Unicode::$data + if (!$remaining) + { + // Percent encode anything invalid or not in iunreserved + if ( + // Invalid sequences + !$valid + // Non-shortest form sequences are invalid + || $length > 1 && $character <= 0x7F + || $length > 2 && $character <= 0x7FF + || $length > 3 && $character <= 0xFFFF + // Outside of range of iunreserved codepoints + || $character < 0x2D + || $character > 0xEFFFD + // Noncharacters + || ($character & 0xFFFE) === 0xFFFE + || $character >= 0xFDD0 && $character <= 0xFDEF + // Everything else not in iunreserved (this is all BMP) + || $character === 0x2F + || $character > 0x39 && $character < 0x41 + || $character > 0x5A && $character < 0x61 + || $character > 0x7A && $character < 0x7E + || $character > 0x7E && $character < 0xA0 + || $character > 0xD7FF && $character < 0xF900 + ) + { + for ($j = $start; $j <= $i; $j++) { - $string = substr_replace($string, strtoupper(substr($string, $position + 1, 2)), $position + 1, 2); - $position += 3; + $string .= '%' . strtoupper($bytes[$j]); } } - // If we don't have a pct-encoded section, just replace the % with its own esccaped form else { - $string = substr_replace($string, '%25', $position, 1); - $strlen += 2; - $position += 3; + for ($j = $start; $j <= $i; $j++) + { + $string .= chr(hexdec($bytes[$j])); + } } } - // If we have an invalid character, change into its pct-encoded form - else + } + + // If we have any bytes left over they are invalid (i.e., we are + // mid-way through a multi-byte sequence) + if ($remaining) + { + for ($j = $start; $j < $len; $j++) { - $replacement = sprintf("%%%02X", ord($string[$position])); - $string = str_replace($string[$position], $replacement, $string); - $strlen = strlen($string); + $string .= '%' . strtoupper($bytes[$j]); } } + return $string; } @@ -14446,7 +14722,7 @@ class SimplePie_Parser case constant('XMLReader::END_ELEMENT'): if ($xml->namespaceURI !== '') { - $tagName = "{$xml->namespaceURI}{$this->separator}{$xml->localName}"; + $tagName = $xml->namespaceURI . $this->separator . $xml->localName; } else { @@ -14458,7 +14734,7 @@ class SimplePie_Parser $empty = $xml->isEmptyElement; if ($xml->namespaceURI !== '') { - $tagName = "{$xml->namespaceURI}{$this->separator}{$xml->localName}"; + $tagName = $xml->namespaceURI . $this->separator . $xml->localName; } else { @@ -14469,7 +14745,7 @@ class SimplePie_Parser { if ($xml->namespaceURI !== '') { - $attrName = "{$xml->namespaceURI}{$this->separator}{$xml->localName}"; + $attrName = $xml->namespaceURI . $this->separator . $xml->localName; } else { -- GitLab