1
0
Fork 0
mirror of https://github.com/wallabag/wallabag.git synced 2025-08-26 18:21:02 +00:00

[change] we now use Full-Text RSS 3.1, thank you so much @fivefilters

This commit is contained in:
Nicolas Lœuillet 2013-12-06 09:45:27 +01:00
parent 59cc585271
commit 42c80841c8
83 changed files with 23898 additions and 7845 deletions

250
inc/3rdparty/libraries/Zend/Cache.php vendored Normal file
View file

@ -0,0 +1,250 @@
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Cache
* @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
* @version $Id: Cache.php 24656 2012-02-26 06:02:53Z adamlundrigan $
*/
/**
* @package Zend_Cache
* @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
abstract class Zend_Cache
{
/**
* Standard frontends
*
* @var array
*/
public static $standardFrontends = array('Core', 'Output', 'Class', 'File', 'Function', 'Page');
/**
* Standard backends
*
* @var array
*/
public static $standardBackends = array('File', 'Sqlite', 'Memcached', 'Libmemcached', 'Apc', 'ZendPlatform',
'Xcache', 'TwoLevels', 'WinCache', 'ZendServer_Disk', 'ZendServer_ShMem');
/**
* Standard backends which implement the ExtendedInterface
*
* @var array
*/
public static $standardExtendedBackends = array('File', 'Apc', 'TwoLevels', 'Memcached', 'Libmemcached', 'Sqlite', 'WinCache');
/**
* Only for backward compatibility (may be removed in next major release)
*
* @var array
* @deprecated
*/
public static $availableFrontends = array('Core', 'Output', 'Class', 'File', 'Function', 'Page');
/**
* Only for backward compatibility (may be removed in next major release)
*
* @var array
* @deprecated
*/
public static $availableBackends = array('File', 'Sqlite', 'Memcached', 'Libmemcached', 'Apc', 'ZendPlatform', 'Xcache', 'WinCache', 'TwoLevels');
/**
* Consts for clean() method
*/
const CLEANING_MODE_ALL = 'all';
const CLEANING_MODE_OLD = 'old';
const CLEANING_MODE_MATCHING_TAG = 'matchingTag';
const CLEANING_MODE_NOT_MATCHING_TAG = 'notMatchingTag';
const CLEANING_MODE_MATCHING_ANY_TAG = 'matchingAnyTag';
/**
* Factory
*
* @param mixed $frontend frontend name (string) or Zend_Cache_Frontend_ object
* @param mixed $backend backend name (string) or Zend_Cache_Backend_ object
* @param array $frontendOptions associative array of options for the corresponding frontend constructor
* @param array $backendOptions associative array of options for the corresponding backend constructor
* @param boolean $customFrontendNaming if true, the frontend argument is used as a complete class name ; if false, the frontend argument is used as the end of "Zend_Cache_Frontend_[...]" class name
* @param boolean $customBackendNaming if true, the backend argument is used as a complete class name ; if false, the backend argument is used as the end of "Zend_Cache_Backend_[...]" class name
* @param boolean $autoload if true, there will no require_once for backend and frontend (useful only for custom backends/frontends)
* @throws Zend_Cache_Exception
* @return Zend_Cache_Core|Zend_Cache_Frontend
*/
public static function factory($frontend, $backend, $frontendOptions = array(), $backendOptions = array(), $customFrontendNaming = false, $customBackendNaming = false, $autoload = false)
{
if (is_string($backend)) {
$backendObject = self::_makeBackend($backend, $backendOptions, $customBackendNaming, $autoload);
} else {
if ((is_object($backend)) && (in_array('Zend_Cache_Backend_Interface', class_implements($backend)))) {
$backendObject = $backend;
} else {
self::throwException('backend must be a backend name (string) or an object which implements Zend_Cache_Backend_Interface');
}
}
if (is_string($frontend)) {
$frontendObject = self::_makeFrontend($frontend, $frontendOptions, $customFrontendNaming, $autoload);
} else {
if (is_object($frontend)) {
$frontendObject = $frontend;
} else {
self::throwException('frontend must be a frontend name (string) or an object');
}
}
$frontendObject->setBackend($backendObject);
return $frontendObject;
}
/**
* Backend Constructor
*
* @param string $backend
* @param array $backendOptions
* @param boolean $customBackendNaming
* @param boolean $autoload
* @return Zend_Cache_Backend
*/
public static function _makeBackend($backend, $backendOptions, $customBackendNaming = false, $autoload = false)
{
if (!$customBackendNaming) {
$backend = self::_normalizeName($backend);
}
if (in_array($backend, Zend_Cache::$standardBackends)) {
// we use a standard backend
$backendClass = 'Zend_Cache_Backend_' . $backend;
// security controls are explicit
require_once realpath(dirname(__FILE__).'/..').DIRECTORY_SEPARATOR.str_replace('_', DIRECTORY_SEPARATOR, $backendClass) . '.php';
} else {
// we use a custom backend
if (!preg_match('~^[\w\\\\]+$~D', $backend)) {
Zend_Cache::throwException("Invalid backend name [$backend]");
}
if (!$customBackendNaming) {
// we use this boolean to avoid an API break
$backendClass = 'Zend_Cache_Backend_' . $backend;
} else {
$backendClass = $backend;
}
if (!$autoload) {
$file = str_replace('_', DIRECTORY_SEPARATOR, $backendClass) . '.php';
if (!(self::_isReadable($file))) {
self::throwException("file $file not found in include_path");
}
require_once $file;
}
}
return new $backendClass($backendOptions);
}
/**
* Frontend Constructor
*
* @param string $frontend
* @param array $frontendOptions
* @param boolean $customFrontendNaming
* @param boolean $autoload
* @return Zend_Cache_Core|Zend_Cache_Frontend
*/
public static function _makeFrontend($frontend, $frontendOptions = array(), $customFrontendNaming = false, $autoload = false)
{
if (!$customFrontendNaming) {
$frontend = self::_normalizeName($frontend);
}
if (in_array($frontend, self::$standardFrontends)) {
// we use a standard frontend
// For perfs reasons, with frontend == 'Core', we can interact with the Core itself
$frontendClass = 'Zend_Cache_' . ($frontend != 'Core' ? 'Frontend_' : '') . $frontend;
// security controls are explicit
require_once realpath(dirname(__FILE__).'/..').DIRECTORY_SEPARATOR.str_replace('_', DIRECTORY_SEPARATOR, $frontendClass) . '.php';
} else {
// we use a custom frontend
if (!preg_match('~^[\w\\\\]+$~D', $frontend)) {
Zend_Cache::throwException("Invalid frontend name [$frontend]");
}
if (!$customFrontendNaming) {
// we use this boolean to avoid an API break
$frontendClass = 'Zend_Cache_Frontend_' . $frontend;
} else {
$frontendClass = $frontend;
}
if (!$autoload) {
$file = str_replace('_', DIRECTORY_SEPARATOR, $frontendClass) . '.php';
if (!(self::_isReadable($file))) {
self::throwException("file $file not found in include_path");
}
require_once $file;
}
}
return new $frontendClass($frontendOptions);
}
/**
* Throw an exception
*
* Note : for perf reasons, the "load" of Zend/Cache/Exception is dynamic
* @param string $msg Message for the exception
* @throws Zend_Cache_Exception
*/
public static function throwException($msg, Exception $e = null)
{
// For perfs reasons, we use this dynamic inclusion
require_once 'Zend/Cache/Exception.php';
throw new Zend_Cache_Exception($msg, 0, $e);
}
/**
* Normalize frontend and backend names to allow multiple words TitleCased
*
* @param string $name Name to normalize
* @return string
*/
protected static function _normalizeName($name)
{
$name = ucfirst(strtolower($name));
$name = str_replace(array('-', '_', '.'), ' ', $name);
$name = ucwords($name);
$name = str_replace(' ', '', $name);
if (stripos($name, 'ZendServer') === 0) {
$name = 'ZendServer_' . substr($name, strlen('ZendServer'));
}
return $name;
}
/**
* Returns TRUE if the $filename is readable, or FALSE otherwise.
* This function uses the PHP include_path, where PHP's is_readable()
* does not.
*
* Note : this method comes from Zend_Loader (see #ZF-2891 for details)
*
* @param string $filename
* @return boolean
*/
private static function _isReadable($filename)
{
if (!$fh = @fopen($filename, 'r', true)) {
return false;
}
@fclose($fh);
return true;
}
}

View file

@ -0,0 +1,290 @@
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Cache
* @subpackage Zend_Cache_Backend
* @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
* @version $Id: Backend.php 24989 2012-06-21 07:24:13Z mabe $
*/
/**
* @package Zend_Cache
* @subpackage Zend_Cache_Backend
* @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Cache_Backend
{
/**
* Frontend or Core directives
*
* =====> (int) lifetime :
* - Cache lifetime (in seconds)
* - If null, the cache is valid forever
*
* =====> (int) logging :
* - if set to true, a logging is activated throw Zend_Log
*
* @var array directives
*/
protected $_directives = array(
'lifetime' => 3600,
'logging' => false,
'logger' => null
);
/**
* Available options
*
* @var array available options
*/
protected $_options = array();
/**
* Constructor
*
* @param array $options Associative array of options
* @throws Zend_Cache_Exception
* @return void
*/
public function __construct(array $options = array())
{
while (list($name, $value) = each($options)) {
$this->setOption($name, $value);
}
}
/**
* Set the frontend directives
*
* @param array $directives Assoc of directives
* @throws Zend_Cache_Exception
* @return void
*/
public function setDirectives($directives)
{
if (!is_array($directives)) Zend_Cache::throwException('Directives parameter must be an array');
while (list($name, $value) = each($directives)) {
if (!is_string($name)) {
Zend_Cache::throwException("Incorrect option name : $name");
}
$name = strtolower($name);
if (array_key_exists($name, $this->_directives)) {
$this->_directives[$name] = $value;
}
}
$this->_loggerSanity();
}
/**
* Set an option
*
* @param string $name
* @param mixed $value
* @throws Zend_Cache_Exception
* @return void
*/
public function setOption($name, $value)
{
if (!is_string($name)) {
Zend_Cache::throwException("Incorrect option name : $name");
}
$name = strtolower($name);
if (array_key_exists($name, $this->_options)) {
$this->_options[$name] = $value;
}
}
/**
* Returns an option
*
* @param string $name Optional, the options name to return
* @throws Zend_Cache_Exceptions
* @return mixed
*/
public function getOption($name)
{
$name = strtolower($name);
if (array_key_exists($name, $this->_options)) {
return $this->_options[$name];
}
if (array_key_exists($name, $this->_directives)) {
return $this->_directives[$name];
}
Zend_Cache::throwException("Incorrect option name : {$name}");
}
/**
* Get the life time
*
* if $specificLifetime is not false, the given specific life time is used
* else, the global lifetime is used
*
* @param int $specificLifetime
* @return int Cache life time
*/
public function getLifetime($specificLifetime)
{
if ($specificLifetime === false) {
return $this->_directives['lifetime'];
}
return $specificLifetime;
}
/**
* Return true if the automatic cleaning is available for the backend
*
* DEPRECATED : use getCapabilities() instead
*
* @deprecated
* @return boolean
*/
public function isAutomaticCleaningAvailable()
{
return true;
}
/**
* Determine system TMP directory and detect if we have read access
*
* inspired from Zend_File_Transfer_Adapter_Abstract
*
* @return string
* @throws Zend_Cache_Exception if unable to determine directory
*/
public function getTmpDir()
{
$tmpdir = array();
foreach (array($_ENV, $_SERVER) as $tab) {
foreach (array('TMPDIR', 'TEMP', 'TMP', 'windir', 'SystemRoot') as $key) {
if (isset($tab[$key]) && is_string($tab[$key])) {
if (($key == 'windir') or ($key == 'SystemRoot')) {
$dir = realpath($tab[$key] . '\\temp');
} else {
$dir = realpath($tab[$key]);
}
if ($this->_isGoodTmpDir($dir)) {
return $dir;
}
}
}
}
$upload = ini_get('upload_tmp_dir');
if ($upload) {
$dir = realpath($upload);
if ($this->_isGoodTmpDir($dir)) {
return $dir;
}
}
if (function_exists('sys_get_temp_dir')) {
$dir = sys_get_temp_dir();
if ($this->_isGoodTmpDir($dir)) {
return $dir;
}
}
// Attemp to detect by creating a temporary file
$tempFile = tempnam(md5(uniqid(rand(), TRUE)), '');
if ($tempFile) {
$dir = realpath(dirname($tempFile));
unlink($tempFile);
if ($this->_isGoodTmpDir($dir)) {
return $dir;
}
}
if ($this->_isGoodTmpDir('/tmp')) {
return '/tmp';
}
if ($this->_isGoodTmpDir('\\temp')) {
return '\\temp';
}
Zend_Cache::throwException('Could not determine temp directory, please specify a cache_dir manually');
}
/**
* Verify if the given temporary directory is readable and writable
*
* @param string $dir temporary directory
* @return boolean true if the directory is ok
*/
protected function _isGoodTmpDir($dir)
{
if (is_readable($dir)) {
if (is_writable($dir)) {
return true;
}
}
return false;
}
/**
* Make sure if we enable logging that the Zend_Log class
* is available.
* Create a default log object if none is set.
*
* @throws Zend_Cache_Exception
* @return void
*/
protected function _loggerSanity()
{
if (!isset($this->_directives['logging']) || !$this->_directives['logging']) {
return;
}
if (isset($this->_directives['logger'])) {
if ($this->_directives['logger'] instanceof Zend_Log) {
return;
}
Zend_Cache::throwException('Logger object is not an instance of Zend_Log class.');
}
// Create a default logger to the standard output stream
require_once 'Zend/Log.php';
require_once 'Zend/Log/Writer/Stream.php';
require_once 'Zend/Log/Filter/Priority.php';
$logger = new Zend_Log(new Zend_Log_Writer_Stream('php://output'));
$logger->addFilter(new Zend_Log_Filter_Priority(Zend_Log::WARN, '<='));
$this->_directives['logger'] = $logger;
}
/**
* Log a message at the WARN (4) priority.
*
* @param string $message
* @throws Zend_Cache_Exception
* @return void
*/
protected function _log($message, $priority = 4)
{
if (!$this->_directives['logging']) {
return;
}
if (!isset($this->_directives['logger'])) {
Zend_Cache::throwException('Logging is enabled but logger is not set.');
}
$logger = $this->_directives['logger'];
if (!$logger instanceof Zend_Log) {
Zend_Cache::throwException('Logger object is not an instance of Zend_Log class.');
}
$logger->log($message, $priority);
}
}

View file

@ -0,0 +1,127 @@
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Cache
* @subpackage Zend_Cache_Backend
* @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
* @version $Id: ExtendedInterface.php 24593 2012-01-05 20:35:02Z matthew $
*/
/**
* @see Zend_Cache_Backend_Interface
*/
//require_once 'Zend/Cache/Backend/Interface.php';
require_once dirname(__FILE__).'/Interface.php';
/**
* @package Zend_Cache
* @subpackage Zend_Cache_Backend
* @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
interface Zend_Cache_Backend_ExtendedInterface extends Zend_Cache_Backend_Interface
{
/**
* Return an array of stored cache ids
*
* @return array array of stored cache ids (string)
*/
public function getIds();
/**
* Return an array of stored tags
*
* @return array array of stored tags (string)
*/
public function getTags();
/**
* Return an array of stored cache ids which match given tags
*
* In case of multiple tags, a logical AND is made between tags
*
* @param array $tags array of tags
* @return array array of matching cache ids (string)
*/
public function getIdsMatchingTags($tags = array());
/**
* Return an array of stored cache ids which don't match given tags
*
* In case of multiple tags, a logical OR is made between tags
*
* @param array $tags array of tags
* @return array array of not matching cache ids (string)
*/
public function getIdsNotMatchingTags($tags = array());
/**
* Return an array of stored cache ids which match any given tags
*
* In case of multiple tags, a logical AND is made between tags
*
* @param array $tags array of tags
* @return array array of any matching cache ids (string)
*/
public function getIdsMatchingAnyTags($tags = array());
/**
* Return the filling percentage of the backend storage
*
* @return int integer between 0 and 100
*/
public function getFillingPercentage();
/**
* Return an array of metadatas for the given cache id
*
* The array must include these keys :
* - expire : the expire timestamp
* - tags : a string array of tags
* - mtime : timestamp of last modification time
*
* @param string $id cache id
* @return array array of metadatas (false if the cache id is not found)
*/
public function getMetadatas($id);
/**
* Give (if possible) an extra lifetime to the given cache id
*
* @param string $id cache id
* @param int $extraLifetime
* @return boolean true if ok
*/
public function touch($id, $extraLifetime);
/**
* Return an associative array of capabilities (booleans) of the backend
*
* The array must include these keys :
* - automatic_cleaning (is automating cleaning necessary)
* - tags (are tags supported)
* - expired_read (is it possible to read expired cache records
* (for doNotTestCacheValidity option for example))
* - priority does the backend deal with priority when saving
* - infinite_lifetime (is infinite lifetime can work with this backend)
* - get_list (is it possible to get the list of cache ids and the complete list of tags)
*
* @return array associative of with capabilities
*/
public function getCapabilities();
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,99 @@
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Cache
* @subpackage Zend_Cache_Backend
* @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
* @version $Id: Interface.php 24593 2012-01-05 20:35:02Z matthew $
*/
/**
* @package Zend_Cache
* @subpackage Zend_Cache_Backend
* @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
interface Zend_Cache_Backend_Interface
{
/**
* Set the frontend directives
*
* @param array $directives assoc of directives
*/
public function setDirectives($directives);
/**
* Test if a cache is available for the given id and (if yes) return it (false else)
*
* Note : return value is always "string" (unserialization is done by the core not by the backend)
*
* @param string $id Cache id
* @param boolean $doNotTestCacheValidity If set to true, the cache validity won't be tested
* @return string|false cached datas
*/
public function load($id, $doNotTestCacheValidity = false);
/**
* Test if a cache is available or not (for the given id)
*
* @param string $id cache id
* @return mixed|false (a cache is not available) or "last modified" timestamp (int) of the available cache record
*/
public function test($id);
/**
* Save some string datas into a cache record
*
* Note : $data is always "string" (serialization is done by the
* core not by the backend)
*
* @param string $data Datas to cache
* @param string $id Cache id
* @param array $tags Array of strings, the cache record will be tagged by each string entry
* @param int $specificLifetime If != false, set a specific lifetime for this cache record (null => infinite lifetime)
* @return boolean true if no problem
*/
public function save($data, $id, $tags = array(), $specificLifetime = false);
/**
* Remove a cache record
*
* @param string $id Cache id
* @return boolean True if no problem
*/
public function remove($id);
/**
* Clean some cache records
*
* Available modes are :
* Zend_Cache::CLEANING_MODE_ALL (default) => remove all cache entries ($tags is not used)
* Zend_Cache::CLEANING_MODE_OLD => remove too old cache entries ($tags is not used)
* Zend_Cache::CLEANING_MODE_MATCHING_TAG => remove cache entries matching all given tags
* ($tags can be an array of strings or a single string)
* Zend_Cache::CLEANING_MODE_NOT_MATCHING_TAG => remove cache entries not {matching one of the given tags}
* ($tags can be an array of strings or a single string)
* Zend_Cache::CLEANING_MODE_MATCHING_ANY_TAG => remove cache entries matching any given tags
* ($tags can be an array of strings or a single string)
*
* @param string $mode Clean mode
* @param array $tags Array of tags
* @return boolean true if no problem
*/
public function clean($mode = Zend_Cache::CLEANING_MODE_ALL, $tags = array());
}

View file

@ -0,0 +1,765 @@
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Cache
* @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
* @version $Id: Core.php 24989 2012-06-21 07:24:13Z mabe $
*/
/**
* @package Zend_Cache
* @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Cache_Core
{
/**
* Messages
*/
const BACKEND_NOT_SUPPORTS_TAG = 'tags are not supported by the current backend';
const BACKEND_NOT_IMPLEMENTS_EXTENDED_IF = 'Current backend doesn\'t implement the Zend_Cache_Backend_ExtendedInterface, so this method is not available';
/**
* Backend Object
*
* @var Zend_Cache_Backend_Interface $_backend
*/
protected $_backend = null;
/**
* Available options
*
* ====> (boolean) write_control :
* - Enable / disable write control (the cache is read just after writing to detect corrupt entries)
* - Enable write control will lightly slow the cache writing but not the cache reading
* Write control can detect some corrupt cache files but maybe it's not a perfect control
*
* ====> (boolean) caching :
* - Enable / disable caching
* (can be very useful for the debug of cached scripts)
*
* =====> (string) cache_id_prefix :
* - prefix for cache ids (namespace)
*
* ====> (boolean) automatic_serialization :
* - Enable / disable automatic serialization
* - It can be used to save directly datas which aren't strings (but it's slower)
*
* ====> (int) automatic_cleaning_factor :
* - Disable / Tune the automatic cleaning process
* - The automatic cleaning process destroy too old (for the given life time)
* cache files when a new cache file is written :
* 0 => no automatic cache cleaning
* 1 => systematic cache cleaning
* x (integer) > 1 => automatic cleaning randomly 1 times on x cache write
*
* ====> (int) lifetime :
* - Cache lifetime (in seconds)
* - If null, the cache is valid forever.
*
* ====> (boolean) logging :
* - If set to true, logging is activated (but the system is slower)
*
* ====> (boolean) ignore_user_abort
* - If set to true, the core will set the ignore_user_abort PHP flag inside the
* save() method to avoid cache corruptions in some cases (default false)
*
* @var array $_options available options
*/
protected $_options = array(
'write_control' => true,
'caching' => true,
'cache_id_prefix' => null,
'automatic_serialization' => false,
'automatic_cleaning_factor' => 10,
'lifetime' => 3600,
'logging' => false,
'logger' => null,
'ignore_user_abort' => false
);
/**
* Array of options which have to be transfered to backend
*
* @var array $_directivesList
*/
protected static $_directivesList = array('lifetime', 'logging', 'logger');
/**
* Not used for the core, just a sort a hint to get a common setOption() method (for the core and for frontends)
*
* @var array $_specificOptions
*/
protected $_specificOptions = array();
/**
* Last used cache id
*
* @var string $_lastId
*/
private $_lastId = null;
/**
* True if the backend implements Zend_Cache_Backend_ExtendedInterface
*
* @var boolean $_extendedBackend
*/
protected $_extendedBackend = false;
/**
* Array of capabilities of the backend (only if it implements Zend_Cache_Backend_ExtendedInterface)
*
* @var array
*/
protected $_backendCapabilities = array();
/**
* Constructor
*
* @param array|Zend_Config $options Associative array of options or Zend_Config instance
* @throws Zend_Cache_Exception
* @return void
*/
public function __construct($options = array())
{
if ($options instanceof Zend_Config) {
$options = $options->toArray();
}
if (!is_array($options)) {
Zend_Cache::throwException("Options passed were not an array"
. " or Zend_Config instance.");
}
while (list($name, $value) = each($options)) {
$this->setOption($name, $value);
}
$this->_loggerSanity();
}
/**
* Set options using an instance of type Zend_Config
*
* @param Zend_Config $config
* @return Zend_Cache_Core
*/
public function setConfig(Zend_Config $config)
{
$options = $config->toArray();
while (list($name, $value) = each($options)) {
$this->setOption($name, $value);
}
return $this;
}
/**
* Set the backend
*
* @param Zend_Cache_Backend $backendObject
* @throws Zend_Cache_Exception
* @return void
*/
public function setBackend(Zend_Cache_Backend $backendObject)
{
$this->_backend= $backendObject;
// some options (listed in $_directivesList) have to be given
// to the backend too (even if they are not "backend specific")
$directives = array();
foreach (Zend_Cache_Core::$_directivesList as $directive) {
$directives[$directive] = $this->_options[$directive];
}
$this->_backend->setDirectives($directives);
if (in_array('Zend_Cache_Backend_ExtendedInterface', class_implements($this->_backend))) {
$this->_extendedBackend = true;
$this->_backendCapabilities = $this->_backend->getCapabilities();
}
}
/**
* Returns the backend
*
* @return Zend_Cache_Backend backend object
*/
public function getBackend()
{
return $this->_backend;
}
/**
* Public frontend to set an option
*
* There is an additional validation (relatively to the protected _setOption method)
*
* @param string $name Name of the option
* @param mixed $value Value of the option
* @throws Zend_Cache_Exception
* @return void
*/
public function setOption($name, $value)
{
if (!is_string($name)) {
Zend_Cache::throwException("Incorrect option name!");
}
$name = strtolower($name);
if (array_key_exists($name, $this->_options)) {
// This is a Core option
$this->_setOption($name, $value);
return;
}
if (array_key_exists($name, $this->_specificOptions)) {
// This a specic option of this frontend
$this->_specificOptions[$name] = $value;
return;
}
}
/**
* Public frontend to get an option value
*
* @param string $name Name of the option
* @throws Zend_Cache_Exception
* @return mixed option value
*/
public function getOption($name)
{
$name = strtolower($name);
if (array_key_exists($name, $this->_options)) {
// This is a Core option
return $this->_options[$name];
}
if (array_key_exists($name, $this->_specificOptions)) {
// This a specic option of this frontend
return $this->_specificOptions[$name];
}
Zend_Cache::throwException("Incorrect option name : $name");
}
/**
* Set an option
*
* @param string $name Name of the option
* @param mixed $value Value of the option
* @throws Zend_Cache_Exception
* @return void
*/
private function _setOption($name, $value)
{
if (!is_string($name) || !array_key_exists($name, $this->_options)) {
Zend_Cache::throwException("Incorrect option name : $name");
}
if ($name == 'lifetime' && empty($value)) {
$value = null;
}
$this->_options[$name] = $value;
}
/**
* Force a new lifetime
*
* The new value is set for the core/frontend but for the backend too (directive)
*
* @param int $newLifetime New lifetime (in seconds)
* @return void
*/
public function setLifetime($newLifetime)
{
$this->_options['lifetime'] = $newLifetime;
$this->_backend->setDirectives(array(
'lifetime' => $newLifetime
));
}
/**
* Test if a cache is available for the given id and (if yes) return it (false else)
*
* @param string $id Cache id
* @param boolean $doNotTestCacheValidity If set to true, the cache validity won't be tested
* @param boolean $doNotUnserialize Do not serialize (even if automatic_serialization is true) => for internal use
* @return mixed|false Cached datas
*/
public function load($id, $doNotTestCacheValidity = false, $doNotUnserialize = false)
{
if (!$this->_options['caching']) {
return false;
}
$id = $this->_id($id); // cache id may need prefix
$this->_lastId = $id;
self::_validateIdOrTag($id);
$this->_log("Zend_Cache_Core: load item '{$id}'", 7);
$data = $this->_backend->load($id, $doNotTestCacheValidity);
if ($data===false) {
// no cache available
return false;
}
if ((!$doNotUnserialize) && $this->_options['automatic_serialization']) {
// we need to unserialize before sending the result
return unserialize($data);
}
return $data;
}
/**
* Test if a cache is available for the given id
*
* @param string $id Cache id
* @return int|false Last modified time of cache entry if it is available, false otherwise
*/
public function test($id)
{
if (!$this->_options['caching']) {
return false;
}
$id = $this->_id($id); // cache id may need prefix
self::_validateIdOrTag($id);
$this->_lastId = $id;
$this->_log("Zend_Cache_Core: test item '{$id}'", 7);
return $this->_backend->test($id);
}
/**
* Save some data in a cache
*
* @param mixed $data Data to put in cache (can be another type than string if automatic_serialization is on)
* @param string $id Cache id (if not set, the last cache id will be used)
* @param array $tags Cache tags
* @param int $specificLifetime If != false, set a specific lifetime for this cache record (null => infinite lifetime)
* @param int $priority integer between 0 (very low priority) and 10 (maximum priority) used by some particular backends
* @throws Zend_Cache_Exception
* @return boolean True if no problem
*/
public function save($data, $id = null, $tags = array(), $specificLifetime = false, $priority = 8)
{
if (!$this->_options['caching']) {
return true;
}
if ($id === null) {
$id = $this->_lastId;
} else {
$id = $this->_id($id);
}
self::_validateIdOrTag($id);
self::_validateTagsArray($tags);
if ($this->_options['automatic_serialization']) {
// we need to serialize datas before storing them
$data = serialize($data);
} else {
if (!is_string($data)) {
Zend_Cache::throwException("Datas must be string or set automatic_serialization = true");
}
}
// automatic cleaning
if ($this->_options['automatic_cleaning_factor'] > 0) {
$rand = rand(1, $this->_options['automatic_cleaning_factor']);
if ($rand==1) {
// new way || deprecated way
if ($this->_extendedBackend || method_exists($this->_backend, 'isAutomaticCleaningAvailable')) {
$this->_log("Zend_Cache_Core::save(): automatic cleaning running", 7);
$this->clean(Zend_Cache::CLEANING_MODE_OLD);
} else {
$this->_log("Zend_Cache_Core::save(): automatic cleaning is not available/necessary with current backend", 4);
}
}
}
$this->_log("Zend_Cache_Core: save item '{$id}'", 7);
if ($this->_options['ignore_user_abort']) {
$abort = ignore_user_abort(true);
}
if (($this->_extendedBackend) && ($this->_backendCapabilities['priority'])) {
$result = $this->_backend->save($data, $id, $tags, $specificLifetime, $priority);
} else {
$result = $this->_backend->save($data, $id, $tags, $specificLifetime);
}
if ($this->_options['ignore_user_abort']) {
ignore_user_abort($abort);
}
if (!$result) {
// maybe the cache is corrupted, so we remove it !
$this->_log("Zend_Cache_Core::save(): failed to save item '{$id}' -> removing it", 4);
$this->_backend->remove($id);
return false;
}
if ($this->_options['write_control']) {
$data2 = $this->_backend->load($id, true);
if ($data!=$data2) {
$this->_log("Zend_Cache_Core::save(): write control of item '{$id}' failed -> removing it", 4);
$this->_backend->remove($id);
return false;
}
}
return true;
}
/**
* Remove a cache
*
* @param string $id Cache id to remove
* @return boolean True if ok
*/
public function remove($id)
{
if (!$this->_options['caching']) {
return true;
}
$id = $this->_id($id); // cache id may need prefix
self::_validateIdOrTag($id);
$this->_log("Zend_Cache_Core: remove item '{$id}'", 7);
return $this->_backend->remove($id);
}
/**
* Clean cache entries
*
* Available modes are :
* 'all' (default) => remove all cache entries ($tags is not used)
* 'old' => remove too old cache entries ($tags is not used)
* 'matchingTag' => remove cache entries matching all given tags
* ($tags can be an array of strings or a single string)
* 'notMatchingTag' => remove cache entries not matching one of the given tags
* ($tags can be an array of strings or a single string)
* 'matchingAnyTag' => remove cache entries matching any given tags
* ($tags can be an array of strings or a single string)
*
* @param string $mode
* @param array|string $tags
* @throws Zend_Cache_Exception
* @return boolean True if ok
*/
public function clean($mode = 'all', $tags = array())
{
if (!$this->_options['caching']) {
return true;
}
if (!in_array($mode, array(Zend_Cache::CLEANING_MODE_ALL,
Zend_Cache::CLEANING_MODE_OLD,
Zend_Cache::CLEANING_MODE_MATCHING_TAG,
Zend_Cache::CLEANING_MODE_NOT_MATCHING_TAG,
Zend_Cache::CLEANING_MODE_MATCHING_ANY_TAG))) {
Zend_Cache::throwException('Invalid cleaning mode');
}
self::_validateTagsArray($tags);
return $this->_backend->clean($mode, $tags);
}
/**
* Return an array of stored cache ids which match given tags
*
* In case of multiple tags, a logical AND is made between tags
*
* @param array $tags array of tags
* @return array array of matching cache ids (string)
*/
public function getIdsMatchingTags($tags = array())
{
if (!$this->_extendedBackend) {
Zend_Cache::throwException(self::BACKEND_NOT_IMPLEMENTS_EXTENDED_IF);
}
if (!($this->_backendCapabilities['tags'])) {
Zend_Cache::throwException(self::BACKEND_NOT_SUPPORTS_TAG);
}
$ids = $this->_backend->getIdsMatchingTags($tags);
// we need to remove cache_id_prefix from ids (see #ZF-6178, #ZF-7600)
if (isset($this->_options['cache_id_prefix']) && $this->_options['cache_id_prefix'] !== '') {
$prefix = & $this->_options['cache_id_prefix'];
$prefixLen = strlen($prefix);
foreach ($ids as &$id) {
if (strpos($id, $prefix) === 0) {
$id = substr($id, $prefixLen);
}
}
}
return $ids;
}
/**
* Return an array of stored cache ids which don't match given tags
*
* In case of multiple tags, a logical OR is made between tags
*
* @param array $tags array of tags
* @return array array of not matching cache ids (string)
*/
public function getIdsNotMatchingTags($tags = array())
{
if (!$this->_extendedBackend) {
Zend_Cache::throwException(self::BACKEND_NOT_IMPLEMENTS_EXTENDED_IF);
}
if (!($this->_backendCapabilities['tags'])) {
Zend_Cache::throwException(self::BACKEND_NOT_SUPPORTS_TAG);
}
$ids = $this->_backend->getIdsNotMatchingTags($tags);
// we need to remove cache_id_prefix from ids (see #ZF-6178, #ZF-7600)
if (isset($this->_options['cache_id_prefix']) && $this->_options['cache_id_prefix'] !== '') {
$prefix = & $this->_options['cache_id_prefix'];
$prefixLen = strlen($prefix);
foreach ($ids as &$id) {
if (strpos($id, $prefix) === 0) {
$id = substr($id, $prefixLen);
}
}
}
return $ids;
}
/**
* Return an array of stored cache ids which match any given tags
*
* In case of multiple tags, a logical OR is made between tags
*
* @param array $tags array of tags
* @return array array of matching any cache ids (string)
*/
public function getIdsMatchingAnyTags($tags = array())
{
if (!$this->_extendedBackend) {
Zend_Cache::throwException(self::BACKEND_NOT_IMPLEMENTS_EXTENDED_IF);
}
if (!($this->_backendCapabilities['tags'])) {
Zend_Cache::throwException(self::BACKEND_NOT_SUPPORTS_TAG);
}
$ids = $this->_backend->getIdsMatchingAnyTags($tags);
// we need to remove cache_id_prefix from ids (see #ZF-6178, #ZF-7600)
if (isset($this->_options['cache_id_prefix']) && $this->_options['cache_id_prefix'] !== '') {
$prefix = & $this->_options['cache_id_prefix'];
$prefixLen = strlen($prefix);
foreach ($ids as &$id) {
if (strpos($id, $prefix) === 0) {
$id = substr($id, $prefixLen);
}
}
}
return $ids;
}
/**
* Return an array of stored cache ids
*
* @return array array of stored cache ids (string)
*/
public function getIds()
{
if (!$this->_extendedBackend) {
Zend_Cache::throwException(self::BACKEND_NOT_IMPLEMENTS_EXTENDED_IF);
}
$ids = $this->_backend->getIds();
// we need to remove cache_id_prefix from ids (see #ZF-6178, #ZF-7600)
if (isset($this->_options['cache_id_prefix']) && $this->_options['cache_id_prefix'] !== '') {
$prefix = & $this->_options['cache_id_prefix'];
$prefixLen = strlen($prefix);
foreach ($ids as &$id) {
if (strpos($id, $prefix) === 0) {
$id = substr($id, $prefixLen);
}
}
}
return $ids;
}
/**
* Return an array of stored tags
*
* @return array array of stored tags (string)
*/
public function getTags()
{
if (!$this->_extendedBackend) {
Zend_Cache::throwException(self::BACKEND_NOT_IMPLEMENTS_EXTENDED_IF);
}
if (!($this->_backendCapabilities['tags'])) {
Zend_Cache::throwException(self::BACKEND_NOT_SUPPORTS_TAG);
}
return $this->_backend->getTags();
}
/**
* Return the filling percentage of the backend storage
*
* @return int integer between 0 and 100
*/
public function getFillingPercentage()
{
if (!$this->_extendedBackend) {
Zend_Cache::throwException(self::BACKEND_NOT_IMPLEMENTS_EXTENDED_IF);
}
return $this->_backend->getFillingPercentage();
}
/**
* Return an array of metadatas for the given cache id
*
* The array will include these keys :
* - expire : the expire timestamp
* - tags : a string array of tags
* - mtime : timestamp of last modification time
*
* @param string $id cache id
* @return array array of metadatas (false if the cache id is not found)
*/
public function getMetadatas($id)
{
if (!$this->_extendedBackend) {
Zend_Cache::throwException(self::BACKEND_NOT_IMPLEMENTS_EXTENDED_IF);
}
$id = $this->_id($id); // cache id may need prefix
return $this->_backend->getMetadatas($id);
}
/**
* Give (if possible) an extra lifetime to the given cache id
*
* @param string $id cache id
* @param int $extraLifetime
* @return boolean true if ok
*/
public function touch($id, $extraLifetime)
{
if (!$this->_extendedBackend) {
Zend_Cache::throwException(self::BACKEND_NOT_IMPLEMENTS_EXTENDED_IF);
}
$id = $this->_id($id); // cache id may need prefix
$this->_log("Zend_Cache_Core: touch item '{$id}'", 7);
return $this->_backend->touch($id, $extraLifetime);
}
/**
* Validate a cache id or a tag (security, reliable filenames, reserved prefixes...)
*
* Throw an exception if a problem is found
*
* @param string $string Cache id or tag
* @throws Zend_Cache_Exception
* @return void
*/
protected static function _validateIdOrTag($string)
{
if (!is_string($string)) {
Zend_Cache::throwException('Invalid id or tag : must be a string');
}
if (substr($string, 0, 9) == 'internal-') {
Zend_Cache::throwException('"internal-*" ids or tags are reserved');
}
if (!preg_match('~^[a-zA-Z0-9_]+$~D', $string)) {
Zend_Cache::throwException("Invalid id or tag '$string' : must use only [a-zA-Z0-9_]");
}
}
/**
* Validate a tags array (security, reliable filenames, reserved prefixes...)
*
* Throw an exception if a problem is found
*
* @param array $tags Array of tags
* @throws Zend_Cache_Exception
* @return void
*/
protected static function _validateTagsArray($tags)
{
if (!is_array($tags)) {
Zend_Cache::throwException('Invalid tags array : must be an array');
}
foreach($tags as $tag) {
self::_validateIdOrTag($tag);
}
reset($tags);
}
/**
* Make sure if we enable logging that the Zend_Log class
* is available.
* Create a default log object if none is set.
*
* @throws Zend_Cache_Exception
* @return void
*/
protected function _loggerSanity()
{
if (!isset($this->_options['logging']) || !$this->_options['logging']) {
return;
}
if (isset($this->_options['logger']) && $this->_options['logger'] instanceof Zend_Log) {
return;
}
// Create a default logger to the standard output stream
require_once 'Zend/Log.php';
require_once 'Zend/Log/Writer/Stream.php';
require_once 'Zend/Log/Filter/Priority.php';
$logger = new Zend_Log(new Zend_Log_Writer_Stream('php://output'));
$logger->addFilter(new Zend_Log_Filter_Priority(Zend_Log::WARN, '<='));
$this->_options['logger'] = $logger;
}
/**
* Log a message at the WARN (4) priority.
*
* @param string $message
* @throws Zend_Cache_Exception
* @return void
*/
protected function _log($message, $priority = 4)
{
if (!$this->_options['logging']) {
return;
}
if (!(isset($this->_options['logger']) || $this->_options['logger'] instanceof Zend_Log)) {
Zend_Cache::throwException('Logging is enabled but logger is not set');
}
$logger = $this->_options['logger'];
$logger->log($message, $priority);
}
/**
* Make and return a cache id
*
* Checks 'cache_id_prefix' and returns new id with prefix or simply the id if null
*
* @param string $id Cache id
* @return string Cache id (with or without prefix)
*/
protected function _id($id)
{
if (($id !== null) && isset($this->_options['cache_id_prefix'])) {
return $this->_options['cache_id_prefix'] . $id; // return with prefix
}
return $id; // no prefix, just return the $id passed
}
}

View file

@ -0,0 +1,32 @@
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Cache
* @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
* @version $Id: Exception.php 24593 2012-01-05 20:35:02Z matthew $
*/
/**
* @see Zend_Exception
*/
require_once 'Zend/Exception.php';
/**
* @package Zend_Cache
* @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Cache_Exception extends Zend_Exception {}

View file

@ -0,0 +1,96 @@
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend
* @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
* @version $Id: Exception.php 24593 2012-01-05 20:35:02Z matthew $
*/
/**
* @category Zend
* @package Zend
* @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Exception extends Exception
{
/**
* @var null|Exception
*/
private $_previous = null;
/**
* Construct the exception
*
* @param string $msg
* @param int $code
* @param Exception $previous
* @return void
*/
public function __construct($msg = '', $code = 0, Exception $previous = null)
{
if (version_compare(PHP_VERSION, '5.3.0', '<')) {
parent::__construct($msg, (int) $code);
$this->_previous = $previous;
} else {
parent::__construct($msg, (int) $code, $previous);
}
}
/**
* Overloading
*
* For PHP < 5.3.0, provides access to the getPrevious() method.
*
* @param string $method
* @param array $args
* @return mixed
*/
public function __call($method, array $args)
{
if ('getprevious' == strtolower($method)) {
return $this->_getPrevious();
}
return null;
}
/**
* String representation of the exception
*
* @return string
*/
public function __toString()
{
if (version_compare(PHP_VERSION, '5.3.0', '<')) {
if (null !== ($e = $this->getPrevious())) {
return $e->__toString()
. "\n\nNext "
. parent::__toString();
}
}
return parent::__toString();
}
/**
* Returns previous Exception
*
* @return Exception|null
*/
protected function _getPrevious()
{
return $this->_previous;
}
}

View file

@ -0,0 +1,728 @@
<?php
/**
* Content Extractor
*
* Uses patterns specified in site config files and auto detection (hNews/PHP Readability)
* to extract content from HTML files.
*
* @version 1.0
* @date 2013-02-05
* @author Keyvan Minoukadeh
* @copyright 2013 Keyvan Minoukadeh
* @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
*/
class ContentExtractor
{
protected static $tidy_config = array(
'clean' => true,
'output-xhtml' => true,
'logical-emphasis' => true,
'show-body-only' => false,
'new-blocklevel-tags' => 'article, aside, footer, header, hgroup, menu, nav, section, details, datagrid',
'new-inline-tags' => 'mark, time, meter, progress, data',
'wrap' => 0,
'drop-empty-paras' => true,
'drop-proprietary-attributes' => false,
'enclose-text' => true,
'enclose-block-text' => true,
'merge-divs' => true,
'merge-spans' => true,
'char-encoding' => 'utf8',
'hide-comments' => true
);
protected $html;
protected $config;
protected $title;
protected $author = array();
protected $language;
protected $date;
protected $body;
protected $success = false;
protected $nextPageUrl;
public $allowedParsers = array('libxml', 'html5lib');
public $fingerprints = array();
public $readability;
public $debug = false;
public $debugVerbose = false;
function __construct($path, $fallback=null) {
SiteConfig::set_config_path($path, $fallback);
}
protected function debug($msg) {
if ($this->debug) {
$mem = round(memory_get_usage()/1024, 2);
$memPeak = round(memory_get_peak_usage()/1024, 2);
echo '* ',$msg;
if ($this->debugVerbose) echo ' - mem used: ',$mem," (peak: $memPeak)";
echo "\n";
ob_flush();
flush();
}
}
public function reset() {
$this->html = null;
$this->readability = null;
$this->config = null;
$this->title = null;
$this->body = null;
$this->author = array();
$this->language = null;
$this->date = null;
$this->nextPageUrl = null;
$this->success = false;
}
public function findHostUsingFingerprints($html) {
$this->debug('Checking fingerprints...');
$head = substr($html, 0, 8000);
foreach ($this->fingerprints as $_fp => $_fphost) {
$lookin = 'html';
if (is_array($_fphost)) {
if (isset($_fphost['head']) && $_fphost['head']) {
$lookin = 'head';
}
$_fphost = $_fphost['hostname'];
}
if (strpos($$lookin, $_fp) !== false) {
$this->debug("Found match: $_fphost");
return $_fphost;
}
}
$this->debug('No fingerprint matches');
return false;
}
// returns SiteConfig instance (joined in order: exact match, wildcard, fingerprint, global, default)
public function buildSiteConfig($url, $html='', $add_to_cache=true) {
// extract host name
$host = @parse_url($url, PHP_URL_HOST);
$host = strtolower($host);
if (substr($host, 0, 4) == 'www.') $host = substr($host, 4);
// is merged version already cached?
if (SiteConfig::is_cached("$host.merged")) {
$this->debug("Returning cached and merged site config for $host");
return SiteConfig::build("$host.merged");
}
// let's build from site_config/custom/ and standard/
$config = SiteConfig::build($host);
if ($add_to_cache && $config && !SiteConfig::is_cached("$host")) {
SiteConfig::add_to_cache($host, $config);
}
// if no match, use defaults
if (!$config) $config = new SiteConfig();
// load fingerprint config?
if ($config->autodetect_on_failure()) {
// check HTML for fingerprints
if (!empty($this->fingerprints) && ($_fphost = $this->findHostUsingFingerprints($html))) {
if ($config_fingerprint = SiteConfig::build($_fphost)) {
$this->debug("Appending site config settings from $_fphost (fingerprint match)");
$config->append($config_fingerprint);
if ($add_to_cache && !SiteConfig::is_cached($_fphost)) {
//$config_fingerprint->cache_in_apc = true;
SiteConfig::add_to_cache($_fphost, $config_fingerprint);
}
}
}
}
// load global config?
if ($config->autodetect_on_failure()) {
if ($config_global = SiteConfig::build('global', true)) {
$this->debug('Appending site config settings from global.txt');
$config->append($config_global);
if ($add_to_cache && !SiteConfig::is_cached('global')) {
//$config_global->cache_in_apc = true;
SiteConfig::add_to_cache('global', $config_global);
}
}
}
// store copy of merged config
if ($add_to_cache) {
// do not store in APC if wildcard match
$use_apc = ($host == $config->cache_key);
$config->cache_key = null;
SiteConfig::add_to_cache("$host.merged", $config, $use_apc);
}
return $config;
}
// returns true on success, false on failure
// $smart_tidy indicates that if tidy is used and no results are produced, we will
// try again without it. Tidy helps us deal with PHP's patchy HTML parsing most of the time
// but it has problems of its own which we try to avoid with this option.
public function process($html, $url, $smart_tidy=true) {
$this->reset();
$this->config = $this->buildSiteConfig($url, $html);
// do string replacements
if (!empty($this->config->find_string)) {
if (count($this->config->find_string) == count($this->config->replace_string)) {
$html = str_replace($this->config->find_string, $this->config->replace_string, $html, $_count);
$this->debug("Strings replaced: $_count (find_string and/or replace_string)");
} else {
$this->debug('Skipped string replacement - incorrect number of find-replace strings in site config');
}
unset($_count);
}
// use tidy (if it exists)?
// This fixes problems with some sites which would otherwise
// trouble DOMDocument's HTML parsing. (Although sometimes it
// makes matters worse, which is why you can override it in site config files.)
$tidied = false;
if ($this->config->tidy() && function_exists('tidy_parse_string') && $smart_tidy) {
$this->debug('Using Tidy');
$tidy = tidy_parse_string($html, self::$tidy_config, 'UTF8');
if (tidy_clean_repair($tidy)) {
$original_html = $html;
$tidied = true;
$html = $tidy->value;
}
unset($tidy);
}
// load and parse html
$_parser = $this->config->parser();
if (!in_array($_parser, $this->allowedParsers)) {
$this->debug("HTML parser $_parser not listed, using libxml instead");
$_parser = 'libxml';
}
$this->debug("Attempting to parse HTML with $_parser");
$this->readability = new Readability($html, $url, $_parser);
// we use xpath to find elements in the given HTML document
// see http://en.wikipedia.org/wiki/XPath_1.0
$xpath = new DOMXPath($this->readability->dom);
// try to get next page link
foreach ($this->config->next_page_link as $pattern) {
$elems = @$xpath->evaluate($pattern, $this->readability->dom);
if (is_string($elems)) {
$this->nextPageUrl = trim($elems);
break;
} elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
foreach ($elems as $item) {
if ($item instanceof DOMElement && $item->hasAttribute('href')) {
$this->nextPageUrl = $item->getAttribute('href');
break 2;
} elseif ($item instanceof DOMAttr && $item->value) {
$this->nextPageUrl = $item->value;
break 2;
}
}
}
}
// try to get title
foreach ($this->config->title as $pattern) {
// $this->debug("Trying $pattern");
$elems = @$xpath->evaluate($pattern, $this->readability->dom);
if (is_string($elems)) {
$this->title = trim($elems);
$this->debug('Title expression evaluated as string: '.$this->title);
$this->debug("...XPath match: $pattern");
break;
} elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
$this->title = $elems->item(0)->textContent;
$this->debug('Title matched: '.$this->title);
$this->debug("...XPath match: $pattern");
// remove title from document
try {
$elems->item(0)->parentNode->removeChild($elems->item(0));
} catch (DOMException $e) {
// do nothing
}
break;
}
}
// try to get author (if it hasn't already been set)
if (empty($this->author)) {
foreach ($this->config->author as $pattern) {
$elems = @$xpath->evaluate($pattern, $this->readability->dom);
if (is_string($elems)) {
if (trim($elems) != '') {
$this->author[] = trim($elems);
$this->debug('Author expression evaluated as string: '.trim($elems));
$this->debug("...XPath match: $pattern");
break;
}
} elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
foreach ($elems as $elem) {
if (!isset($elem->parentNode)) continue;
$this->author[] = trim($elem->textContent);
$this->debug('Author matched: '.trim($elem->textContent));
}
if (!empty($this->author)) {
$this->debug("...XPath match: $pattern");
break;
}
}
}
}
// try to get language
$_lang_xpath = array('//html[@lang]/@lang', '//meta[@name="DC.language"]/@content');
foreach ($_lang_xpath as $pattern) {
$elems = @$xpath->evaluate($pattern, $this->readability->dom);
if (is_string($elems)) {
if (trim($elems) != '') {
$this->language = trim($elems);
$this->debug('Language matched: '.$this->language);
break;
}
} elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
foreach ($elems as $elem) {
if (!isset($elem->parentNode)) continue;
$this->language = trim($elem->textContent);
$this->debug('Language matched: '.$this->language);
}
if ($this->language) break;
}
}
// try to get date
foreach ($this->config->date as $pattern) {
$elems = @$xpath->evaluate($pattern, $this->readability->dom);
if (is_string($elems)) {
$this->date = strtotime(trim($elems, "; \t\n\r\0\x0B"));
} elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
$this->date = $elems->item(0)->textContent;
$this->date = strtotime(trim($this->date, "; \t\n\r\0\x0B"));
// remove date from document
// $elems->item(0)->parentNode->removeChild($elems->item(0));
}
if (!$this->date) {
$this->date = null;
} else {
$this->debug('Date matched: '.date('Y-m-d H:i:s', $this->date));
$this->debug("...XPath match: $pattern");
break;
}
}
// strip elements (using xpath expressions)
foreach ($this->config->strip as $pattern) {
$elems = @$xpath->query($pattern, $this->readability->dom);
// check for matches
if ($elems && $elems->length > 0) {
$this->debug('Stripping '.$elems->length.' elements (strip)');
for ($i=$elems->length-1; $i >= 0; $i--) {
$elems->item($i)->parentNode->removeChild($elems->item($i));
}
}
}
// strip elements (using id and class attribute values)
foreach ($this->config->strip_id_or_class as $string) {
$string = strtr($string, array("'"=>'', '"'=>''));
$elems = @$xpath->query("//*[contains(@class, '$string') or contains(@id, '$string')]", $this->readability->dom);
// check for matches
if ($elems && $elems->length > 0) {
$this->debug('Stripping '.$elems->length.' elements (strip_id_or_class)');
for ($i=$elems->length-1; $i >= 0; $i--) {
$elems->item($i)->parentNode->removeChild($elems->item($i));
}
}
}
// strip images (using src attribute values)
foreach ($this->config->strip_image_src as $string) {
$string = strtr($string, array("'"=>'', '"'=>''));
$elems = @$xpath->query("//img[contains(@src, '$string')]", $this->readability->dom);
// check for matches
if ($elems && $elems->length > 0) {
$this->debug('Stripping '.$elems->length.' image elements');
for ($i=$elems->length-1; $i >= 0; $i--) {
$elems->item($i)->parentNode->removeChild($elems->item($i));
}
}
}
// strip elements using Readability.com and Instapaper.com ignore class names
// .entry-unrelated and .instapaper_ignore
// See https://www.readability.com/publishers/guidelines/#view-plainGuidelines
// and http://blog.instapaper.com/post/730281947
$elems = @$xpath->query("//*[contains(concat(' ',normalize-space(@class),' '),' entry-unrelated ') or contains(concat(' ',normalize-space(@class),' '),' instapaper_ignore ')]", $this->readability->dom);
// check for matches
if ($elems && $elems->length > 0) {
$this->debug('Stripping '.$elems->length.' .entry-unrelated,.instapaper_ignore elements');
for ($i=$elems->length-1; $i >= 0; $i--) {
$elems->item($i)->parentNode->removeChild($elems->item($i));
}
}
// strip elements that contain style="display: none;"
$elems = @$xpath->query("//*[contains(@style,'display:none')]", $this->readability->dom);
// check for matches
if ($elems && $elems->length > 0) {
$this->debug('Stripping '.$elems->length.' elements with inline display:none style');
for ($i=$elems->length-1; $i >= 0; $i--) {
$elems->item($i)->parentNode->removeChild($elems->item($i));
}
}
// try to get body
foreach ($this->config->body as $pattern) {
$elems = @$xpath->query($pattern, $this->readability->dom);
// check for matches
if ($elems && $elems->length > 0) {
$this->debug('Body matched');
$this->debug("...XPath match: $pattern");
if ($elems->length == 1) {
$this->body = $elems->item(0);
// prune (clean up elements that may not be content)
if ($this->config->prune()) {
$this->debug('...pruning content');
$this->readability->prepArticle($this->body);
}
break;
} else {
$this->body = $this->readability->dom->createElement('div');
$this->debug($elems->length.' body elems found');
foreach ($elems as $elem) {
if (!isset($elem->parentNode)) continue;
$isDescendant = false;
foreach ($this->body->childNodes as $parent) {
if ($this->isDescendant($parent, $elem)) {
$isDescendant = true;
break;
}
}
if ($isDescendant) {
$this->debug('...element is child of another body element, skipping.');
} else {
// prune (clean up elements that may not be content)
if ($this->config->prune()) {
$this->debug('Pruning content');
$this->readability->prepArticle($elem);
}
$this->debug('...element added to body');
$this->body->appendChild($elem);
}
}
if ($this->body->hasChildNodes()) break;
}
}
}
// auto detect?
$detect_title = $detect_body = $detect_author = $detect_date = false;
// detect title?
if (!isset($this->title)) {
if (empty($this->config->title) || $this->config->autodetect_on_failure()) {
$detect_title = true;
}
}
// detect body?
if (!isset($this->body)) {
if (empty($this->config->body) || $this->config->autodetect_on_failure()) {
$detect_body = true;
}
}
// detect author?
if (empty($this->author)) {
if (empty($this->config->author) || $this->config->autodetect_on_failure()) {
$detect_author = true;
}
}
// detect date?
if (!isset($this->date)) {
if (empty($this->config->date) || $this->config->autodetect_on_failure()) {
$detect_date = true;
}
}
// check for hNews
if ($detect_title || $detect_body) {
// check for hentry
$elems = @$xpath->query("//*[contains(concat(' ',normalize-space(@class),' '),' hentry ')]", $this->readability->dom);
if ($elems && $elems->length > 0) {
$this->debug('hNews: found hentry');
$hentry = $elems->item(0);
if ($detect_title) {
// check for entry-title
$elems = @$xpath->query(".//*[contains(concat(' ',normalize-space(@class),' '),' entry-title ')]", $hentry);
if ($elems && $elems->length > 0) {
$this->title = $elems->item(0)->textContent;
$this->debug('hNews: found entry-title: '.$this->title);
// remove title from document
$elems->item(0)->parentNode->removeChild($elems->item(0));
$detect_title = false;
}
}
if ($detect_date) {
// check for time element with pubdate attribute
$elems = @$xpath->query(".//time[@pubdate] | .//abbr[contains(concat(' ',normalize-space(@class),' '),' published ')]", $hentry);
if ($elems && $elems->length > 0) {
$this->date = strtotime(trim($elems->item(0)->textContent));
// remove date from document
//$elems->item(0)->parentNode->removeChild($elems->item(0));
if ($this->date) {
$this->debug('hNews: found publication date: '.date('Y-m-d H:i:s', $this->date));
$detect_date = false;
} else {
$this->date = null;
}
}
}
if ($detect_author) {
// check for time element with pubdate attribute
$elems = @$xpath->query(".//*[contains(concat(' ',normalize-space(@class),' '),' vcard ') and (contains(concat(' ',normalize-space(@class),' '),' author ') or contains(concat(' ',normalize-space(@class),' '),' byline '))]", $hentry);
if ($elems && $elems->length > 0) {
$author = $elems->item(0);
$fn = @$xpath->query(".//*[contains(concat(' ',normalize-space(@class),' '),' fn ')]", $author);
if ($fn && $fn->length > 0) {
foreach ($fn as $_fn) {
if (trim($_fn->textContent) != '') {
$this->author[] = trim($_fn->textContent);
$this->debug('hNews: found author: '.trim($_fn->textContent));
}
}
} else {
if (trim($author->textContent) != '') {
$this->author[] = trim($author->textContent);
$this->debug('hNews: found author: '.trim($author->textContent));
}
}
$detect_author = empty($this->author);
}
}
// check for entry-content.
// according to hAtom spec, if there are multiple elements marked entry-content,
// we include all of these in the order they appear - see http://microformats.org/wiki/hatom#Entry_Content
if ($detect_body) {
$elems = @$xpath->query(".//*[contains(concat(' ',normalize-space(@class),' '),' entry-content ')]", $hentry);
if ($elems && $elems->length > 0) {
$this->debug('hNews: found entry-content');
if ($elems->length == 1) {
// what if it's empty? (some sites misuse hNews - place their content outside an empty entry-content element)
$e = $elems->item(0);
if (($e->tagName == 'img') || (trim($e->textContent) != '')) {
$this->body = $elems->item(0);
// prune (clean up elements that may not be content)
if ($this->config->prune()) {
$this->debug('Pruning content');
$this->readability->prepArticle($this->body);
}
$detect_body = false;
} else {
$this->debug('hNews: skipping entry-content - appears not to contain content');
}
unset($e);
} else {
$this->body = $this->readability->dom->createElement('div');
$this->debug($elems->length.' entry-content elems found');
foreach ($elems as $elem) {
if (!isset($elem->parentNode)) continue;
$isDescendant = false;
foreach ($this->body->childNodes as $parent) {
if ($this->isDescendant($parent, $elem)) {
$isDescendant = true;
break;
}
}
if ($isDescendant) {
$this->debug('Element is child of another body element, skipping.');
} else {
// prune (clean up elements that may not be content)
if ($this->config->prune()) {
$this->debug('Pruning content');
$this->readability->prepArticle($elem);
}
$this->debug('Element added to body');
$this->body->appendChild($elem);
}
}
$detect_body = false;
}
}
}
}
}
// check for elements marked with instapaper_title
if ($detect_title) {
// check for instapaper_title
$elems = @$xpath->query("//*[contains(concat(' ',normalize-space(@class),' '),' instapaper_title ')]", $this->readability->dom);
if ($elems && $elems->length > 0) {
$this->title = $elems->item(0)->textContent;
$this->debug('Title found (.instapaper_title): '.$this->title);
// remove title from document
$elems->item(0)->parentNode->removeChild($elems->item(0));
$detect_title = false;
}
}
// check for elements marked with instapaper_body
if ($detect_body) {
$elems = @$xpath->query("//*[contains(concat(' ',normalize-space(@class),' '),' instapaper_body ')]", $this->readability->dom);
if ($elems && $elems->length > 0) {
$this->debug('body found (.instapaper_body)');
$this->body = $elems->item(0);
// prune (clean up elements that may not be content)
if ($this->config->prune()) {
$this->debug('Pruning content');
$this->readability->prepArticle($this->body);
}
$detect_body = false;
}
}
// Find author in rel="author" marked element
// We only use this if there's exactly one.
// If there's more than one, it could indicate more than
// one author, but it could also indicate that we're processing
// a page listing different articles with different authors.
if ($detect_author) {
$elems = @$xpath->query("//a[contains(concat(' ',normalize-space(@rel),' '),' author ')]", $this->readability->dom);
if ($elems && $elems->length == 1) {
$author = trim($elems->item(0)->textContent);
if ($author != '') {
$this->debug("Author found (rel=\"author\"): $author");
$this->author[] = $author;
$detect_author = false;
}
}
}
// Find date in pubdate marked time element
// For the same reason given above, we only use this
// if there's exactly one element.
if ($detect_date) {
$elems = @$xpath->query("//time[@pubdate]", $this->readability->dom);
if ($elems && $elems->length == 1) {
$this->date = strtotime(trim($elems->item(0)->textContent));
// remove date from document
//$elems->item(0)->parentNode->removeChild($elems->item(0));
if ($this->date) {
$this->debug('Date found (pubdate marked time element): '.date('Y-m-d H:i:s', $this->date));
$detect_date = false;
} else {
$this->date = null;
}
}
}
// still missing title or body, so we detect using Readability
if ($detect_title || $detect_body) {
$this->debug('Using Readability');
// clone body if we're only using Readability for title (otherwise it may interfere with body element)
if (isset($this->body)) $this->body = $this->body->cloneNode(true);
$success = $this->readability->init();
}
if ($detect_title) {
$this->debug('Detecting title');
$this->title = $this->readability->getTitle()->textContent;
}
if ($detect_body && $success) {
$this->debug('Detecting body');
$this->body = $this->readability->getContent();
if ($this->body->childNodes->length == 1 && $this->body->firstChild->nodeType === XML_ELEMENT_NODE) {
$this->body = $this->body->firstChild;
}
// prune (clean up elements that may not be content)
if ($this->config->prune()) {
$this->debug('Pruning content');
$this->readability->prepArticle($this->body);
}
}
if (isset($this->body)) {
// remove scripts
$this->readability->removeScripts($this->body);
// remove any h1-h6 elements that appear as first thing in the body
// and which match our title
if (isset($this->title) && ($this->title != '')) {
$firstChild = $this->body->firstChild;
while ($firstChild->nodeType && ($firstChild->nodeType !== XML_ELEMENT_NODE)) {
$firstChild = $firstChild->nextSibling;
}
if (($firstChild->nodeType === XML_ELEMENT_NODE)
&& in_array(strtolower($firstChild->tagName), array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'))
&& (strtolower(trim($firstChild->textContent)) == strtolower(trim($this->title)))) {
$this->body->removeChild($firstChild);
}
}
// prevent self-closing iframes
$elems = $this->body->getElementsByTagName('iframe');
for ($i = $elems->length-1; $i >= 0; $i--) {
$e = $elems->item($i);
if (!$e->hasChildNodes()) {
$e->appendChild($this->body->ownerDocument->createTextNode('[embedded content]'));
}
}
// remove image lazy loading - WordPress plugin http://wordpress.org/extend/plugins/lazy-load/
// the plugin replaces the src attribute to point to a 1x1 gif and puts the original src
// inside the data-lazy-src attribute. It also places the original image inside a noscript element
// next to the amended one.
$elems = @$xpath->query("//img[@data-lazy-src]", $this->body);
for ($i = $elems->length-1; $i >= 0; $i--) {
$e = $elems->item($i);
// let's see if we can grab image from noscript
if ($e->nextSibling !== null && $e->nextSibling->nodeName === 'noscript') {
$_new_elem = $e->ownerDocument->createDocumentFragment();
@$_new_elem->appendXML($e->nextSibling->innerHTML);
$e->nextSibling->parentNode->replaceChild($_new_elem, $e->nextSibling);
$e->parentNode->removeChild($e);
} else {
// Use data-lazy-src as src value
$e->setAttribute('src', $e->getAttribute('data-lazy-src'));
$e->removeAttribute('data-lazy-src');
}
}
$this->success = true;
}
// if we've had no success and we've used tidy, there's a chance
// that tidy has messed up. So let's try again without tidy...
if (!$this->success && $tidied && $smart_tidy) {
$this->debug('Trying again without tidy');
$this->process($original_html, $url, false);
}
return $this->success;
}
private function isDescendant(DOMElement $parent, DOMElement $child) {
$node = $child->parentNode;
while ($node != null) {
if ($node->isSameNode($parent)) return true;
$node = $node->parentNode;
}
return false;
}
public function getContent() {
return $this->body;
}
public function getTitle() {
return $this->title;
}
public function getAuthors() {
return $this->author;
}
public function getLanguage() {
return $this->language;
}
public function getDate() {
return $this->date;
}
public function getSiteConfig() {
return $this->config;
}
public function getNextPageUrl() {
return $this->nextPageUrl;
}
}
?>

View file

@ -0,0 +1,338 @@
<?php
/**
* Site Config
*
* Each instance of this class should hold extraction patterns and other directives
* for a website. See ContentExtractor class to see how it's used.
*
* @version 0.7
* @date 2012-08-27
* @author Keyvan Minoukadeh
* @copyright 2012 Keyvan Minoukadeh
* @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
*/
class SiteConfig
{
// Use first matching element as title (0 or more xpath expressions)
public $title = array();
// Use first matching element as body (0 or more xpath expressions)
public $body = array();
// Use first matching element as author (0 or more xpath expressions)
public $author = array();
// Use first matching element as date (0 or more xpath expressions)
public $date = array();
// Strip elements matching these xpath expressions (0 or more)
public $strip = array();
// Strip elements which contain these strings (0 or more) in the id or class attribute
public $strip_id_or_class = array();
// Strip images which contain these strings (0 or more) in the src attribute
public $strip_image_src = array();
// Additional HTTP headers to send
// NOT YET USED
public $http_header = array();
// Process HTML with tidy before creating DOM (bool or null if undeclared)
public $tidy = null;
protected $default_tidy = true; // used if undeclared
// Autodetect title/body if xpath expressions fail to produce results.
// Note that this applies to title and body separately, ie.
// * if we get a body match but no title match, this option will determine whether we autodetect title
// * if neither match, this determines whether we autodetect title and body.
// Also note that this only applies when there is at least one xpath expression in title or body, ie.
// * if title and body are both empty (no xpath expressions), this option has no effect (both title and body will be auto-detected)
// * if there's an xpath expression for title and none for body, body will be auto-detected and this option will determine whether we auto-detect title if the xpath expression for it fails to produce results.
// Usage scenario: you want to extract something specific from a set of URLs, e.g. a table, and if the table is not found, you want to ignore the entry completely. Auto-detection is unlikely to succeed here, so you construct your patterns and set this option to false. Another scenario may be a site where auto-detection has proven to fail (or worse, picked up the wrong content).
// bool or null if undeclared
public $autodetect_on_failure = null;
protected $default_autodetect_on_failure = true; // used if undeclared
// Clean up content block - attempt to remove elements that appear to be superfluous
// bool or null if undeclared
public $prune = null;
protected $default_prune = true; // used if undeclared
// Test URL - if present, can be used to test the config above
public $test_url = array();
// Single-page link - should identify a link element or URL pointing to the page holding the entire article
// This is useful for sites which split their articles across multiple pages. Links to such pages tend to
// display the first page with links to the other pages at the bottom. Often there is also a link to a page
// which displays the entire article on one page (e.g. 'print view').
// This should be an XPath expression identifying the link to that page. If present and we find a match,
// we will retrieve that page and the rest of the options in this config will be applied to the new page.
public $single_page_link = array();
public $next_page_link = array();
// Single-page link in feed? - same as above, but patterns applied to item description HTML taken from feed
public $single_page_link_in_feed = array();
// Which parser to use for turning raw HTML into a DOMDocument (either 'libxml' or 'html5lib')
// string or null if undeclared
public $parser = null;
protected $default_parser = 'libxml'; // used if undeclared
// Strings to search for in HTML before processing begins (used with $replace_string)
public $find_string = array();
// Strings to replace those found in $find_string before HTML processing begins
public $replace_string = array();
// the options below cannot be set in the config files which this class represents
//public $cache_in_apc = false; // used to decide if we should cache in apc or not
public $cache_key = null;
public static $debug = false;
protected static $apc = false;
protected static $config_path;
protected static $config_path_fallback;
protected static $config_cache = array();
const HOSTNAME_REGEX = '/^(([a-zA-Z0-9-]*[a-zA-Z0-9])\.)*([A-Za-z0-9-]*[A-Za-z0-9])$/';
protected static function debug($msg) {
if (self::$debug) {
//$mem = round(memory_get_usage()/1024, 2);
//$memPeak = round(memory_get_peak_usage()/1024, 2);
echo '* ',$msg;
//echo ' - mem used: ',$mem," (peak: $memPeak)\n";
echo "\n";
ob_flush();
flush();
}
}
// enable APC caching of certain site config files?
// If enabled the following site config files will be
// cached in APC cache (when requested for first time):
// * anything in site_config/custom/ and its corresponding file in site_config/standard/
// * the site config files associated with HTML fingerprints
// * the global site config file
// returns true if enabled, false otherwise
public static function use_apc($apc=true) {
if (!function_exists('apc_add')) {
if ($apc) self::debug('APC will not be used (function apc_add does not exist)');
return false;
}
self::$apc = $apc;
return $apc;
}
// return bool or null
public function tidy($use_default=true) {
if ($use_default) return (isset($this->tidy)) ? $this->tidy : $this->default_tidy;
return $this->tidy;
}
// return bool or null
public function prune($use_default=true) {
if ($use_default) return (isset($this->prune)) ? $this->prune : $this->default_prune;
return $this->prune;
}
// return string or null
public function parser($use_default=true) {
if ($use_default) return (isset($this->parser)) ? $this->parser : $this->default_parser;
return $this->parser;
}
// return bool or null
public function autodetect_on_failure($use_default=true) {
if ($use_default) return (isset($this->autodetect_on_failure)) ? $this->autodetect_on_failure : $this->default_autodetect_on_failure;
return $this->autodetect_on_failure;
}
public static function set_config_path($path, $fallback=null) {
self::$config_path = $path;
self::$config_path_fallback = $fallback;
}
public static function add_to_cache($key, SiteConfig $config, $use_apc=true) {
$key = strtolower($key);
if (substr($key, 0, 4) == 'www.') $key = substr($key, 4);
if ($config->cache_key) $key = $config->cache_key;
self::$config_cache[$key] = $config;
if (self::$apc && $use_apc) {
self::debug("Adding site config to APC cache with key sc.$key");
apc_add("sc.$key", $config);
}
self::debug("Cached site config with key $key");
}
public static function is_cached($key) {
$key = strtolower($key);
if (substr($key, 0, 4) == 'www.') $key = substr($key, 4);
if (array_key_exists($key, self::$config_cache)) {
return true;
} elseif (self::$apc && (bool)apc_fetch("sc.$key")) {
return true;
}
return false;
}
public function append(SiteConfig $newconfig) {
// check for commands where we accept multiple statements (no test_url)
foreach (array('title', 'body', 'author', 'date', 'strip', 'strip_id_or_class', 'strip_image_src', 'single_page_link', 'single_page_link_in_feed', 'next_page_link', 'http_header', 'find_string', 'replace_string') as $var) {
// append array elements for this config variable from $newconfig to this config
//$this->$var = $this->$var + $newconfig->$var;
$this->$var = array_unique(array_merge($this->$var, $newconfig->$var));
}
// check for single statement commands
// we do not overwrite existing non null values
foreach (array('tidy', 'prune', 'parser', 'autodetect_on_failure') as $var) {
if ($this->$var === null) $this->$var = $newconfig->$var;
}
}
// returns SiteConfig instance if an appropriate one is found, false otherwise
// if $exact_host_match is true, we will not look for wildcard config matches
// by default if host is 'test.example.org' we will look for and load '.example.org.txt' if it exists
public static function build($host, $exact_host_match=false) {
$host = strtolower($host);
if (substr($host, 0, 4) == 'www.') $host = substr($host, 4);
if (!$host || (strlen($host) > 200) || !preg_match(self::HOSTNAME_REGEX, ltrim($host, '.'))) return false;
// check for site configuration
$try = array($host);
// should we look for wildcard matches
if (!$exact_host_match) {
$split = explode('.', $host);
if (count($split) > 1) {
array_shift($split);
$try[] = '.'.implode('.', $split);
}
}
// look for site config file in primary folder
self::debug(". looking for site config for $host in primary folder");
foreach ($try as $h) {
if (array_key_exists($h, self::$config_cache)) {
self::debug("... site config for $h already loaded in this request");
return self::$config_cache[$h];
} elseif (self::$apc && ($sconfig = apc_fetch("sc.$h"))) {
self::debug("... site config for $h in APC cache");
return $sconfig;
} elseif (file_exists(self::$config_path."/$h.txt")) {
self::debug("... found site config ($h.txt)");
$file_primary = self::$config_path."/$h.txt";
$matched_name = $h;
break;
}
}
// if we found site config, process it
if (isset($file_primary)) {
$config_lines = file($file_primary, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
if (!$config_lines || !is_array($config_lines)) return false;
$config = self::build_from_array($config_lines);
// if APC caching is available and enabled, mark this for cache
//$config->cache_in_apc = true;
$config->cache_key = $matched_name;
// if autodetec on failure is off (on by default) we do not need to look
// in secondary folder
if (!$config->autodetect_on_failure()) {
self::debug('... autodetect on failure is disabled (no other site config files will be loaded)');
return $config;
}
}
// look for site config file in secondary folder
if (isset(self::$config_path_fallback)) {
self::debug(". looking for site config for $host in secondary folder");
foreach ($try as $h) {
if (file_exists(self::$config_path_fallback."/$h.txt")) {
self::debug("... found site config in secondary folder ($h.txt)");
$file_secondary = self::$config_path_fallback."/$h.txt";
$matched_name = $h;
break;
}
}
if (!isset($file_secondary)) {
self::debug("... no site config match in secondary folder");
}
}
// return false if no config file found
if (!isset($file_primary) && !isset($file_secondary)) {
self::debug("... no site config match for $host");
return false;
}
// return primary config if secondary not found
if (!isset($file_secondary) && isset($config)) {
return $config;
}
// process secondary config file
$config_lines = file($file_secondary, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
if (!$config_lines || !is_array($config_lines)) {
// failed to process secondary
if (isset($config)) {
// return primary config
return $config;
} else {
return false;
}
}
// merge with primary and return
if (isset($config)) {
self::debug('. merging config files');
$config->append(self::build_from_array($config_lines));
return $config;
} else {
// return just secondary
$config = self::build_from_array($config_lines);
// if APC caching is available and enabled, mark this for cache
//$config->cache_in_apc = true;
$config->cache_key = $matched_name;
return $config;
}
}
public static function build_from_array(array $lines) {
$config = new SiteConfig();
foreach ($lines as $line) {
$line = trim($line);
// skip comments, empty lines
if ($line == '' || $line[0] == '#') continue;
// get command
$command = explode(':', $line, 2);
// if there's no colon ':', skip this line
if (count($command) != 2) continue;
$val = trim($command[1]);
$command = trim($command[0]);
if ($command == '' || $val == '') continue;
// check for commands where we accept multiple statements
if (in_array($command, array('title', 'body', 'author', 'date', 'strip', 'strip_id_or_class', 'strip_image_src', 'single_page_link', 'single_page_link_in_feed', 'next_page_link', 'http_header', 'test_url', 'find_string', 'replace_string'))) {
array_push($config->$command, $val);
// check for single statement commands that evaluate to true or false
} elseif (in_array($command, array('tidy', 'prune', 'autodetect_on_failure'))) {
$config->$command = ($val == 'yes');
// check for single statement commands stored as strings
} elseif (in_array($command, array('parser'))) {
$config->$command = $val;
// check for replace_string(find): replace
} elseif ((substr($command, -1) == ')') && preg_match('!^([a-z0-9_]+)\((.*?)\)$!i', $command, $match)) {
if (in_array($match[1], array('replace_string'))) {
$command = $match[1];
array_push($config->find_string, $match[2]);
array_push($config->$command, $val);
}
}
}
return $config;
}
}
?>

View file

@ -0,0 +1,190 @@
<?php
/**
* Univarsel Feed Writer
*
* FeedItem class - Used as feed element in FeedWriter class
*
* @package UnivarselFeedWriter
* @author Anis uddin Ahmad <anisniit@gmail.com>
* @link http://www.ajaxray.com/projects/rss
*/
class FeedItem
{
private $elements = array(); //Collection of feed elements
private $version;
/**
* Constructor
*
* @param contant (RSS1/RSS2/ATOM) RSS2 is default.
*/
function __construct($version = RSS2)
{
$this->version = $version;
}
/**
* Set element (overwrites existing elements with $elementName)
*
* @access public
* @param srting The tag name of an element
* @param srting The content of tag
* @param array Attributes(if any) in 'attrName' => 'attrValue' format
* @return void
*/
public function setElement($elementName, $content, $attributes = null)
{
if (isset($this->elements[$elementName])) {
unset($this->elements[$elementName]);
}
$this->addElement($elementName, $content, $attributes);
}
/**
* Add an element to elements array
*
* @access public
* @param srting The tag name of an element
* @param srting The content of tag
* @param array Attributes(if any) in 'attrName' => 'attrValue' format
* @return void
*/
public function addElement($elementName, $content, $attributes = null)
{
$i = 0;
if (isset($this->elements[$elementName])) {
$i = count($this->elements[$elementName]);
} else {
$this->elements[$elementName] = array();
}
$this->elements[$elementName][$i]['name'] = $elementName;
$this->elements[$elementName][$i]['content'] = $content;
$this->elements[$elementName][$i]['attributes'] = $attributes;
}
/**
* Set multiple feed elements from an array.
* Elements which have attributes cannot be added by this method
*
* @access public
* @param array array of elements in 'tagName' => 'tagContent' format.
* @return void
*/
public function addElementArray($elementArray)
{
if(! is_array($elementArray)) return;
foreach ($elementArray as $elementName => $content)
{
$this->addElement($elementName, $content);
}
}
/**
* Return the collection of elements in this feed item
*
* @access public
* @return array
*/
public function getElements()
{
return $this->elements;
}
// Wrapper functions ------------------------------------------------------
/**
* Set the 'dscription' element of feed item
*
* @access public
* @param string The content of 'description' element
* @return void
*/
public function setDescription($description)
{
$tag = ($this->version == ATOM)? 'summary' : 'description';
$this->setElement($tag, $description);
}
/**
* @desc Set the 'title' element of feed item
* @access public
* @param string The content of 'title' element
* @return void
*/
public function setTitle($title)
{
$this->setElement('title', $title);
}
/**
* Set the 'date' element of feed item
*
* @access public
* @param string The content of 'date' element
* @return void
*/
public function setDate($date)
{
if(! is_numeric($date))
{
$date = strtotime($date);
}
if($this->version == ATOM)
{
$tag = 'updated';
$value = date(DATE_ATOM, $date);
}
elseif($this->version == RSS2)
{
$tag = 'pubDate';
$value = date(DATE_RSS, $date);
}
else
{
$tag = 'dc:date';
$value = date("Y-m-d", $date);
}
$this->setElement($tag, $value);
}
/**
* Set the 'link' element of feed item
*
* @access public
* @param string The content of 'link' element
* @return void
*/
public function setLink($link)
{
if($this->version == RSS2 || $this->version == RSS1)
{
$this->setElement('link', $link);
}
else
{
$this->setElement('link','',array('href'=>$link));
$this->setElement('id', FeedWriter::uuid($link,'urn:uuid:'));
}
}
/**
* Set the 'encloser' element of feed item
* For RSS 2.0 only
*
* @access public
* @param string The url attribute of encloser tag
* @param string The length attribute of encloser tag
* @param string The type attribute of encloser tag
* @return void
*/
public function setEncloser($url, $length, $type)
{
$attributes = array('url'=>$url, 'length'=>$length, 'type'=>$type);
$this->setElement('enclosure','',$attributes);
}
} // end of class FeedItem
?>

View file

@ -0,0 +1,441 @@
<?php
define('RSS2', 1, true);
define('JSON', 2, true);
define('JSONP', 3, true);
/**
* Univarsel Feed Writer class
*
* Genarate RSS2 or JSON (original: RSS 1.0, RSS2.0 and ATOM Feed)
*
* Modified for FiveFilters.org's Full-Text RSS project
* to allow for inclusion of hubs, JSON output.
* Stripped RSS1 and ATOM support.
*
* @package UnivarselFeedWriter
* @author Anis uddin Ahmad <anisniit@gmail.com>
* @link http://www.ajaxray.com/projects/rss
*/
class FeedWriter
{
private $self = null; // self URL - http://feed2.w3.org/docs/warning/MissingAtomSelfLink.html
private $hubs = array(); // PubSubHubbub hubs
private $channels = array(); // Collection of channel elements
private $items = array(); // Collection of items as object of FeedItem class.
private $data = array(); // Store some other version wise data
private $CDATAEncoding = array(); // The tag names which have to encoded as CDATA
private $xsl = null; // stylesheet to render RSS (used by Chrome)
private $json = null; // JSON object
private $version = null;
/**
* Constructor
*
* @param constant the version constant (RSS2 or JSON).
*/
function __construct($version = RSS2)
{
$this->version = $version;
// Setting default value for assential channel elements
$this->channels['title'] = $version . ' Feed';
$this->channels['link'] = 'http://www.ajaxray.com/blog';
//Tag names to encode in CDATA
$this->CDATAEncoding = array('description', 'content:encoded', 'content', 'subtitle', 'summary');
}
public function setFormat($format) {
$this->version = $format;
}
// Start # public functions ---------------------------------------------
/**
* Set a channel element
* @access public
* @param srting name of the channel tag
* @param string content of the channel tag
* @return void
*/
public function setChannelElement($elementName, $content)
{
$this->channels[$elementName] = $content ;
}
/**
* Set multiple channel elements from an array. Array elements
* should be 'channelName' => 'channelContent' format.
*
* @access public
* @param array array of channels
* @return void
*/
public function setChannelElementsFromArray($elementArray)
{
if(! is_array($elementArray)) return;
foreach ($elementArray as $elementName => $content)
{
$this->setChannelElement($elementName, $content);
}
}
/**
* Genarate the actual RSS/JSON file
*
* @access public
* @return void
*/
public function genarateFeed()
{
if ($this->version == RSS2) {
header('Content-type: text/xml; charset=UTF-8');
// this line prevents Chrome 20 from prompting download
// used by Google: https://news.google.com/news/feeds?ned=us&topic=b&output=rss
header('X-content-type-options: nosniff');
} elseif ($this->version == JSON) {
header('Content-type: application/json; charset=UTF-8');
$this->json = new stdClass();
} elseif ($this->version == JSONP) {
header('Content-type: application/javascript; charset=UTF-8');
$this->json = new stdClass();
}
$this->printHead();
$this->printChannels();
$this->printItems();
$this->printTale();
if ($this->version == JSON || $this->version == JSONP) {
echo json_encode($this->json);
}
}
/**
* Create a new FeedItem.
*
* @access public
* @return object instance of FeedItem class
*/
public function createNewItem()
{
$Item = new FeedItem($this->version);
return $Item;
}
/**
* Add a FeedItem to the main class
*
* @access public
* @param object instance of FeedItem class
* @return void
*/
public function addItem($feedItem)
{
$this->items[] = $feedItem;
}
// Wrapper functions -------------------------------------------------------------------
/**
* Set the 'title' channel element
*
* @access public
* @param srting value of 'title' channel tag
* @return void
*/
public function setTitle($title)
{
$this->setChannelElement('title', $title);
}
/**
* Add a hub to the channel element
*
* @access public
* @param string URL
* @return void
*/
public function addHub($hub)
{
$this->hubs[] = $hub;
}
/**
* Set XSL URL
*
* @access public
* @param string URL
* @return void
*/
public function setXsl($xsl)
{
$this->xsl = $xsl;
}
/**
* Set self URL
*
* @access public
* @param string URL
* @return void
*/
public function setSelf($self)
{
$this->self = $self;
}
/**
* Set the 'description' channel element
*
* @access public
* @param srting value of 'description' channel tag
* @return void
*/
public function setDescription($desciption)
{
$tag = ($this->version == ATOM)? 'subtitle' : 'description';
$this->setChannelElement($tag, $desciption);
}
/**
* Set the 'link' channel element
*
* @access public
* @param srting value of 'link' channel tag
* @return void
*/
public function setLink($link)
{
$this->setChannelElement('link', $link);
}
/**
* Set the 'image' channel element
*
* @access public
* @param srting title of image
* @param srting link url of the imahe
* @param srting path url of the image
* @return void
*/
public function setImage($title, $link, $url)
{
$this->setChannelElement('image', array('title'=>$title, 'link'=>$link, 'url'=>$url));
}
// End # public functions ----------------------------------------------
// Start # private functions ----------------------------------------------
/**
* Prints the xml and rss namespace
*
* @access private
* @return void
*/
private function printHead()
{
if ($this->version == RSS2)
{
$out = '<?xml version="1.0" encoding="utf-8"?>'."\n";
if ($this->xsl) $out .= '<?xml-stylesheet type="text/xsl" href="'.htmlspecialchars($this->xsl).'"?>' . PHP_EOL;
$out .= '<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:media="http://search.yahoo.com/mrss/">' . PHP_EOL;
echo $out;
}
elseif ($this->version == JSON || $this->version == JSONP)
{
$this->json->rss = array('@attributes' => array('version' => '2.0'));
}
}
/**
* Closes the open tags at the end of file
*
* @access private
* @return void
*/
private function printTale()
{
if ($this->version == RSS2)
{
echo '</channel>',PHP_EOL,'</rss>';
}
// do nothing for JSON
}
/**
* Creates a single node as xml format
*
* @access private
* @param string name of the tag
* @param mixed tag value as string or array of nested tags in 'tagName' => 'tagValue' format
* @param array Attributes(if any) in 'attrName' => 'attrValue' format
* @return string formatted xml tag
*/
private function makeNode($tagName, $tagContent, $attributes = null)
{
if ($this->version == RSS2)
{
$nodeText = '';
$attrText = '';
if (is_array($attributes))
{
foreach ($attributes as $key => $value)
{
$attrText .= " $key=\"$value\" ";
}
}
$nodeText .= "<{$tagName}{$attrText}>";
if (is_array($tagContent))
{
foreach ($tagContent as $key => $value)
{
$nodeText .= $this->makeNode($key, $value);
}
}
else
{
//$nodeText .= (in_array($tagName, $this->CDATAEncoding))? $tagContent : htmlentities($tagContent);
$nodeText .= htmlspecialchars($tagContent);
}
//$nodeText .= (in_array($tagName, $this->CDATAEncoding))? "]]></$tagName>" : "</$tagName>";
$nodeText .= "</$tagName>";
return $nodeText . PHP_EOL;
}
elseif ($this->version == JSON || $this->version == JSONP)
{
$tagName = (string)$tagName;
$tagName = strtr($tagName, ':', '_');
$node = null;
if (!$tagContent && is_array($attributes) && count($attributes))
{
$node = array('@attributes' => $this->json_keys($attributes));
} else {
if (is_array($tagContent)) {
$node = $this->json_keys($tagContent);
} else {
$node = $tagContent;
}
}
return $node;
}
return ''; // should not get here
}
private function json_keys(array $array) {
$new = array();
foreach ($array as $key => $val) {
if (is_string($key)) $key = strtr($key, ':', '_');
if (is_array($val)) {
$new[$key] = $this->json_keys($val);
} else {
$new[$key] = $val;
}
}
return $new;
}
/**
* @desc Print channels
* @access private
* @return void
*/
private function printChannels()
{
//Start channel tag
if ($this->version == RSS2) {
echo '<channel>' . PHP_EOL;
// add hubs
foreach ($this->hubs as $hub) {
//echo $this->makeNode('link', '', array('rel'=>'hub', 'href'=>$hub, 'xmlns'=>'http://www.w3.org/2005/Atom'));
echo '<link rel="hub" href="'.htmlspecialchars($hub).'" xmlns="http://www.w3.org/2005/Atom" />' . PHP_EOL;
}
// add self
if (isset($this->self)) {
//echo $this->makeNode('link', '', array('rel'=>'self', 'href'=>$this->self, 'xmlns'=>'http://www.w3.org/2005/Atom'));
echo '<link rel="self" href="'.htmlspecialchars($this->self).'" xmlns="http://www.w3.org/2005/Atom" />' . PHP_EOL;
}
//Print Items of channel
foreach ($this->channels as $key => $value)
{
echo $this->makeNode($key, $value);
}
} elseif ($this->version == JSON || $this->version == JSONP) {
$this->json->rss['channel'] = (object)$this->json_keys($this->channels);
}
}
/**
* Prints formatted feed items
*
* @access private
* @return void
*/
private function printItems()
{
foreach ($this->items as $item) {
$itemElements = $item->getElements();
echo $this->startItem();
if ($this->version == JSON || $this->version == JSONP) {
$json_item = array();
}
foreach ($itemElements as $thisElement) {
foreach ($thisElement as $instance) {
if ($this->version == RSS2) {
echo $this->makeNode($instance['name'], $instance['content'], $instance['attributes']);
} elseif ($this->version == JSON || $this->version == JSONP) {
$_json_node = $this->makeNode($instance['name'], $instance['content'], $instance['attributes']);
if (count($thisElement) > 1) {
$json_item[strtr($instance['name'], ':', '_')][] = $_json_node;
} else {
$json_item[strtr($instance['name'], ':', '_')] = $_json_node;
}
}
}
}
echo $this->endItem();
if ($this->version == JSON || $this->version == JSONP) {
if (count($this->items) > 1) {
$this->json->rss['channel']->item[] = $json_item;
} else {
$this->json->rss['channel']->item = $json_item;
}
}
}
}
/**
* Make the starting tag of channels
*
* @access private
* @return void
*/
private function startItem()
{
if ($this->version == RSS2)
{
echo '<item>' . PHP_EOL;
}
// nothing for JSON
}
/**
* Closes feed item tag
*
* @access private
* @return void
*/
private function endItem()
{
if ($this->version == RSS2)
{
echo '</item>' . PHP_EOL;
}
// nothing for JSON
}
// End # private functions ----------------------------------------------
}

View file

@ -0,0 +1,728 @@
<?php
/*
htmLawed 1.1.14, 8 August 2012
OOP code, 8 August 2012
Copyright Santosh Patnaik
Dual LGPL v3 and GPL v2+ license
A PHP Labware internal utility; www.bioinformatics.org/phplabware/internal_utilities/htmLawed
See htmLawed_README.txt/htm
*/
class htmLawed{
// begin class
public static function hl($t, $C=1, $S=array()){
$C = is_array($C) ? $C : array();
if(!empty($C['valid_xhtml'])){
$C['elements'] = empty($C['elements']) ? '*-center-dir-font-isindex-menu-s-strike-u' : $C['elements'];
$C['make_tag_strict'] = isset($C['make_tag_strict']) ? $C['make_tag_strict'] : 2;
$C['xml:lang'] = isset($C['xml:lang']) ? $C['xml:lang'] : 2;
}
// config eles
$e = array('a'=>1, 'abbr'=>1, 'acronym'=>1, 'address'=>1, 'applet'=>1, 'area'=>1, 'b'=>1, 'bdo'=>1, 'big'=>1, 'blockquote'=>1, 'br'=>1, 'button'=>1, 'caption'=>1, 'center'=>1, 'cite'=>1, 'code'=>1, 'col'=>1, 'colgroup'=>1, 'dd'=>1, 'del'=>1, 'dfn'=>1, 'dir'=>1, 'div'=>1, 'dl'=>1, 'dt'=>1, 'em'=>1, 'embed'=>1, 'fieldset'=>1, 'font'=>1, 'form'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'hr'=>1, 'i'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'ins'=>1, 'isindex'=>1, 'kbd'=>1, 'label'=>1, 'legend'=>1, 'li'=>1, 'map'=>1, 'menu'=>1, 'noscript'=>1, 'object'=>1, 'ol'=>1, 'optgroup'=>1, 'option'=>1, 'p'=>1, 'param'=>1, 'pre'=>1, 'q'=>1, 'rb'=>1, 'rbc'=>1, 'rp'=>1, 'rt'=>1, 'rtc'=>1, 'ruby'=>1, 's'=>1, 'samp'=>1, 'script'=>1, 'select'=>1, 'small'=>1, 'span'=>1, 'strike'=>1, 'strong'=>1, 'sub'=>1, 'sup'=>1, 'table'=>1, 'tbody'=>1, 'td'=>1, 'textarea'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1, 'tt'=>1, 'u'=>1, 'ul'=>1, 'var'=>1); // 86/deprecated+embed+ruby
if(!empty($C['safe'])){
unset($e['applet'], $e['embed'], $e['iframe'], $e['object'], $e['script']);
}
$x = !empty($C['elements']) ? str_replace(array("\n", "\r", "\t", ' '), '', $C['elements']) : '*';
if($x == '-*'){$e = array();}
elseif(strpos($x, '*') === false){$e = array_flip(explode(',', $x));}
else{
if(isset($x[1])){
preg_match_all('`(?:^|-|\+)[^\-+]+?(?=-|\+|$)`', $x, $m, PREG_SET_ORDER);
for($i=count($m); --$i>=0;){$m[$i] = $m[$i][0];}
foreach($m as $v){
if($v[0] == '+'){$e[substr($v, 1)] = 1;}
if($v[0] == '-' && isset($e[($v = substr($v, 1))]) && !in_array('+'. $v, $m)){unset($e[$v]);}
}
}
}
$C['elements'] =& $e;
// config attrs
$x = !empty($C['deny_attribute']) ? str_replace(array("\n", "\r", "\t", ' '), '', $C['deny_attribute']) : '';
$x = array_flip((isset($x[0]) && $x[0] == '*') ? explode('-', $x) : explode(',', $x. (!empty($C['safe']) ? ',on*' : '')));
if(isset($x['on*'])){
unset($x['on*']);
$x += array('onblur'=>1, 'onchange'=>1, 'onclick'=>1, 'ondblclick'=>1, 'onfocus'=>1, 'onkeydown'=>1, 'onkeypress'=>1, 'onkeyup'=>1, 'onmousedown'=>1, 'onmousemove'=>1, 'onmouseout'=>1, 'onmouseover'=>1, 'onmouseup'=>1, 'onreset'=>1, 'onselect'=>1, 'onsubmit'=>1);
}
$C['deny_attribute'] = $x;
// config URL
$x = (isset($C['schemes'][2]) && strpos($C['schemes'], ':')) ? strtolower($C['schemes']) : 'href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; *:file, http, https';
$C['schemes'] = array();
foreach(explode(';', str_replace(array(' ', "\t", "\r", "\n"), '', $x)) as $v){
$x = $x2 = null; list($x, $x2) = explode(':', $v, 2);
if($x2){$C['schemes'][$x] = array_flip(explode(',', $x2));}
}
if(!isset($C['schemes']['*'])){$C['schemes']['*'] = array('file'=>1, 'http'=>1, 'https'=>1,);}
if(!empty($C['safe']) && empty($C['schemes']['style'])){$C['schemes']['style'] = array('!'=>1);}
$C['abs_url'] = isset($C['abs_url']) ? $C['abs_url'] : 0;
if(!isset($C['base_url']) or !preg_match('`^[a-zA-Z\d.+\-]+://[^/]+/(.+?/)?$`', $C['base_url'])){
$C['base_url'] = $C['abs_url'] = 0;
}
// config rest
$C['and_mark'] = empty($C['and_mark']) ? 0 : 1;
$C['anti_link_spam'] = (isset($C['anti_link_spam']) && is_array($C['anti_link_spam']) && count($C['anti_link_spam']) == 2 && (empty($C['anti_link_spam'][0]) or htmLawed::hl_regex($C['anti_link_spam'][0])) && (empty($C['anti_link_spam'][1]) or htmLawed::hl_regex($C['anti_link_spam'][1]))) ? $C['anti_link_spam'] : 0;
$C['anti_mail_spam'] = isset($C['anti_mail_spam']) ? $C['anti_mail_spam'] : 0;
$C['balance'] = isset($C['balance']) ? (bool)$C['balance'] : 1;
$C['cdata'] = isset($C['cdata']) ? $C['cdata'] : (empty($C['safe']) ? 3 : 0);
$C['clean_ms_char'] = empty($C['clean_ms_char']) ? 0 : $C['clean_ms_char'];
$C['comment'] = isset($C['comment']) ? $C['comment'] : (empty($C['safe']) ? 3 : 0);
$C['css_expression'] = empty($C['css_expression']) ? 0 : 1;
$C['direct_list_nest'] = empty($C['direct_list_nest']) ? 0 : 1;
$C['hexdec_entity'] = isset($C['hexdec_entity']) ? $C['hexdec_entity'] : 1;
$C['hook'] = (!empty($C['hook']) && function_exists($C['hook'])) ? $C['hook'] : 0;
$C['hook_tag'] = (!empty($C['hook_tag']) && function_exists($C['hook_tag'])) ? $C['hook_tag'] : 0;
$C['keep_bad'] = isset($C['keep_bad']) ? $C['keep_bad'] : 6;
$C['lc_std_val'] = isset($C['lc_std_val']) ? (bool)$C['lc_std_val'] : 1;
$C['make_tag_strict'] = isset($C['make_tag_strict']) ? $C['make_tag_strict'] : 1;
$C['named_entity'] = isset($C['named_entity']) ? (bool)$C['named_entity'] : 1;
$C['no_deprecated_attr'] = isset($C['no_deprecated_attr']) ? $C['no_deprecated_attr'] : 1;
$C['parent'] = isset($C['parent'][0]) ? strtolower($C['parent']) : 'body';
$C['show_setting'] = !empty($C['show_setting']) ? $C['show_setting'] : 0;
$C['style_pass'] = empty($C['style_pass']) ? 0 : 1;
$C['tidy'] = empty($C['tidy']) ? 0 : $C['tidy'];
$C['unique_ids'] = isset($C['unique_ids']) ? $C['unique_ids'] : 1;
$C['xml:lang'] = isset($C['xml:lang']) ? $C['xml:lang'] : 0;
if(isset($GLOBALS['C'])){$reC = $GLOBALS['C'];}
$GLOBALS['C'] = $C;
$S = is_array($S) ? $S : htmLawed::hl_spec($S);
if(isset($GLOBALS['S'])){$reS = $GLOBALS['S'];}
$GLOBALS['S'] = $S;
$t = preg_replace('`[\x00-\x08\x0b-\x0c\x0e-\x1f]`', '', $t);
if($C['clean_ms_char']){
$x = array("\x7f"=>'', "\x80"=>'&#8364;', "\x81"=>'', "\x83"=>'&#402;', "\x85"=>'&#8230;', "\x86"=>'&#8224;', "\x87"=>'&#8225;', "\x88"=>'&#710;', "\x89"=>'&#8240;', "\x8a"=>'&#352;', "\x8b"=>'&#8249;', "\x8c"=>'&#338;', "\x8d"=>'', "\x8e"=>'&#381;', "\x8f"=>'', "\x90"=>'', "\x95"=>'&#8226;', "\x96"=>'&#8211;', "\x97"=>'&#8212;', "\x98"=>'&#732;', "\x99"=>'&#8482;', "\x9a"=>'&#353;', "\x9b"=>'&#8250;', "\x9c"=>'&#339;', "\x9d"=>'', "\x9e"=>'&#382;', "\x9f"=>'&#376;');
$x = $x + ($C['clean_ms_char'] == 1 ? array("\x82"=>'&#8218;', "\x84"=>'&#8222;', "\x91"=>'&#8216;', "\x92"=>'&#8217;', "\x93"=>'&#8220;', "\x94"=>'&#8221;') : array("\x82"=>'\'', "\x84"=>'"', "\x91"=>'\'', "\x92"=>'\'', "\x93"=>'"', "\x94"=>'"'));
$t = strtr($t, $x);
}
if($C['cdata'] or $C['comment']){$t = preg_replace_callback('`<!(?:(?:--.*?--)|(?:\[CDATA\[.*?\]\]))>`sm', 'htmLawed::hl_cmtcd', $t);}
$t = preg_replace_callback('`&amp;([A-Za-z][A-Za-z0-9]{1,30}|#(?:[0-9]{1,8}|[Xx][0-9A-Fa-f]{1,7}));`', 'htmLawed::hl_ent', str_replace('&', '&amp;', $t));
if($C['unique_ids'] && !isset($GLOBALS['hl_Ids'])){$GLOBALS['hl_Ids'] = array();}
if($C['hook']){$t = $C['hook']($t, $C, $S);}
if($C['show_setting'] && preg_match('`^[a-z][a-z0-9_]*$`i', $C['show_setting'])){
$GLOBALS[$C['show_setting']] = array('config'=>$C, 'spec'=>$S, 'time'=>microtime());
}
// main
$t = preg_replace_callback('`<(?:(?:\s|$)|(?:[^>]*(?:>|$)))|>`m', 'htmLawed::hl_tag', $t);
$t = $C['balance'] ? htmLawed::hl_bal($t, $C['keep_bad'], $C['parent']) : $t;
$t = (($C['cdata'] or $C['comment']) && strpos($t, "\x01") !== false) ? str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05"), array('', '', '&', '<', '>'), $t) : $t;
$t = $C['tidy'] ? htmLawed::hl_tidy($t, $C['tidy'], $C['parent']) : $t;
unset($C, $e);
if(isset($reC)){$GLOBALS['C'] = $reC;}
if(isset($reS)){$GLOBALS['S'] = $reS;}
return $t;
// eof
}
public static function hl_attrval($t, $p){
// check attr val against $S
$o = 1; $l = strlen($t);
foreach($p as $k=>$v){
switch($k){
case 'maxlen':if($l > $v){$o = 0;}
break; case 'minlen': if($l < $v){$o = 0;}
break; case 'maxval': if((float)($t) > $v){$o = 0;}
break; case 'minval': if((float)($t) < $v){$o = 0;}
break; case 'match': if(!preg_match($v, $t)){$o = 0;}
break; case 'nomatch': if(preg_match($v, $t)){$o = 0;}
break; case 'oneof':
$m = 0;
foreach(explode('|', $v) as $n){if($t == $n){$m = 1; break;}}
$o = $m;
break; case 'noneof':
$m = 1;
foreach(explode('|', $v) as $n){if($t == $n){$m = 0; break;}}
$o = $m;
break; default:
break;
}
if(!$o){break;}
}
return ($o ? $t : (isset($p['default']) ? $p['default'] : 0));
// eof
}
public static function hl_bal($t, $do=1, $in='div'){
// balance tags
// by content
$cB = array('blockquote'=>1, 'form'=>1, 'map'=>1, 'noscript'=>1); // Block
$cE = array('area'=>1, 'br'=>1, 'col'=>1, 'embed'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'param'=>1); // Empty
$cF = array('button'=>1, 'del'=>1, 'div'=>1, 'dd'=>1, 'fieldset'=>1, 'iframe'=>1, 'ins'=>1, 'li'=>1, 'noscript'=>1, 'object'=>1, 'td'=>1, 'th'=>1); // Flow; later context-wise dynamic move of ins & del to $cI
$cI = array('a'=>1, 'abbr'=>1, 'acronym'=>1, 'address'=>1, 'b'=>1, 'bdo'=>1, 'big'=>1, 'caption'=>1, 'cite'=>1, 'code'=>1, 'dfn'=>1, 'dt'=>1, 'em'=>1, 'font'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'i'=>1, 'kbd'=>1, 'label'=>1, 'legend'=>1, 'p'=>1, 'pre'=>1, 'q'=>1, 'rb'=>1, 'rt'=>1, 's'=>1, 'samp'=>1, 'small'=>1, 'span'=>1, 'strike'=>1, 'strong'=>1, 'sub'=>1, 'sup'=>1, 'tt'=>1, 'u'=>1, 'var'=>1); // Inline
$cN = array('a'=>array('a'=>1), 'button'=>array('a'=>1, 'button'=>1, 'fieldset'=>1, 'form'=>1, 'iframe'=>1, 'input'=>1, 'label'=>1, 'select'=>1, 'textarea'=>1), 'fieldset'=>array('fieldset'=>1), 'form'=>array('form'=>1), 'label'=>array('label'=>1), 'noscript'=>array('script'=>1), 'pre'=>array('big'=>1, 'font'=>1, 'img'=>1, 'object'=>1, 'script'=>1, 'small'=>1, 'sub'=>1, 'sup'=>1), 'rb'=>array('ruby'=>1), 'rt'=>array('ruby'=>1)); // Illegal
$cN2 = array_keys($cN);
$cR = array('blockquote'=>1, 'dir'=>1, 'dl'=>1, 'form'=>1, 'map'=>1, 'menu'=>1, 'noscript'=>1, 'ol'=>1, 'optgroup'=>1, 'rbc'=>1, 'rtc'=>1, 'ruby'=>1, 'select'=>1, 'table'=>1, 'tbody'=>1, 'tfoot'=>1, 'thead'=>1, 'tr'=>1, 'ul'=>1);
$cS = array('colgroup'=>array('col'=>1), 'dir'=>array('li'=>1), 'dl'=>array('dd'=>1, 'dt'=>1), 'menu'=>array('li'=>1), 'ol'=>array('li'=>1), 'optgroup'=>array('option'=>1), 'option'=>array('#pcdata'=>1), 'rbc'=>array('rb'=>1), 'rp'=>array('#pcdata'=>1), 'rtc'=>array('rt'=>1), 'ruby'=>array('rb'=>1, 'rbc'=>1, 'rp'=>1, 'rt'=>1, 'rtc'=>1), 'select'=>array('optgroup'=>1, 'option'=>1), 'script'=>array('#pcdata'=>1), 'table'=>array('caption'=>1, 'col'=>1, 'colgroup'=>1, 'tfoot'=>1, 'tbody'=>1, 'tr'=>1, 'thead'=>1), 'tbody'=>array('tr'=>1), 'tfoot'=>array('tr'=>1), 'textarea'=>array('#pcdata'=>1), 'thead'=>array('tr'=>1), 'tr'=>array('td'=>1, 'th'=>1), 'ul'=>array('li'=>1)); // Specific - immediate parent-child
if($GLOBALS['C']['direct_list_nest']){$cS['ol'] = $cS['ul'] += array('ol'=>1, 'ul'=>1);}
$cO = array('address'=>array('p'=>1), 'applet'=>array('param'=>1), 'blockquote'=>array('script'=>1), 'fieldset'=>array('legend'=>1, '#pcdata'=>1), 'form'=>array('script'=>1), 'map'=>array('area'=>1), 'object'=>array('param'=>1, 'embed'=>1)); // Other
$cT = array('colgroup'=>1, 'dd'=>1, 'dt'=>1, 'li'=>1, 'option'=>1, 'p'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1); // Omitable closing
// block/inline type; ins & del both type; #pcdata: text
$eB = array('address'=>1, 'blockquote'=>1, 'center'=>1, 'del'=>1, 'dir'=>1, 'dl'=>1, 'div'=>1, 'fieldset'=>1, 'form'=>1, 'ins'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'hr'=>1, 'isindex'=>1, 'menu'=>1, 'noscript'=>1, 'ol'=>1, 'p'=>1, 'pre'=>1, 'table'=>1, 'ul'=>1);
$eI = array('#pcdata'=>1, 'a'=>1, 'abbr'=>1, 'acronym'=>1, 'applet'=>1, 'b'=>1, 'bdo'=>1, 'big'=>1, 'br'=>1, 'button'=>1, 'cite'=>1, 'code'=>1, 'del'=>1, 'dfn'=>1, 'em'=>1, 'embed'=>1, 'font'=>1, 'i'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'ins'=>1, 'kbd'=>1, 'label'=>1, 'map'=>1, 'object'=>1, 'q'=>1, 'ruby'=>1, 's'=>1, 'samp'=>1, 'select'=>1, 'script'=>1, 'small'=>1, 'span'=>1, 'strike'=>1, 'strong'=>1, 'sub'=>1, 'sup'=>1, 'textarea'=>1, 'tt'=>1, 'u'=>1, 'var'=>1);
$eN = array('a'=>1, 'big'=>1, 'button'=>1, 'fieldset'=>1, 'font'=>1, 'form'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'label'=>1, 'object'=>1, 'ruby'=>1, 'script'=>1, 'select'=>1, 'small'=>1, 'sub'=>1, 'sup'=>1, 'textarea'=>1); // Exclude from specific ele; $cN values
$eO = array('area'=>1, 'caption'=>1, 'col'=>1, 'colgroup'=>1, 'dd'=>1, 'dt'=>1, 'legend'=>1, 'li'=>1, 'optgroup'=>1, 'option'=>1, 'param'=>1, 'rb'=>1, 'rbc'=>1, 'rp'=>1, 'rt'=>1, 'rtc'=>1, 'script'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'thead'=>1, 'th'=>1, 'tr'=>1); // Missing in $eB & $eI
$eF = $eB + $eI;
// $in sets allowed child
$in = ((isset($eF[$in]) && $in != '#pcdata') or isset($eO[$in])) ? $in : 'div';
if(isset($cE[$in])){
return (!$do ? '' : str_replace(array('<', '>'), array('&lt;', '&gt;'), $t));
}
if(isset($cS[$in])){$inOk = $cS[$in];}
elseif(isset($cI[$in])){$inOk = $eI; $cI['del'] = 1; $cI['ins'] = 1;}
elseif(isset($cF[$in])){$inOk = $eF; unset($cI['del'], $cI['ins']);}
elseif(isset($cB[$in])){$inOk = $eB; unset($cI['del'], $cI['ins']);}
if(isset($cO[$in])){$inOk = $inOk + $cO[$in];}
if(isset($cN[$in])){$inOk = array_diff_assoc($inOk, $cN[$in]);}
$t = explode('<', $t);
$ok = $q = array(); // $q seq list of open non-empty ele
ob_start();
for($i=-1, $ci=count($t); ++$i<$ci;){
// allowed $ok in parent $p
if($ql = count($q)){
$p = array_pop($q);
$q[] = $p;
if(isset($cS[$p])){$ok = $cS[$p];}
elseif(isset($cI[$p])){$ok = $eI; $cI['del'] = 1; $cI['ins'] = 1;}
elseif(isset($cF[$p])){$ok = $eF; unset($cI['del'], $cI['ins']);}
elseif(isset($cB[$p])){$ok = $eB; unset($cI['del'], $cI['ins']);}
if(isset($cO[$p])){$ok = $ok + $cO[$p];}
if(isset($cN[$p])){$ok = array_diff_assoc($ok, $cN[$p]);}
}else{$ok = $inOk; unset($cI['del'], $cI['ins']);}
// bad tags, & ele content
if(isset($e) && ($do == 1 or (isset($ok['#pcdata']) && ($do == 3 or $do == 5)))){
echo '&lt;', $s, $e, $a, '&gt;';
}
if(isset($x[0])){
if(strlen(trim($x)) && (($ql && isset($cB[$p])) or (isset($cB[$in]) && !$ql))){
echo '<div>', $x, '</div>';
}
elseif($do < 3 or isset($ok['#pcdata'])){echo $x;}
elseif(strpos($x, "\x02\x04")){
foreach(preg_split('`(\x01\x02[^\x01\x02]+\x02\x01)`', $x, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY) as $v){
echo (substr($v, 0, 2) == "\x01\x02" ? $v : ($do > 4 ? preg_replace('`\S`', '', $v) : ''));
}
}elseif($do > 4){echo preg_replace('`\S`', '', $x);}
}
// get markup
if(!preg_match('`^(/?)([a-z1-6]+)([^>]*)>(.*)`sm', $t[$i], $r)){$x = $t[$i]; continue;}
$s = null; $e = null; $a = null; $x = null; list($all, $s, $e, $a, $x) = $r;
// close tag
if($s){
if(isset($cE[$e]) or !in_array($e, $q)){continue;} // Empty/unopen
if($p == $e){array_pop($q); echo '</', $e, '>'; unset($e); continue;} // Last open
$add = ''; // Nesting - close open tags that need to be
for($j=-1, $cj=count($q); ++$j<$cj;){
if(($d = array_pop($q)) == $e){break;}
else{$add .= "</{$d}>";}
}
echo $add, '</', $e, '>'; unset($e); continue;
}
// open tag
// $cB ele needs $eB ele as child
if(isset($cB[$e]) && strlen(trim($x))){
$t[$i] = "{$e}{$a}>";
array_splice($t, $i+1, 0, 'div>'. $x); unset($e, $x); ++$ci; --$i; continue;
}
if((($ql && isset($cB[$p])) or (isset($cB[$in]) && !$ql)) && !isset($eB[$e]) && !isset($ok[$e])){
array_splice($t, $i, 0, 'div>'); unset($e, $x); ++$ci; --$i; continue;
}
// if no open ele, $in = parent; mostly immediate parent-child relation should hold
if(!$ql or !isset($eN[$e]) or !array_intersect($q, $cN2)){
if(!isset($ok[$e])){
if($ql && isset($cT[$p])){echo '</', array_pop($q), '>'; unset($e, $x); --$i;}
continue;
}
if(!isset($cE[$e])){$q[] = $e;}
echo '<', $e, $a, '>'; unset($e); continue;
}
// specific parent-child
if(isset($cS[$p][$e])){
if(!isset($cE[$e])){$q[] = $e;}
echo '<', $e, $a, '>'; unset($e); continue;
}
// nesting
$add = '';
$q2 = array();
for($k=-1, $kc=count($q); ++$k<$kc;){
$d = $q[$k];
$ok2 = array();
if(isset($cS[$d])){$q2[] = $d; continue;}
$ok2 = isset($cI[$d]) ? $eI : $eF;
if(isset($cO[$d])){$ok2 = $ok2 + $cO[$d];}
if(isset($cN[$d])){$ok2 = array_diff_assoc($ok2, $cN[$d]);}
if(!isset($ok2[$e])){
if(!$k && !isset($inOk[$e])){continue 2;}
$add = "</{$d}>";
for(;++$k<$kc;){$add = "</{$q[$k]}>{$add}";}
break;
}
else{$q2[] = $d;}
}
$q = $q2;
if(!isset($cE[$e])){$q[] = $e;}
echo $add, '<', $e, $a, '>'; unset($e); continue;
}
// end
if($ql = count($q)){
$p = array_pop($q);
$q[] = $p;
if(isset($cS[$p])){$ok = $cS[$p];}
elseif(isset($cI[$p])){$ok = $eI; $cI['del'] = 1; $cI['ins'] = 1;}
elseif(isset($cF[$p])){$ok = $eF; unset($cI['del'], $cI['ins']);}
elseif(isset($cB[$p])){$ok = $eB; unset($cI['del'], $cI['ins']);}
if(isset($cO[$p])){$ok = $ok + $cO[$p];}
if(isset($cN[$p])){$ok = array_diff_assoc($ok, $cN[$p]);}
}else{$ok = $inOk; unset($cI['del'], $cI['ins']);}
if(isset($e) && ($do == 1 or (isset($ok['#pcdata']) && ($do == 3 or $do == 5)))){
echo '&lt;', $s, $e, $a, '&gt;';
}
if(isset($x[0])){
if(strlen(trim($x)) && (($ql && isset($cB[$p])) or (isset($cB[$in]) && !$ql))){
echo '<div>', $x, '</div>';
}
elseif($do < 3 or isset($ok['#pcdata'])){echo $x;}
elseif(strpos($x, "\x02\x04")){
foreach(preg_split('`(\x01\x02[^\x01\x02]+\x02\x01)`', $x, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY) as $v){
echo (substr($v, 0, 2) == "\x01\x02" ? $v : ($do > 4 ? preg_replace('`\S`', '', $v) : ''));
}
}elseif($do > 4){echo preg_replace('`\S`', '', $x);}
}
while(!empty($q) && ($e = array_pop($q))){echo '</', $e, '>';}
$o = ob_get_contents();
ob_end_clean();
return $o;
// eof
}
public static function hl_cmtcd($t){
// comment/CDATA sec handler
$t = $t[0];
global $C;
if(!($v = $C[$n = $t[3] == '-' ? 'comment' : 'cdata'])){return $t;}
if($v == 1){return '';}
if($n == 'comment'){
if(substr(($t = preg_replace('`--+`', '-', substr($t, 4, -3))), -1) != ' '){$t .= ' ';}
}
else{$t = substr($t, 1, -1);}
$t = $v == 2 ? str_replace(array('&', '<', '>'), array('&amp;', '&lt;', '&gt;'), $t) : $t;
return str_replace(array('&', '<', '>'), array("\x03", "\x04", "\x05"), ($n == 'comment' ? "\x01\x02\x04!--$t--\x05\x02\x01" : "\x01\x01\x04$t\x05\x01\x01"));
// eof
}
public static function hl_ent($t){
// entitity handler
global $C;
$t = $t[1];
static $U = array('quot'=>1,'amp'=>1,'lt'=>1,'gt'=>1);
static $N = array('fnof'=>'402', 'Alpha'=>'913', 'Beta'=>'914', 'Gamma'=>'915', 'Delta'=>'916', 'Epsilon'=>'917', 'Zeta'=>'918', 'Eta'=>'919', 'Theta'=>'920', 'Iota'=>'921', 'Kappa'=>'922', 'Lambda'=>'923', 'Mu'=>'924', 'Nu'=>'925', 'Xi'=>'926', 'Omicron'=>'927', 'Pi'=>'928', 'Rho'=>'929', 'Sigma'=>'931', 'Tau'=>'932', 'Upsilon'=>'933', 'Phi'=>'934', 'Chi'=>'935', 'Psi'=>'936', 'Omega'=>'937', 'alpha'=>'945', 'beta'=>'946', 'gamma'=>'947', 'delta'=>'948', 'epsilon'=>'949', 'zeta'=>'950', 'eta'=>'951', 'theta'=>'952', 'iota'=>'953', 'kappa'=>'954', 'lambda'=>'955', 'mu'=>'956', 'nu'=>'957', 'xi'=>'958', 'omicron'=>'959', 'pi'=>'960', 'rho'=>'961', 'sigmaf'=>'962', 'sigma'=>'963', 'tau'=>'964', 'upsilon'=>'965', 'phi'=>'966', 'chi'=>'967', 'psi'=>'968', 'omega'=>'969', 'thetasym'=>'977', 'upsih'=>'978', 'piv'=>'982', 'bull'=>'8226', 'hellip'=>'8230', 'prime'=>'8242', 'Prime'=>'8243', 'oline'=>'8254', 'frasl'=>'8260', 'weierp'=>'8472', 'image'=>'8465', 'real'=>'8476', 'trade'=>'8482', 'alefsym'=>'8501', 'larr'=>'8592', 'uarr'=>'8593', 'rarr'=>'8594', 'darr'=>'8595', 'harr'=>'8596', 'crarr'=>'8629', 'lArr'=>'8656', 'uArr'=>'8657', 'rArr'=>'8658', 'dArr'=>'8659', 'hArr'=>'8660', 'forall'=>'8704', 'part'=>'8706', 'exist'=>'8707', 'empty'=>'8709', 'nabla'=>'8711', 'isin'=>'8712', 'notin'=>'8713', 'ni'=>'8715', 'prod'=>'8719', 'sum'=>'8721', 'minus'=>'8722', 'lowast'=>'8727', 'radic'=>'8730', 'prop'=>'8733', 'infin'=>'8734', 'ang'=>'8736', 'and'=>'8743', 'or'=>'8744', 'cap'=>'8745', 'cup'=>'8746', 'int'=>'8747', 'there4'=>'8756', 'sim'=>'8764', 'cong'=>'8773', 'asymp'=>'8776', 'ne'=>'8800', 'equiv'=>'8801', 'le'=>'8804', 'ge'=>'8805', 'sub'=>'8834', 'sup'=>'8835', 'nsub'=>'8836', 'sube'=>'8838', 'supe'=>'8839', 'oplus'=>'8853', 'otimes'=>'8855', 'perp'=>'8869', 'sdot'=>'8901', 'lceil'=>'8968', 'rceil'=>'8969', 'lfloor'=>'8970', 'rfloor'=>'8971', 'lang'=>'9001', 'rang'=>'9002', 'loz'=>'9674', 'spades'=>'9824', 'clubs'=>'9827', 'hearts'=>'9829', 'diams'=>'9830', 'apos'=>'39', 'OElig'=>'338', 'oelig'=>'339', 'Scaron'=>'352', 'scaron'=>'353', 'Yuml'=>'376', 'circ'=>'710', 'tilde'=>'732', 'ensp'=>'8194', 'emsp'=>'8195', 'thinsp'=>'8201', 'zwnj'=>'8204', 'zwj'=>'8205', 'lrm'=>'8206', 'rlm'=>'8207', 'ndash'=>'8211', 'mdash'=>'8212', 'lsquo'=>'8216', 'rsquo'=>'8217', 'sbquo'=>'8218', 'ldquo'=>'8220', 'rdquo'=>'8221', 'bdquo'=>'8222', 'dagger'=>'8224', 'Dagger'=>'8225', 'permil'=>'8240', 'lsaquo'=>'8249', 'rsaquo'=>'8250', 'euro'=>'8364', 'nbsp'=>'160', 'iexcl'=>'161', 'cent'=>'162', 'pound'=>'163', 'curren'=>'164', 'yen'=>'165', 'brvbar'=>'166', 'sect'=>'167', 'uml'=>'168', 'copy'=>'169', 'ordf'=>'170', 'laquo'=>'171', 'not'=>'172', 'shy'=>'173', 'reg'=>'174', 'macr'=>'175', 'deg'=>'176', 'plusmn'=>'177', 'sup2'=>'178', 'sup3'=>'179', 'acute'=>'180', 'micro'=>'181', 'para'=>'182', 'middot'=>'183', 'cedil'=>'184', 'sup1'=>'185', 'ordm'=>'186', 'raquo'=>'187', 'frac14'=>'188', 'frac12'=>'189', 'frac34'=>'190', 'iquest'=>'191', 'Agrave'=>'192', 'Aacute'=>'193', 'Acirc'=>'194', 'Atilde'=>'195', 'Auml'=>'196', 'Aring'=>'197', 'AElig'=>'198', 'Ccedil'=>'199', 'Egrave'=>'200', 'Eacute'=>'201', 'Ecirc'=>'202', 'Euml'=>'203', 'Igrave'=>'204', 'Iacute'=>'205', 'Icirc'=>'206', 'Iuml'=>'207', 'ETH'=>'208', 'Ntilde'=>'209', 'Ograve'=>'210', 'Oacute'=>'211', 'Ocirc'=>'212', 'Otilde'=>'213', 'Ouml'=>'214', 'times'=>'215', 'Oslash'=>'216', 'Ugrave'=>'217', 'Uacute'=>'218', 'Ucirc'=>'219', 'Uuml'=>'220', 'Yacute'=>'221', 'THORN'=>'222', 'szlig'=>'223', 'agrave'=>'224', 'aacute'=>'225', 'acirc'=>'226', 'atilde'=>'227', 'auml'=>'228', 'aring'=>'229', 'aelig'=>'230', 'ccedil'=>'231', 'egrave'=>'232', 'eacute'=>'233', 'ecirc'=>'234', 'euml'=>'235', 'igrave'=>'236', 'iacute'=>'237', 'icirc'=>'238', 'iuml'=>'239', 'eth'=>'240', 'ntilde'=>'241', 'ograve'=>'242', 'oacute'=>'243', 'ocirc'=>'244', 'otilde'=>'245', 'ouml'=>'246', 'divide'=>'247', 'oslash'=>'248', 'ugrave'=>'249', 'uacute'=>'250', 'ucirc'=>'251', 'uuml'=>'252', 'yacute'=>'253', 'thorn'=>'254', 'yuml'=>'255');
if($t[0] != '#'){
return ($C['and_mark'] ? "\x06" : '&'). (isset($U[$t]) ? $t : (isset($N[$t]) ? (!$C['named_entity'] ? '#'. ($C['hexdec_entity'] > 1 ? 'x'. dechex($N[$t]) : $N[$t]) : $t) : 'amp;'. $t)). ';';
}
if(($n = ctype_digit($t = substr($t, 1)) ? intval($t) : hexdec(substr($t, 1))) < 9 or ($n > 13 && $n < 32) or $n == 11 or $n == 12 or ($n > 126 && $n < 160 && $n != 133) or ($n > 55295 && ($n < 57344 or ($n > 64975 && $n < 64992) or $n == 65534 or $n == 65535 or $n > 1114111))){
return ($C['and_mark'] ? "\x06" : '&'). "amp;#{$t};";
}
return ($C['and_mark'] ? "\x06" : '&'). '#'. (((ctype_digit($t) && $C['hexdec_entity'] < 2) or !$C['hexdec_entity']) ? $n : 'x'. dechex($n)). ';';
// eof
}
public static function hl_prot($p, $c=null){
// check URL scheme
global $C;
$b = $a = '';
if($c == null){$c = 'style'; $b = $p[1]; $a = $p[3]; $p = trim($p[2]);}
$c = isset($C['schemes'][$c]) ? $C['schemes'][$c] : $C['schemes']['*'];
static $d = 'denied:';
if(isset($c['!']) && substr($p, 0, 7) != $d){$p = "$d$p";}
if(isset($c['*']) or !strcspn($p, '#?;') or (substr($p, 0, 7) == $d)){return "{$b}{$p}{$a}";} // All ok, frag, query, param
if(preg_match('`^([a-z\d\-+.&#; ]+?)(:|&#(58|x3a);|%3a|\\\\0{0,4}3a).`i', $p, $m) && !isset($c[strtolower($m[1])])){ // Denied prot
return "{$b}{$d}{$p}{$a}";
}
if($C['abs_url']){
if($C['abs_url'] == -1 && strpos($p, $C['base_url']) === 0){ // Make url rel
$p = substr($p, strlen($C['base_url']));
}elseif(empty($m[1])){ // Make URL abs
if(substr($p, 0, 2) == '//'){$p = substr($C['base_url'], 0, strpos($C['base_url'], ':')+1). $p;}
elseif($p[0] == '/'){$p = preg_replace('`(^.+?://[^/]+)(.*)`', '$1', $C['base_url']). $p;}
elseif(strcspn($p, './')){$p = $C['base_url']. $p;}
else{
preg_match('`^([a-zA-Z\d\-+.]+://[^/]+)(.*)`', $C['base_url'], $m);
$p = preg_replace('`(?<=/)\./`', '', $m[2]. $p);
while(preg_match('`(?<=/)([^/]{3,}|[^/.]+?|\.[^/.]|[^/.]\.)/\.\./`', $p)){
$p = preg_replace('`(?<=/)([^/]{3,}|[^/.]+?|\.[^/.]|[^/.]\.)/\.\./`', '', $p);
}
$p = $m[1]. $p;
}
}
}
return "{$b}{$p}{$a}";
// eof
}
public static function hl_regex($p){
// ?regex
if(empty($p)){return 0;}
if($t = ini_get('track_errors')){$o = isset($php_errormsg) ? $php_errormsg : null;}
else{ini_set('track_errors', 1);}
unset($php_errormsg);
if(($d = ini_get('display_errors'))){ini_set('display_errors', 0);}
preg_match($p, '');
if($d){ini_set('display_errors', 1);}
$r = isset($php_errormsg) ? 0 : 1;
if($t){$php_errormsg = isset($o) ? $o : null;}
else{ini_set('track_errors', 0);}
return $r;
// eof
}
public static function hl_spec($t){
// final $spec
$s = array();
$t = str_replace(array("\t", "\r", "\n", ' '), '', preg_replace('/"(?>(`.|[^"])*)"/sme', 'substr(str_replace(array(";", "|", "~", " ", ",", "/", "(", ")", \'`"\'), array("\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07", "\x08", "\""), "$0"), 1, -1)', trim($t)));
for($i = count(($t = explode(';', $t))); --$i>=0;){
$w = $t[$i];
if(empty($w) or ($e = strpos($w, '=')) === false or !strlen(($a = substr($w, $e+1)))){continue;}
$y = $n = array();
foreach(explode(',', $a) as $v){
if(!preg_match('`^([a-z:\-\*]+)(?:\((.*?)\))?`i', $v, $m)){continue;}
if(($x = strtolower($m[1])) == '-*'){$n['*'] = 1; continue;}
if($x[0] == '-'){$n[substr($x, 1)] = 1; continue;}
if(!isset($m[2])){$y[$x] = 1; continue;}
foreach(explode('/', $m[2]) as $m){
if(empty($m) or ($p = strpos($m, '=')) == 0 or $p < 5){$y[$x] = 1; continue;}
$y[$x][strtolower(substr($m, 0, $p))] = str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07", "\x08"), array(";", "|", "~", " ", ",", "/", "(", ")"), substr($m, $p+1));
}
if(isset($y[$x]['match']) && !htmLawed::hl_regex($y[$x]['match'])){unset($y[$x]['match']);}
if(isset($y[$x]['nomatch']) && !htmLawed::hl_regex($y[$x]['nomatch'])){unset($y[$x]['nomatch']);}
}
if(!count($y) && !count($n)){continue;}
foreach(explode(',', substr($w, 0, $e)) as $v){
if(!strlen(($v = strtolower($v)))){continue;}
if(count($y)){$s[$v] = $y;}
if(count($n)){$s[$v]['n'] = $n;}
}
}
return $s;
// eof
}
public static function hl_tag($t){
// tag/attribute handler
global $C;
$t = $t[0];
// invalid < >
if($t == '< '){return '&lt; ';}
if($t == '>'){return '&gt;';}
if(!preg_match('`^<(/?)([a-zA-Z][a-zA-Z1-6]*)([^>]*?)\s?>$`m', $t, $m)){
return str_replace(array('<', '>'), array('&lt;', '&gt;'), $t);
}elseif(!isset($C['elements'][($e = strtolower($m[2]))])){
return (($C['keep_bad']%2) ? str_replace(array('<', '>'), array('&lt;', '&gt;'), $t) : '');
}
// attr string
$a = str_replace(array("\n", "\r", "\t"), ' ', trim($m[3]));
// tag transform
static $eD = array('applet'=>1, 'center'=>1, 'dir'=>1, 'embed'=>1, 'font'=>1, 'isindex'=>1, 'menu'=>1, 's'=>1, 'strike'=>1, 'u'=>1); // Deprecated
if($C['make_tag_strict'] && isset($eD[$e])){
$trt = htmLawed::hl_tag2($e, $a, $C['make_tag_strict']);
if(!$e){return (($C['keep_bad']%2) ? str_replace(array('<', '>'), array('&lt;', '&gt;'), $t) : '');}
}
// close tag
static $eE = array('area'=>1, 'br'=>1, 'col'=>1, 'embed'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'param'=>1); // Empty ele
if(!empty($m[1])){
return (!isset($eE[$e]) ? (empty($C['hook_tag']) ? "</$e>" : $C['hook_tag']($e)) : (($C['keep_bad'])%2 ? str_replace(array('<', '>'), array('&lt;', '&gt;'), $t) : ''));
}
// open tag & attr
static $aN = array('abbr'=>array('td'=>1, 'th'=>1), 'accept-charset'=>array('form'=>1), 'accept'=>array('form'=>1, 'input'=>1), 'accesskey'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'label'=>1, 'legend'=>1, 'textarea'=>1), 'action'=>array('form'=>1), 'align'=>array('caption'=>1, 'embed'=>1, 'applet'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'object'=>1, 'legend'=>1, 'table'=>1, 'hr'=>1, 'div'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'p'=>1, 'col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'alt'=>array('applet'=>1, 'area'=>1, 'img'=>1, 'input'=>1), 'archive'=>array('applet'=>1, 'object'=>1), 'axis'=>array('td'=>1, 'th'=>1), 'bgcolor'=>array('embed'=>1, 'table'=>1, 'tr'=>1, 'td'=>1, 'th'=>1), 'border'=>array('table'=>1, 'img'=>1, 'object'=>1), 'bordercolor'=>array('table'=>1, 'td'=>1, 'tr'=>1), 'cellpadding'=>array('table'=>1), 'cellspacing'=>array('table'=>1), 'char'=>array('col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'charoff'=>array('col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'charset'=>array('a'=>1, 'script'=>1), 'checked'=>array('input'=>1), 'cite'=>array('blockquote'=>1, 'q'=>1, 'del'=>1, 'ins'=>1), 'classid'=>array('object'=>1), 'clear'=>array('br'=>1), 'code'=>array('applet'=>1), 'codebase'=>array('object'=>1, 'applet'=>1), 'codetype'=>array('object'=>1), 'color'=>array('font'=>1), 'cols'=>array('textarea'=>1), 'colspan'=>array('td'=>1, 'th'=>1), 'compact'=>array('dir'=>1, 'dl'=>1, 'menu'=>1, 'ol'=>1, 'ul'=>1), 'coords'=>array('area'=>1, 'a'=>1), 'data'=>array('object'=>1), 'datetime'=>array('del'=>1, 'ins'=>1), 'declare'=>array('object'=>1), 'defer'=>array('script'=>1), 'dir'=>array('bdo'=>1), 'disabled'=>array('button'=>1, 'input'=>1, 'optgroup'=>1, 'option'=>1, 'select'=>1, 'textarea'=>1), 'enctype'=>array('form'=>1), 'face'=>array('font'=>1), 'for'=>array('label'=>1), 'frame'=>array('table'=>1), 'frameborder'=>array('iframe'=>1), 'headers'=>array('td'=>1, 'th'=>1), 'height'=>array('embed'=>1, 'iframe'=>1, 'td'=>1, 'th'=>1, 'img'=>1, 'object'=>1, 'applet'=>1), 'href'=>array('a'=>1, 'area'=>1), 'hreflang'=>array('a'=>1), 'hspace'=>array('applet'=>1, 'img'=>1, 'object'=>1), 'ismap'=>array('img'=>1, 'input'=>1), 'label'=>array('option'=>1, 'optgroup'=>1), 'language'=>array('script'=>1), 'longdesc'=>array('img'=>1, 'iframe'=>1), 'marginheight'=>array('iframe'=>1), 'marginwidth'=>array('iframe'=>1), 'maxlength'=>array('input'=>1), 'method'=>array('form'=>1), 'model'=>array('embed'=>1), 'multiple'=>array('select'=>1), 'name'=>array('button'=>1, 'embed'=>1, 'textarea'=>1, 'applet'=>1, 'select'=>1, 'form'=>1, 'iframe'=>1, 'img'=>1, 'a'=>1, 'input'=>1, 'object'=>1, 'map'=>1, 'param'=>1), 'nohref'=>array('area'=>1), 'noshade'=>array('hr'=>1), 'nowrap'=>array('td'=>1, 'th'=>1), 'object'=>array('applet'=>1), 'onblur'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'label'=>1, 'select'=>1, 'textarea'=>1), 'onchange'=>array('input'=>1, 'select'=>1, 'textarea'=>1), 'onfocus'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'label'=>1, 'select'=>1, 'textarea'=>1), 'onreset'=>array('form'=>1), 'onselect'=>array('input'=>1, 'textarea'=>1), 'onsubmit'=>array('form'=>1), 'pluginspage'=>array('embed'=>1), 'pluginurl'=>array('embed'=>1), 'prompt'=>array('isindex'=>1), 'readonly'=>array('textarea'=>1, 'input'=>1), 'rel'=>array('a'=>1), 'rev'=>array('a'=>1), 'rows'=>array('textarea'=>1), 'rowspan'=>array('td'=>1, 'th'=>1), 'rules'=>array('table'=>1), 'scope'=>array('td'=>1, 'th'=>1), 'scrolling'=>array('iframe'=>1), 'selected'=>array('option'=>1), 'shape'=>array('area'=>1, 'a'=>1), 'size'=>array('hr'=>1, 'font'=>1, 'input'=>1, 'select'=>1), 'span'=>array('col'=>1, 'colgroup'=>1), 'src'=>array('embed'=>1, 'script'=>1, 'input'=>1, 'iframe'=>1, 'img'=>1), 'standby'=>array('object'=>1), 'start'=>array('ol'=>1), 'summary'=>array('table'=>1), 'tabindex'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'object'=>1, 'select'=>1, 'textarea'=>1), 'target'=>array('a'=>1, 'area'=>1, 'form'=>1), 'type'=>array('a'=>1, 'embed'=>1, 'object'=>1, 'param'=>1, 'script'=>1, 'input'=>1, 'li'=>1, 'ol'=>1, 'ul'=>1, 'button'=>1), 'usemap'=>array('img'=>1, 'input'=>1, 'object'=>1), 'valign'=>array('col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'value'=>array('input'=>1, 'option'=>1, 'param'=>1, 'button'=>1, 'li'=>1), 'valuetype'=>array('param'=>1), 'vspace'=>array('applet'=>1, 'img'=>1, 'object'=>1), 'width'=>array('embed'=>1, 'hr'=>1, 'iframe'=>1, 'img'=>1, 'object'=>1, 'table'=>1, 'td'=>1, 'th'=>1, 'applet'=>1, 'col'=>1, 'colgroup'=>1, 'pre'=>1), 'wmode'=>array('embed'=>1), 'xml:space'=>array('pre'=>1, 'script'=>1, 'style'=>1)); // Ele-specific
static $aNE = array('checked'=>1, 'compact'=>1, 'declare'=>1, 'defer'=>1, 'disabled'=>1, 'ismap'=>1, 'multiple'=>1, 'nohref'=>1, 'noresize'=>1, 'noshade'=>1, 'nowrap'=>1, 'readonly'=>1, 'selected'=>1); // Empty
static $aNP = array('action'=>1, 'cite'=>1, 'classid'=>1, 'codebase'=>1, 'data'=>1, 'href'=>1, 'longdesc'=>1, 'model'=>1, 'pluginspage'=>1, 'pluginurl'=>1, 'usemap'=>1); // Need scheme check; excludes style, on* & src
static $aNU = array('class'=>array('param'=>1, 'script'=>1), 'dir'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'iframe'=>1, 'param'=>1, 'script'=>1), 'id'=>array('script'=>1), 'lang'=>array('applet'=>1, 'br'=>1, 'iframe'=>1, 'param'=>1, 'script'=>1), 'xml:lang'=>array('applet'=>1, 'br'=>1, 'iframe'=>1, 'param'=>1, 'script'=>1), 'onclick'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'ondblclick'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onkeydown'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onkeypress'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onkeyup'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmousedown'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmousemove'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmouseout'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmouseover'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmouseup'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'style'=>array('param'=>1, 'script'=>1), 'title'=>array('param'=>1, 'script'=>1)); // Univ & exceptions
if($C['lc_std_val']){
// predef attr vals for $eAL & $aNE ele
static $aNL = array('all'=>1, 'baseline'=>1, 'bottom'=>1, 'button'=>1, 'center'=>1, 'char'=>1, 'checkbox'=>1, 'circle'=>1, 'col'=>1, 'colgroup'=>1, 'cols'=>1, 'data'=>1, 'default'=>1, 'file'=>1, 'get'=>1, 'groups'=>1, 'hidden'=>1, 'image'=>1, 'justify'=>1, 'left'=>1, 'ltr'=>1, 'middle'=>1, 'none'=>1, 'object'=>1, 'password'=>1, 'poly'=>1, 'post'=>1, 'preserve'=>1, 'radio'=>1, 'rect'=>1, 'ref'=>1, 'reset'=>1, 'right'=>1, 'row'=>1, 'rowgroup'=>1, 'rows'=>1, 'rtl'=>1, 'submit'=>1, 'text'=>1, 'top'=>1);
static $eAL = array('a'=>1, 'area'=>1, 'bdo'=>1, 'button'=>1, 'col'=>1, 'form'=>1, 'img'=>1, 'input'=>1, 'object'=>1, 'optgroup'=>1, 'option'=>1, 'param'=>1, 'script'=>1, 'select'=>1, 'table'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1, 'xml:space'=>1);
$lcase = isset($eAL[$e]) ? 1 : 0;
}
$depTr = 0;
if($C['no_deprecated_attr']){
// dep attr:applicable ele
static $aND = array('align'=>array('caption'=>1, 'div'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'legend'=>1, 'object'=>1, 'p'=>1, 'table'=>1), 'bgcolor'=>array('table'=>1, 'td'=>1, 'th'=>1, 'tr'=>1), 'border'=>array('img'=>1, 'object'=>1), 'bordercolor'=>array('table'=>1, 'td'=>1, 'tr'=>1), 'clear'=>array('br'=>1), 'compact'=>array('dl'=>1, 'ol'=>1, 'ul'=>1), 'height'=>array('td'=>1, 'th'=>1), 'hspace'=>array('img'=>1, 'object'=>1), 'language'=>array('script'=>1), 'name'=>array('a'=>1, 'form'=>1, 'iframe'=>1, 'img'=>1, 'map'=>1), 'noshade'=>array('hr'=>1), 'nowrap'=>array('td'=>1, 'th'=>1), 'size'=>array('hr'=>1), 'start'=>array('ol'=>1), 'type'=>array('li'=>1, 'ol'=>1, 'ul'=>1), 'value'=>array('li'=>1), 'vspace'=>array('img'=>1, 'object'=>1), 'width'=>array('hr'=>1, 'pre'=>1, 'td'=>1, 'th'=>1));
static $eAD = array('a'=>1, 'br'=>1, 'caption'=>1, 'div'=>1, 'dl'=>1, 'form'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'hr'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'legend'=>1, 'li'=>1, 'map'=>1, 'object'=>1, 'ol'=>1, 'p'=>1, 'pre'=>1, 'script'=>1, 'table'=>1, 'td'=>1, 'th'=>1, 'tr'=>1, 'ul'=>1);
$depTr = isset($eAD[$e]) ? 1 : 0;
}
// attr name-vals
if(strpos($a, "\x01") !== false){$a = preg_replace('`\x01[^\x01]*\x01`', '', $a);} // No comment/CDATA sec
$mode = 0; $a = trim($a, ' /'); $aA = array();
while(strlen($a)){
$w = 0;
switch($mode){
case 0: // Name
if(preg_match('`^[a-zA-Z][\-a-zA-Z:]+`', $a, $m)){
$nm = strtolower($m[0]);
$w = $mode = 1; $a = ltrim(substr_replace($a, '', 0, strlen($m[0])));
}
break; case 1:
if($a[0] == '='){ // =
$w = 1; $mode = 2; $a = ltrim($a, '= ');
}else{ // No val
$w = 1; $mode = 0; $a = ltrim($a);
$aA[$nm] = '';
}
break; case 2: // Val
if(preg_match('`^((?:"[^"]*")|(?:\'[^\']*\')|(?:\s*[^\s"\']+))(.*)`', $a, $m)){
$a = ltrim($m[2]); $m = $m[1]; $w = 1; $mode = 0;
$aA[$nm] = trim(($m[0] == '"' or $m[0] == '\'') ? substr($m, 1, -1) : $m);
}
break;
}
if($w == 0){ // Parse errs, deal with space, " & '
$a = preg_replace('`^(?:"[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*`', '', $a);
$mode = 0;
}
}
if($mode == 1){$aA[$nm] = '';}
// clean attrs
global $S;
$rl = isset($S[$e]) ? $S[$e] : array();
$a = array(); $nfr = 0;
foreach($aA as $k=>$v){
if(((isset($C['deny_attribute']['*']) ? isset($C['deny_attribute'][$k]) : !isset($C['deny_attribute'][$k])) && (isset($aN[$k][$e]) or (isset($aNU[$k]) && !isset($aNU[$k][$e]))) && !isset($rl['n'][$k]) && !isset($rl['n']['*'])) or isset($rl[$k])){
if(isset($aNE[$k])){$v = $k;}
elseif(!empty($lcase) && (($e != 'button' or $e != 'input') or $k == 'type')){ // Rather loose but ?not cause issues
$v = (isset($aNL[($v2 = strtolower($v))])) ? $v2 : $v;
}
if($k == 'style' && !$C['style_pass']){
if(false !== strpos($v, '&#')){
static $sC = array('&#x20;'=>' ', '&#32;'=>' ', '&#x45;'=>'e', '&#69;'=>'e', '&#x65;'=>'e', '&#101;'=>'e', '&#x58;'=>'x', '&#88;'=>'x', '&#x78;'=>'x', '&#120;'=>'x', '&#x50;'=>'p', '&#80;'=>'p', '&#x70;'=>'p', '&#112;'=>'p', '&#x53;'=>'s', '&#83;'=>'s', '&#x73;'=>'s', '&#115;'=>'s', '&#x49;'=>'i', '&#73;'=>'i', '&#x69;'=>'i', '&#105;'=>'i', '&#x4f;'=>'o', '&#79;'=>'o', '&#x6f;'=>'o', '&#111;'=>'o', '&#x4e;'=>'n', '&#78;'=>'n', '&#x6e;'=>'n', '&#110;'=>'n', '&#x55;'=>'u', '&#85;'=>'u', '&#x75;'=>'u', '&#117;'=>'u', '&#x52;'=>'r', '&#82;'=>'r', '&#x72;'=>'r', '&#114;'=>'r', '&#x4c;'=>'l', '&#76;'=>'l', '&#x6c;'=>'l', '&#108;'=>'l', '&#x28;'=>'(', '&#40;'=>'(', '&#x29;'=>')', '&#41;'=>')', '&#x20;'=>':', '&#32;'=>':', '&#x22;'=>'"', '&#34;'=>'"', '&#x27;'=>"'", '&#39;'=>"'", '&#x2f;'=>'/', '&#47;'=>'/', '&#x2a;'=>'*', '&#42;'=>'*', '&#x5c;'=>'\\', '&#92;'=>'\\');
$v = strtr($v, $sC);
}
$v = preg_replace_callback('`(url(?:\()(?: )*(?:\'|"|&(?:quot|apos);)?)(.+?)((?:\'|"|&(?:quot|apos);)?(?: )*(?:\)))`iS', 'htmLawed::hl_prot', $v);
$v = !$C['css_expression'] ? preg_replace('`expression`i', ' ', preg_replace('`\\\\\S|(/|(%2f))(\*|(%2a))`i', ' ', $v)) : $v;
}elseif(isset($aNP[$k]) or strpos($k, 'src') !== false or $k[0] == 'o'){
$v = str_replace("\xad", ' ', (strpos($v, '&') !== false ? str_replace(array('&#xad;', '&#173;', '&shy;'), ' ', $v) : $v));
$v = htmLawed::hl_prot($v, $k);
if($k == 'href'){ // X-spam
if($C['anti_mail_spam'] && strpos($v, 'mailto:') === 0){
$v = str_replace('@', htmlspecialchars($C['anti_mail_spam']), $v);
}elseif($C['anti_link_spam']){
$r1 = $C['anti_link_spam'][1];
if(!empty($r1) && preg_match($r1, $v)){continue;}
$r0 = $C['anti_link_spam'][0];
if(!empty($r0) && preg_match($r0, $v)){
if(isset($a['rel'])){
if(!preg_match('`\bnofollow\b`i', $a['rel'])){$a['rel'] .= ' nofollow';}
}elseif(isset($aA['rel'])){
if(!preg_match('`\bnofollow\b`i', $aA['rel'])){$nfr = 1;}
}else{$a['rel'] = 'nofollow';}
}
}
}
}
if(isset($rl[$k]) && is_array($rl[$k]) && ($v = htmLawed::hl_attrval($v, $rl[$k])) === 0){continue;}
$a[$k] = str_replace('"', '&quot;', $v);
}
}
if($nfr){$a['rel'] = isset($a['rel']) ? $a['rel']. ' nofollow' : 'nofollow';}
// rqd attr
static $eAR = array('area'=>array('alt'=>'area'), 'bdo'=>array('dir'=>'ltr'), 'form'=>array('action'=>''), 'img'=>array('src'=>'', 'alt'=>'image'), 'map'=>array('name'=>''), 'optgroup'=>array('label'=>''), 'param'=>array('name'=>''), 'script'=>array('type'=>'text/javascript'), 'textarea'=>array('rows'=>'10', 'cols'=>'50'));
if(isset($eAR[$e])){
foreach($eAR[$e] as $k=>$v){
if(!isset($a[$k])){$a[$k] = isset($v[0]) ? $v : $k;}
}
}
// depr attrs
if($depTr){
$c = array();
foreach($a as $k=>$v){
if($k == 'style' or !isset($aND[$k][$e])){continue;}
if($k == 'align'){
unset($a['align']);
if($e == 'img' && ($v == 'left' or $v == 'right')){$c[] = 'float: '. $v;}
elseif(($e == 'div' or $e == 'table') && $v == 'center'){$c[] = 'margin: auto';}
else{$c[] = 'text-align: '. $v;}
}elseif($k == 'bgcolor'){
unset($a['bgcolor']);
$c[] = 'background-color: '. $v;
}elseif($k == 'border'){
unset($a['border']); $c[] = "border: {$v}px";
}elseif($k == 'bordercolor'){
unset($a['bordercolor']); $c[] = 'border-color: '. $v;
}elseif($k == 'clear'){
unset($a['clear']); $c[] = 'clear: '. ($v != 'all' ? $v : 'both');
}elseif($k == 'compact'){
unset($a['compact']); $c[] = 'font-size: 85%';
}elseif($k == 'height' or $k == 'width'){
unset($a[$k]); $c[] = $k. ': '. ($v[0] != '*' ? $v. (ctype_digit($v) ? 'px' : '') : 'auto');
}elseif($k == 'hspace'){
unset($a['hspace']); $c[] = "margin-left: {$v}px; margin-right: {$v}px";
}elseif($k == 'language' && !isset($a['type'])){
unset($a['language']);
$a['type'] = 'text/'. strtolower($v);
}elseif($k == 'name'){
if($C['no_deprecated_attr'] == 2 or ($e != 'a' && $e != 'map')){unset($a['name']);}
if(!isset($a['id']) && preg_match('`[a-zA-Z][a-zA-Z\d.:_\-]*`', $v)){$a['id'] = $v;}
}elseif($k == 'noshade'){
unset($a['noshade']); $c[] = 'border-style: none; border: 0; background-color: gray; color: gray';
}elseif($k == 'nowrap'){
unset($a['nowrap']); $c[] = 'white-space: nowrap';
}elseif($k == 'size'){
unset($a['size']); $c[] = 'size: '. $v. 'px';
}elseif($k == 'start' or $k == 'value'){
unset($a[$k]);
}elseif($k == 'type'){
unset($a['type']);
static $ol_type = array('i'=>'lower-roman', 'I'=>'upper-roman', 'a'=>'lower-latin', 'A'=>'upper-latin', '1'=>'decimal');
$c[] = 'list-style-type: '. (isset($ol_type[$v]) ? $ol_type[$v] : 'decimal');
}elseif($k == 'vspace'){
unset($a['vspace']); $c[] = "margin-top: {$v}px; margin-bottom: {$v}px";
}
}
if(count($c)){
$c = implode('; ', $c);
$a['style'] = isset($a['style']) ? rtrim($a['style'], ' ;'). '; '. $c. ';': $c. ';';
}
}
// unique ID
if($C['unique_ids'] && isset($a['id'])){
if(!preg_match('`^[A-Za-z][A-Za-z0-9_\-.:]*$`', ($id = $a['id'])) or (isset($GLOBALS['hl_Ids'][$id]) && $C['unique_ids'] == 1)){unset($a['id']);
}else{
while(isset($GLOBALS['hl_Ids'][$id])){$id = $C['unique_ids']. $id;}
$GLOBALS['hl_Ids'][($a['id'] = $id)] = 1;
}
}
// xml:lang
if($C['xml:lang'] && isset($a['lang'])){
$a['xml:lang'] = isset($a['xml:lang']) ? $a['xml:lang'] : $a['lang'];
if($C['xml:lang'] == 2){unset($a['lang']);}
}
// for transformed tag
if(!empty($trt)){
$a['style'] = isset($a['style']) ? rtrim($a['style'], ' ;'). '; '. $trt : $trt;
}
// return with empty ele /
if(empty($C['hook_tag'])){
$aA = '';
foreach($a as $k=>$v){$aA .= " {$k}=\"{$v}\"";}
return "<{$e}{$aA}". (isset($eE[$e]) ? ' /' : ''). '>';
}
else{return $C['hook_tag']($e, $a);}
// eof
}
public static function hl_tag2(&$e, &$a, $t=1){
// transform tag
if($e == 'center'){$e = 'div'; return 'text-align: center;';}
if($e == 'dir' or $e == 'menu'){$e = 'ul'; return '';}
if($e == 's' or $e == 'strike'){$e = 'span'; return 'text-decoration: line-through;';}
if($e == 'u'){$e = 'span'; return 'text-decoration: underline;';}
static $fs = array('0'=>'xx-small', '1'=>'xx-small', '2'=>'small', '3'=>'medium', '4'=>'large', '5'=>'x-large', '6'=>'xx-large', '7'=>'300%', '-1'=>'smaller', '-2'=>'60%', '+1'=>'larger', '+2'=>'150%', '+3'=>'200%', '+4'=>'300%');
if($e == 'font'){
$a2 = '';
if(preg_match('`face\s*=\s*(\'|")([^=]+?)\\1`i', $a, $m) or preg_match('`face\s*=(\s*)(\S+)`i', $a, $m)){
$a2 .= ' font-family: '. str_replace('"', '\'', trim($m[2])). ';';
}
if(preg_match('`color\s*=\s*(\'|")?(.+?)(\\1|\s|$)`i', $a, $m)){
$a2 .= ' color: '. trim($m[2]). ';';
}
if(preg_match('`size\s*=\s*(\'|")?(.+?)(\\1|\s|$)`i', $a, $m) && isset($fs[($m = trim($m[2]))])){
$a2 .= ' font-size: '. $fs[$m]. ';';
}
$e = 'span'; return ltrim($a2);
}
if($t == 2){$e = 0; return 0;}
return '';
// eof
}
public static function hl_tidy($t, $w, $p){
// Tidy/compact HTM
if(strpos(' pre,script,textarea', "$p,")){return $t;}
$t = str_replace(' </', '</', preg_replace(array('`(<\w[^>]*(?<!/)>)\s+`', '`\s+`', '`(<\w[^>]*(?<!/)>) `'), array(' $1', ' ', '$1'), preg_replace_callback(array('`(<(!\[CDATA\[))(.+?)(\]\]>)`sm', '`(<(!--))(.+?)(-->)`sm', '`(<(pre|script|textarea)[^>]*?>)(.+?)(</\2>)`sm'), create_function('$m', 'return $m[1]. str_replace(array("<", ">", "\n", "\r", "\t", " "), array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), $m[3]). $m[4];'), $t)));
if(($w = strtolower($w)) == -1){
return str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), array('<', '>', "\n", "\r", "\t", ' '), $t);
}
$s = strpos(" $w", 't') ? "\t" : ' ';
$s = preg_match('`\d`', $w, $m) ? str_repeat($s, $m[0]) : str_repeat($s, ($s == "\t" ? 1 : 2));
$N = preg_match('`[ts]([1-9])`', $w, $m) ? $m[1] : 0;
$a = array('br'=>1);
$b = array('button'=>1, 'input'=>1, 'option'=>1);
$c = array('caption'=>1, 'dd'=>1, 'dt'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'isindex'=>1, 'label'=>1, 'legend'=>1, 'li'=>1, 'object'=>1, 'p'=>1, 'pre'=>1, 'td'=>1, 'textarea'=>1, 'th'=>1);
$d = array('address'=>1, 'blockquote'=>1, 'center'=>1, 'colgroup'=>1, 'dir'=>1, 'div'=>1, 'dl'=>1, 'fieldset'=>1, 'form'=>1, 'hr'=>1, 'iframe'=>1, 'map'=>1, 'menu'=>1, 'noscript'=>1, 'ol'=>1, 'optgroup'=>1, 'rbc'=>1, 'rtc'=>1, 'ruby'=>1, 'script'=>1, 'select'=>1, 'table'=>1, 'tbody'=>1, 'tfoot'=>1, 'thead'=>1, 'tr'=>1, 'ul'=>1);
$T = explode('<', $t);
$X = 1;
while($X){
$n = $N;
$t = $T;
ob_start();
if(isset($d[$p])){echo str_repeat($s, ++$n);}
echo ltrim(array_shift($t));
for($i=-1, $j=count($t); ++$i<$j;){
$r = ''; list($e, $r) = explode('>', $t[$i]);
$x = $e[0] == '/' ? 0 : (substr($e, -1) == '/' ? 1 : ($e[0] != '!' ? 2 : -1));
$y = !$x ? ltrim($e, '/') : ($x > 0 ? substr($e, 0, strcspn($e, ' ')) : 0);
$e = "<$e>";
if(isset($d[$y])){
if(!$x){
if($n){echo "\n", str_repeat($s, --$n), "$e\n", str_repeat($s, $n);}
else{++$N; ob_end_clean(); continue 2;}
}
else{echo "\n", str_repeat($s, $n), "$e\n", str_repeat($s, ($x != 1 ? ++$n : $n));}
echo ltrim($r); continue;
}
$f = "\n". str_repeat($s, $n);
if(isset($c[$y])){
if(!$x){echo $e, $f, ltrim($r);}
else{echo $f, $e, $r;}
}elseif(isset($b[$y])){echo $f, $e, $r;
}elseif(isset($a[$y])){echo $e, $f, ltrim($r);
}elseif(!$y){echo $f, $e, $f, ltrim($r);
}else{echo $e, $r;}
}
$X = 0;
}
$t = preg_replace('`[\n]\s*?[\n]+`', "\n", ob_get_contents());
ob_end_clean();
if(($l = strpos(" $w", 'r') ? (strpos(" $w", 'n') ? "\r\n" : "\r") : 0)){
$t = str_replace("\n", $l, $t);
}
return str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), array('<', '>', "\n", "\r", "\t", ' '), $t);
// eof
}
public static function hl_version(){
// rel
return '1.1.14';
// eof
}
public static function kses($t, $h, $p=array('http', 'https', 'ftp', 'news', 'nntp', 'telnet', 'gopher', 'mailto')){
// kses compat
foreach($h as $k=>$v){
$h[$k]['n']['*'] = 1;
}
$C['cdata'] = $C['comment'] = $C['make_tag_strict'] = $C['no_deprecated_attr'] = $C['unique_ids'] = 0;
$C['keep_bad'] = 1;
$C['elements'] = count($h) ? strtolower(implode(',', array_keys($h))) : '-*';
$C['hook'] = 'htmLawed::kses_hook';
$C['schemes'] = '*:'. implode(',', $p);
return htmLawed::hl($t, $C, $h);
// eof
}
public static function kses_hook($t, &$C, &$S){
// kses compat
return $t;
// eof
}
// end class
}

114
inc/3rdparty/libraries/html5/Data.php vendored Normal file
View file

@ -0,0 +1,114 @@
<?php
// warning: this file is encoded in UTF-8!
class HTML5_Data
{
// at some point this should be moved to a .ser file. Another
// possible optimization is to give UTF-8 bytes, not Unicode
// codepoints
// XXX: Not quite sure why it's named this; this is
// actually the numeric entity dereference table.
protected static $realCodepointTable = array(
0x00 => 0xFFFD, // REPLACEMENT CHARACTER
0x0D => 0x000A, // LINE FEED (LF)
0x80 => 0x20AC, // EURO SIGN ('€')
0x81 => 0x0081, // <control>
0x82 => 0x201A, // SINGLE LOW-9 QUOTATION MARK ('')
0x83 => 0x0192, // LATIN SMALL LETTER F WITH HOOK ('ƒ')
0x84 => 0x201E, // DOUBLE LOW-9 QUOTATION MARK ('„')
0x85 => 0x2026, // HORIZONTAL ELLIPSIS ('…')
0x86 => 0x2020, // DAGGER ('†')
0x87 => 0x2021, // DOUBLE DAGGER ('‡')
0x88 => 0x02C6, // MODIFIER LETTER CIRCUMFLEX ACCENT ('ˆ')
0x89 => 0x2030, // PER MILLE SIGN ('‰')
0x8A => 0x0160, // LATIN CAPITAL LETTER S WITH CARON ('Š')
0x8B => 0x2039, // SINGLE LEFT-POINTING ANGLE QUOTATION MARK ('')
0x8C => 0x0152, // LATIN CAPITAL LIGATURE OE ('Œ')
0x8D => 0x008D, // <control>
0x8E => 0x017D, // LATIN CAPITAL LETTER Z WITH CARON ('Ž')
0x8F => 0x008F, // <control>
0x90 => 0x0090, // <control>
0x91 => 0x2018, // LEFT SINGLE QUOTATION MARK ('')
0x92 => 0x2019, // RIGHT SINGLE QUOTATION MARK ('')
0x93 => 0x201C, // LEFT DOUBLE QUOTATION MARK ('“')
0x94 => 0x201D, // RIGHT DOUBLE QUOTATION MARK ('”')
0x95 => 0x2022, // BULLET ('•')
0x96 => 0x2013, // EN DASH ('')
0x97 => 0x2014, // EM DASH ('—')
0x98 => 0x02DC, // SMALL TILDE ('˜')
0x99 => 0x2122, // TRADE MARK SIGN ('™')
0x9A => 0x0161, // LATIN SMALL LETTER S WITH CARON ('š')
0x9B => 0x203A, // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK ('')
0x9C => 0x0153, // LATIN SMALL LIGATURE OE ('œ')
0x9D => 0x009D, // <control>
0x9E => 0x017E, // LATIN SMALL LETTER Z WITH CARON ('ž')
0x9F => 0x0178, // LATIN CAPITAL LETTER Y WITH DIAERESIS ('Ÿ')
);
protected static $namedCharacterReferences;
protected static $namedCharacterReferenceMaxLength;
/**
* Returns the "real" Unicode codepoint of a malformed character
* reference.
*/
public static function getRealCodepoint($ref) {
if (!isset(self::$realCodepointTable[$ref])) return false;
else return self::$realCodepointTable[$ref];
}
public static function getNamedCharacterReferences() {
if (!self::$namedCharacterReferences) {
self::$namedCharacterReferences = unserialize(
file_get_contents(dirname(__FILE__) . '/named-character-references.ser'));
}
return self::$namedCharacterReferences;
}
/**
* Converts a Unicode codepoint to sequence of UTF-8 bytes.
* @note Shamelessly stolen from HTML Purifier, which is also
* shamelessly stolen from Feyd (which is in public domain).
*/
public static function utf8chr($code) {
/* We don't care: we live dangerously
* if($code > 0x10FFFF or $code < 0x0 or
($code >= 0xD800 and $code <= 0xDFFF) ) {
// bits are set outside the "valid" range as defined
// by UNICODE 4.1.0
return "\xEF\xBF\xBD";
}*/
$x = $y = $z = $w = 0;
if ($code < 0x80) {
// regular ASCII character
$x = $code;
} else {
// set up bits for UTF-8
$x = ($code & 0x3F) | 0x80;
if ($code < 0x800) {
$y = (($code & 0x7FF) >> 6) | 0xC0;
} else {
$y = (($code & 0xFC0) >> 6) | 0x80;
if($code < 0x10000) {
$z = (($code >> 12) & 0x0F) | 0xE0;
} else {
$z = (($code >> 12) & 0x3F) | 0x80;
$w = (($code >> 18) & 0x07) | 0xF0;
}
}
}
// set up the actual character
$ret = '';
if($w) $ret .= chr($w);
if($z) $ret .= chr($z);
if($y) $ret .= chr($y);
$ret .= chr($x);
return $ret;
}
}

View file

@ -0,0 +1,284 @@
<?php
/*
Copyright 2009 Geoffrey Sneddon <http://gsnedders.com/>
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
// Some conventions:
// /* */ indicates verbatim text from the HTML 5 specification
// // indicates regular comments
class HTML5_InputStream {
/**
* The string data we're parsing.
*/
private $data;
/**
* The current integer byte position we are in $data
*/
private $char;
/**
* Length of $data; when $char === $data, we are at the end-of-file.
*/
private $EOF;
/**
* Parse errors.
*/
public $errors = array();
/**
* @param $data Data to parse
*/
public function __construct($data) {
/* Given an encoding, the bytes in the input stream must be
converted to Unicode characters for the tokeniser, as
described by the rules for that encoding, except that the
leading U+FEFF BYTE ORDER MARK character, if any, must not
be stripped by the encoding layer (it is stripped by the rule below).
Bytes or sequences of bytes in the original byte stream that
could not be converted to Unicode characters must be converted
to U+FFFD REPLACEMENT CHARACTER code points. */
// XXX currently assuming input data is UTF-8; once we
// build encoding detection this will no longer be the case
//
// We previously had an mbstring implementation here, but that
// implementation is heavily non-conforming, so it's been
// omitted.
if (extension_loaded('iconv')) {
// non-conforming
$data = @iconv('UTF-8', 'UTF-8//IGNORE', $data);
} else {
// we can make a conforming native implementation
throw new Exception('Not implemented, please install mbstring or iconv');
}
/* One leading U+FEFF BYTE ORDER MARK character must be
ignored if any are present. */
if (substr($data, 0, 3) === "\xEF\xBB\xBF") {
$data = substr($data, 3);
}
/* All U+0000 NULL characters in the input must be replaced
by U+FFFD REPLACEMENT CHARACTERs. Any occurrences of such
characters is a parse error. */
for ($i = 0, $count = substr_count($data, "\0"); $i < $count; $i++) {
$this->errors[] = array(
'type' => HTML5_Tokenizer::PARSEERROR,
'data' => 'null-character'
);
}
/* U+000D CARRIAGE RETURN (CR) characters and U+000A LINE FEED
(LF) characters are treated specially. Any CR characters
that are followed by LF characters must be removed, and any
CR characters not followed by LF characters must be converted
to LF characters. Thus, newlines in HTML DOMs are represented
by LF characters, and there are never any CR characters in the
input to the tokenization stage. */
$data = str_replace(
array(
"\0",
"\r\n",
"\r"
),
array(
"\xEF\xBF\xBD",
"\n",
"\n"
),
$data
);
/* Any occurrences of any characters in the ranges U+0001 to
U+0008, U+000B, U+000E to U+001F, U+007F to U+009F,
U+D800 to U+DFFF , U+FDD0 to U+FDEF, and
characters U+FFFE, U+FFFF, U+1FFFE, U+1FFFF, U+2FFFE, U+2FFFF,
U+3FFFE, U+3FFFF, U+4FFFE, U+4FFFF, U+5FFFE, U+5FFFF, U+6FFFE,
U+6FFFF, U+7FFFE, U+7FFFF, U+8FFFE, U+8FFFF, U+9FFFE, U+9FFFF,
U+AFFFE, U+AFFFF, U+BFFFE, U+BFFFF, U+CFFFE, U+CFFFF, U+DFFFE,
U+DFFFF, U+EFFFE, U+EFFFF, U+FFFFE, U+FFFFF, U+10FFFE, and
U+10FFFF are parse errors. (These are all control characters
or permanently undefined Unicode characters.) */
// Check PCRE is loaded.
if (extension_loaded('pcre')) {
$count = preg_match_all(
'/(?:
[\x01-\x08\x0B\x0E-\x1F\x7F] # U+0001 to U+0008, U+000B, U+000E to U+001F and U+007F
|
\xC2[\x80-\x9F] # U+0080 to U+009F
|
\xED(?:\xA0[\x80-\xFF]|[\xA1-\xBE][\x00-\xFF]|\xBF[\x00-\xBF]) # U+D800 to U+DFFFF
|
\xEF\xB7[\x90-\xAF] # U+FDD0 to U+FDEF
|
\xEF\xBF[\xBE\xBF] # U+FFFE and U+FFFF
|
[\xF0-\xF4][\x8F-\xBF]\xBF[\xBE\xBF] # U+nFFFE and U+nFFFF (1 <= n <= 10_{16})
)/x',
$data,
$matches
);
for ($i = 0; $i < $count; $i++) {
$this->errors[] = array(
'type' => HTML5_Tokenizer::PARSEERROR,
'data' => 'invalid-codepoint'
);
}
} else {
// XXX: Need non-PCRE impl, probably using substr_count
}
$this->data = $data;
$this->char = 0;
$this->EOF = strlen($data);
}
/**
* Returns the current line that the tokenizer is at.
*/
public function getCurrentLine() {
// Check the string isn't empty
if($this->EOF) {
// Add one to $this->char because we want the number for the next
// byte to be processed.
return substr_count($this->data, "\n", 0, min($this->char, $this->EOF)) + 1;
} else {
// If the string is empty, we are on the first line (sorta).
return 1;
}
}
/**
* Returns the current column of the current line that the tokenizer is at.
*/
public function getColumnOffset() {
// strrpos is weird, and the offset needs to be negative for what we
// want (i.e., the last \n before $this->char). This needs to not have
// one (to make it point to the next character, the one we want the
// position of) added to it because strrpos's behaviour includes the
// final offset byte.
$lastLine = strrpos($this->data, "\n", $this->char - 1 - strlen($this->data));
// However, for here we want the length up until the next byte to be
// processed, so add one to the current byte ($this->char).
if($lastLine !== false) {
$findLengthOf = substr($this->data, $lastLine + 1, $this->char - 1 - $lastLine);
} else {
$findLengthOf = substr($this->data, 0, $this->char);
}
// Get the length for the string we need.
if(extension_loaded('iconv')) {
return iconv_strlen($findLengthOf, 'utf-8');
} elseif(extension_loaded('mbstring')) {
return mb_strlen($findLengthOf, 'utf-8');
} elseif(extension_loaded('xml')) {
return strlen(utf8_decode($findLengthOf));
} else {
$count = count_chars($findLengthOf);
// 0x80 = 0x7F - 0 + 1 (one added to get inclusive range)
// 0x33 = 0xF4 - 0x2C + 1 (one added to get inclusive range)
return array_sum(array_slice($count, 0, 0x80)) +
array_sum(array_slice($count, 0xC2, 0x33));
}
}
/**
* Retrieve the currently consume character.
* @note This performs bounds checking
*/
public function char() {
return ($this->char++ < $this->EOF)
? $this->data[$this->char - 1]
: false;
}
/**
* Get all characters until EOF.
* @note This performs bounds checking
*/
public function remainingChars() {
if($this->char < $this->EOF) {
$data = substr($this->data, $this->char);
$this->char = $this->EOF;
return $data;
} else {
return false;
}
}
/**
* Matches as far as possible until we reach a certain set of bytes
* and returns the matched substring.
* @param $bytes Bytes to match.
*/
public function charsUntil($bytes, $max = null) {
if ($this->char < $this->EOF) {
if ($max === 0 || $max) {
$len = strcspn($this->data, $bytes, $this->char, $max);
} else {
$len = strcspn($this->data, $bytes, $this->char);
}
$string = (string) substr($this->data, $this->char, $len);
$this->char += $len;
return $string;
} else {
return false;
}
}
/**
* Matches as far as possible with a certain set of bytes
* and returns the matched substring.
* @param $bytes Bytes to match.
*/
public function charsWhile($bytes, $max = null) {
if ($this->char < $this->EOF) {
if ($max === 0 || $max) {
$len = strspn($this->data, $bytes, $this->char, $max);
} else {
$len = strspn($this->data, $bytes, $this->char);
}
$string = (string) substr($this->data, $this->char, $len);
$this->char += $len;
return $string;
} else {
return false;
}
}
/**
* Unconsume one character.
*/
public function unget() {
if ($this->char <= $this->EOF) {
$this->char--;
}
}
}

36
inc/3rdparty/libraries/html5/Parser.php vendored Normal file
View file

@ -0,0 +1,36 @@
<?php
require_once dirname(__FILE__) . '/Data.php';
require_once dirname(__FILE__) . '/InputStream.php';
require_once dirname(__FILE__) . '/TreeBuilder.php';
require_once dirname(__FILE__) . '/Tokenizer.php';
/**
* Outwards facing interface for HTML5.
*/
class HTML5_Parser
{
/**
* Parses a full HTML document.
* @param $text HTML text to parse
* @param $builder Custom builder implementation
* @return Parsed HTML as DOMDocument
*/
static public function parse($text, $builder = null) {
$tokenizer = new HTML5_Tokenizer($text, $builder);
$tokenizer->parse();
return $tokenizer->save();
}
/**
* Parses an HTML fragment.
* @param $text HTML text to parse
* @param $context String name of context element to pretend parsing is in.
* @param $builder Custom builder implementation
* @return Parsed HTML as DOMDocument
*/
static public function parseFragment($text, $context = null, $builder = null) {
$tokenizer = new HTML5_Tokenizer($text, $builder);
$tokenizer->parseFragment($context);
return $tokenizer->save();
}
}

2422
inc/3rdparty/libraries/html5/Tokenizer.php vendored Normal file

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,404 @@
<?php
/**
* Cookie Jar
*
* PHP class for handling cookies, as defined by the Netscape spec:
* <http://curl.haxx.se/rfc/cookie_spec.html>
*
* This class should be used to handle cookies (storing cookies from HTTP response messages, and
* sending out cookies in HTTP request messages). This has been adapted for FiveFilters.org
* from the original version used in HTTP Navigator. See http://www.keyvan.net/code/http-navigator/
*
* This class is mainly based on Cookies.pm <http://search.cpan.org/author/GAAS/libwww-perl-5.65/
* lib/HTTP/Cookies.pm> from the libwww-perl collection <http://www.linpro.no/lwp/>.
* Unlike Cookies.pm, this class only supports the Netscape cookie spec, not RFC 2965.
*
* @version 0.5
* @date 2011-03-15
* @see http://php.net/HttpRequestPool
* @author Keyvan Minoukadeh
* @copyright 2011 Keyvan Minoukadeh
* @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
*/
class CookieJar
{
/**
* Cookies - array containing all cookies.
*
* <pre>
* Cookies are stored like this:
* [domain][path][name] = array
* where array is:
* 0 => value, 1 => secure, 2 => expires
* </pre>
* @var array
* @access private
*/
public $cookies = array();
public $debug = false;
/**
* Constructor
*/
function __construct() {
}
protected function debug($msg, $file=null, $line=null) {
if ($this->debug) {
$mem = round(memory_get_usage()/1024, 2);
$memPeak = round(memory_get_peak_usage()/1024, 2);
echo '* ',$msg;
if (isset($file, $line)) echo " ($file line $line)";
echo ' - mem used: ',$mem," (peak: $memPeak)\n";
ob_flush();
flush();
}
}
/**
* Get matching cookies
*
* Only use this method if you cannot use add_cookie_header(), for example, if you want to use
* this cookie jar class without using the request class.
*
* @param array $param associative array containing 'domain', 'path', 'secure' keys
* @return string
* @see add_cookie_header()
*/
public function getMatchingCookies($url)
{
if (($parts = @parse_url($url)) && isset($parts['scheme'], $parts['host'], $parts['path'])) {
$param['domain'] = $parts['host'];
$param['path'] = $parts['path'];
$param['secure'] = (strtolower($parts['scheme']) == 'https');
unset($parts);
} else {
return false;
}
// RFC 2965 notes:
// If multiple cookies satisfy the criteria above, they are ordered in
// the Cookie header such that those with more specific Path attributes
// precede those with less specific. Ordering with respect to other
// attributes (e.g., Domain) is unspecified.
$domain = $param['domain'];
if (strpos($domain, '.') === false) $domain .= '.local';
$request_path = $param['path'];
if ($request_path == '') $request_path = '/';
$request_secure = $param['secure'];
$now = time();
$matched_cookies = array();
// domain - find matching domains
$this->debug('Finding matching domains for '.$domain, __FILE__, __LINE__);
while (strpos($domain, '.') !== false) {
if (isset($this->cookies[$domain])) {
$this->debug(' domain match found: '.$domain);
$cookies =& $this->cookies[$domain];
} else {
$domain = $this->_reduce_domain($domain);
continue;
}
// paths - find matching paths starting from most specific
$this->debug(' - Finding matching paths for '.$request_path);
$paths = array_keys($cookies);
usort($paths, array($this, '_cmp_length'));
foreach ($paths as $path) {
// continue to next cookie if request path does not path-match cookie path
if (!$this->_path_match($request_path, $path)) continue;
// loop through cookie names
$this->debug(' path match found: '.$path);
foreach ($cookies[$path] as $name => $values) {
// if this cookie is secure but request isn't, continue to next cookie
if ($values[1] && !$request_secure) continue;
// if cookie is not a session cookie and has expired, continue to next cookie
if (is_int($values[2]) && ($values[2] < $now)) continue;
// cookie matches request
$this->debug(' cookie match: '.$name.'='.$values[0]);
$matched_cookies[] = $name.'='.$values[0];
}
}
$domain = $this->_reduce_domain($domain);
}
// return cookies
return implode('; ', $matched_cookies);
}
/**
* Parse Set-Cookie values.
*
* Only use this method if you cannot use extract_cookies(), for example, if you want to use
* this cookie jar class without using the response class.
*
* @param array $set_cookies array holding 1 or more "Set-Cookie" header values
* @param array $param associative array containing 'host', 'path' keys
* @return void
* @see extract_cookies()
*/
public function storeCookies($url, $set_cookies)
{
if (count($set_cookies) == 0) return;
$param = @parse_url($url);
if (!is_array($param) || !isset($param['host'])) return;
$request_host = $param['host'];
if (strpos($request_host, '.') === false) $request_host .= '.local';
$request_path = @$param['path'];
if ($request_path == '') $request_path = '/';
//
// loop through set-cookie headers
//
foreach ($set_cookies as $set_cookie) {
$this->debug('Parsing: '.$set_cookie);
// temporary cookie store (before adding to jar)
$tmp_cookie = array();
$param = explode(';', $set_cookie);
// loop through params
for ($x=0; $x<count($param); $x++) {
$key_val = explode('=', $param[$x], 2);
if (count($key_val) != 2) {
// if the first param isn't a name=value pair, continue to the next set-cookie
// header
if ($x == 0) continue 2;
// check for secure flag
if (strtolower(trim($key_val[0])) == 'secure') $tmp_cookie['secure'] = true;
// continue to next param
continue;
}
list($key, $val) = array_map('trim', $key_val);
// first name=value pair is the cookie name and value
// the name and value are stored under 'name' and 'value' to avoid conflicts
// with later parameters.
if ($x == 0) {
$tmp_cookie = array('name'=>$key, 'value'=>$val);
continue;
}
$key = strtolower($key);
if (in_array($key, array('expires', 'path', 'domain', 'secure'))) {
$tmp_cookie[$key] = $val;
}
}
//
// set cookie
//
// check domain
if (isset($tmp_cookie['domain']) && ($tmp_cookie['domain'] != $request_host) &&
($tmp_cookie['domain'] != ".$request_host")) {
$domain = $tmp_cookie['domain'];
if ((strpos($domain, '.') === false) && ($domain != 'local')) {
$this->debug(' - domain "'.$domain.'" has no dot and is not a local domain');
continue;
}
if (preg_match('/\.[0-9]+$/', $domain)) {
$this->debug(' - domain "'.$domain.'" appears to be an ip address');
continue;
}
if (substr($domain, 0, 1) != '.') $domain = ".$domain";
if (!$this->_domain_match($request_host, $domain)) {
$this->debug(' - request host "'.$request_host.'" does not domain-match "'.$domain.'"');
continue;
}
} else {
// if domain is not specified in the set-cookie header, domain will default to
// the request host
$domain = $request_host;
}
// check path
if (isset($tmp_cookie['path']) && ($tmp_cookie['path'] != '')) {
$path = urldecode($tmp_cookie['path']);
if (!$this->_path_match($request_path, $path)) {
$this->debug(' - request path "'.$request_path.'" does not path-match "'.$path.'"');
continue;
}
} else {
$path = $request_path;
$path = substr($path, 0, strrpos($path, '/'));
if ($path == '') $path = '/';
}
// check if secure
$secure = (isset($tmp_cookie['secure'])) ? true : false;
// check expiry
if (isset($tmp_cookie['expires'])) {
if (($expires = strtotime($tmp_cookie['expires'])) < 0) {
$expires = null;
}
} else {
$expires = null;
}
// set cookie
$this->set_cookie($domain, $path, $tmp_cookie['name'], $tmp_cookie['value'], $secure, $expires);
}
}
// return array of set-cookie values extracted from HTTP response headers (string $h)
public function extractCookies($h) {
$x = 0;
$lines = 0;
$headers = array();
$last_match = false;
$h = explode("\n", $h);
foreach ($h as $line) {
$line = rtrim($line);
$lines++;
$trimmed_line = trim($line);
if (isset($line_last)) {
// check if we have \r\n\r\n (indicating the end of headers)
// some servers will not use CRLF (\r\n), so we make CR (\r) optional.
// if (preg_match('/\015?\012\015?\012/', $line_last.$line)) {
// break;
// }
// As an alternative, we can check if the current trimmed line is empty
if ($trimmed_line == '') {
break;
}
// check for continuation line...
// RFC 2616 Section 2.2 "Basic Rules":
// HTTP/1.1 header field values can be folded onto multiple lines if the
// continuation line begins with a space or horizontal tab. All linear
// white space, including folding, has the same semantics as SP. A
// recipient MAY replace any linear white space with a single SP before
// interpreting the field value or forwarding the message downstream.
if ($last_match && preg_match('/^\s+(.*)/', $line, $match)) {
// append to previous header value
$headers[$x-1] .= ' '.rtrim($match[1]);
continue;
}
}
$line_last = $line;
// split header name and value
if (preg_match('/^Set-Cookie\s*:\s*(.*)/i', $line, $match)) {
$headers[$x++] = rtrim($match[1]);
$last_match = true;
} else {
$last_match = false;
}
}
return $headers;
}
/**
* Set Cookie
* @param string $domain
* @param string $path
* @param string $name cookie name
* @param string $value cookie value
* @param bool $secure
* @param int $expires expiry time (null if session cookie, <= 0 will delete cookie)
* @return void
*/
function set_cookie($domain, $path, $name, $value, $secure=false, $expires=null)
{
if ($domain == '') return;
if ($path == '') return;
if ($name == '') return;
// check if cookie needs to go
if (isset($expires) && ($expires <= 0)) {
if (isset($this->cookies[$domain][$path][$name])) unset($this->cookies[$domain][$path][$name]);
return;
}
if ($value == '') return;
$this->cookies[$domain][$path][$name] = array($value, $secure, $expires);
return;
}
/**
* Clear cookies - [domain [,path [,name]]] - call method with no arguments to clear all cookies.
* @param string $domain
* @param string $path
* @param string $name
* @return void
*/
function clear($domain=null, $path=null, $name=null)
{
if (!isset($domain)) {
$this->cookies = array();
} elseif (!isset($path)) {
if (isset($this->cookies[$domain])) unset($this->cookies[$domain]);
} elseif (!isset($name)) {
if (isset($this->cookies[$domain][$path])) unset($this->cookies[$domain][$path]);
} elseif (isset($name)) {
if (isset($this->cookies[$domain][$path][$name])) unset($this->cookies[$domain][$path][$name]);
}
}
/**
* Compare string length - used for sorting
* @access private
* @return int
*/
function _cmp_length($a, $b)
{
$la = strlen($a); $lb = strlen($b);
if ($la == $lb) return 0;
return ($la > $lb) ? -1 : 1;
}
/**
* Reduce domain
* @param string $domain
* @return string
* @access private
*/
function _reduce_domain($domain)
{
if ($domain == '') return '';
if (substr($domain, 0, 1) == '.') return substr($domain, 1);
return substr($domain, strpos($domain, '.'));
}
/**
* Path match - check if path1 path-matches path2
*
* From RFC 2965:
* <i>For two strings that represent paths, P1 and P2, P1 path-matches P2
* if P2 is a prefix of P1 (including the case where P1 and P2 string-
* compare equal). Thus, the string /tec/waldo path-matches /tec.</i>
* @param string $path1
* @param string $path2
* @return bool
* @access private
*/
function _path_match($path1, $path2)
{
return (substr($path1, 0, strlen($path2)) == $path2);
}
/**
* Domain match - check if domain1 domain-matches domain2
*
* A few extracts from RFC 2965:
* - A Set-Cookie2 from request-host y.x.foo.com for Domain=.foo.com
* would be rejected, because H is y.x and contains a dot.
*
* - A Set-Cookie2 from request-host x.foo.com for Domain=.foo.com
* would be accepted.
*
* - A Set-Cookie2 with Domain=.com or Domain=.com., will always be
* rejected, because there is no embedded dot.
*
* - A Set-Cookie2 from request-host example for Domain=.local will
* be accepted, because the effective host name for the request-
* host is example.local, and example.local domain-matches .local.
*
* I'm ignoring the first point for now (must check to see how other browsers handle
* this rule for Set-Cookie headers)
*
* @param string $domain1
* @param string $domain2
* @return bool
* @access private
*/
function _domain_match($domain1, $domain2)
{
$domain1 = strtolower($domain1);
$domain2 = strtolower($domain2);
while (strpos($domain1, '.') !== false) {
if ($domain1 == $domain2) return true;
$domain1 = $this->_reduce_domain($domain1);
continue;
}
return false;
}
}
?>

View file

@ -0,0 +1,779 @@
<?php
/**
* Humble HTTP Agent
*
* This class is designed to take advantage of parallel HTTP requests
* offered by PHP's PECL HTTP extension or the curl_multi_* functions.
* For environments which do not have these options, it reverts to standard sequential
* requests (using file_get_contents())
*
* @version 1.1
* @date 2012-08-20
* @see http://php.net/HttpRequestPool
* @author Keyvan Minoukadeh
* @copyright 2011-2012 Keyvan Minoukadeh
* @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
*/
class HumbleHttpAgent
{
const METHOD_REQUEST_POOL = 1;
const METHOD_CURL_MULTI = 2;
const METHOD_FILE_GET_CONTENTS = 4;
//const UA_BROWSER = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0.1) Gecko/20100101 Firefox/4.0.1';
const UA_BROWSER = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.92 Safari/535.2';
const UA_PHP = 'PHP/5.2';
const REF_GOOGLE = 'http://www.google.co.uk/url?sa=t&source=web&cd=1';
protected $requests = array();
protected $redirectQueue = array();
protected $requestOptions;
protected $maxParallelRequests = 5;
protected $cache = null; //TODO
protected $httpContext;
protected $minimiseMemoryUse = false; //TODO
protected $method;
protected $cookieJar;
public $debug = false;
public $debugVerbose = false;
public $rewriteHashbangFragment = true; // see http://code.google.com/web/ajaxcrawling/docs/specification.html
public $maxRedirects = 5;
public $userAgentMap = array();
public $rewriteUrls = array();
public $userAgentDefault;
public $referer;
//public $userAgent = 'Mozilla/5.0';
// Prevent certain file/mime types
// HTTP responses which match these content types will
// be returned without body.
public $headerOnlyTypes = array();
// URLs ending with one of these extensions will
// prompt Humble HTTP Agent to send a HEAD request first
// to see if returned content type matches $headerOnlyTypes.
public $headerOnlyClues = array('pdf','mp3','zip','exe','gif','gzip','gz','jpeg','jpg','mpg','mpeg','png','ppt','mov');
// AJAX triggers to search for.
// for AJAX sites, e.g. Blogger with its dynamic views templates.
public $ajaxTriggers = array("<meta name='fragment' content='!'",'<meta name="fragment" content="!"',"<meta content='!' name='fragment'",'<meta content="!" name="fragment"');
//TODO: set max file size
//TODO: normalise headers
function __construct($requestOptions=null, $method=null) {
$this->userAgentDefault = self::UA_BROWSER;
$this->referer = self::REF_GOOGLE;
// set the request method
if (in_array($method, array(1,2,4))) {
$this->method = $method;
} else {
if (class_exists('HttpRequestPool')) {
$this->method = self::METHOD_REQUEST_POOL;
} elseif (function_exists('curl_multi_init')) {
$this->method = self::METHOD_CURL_MULTI;
} else {
$this->method = self::METHOD_FILE_GET_CONTENTS;
}
}
if ($this->method == self::METHOD_CURL_MULTI) {
require_once(dirname(__FILE__).'/RollingCurl.php');
}
// create cookie jar
$this->cookieJar = new CookieJar();
// set request options (redirect must be 0)
$this->requestOptions = array(
'timeout' => 15,
'redirect' => 0 // we handle redirects manually so we can rewrite the new hashbang URLs that are creeping up over the web
// TODO: test onprogress?
);
if (is_array($requestOptions)) {
$this->requestOptions = array_merge($this->requestOptions, $requestOptions);
}
$this->httpContext = array(
'http' => array(
'ignore_errors' => true,
'timeout' => $this->requestOptions['timeout'],
'max_redirects' => $this->requestOptions['redirect'],
'header' => "Accept: */*\r\n"
)
);
}
protected function debug($msg) {
if ($this->debug) {
$mem = round(memory_get_usage()/1024, 2);
$memPeak = round(memory_get_peak_usage()/1024, 2);
echo '* ',$msg;
if ($this->debugVerbose) echo ' - mem used: ',$mem," (peak: $memPeak)";
echo "\n";
ob_flush();
flush();
}
}
protected function getUserAgent($url, $asArray=false) {
$host = @parse_url($url, PHP_URL_HOST);
if (strtolower(substr($host, 0, 4)) == 'www.') {
$host = substr($host, 4);
}
if ($host) {
$try = array($host);
$split = explode('.', $host);
if (count($split) > 1) {
array_shift($split);
$try[] = '.'.implode('.', $split);
}
foreach ($try as $h) {
if (isset($this->userAgentMap[$h])) {
$ua = $this->userAgentMap[$h];
break;
}
}
}
if (!isset($ua)) $ua = $this->userAgentDefault;
if ($asArray) {
return array('User-Agent' => $ua);
} else {
return 'User-Agent: '.$ua;
}
}
public function rewriteHashbangFragment($url) {
// return $url if there's no '#!'
if (strpos($url, '#!') === false) return $url;
// split $url and rewrite
// TODO: is SimplePie_IRI included?
$iri = new SimplePie_IRI($url);
$fragment = substr($iri->fragment, 1); // strip '!'
$iri->fragment = null;
if (isset($iri->query)) {
parse_str($iri->query, $query);
} else {
$query = array();
}
$query['_escaped_fragment_'] = (string)$fragment;
$iri->query = str_replace('%2F', '/', http_build_query($query)); // needed for some sites
return $iri->get_iri();
}
public function getUglyURL($url, $html) {
if ($html == '') return false;
$found = false;
foreach ($this->ajaxTriggers as $string) {
if (stripos($html, $string)) {
$found = true;
break;
}
}
if (!$found) return false;
$iri = new SimplePie_IRI($url);
if (isset($iri->query)) {
parse_str($iri->query, $query);
} else {
$query = array();
}
$query['_escaped_fragment_'] = '';
$iri->query = str_replace('%2F', '/', http_build_query($query)); // needed for some sites
return $iri->get_iri();
}
public function removeFragment($url) {
$pos = strpos($url, '#');
if ($pos === false) {
return $url;
} else {
return substr($url, 0, $pos);
}
}
public function rewriteUrls($url) {
foreach ($this->rewriteUrls as $find => $action) {
if (strpos($url, $find) !== false) {
if (is_array($action)) {
return strtr($url, $action);
}
}
}
return $url;
}
public function enableDebug($bool=true) {
$this->debug = (bool)$bool;
}
public function minimiseMemoryUse($bool = true) {
$this->minimiseMemoryUse = $bool;
}
public function setMaxParallelRequests($max) {
$this->maxParallelRequests = $max;
}
public function validateUrl($url) {
$url = filter_var($url, FILTER_SANITIZE_URL);
$test = filter_var($url, FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED);
// deal with bug http://bugs.php.net/51192 (present in PHP 5.2.13 and PHP 5.3.2)
if ($test === false) {
$test = filter_var(strtr($url, '-', '_'), FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED);
}
if ($test !== false && $test !== null && preg_match('!^https?://!', $url)) {
return $url;
} else {
return false;
}
}
public function fetchAll(array $urls) {
$this->fetchAllOnce($urls, $isRedirect=false);
$redirects = 0;
while (!empty($this->redirectQueue) && ++$redirects <= $this->maxRedirects) {
$this->debug("Following redirects #$redirects...");
$this->fetchAllOnce($this->redirectQueue, $isRedirect=true);
}
}
// fetch all URLs without following redirects
public function fetchAllOnce(array $urls, $isRedirect=false) {
if (!$isRedirect) $urls = array_unique($urls);
if (empty($urls)) return;
//////////////////////////////////////////////////////
// parallel (HttpRequestPool)
if ($this->method == self::METHOD_REQUEST_POOL) {
$this->debug('Starting parallel fetch (HttpRequestPool)');
try {
while (count($urls) > 0) {
$this->debug('Processing set of '.min($this->maxParallelRequests, count($urls)));
$subset = array_splice($urls, 0, $this->maxParallelRequests);
$pool = new HttpRequestPool();
foreach ($subset as $orig => $url) {
if (!$isRedirect) $orig = $url;
unset($this->redirectQueue[$orig]);
$this->debug("...$url");
if (!$isRedirect && isset($this->requests[$url])) {
$this->debug("......in memory");
/*
} elseif ($this->isCached($url)) {
$this->debug("......is cached");
if (!$this->minimiseMemoryUse) {
$this->requests[$url] = $this->getCached($url);
}
*/
} else {
$this->debug("......adding to pool");
$req_url = $this->rewriteUrls($url);
$req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url;
$req_url = $this->removeFragment($req_url);
if (!empty($this->headerOnlyTypes) && !isset($this->requests[$orig]['wrongGuess']) && $this->possibleUnsupportedType($req_url)) {
$_meth = HttpRequest::METH_HEAD;
} else {
$_meth = HttpRequest::METH_GET;
unset($this->requests[$orig]['wrongGuess']);
}
$httpRequest = new HttpRequest($req_url, $_meth, $this->requestOptions);
// send cookies, if we have any
if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) {
$this->debug("......sending cookies: $cookies");
$httpRequest->addHeaders(array('Cookie' => $cookies));
}
//$httpRequest->addHeaders(array('User-Agent' => $this->userAgent));
$httpRequest->addHeaders($this->getUserAgent($req_url, true));
// add referer for picky sites
$httpRequest->addheaders(array('Referer' => $this->referer));
$this->requests[$orig] = array('headers'=>null, 'body'=>null, 'httpRequest'=>$httpRequest);
$this->requests[$orig]['original_url'] = $orig;
$pool->attach($httpRequest);
}
}
// did we get anything into the pool?
if (count($pool) > 0) {
$this->debug('Sending request...');
try {
$pool->send();
} catch (HttpRequestPoolException $e) {
// do nothing
}
$this->debug('Received responses');
foreach($subset as $orig => $url) {
if (!$isRedirect) $orig = $url;
$request = $this->requests[$orig]['httpRequest'];
//$this->requests[$orig]['headers'] = $this->headersToString($request->getResponseHeader());
// getResponseHeader() doesn't return status line, so, for consistency...
$this->requests[$orig]['headers'] = substr($request->getRawResponseMessage(), 0, $request->getResponseInfo('header_size'));
// check content type
// TODO: use getResponseHeader('content-type') or getResponseInfo()
if ($this->headerOnlyType($this->requests[$orig]['headers'])) {
$this->requests[$orig]['body'] = '';
$_header_only_type = true;
$this->debug('Header only type returned');
} else {
$this->requests[$orig]['body'] = $request->getResponseBody();
$_header_only_type = false;
}
$this->requests[$orig]['effective_url'] = $request->getResponseInfo('effective_url');
$this->requests[$orig]['status_code'] = $status_code = $request->getResponseCode();
// is redirect?
if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && $request->getResponseHeader('location')) {
$redirectURL = $request->getResponseHeader('location');
if (!preg_match('!^https?://!i', $redirectURL)) {
$redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url);
}
if ($this->validateURL($redirectURL)) {
$this->debug('Redirect detected. Valid URL: '.$redirectURL);
// store any cookies
$cookies = $request->getResponseHeader('set-cookie');
if ($cookies && !is_array($cookies)) $cookies = array($cookies);
if ($cookies) $this->cookieJar->storeCookies($url, $cookies);
$this->redirectQueue[$orig] = $redirectURL;
} else {
$this->debug('Redirect detected. Invalid URL: '.$redirectURL);
}
} elseif (!$_header_only_type && $request->getMethod() === HttpRequest::METH_HEAD) {
// the response content-type did not match our 'header only' types,
// but we'd issues a HEAD request because we assumed it would. So
// let's queue a proper GET request for this item...
$this->debug('Wrong guess at content-type, queing GET request');
$this->requests[$orig]['wrongGuess'] = true;
$this->redirectQueue[$orig] = $this->requests[$orig]['effective_url'];
} elseif (strpos($this->requests[$orig]['effective_url'], '_escaped_fragment_') === false) {
// check for <meta name='fragment' content='!'/>
// for AJAX sites, e.g. Blogger with its dynamic views templates.
// Based on Google's spec: https://developers.google.com/webmasters/ajax-crawling/docs/specification
if (isset($this->requests[$orig]['body'])) {
$redirectURL = $this->getUglyURL($this->requests[$orig]['effective_url'], substr($this->requests[$orig]['body'], 0, 4000));
if ($redirectURL) {
$this->debug('AJAX trigger (meta name="fragment" content="!") found. Queueing '.$redirectURL);
$this->redirectQueue[$orig] = $redirectURL;
}
}
}
//die($url.' -multi- '.$request->getResponseInfo('effective_url'));
$pool->detach($request);
unset($this->requests[$orig]['httpRequest'], $request);
/*
if ($this->minimiseMemoryUse) {
if ($this->cache($url)) {
unset($this->requests[$url]);
}
}
*/
}
}
}
} catch (HttpException $e) {
$this->debug($e);
return false;
}
}
//////////////////////////////////////////////////////////
// parallel (curl_multi_*)
elseif ($this->method == self::METHOD_CURL_MULTI) {
$this->debug('Starting parallel fetch (curl_multi_*)');
while (count($urls) > 0) {
$this->debug('Processing set of '.min($this->maxParallelRequests, count($urls)));
$subset = array_splice($urls, 0, $this->maxParallelRequests);
$pool = new RollingCurl(array($this, 'handleCurlResponse'));
$pool->window_size = count($subset);
foreach ($subset as $orig => $url) {
if (!$isRedirect) $orig = $url;
unset($this->redirectQueue[$orig]);
$this->debug("...$url");
if (!$isRedirect && isset($this->requests[$url])) {
$this->debug("......in memory");
/*
} elseif ($this->isCached($url)) {
$this->debug("......is cached");
if (!$this->minimiseMemoryUse) {
$this->requests[$url] = $this->getCached($url);
}
*/
} else {
$this->debug("......adding to pool");
$req_url = $this->rewriteUrls($url);
$req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url;
$req_url = $this->removeFragment($req_url);
if (!empty($this->headerOnlyTypes) && !isset($this->requests[$orig]['wrongGuess']) && $this->possibleUnsupportedType($req_url)) {
$_meth = 'HEAD';
} else {
$_meth = 'GET';
unset($this->requests[$orig]['wrongGuess']);
}
$headers = array();
//$headers[] = 'User-Agent: '.$this->userAgent;
$headers[] = $this->getUserAgent($req_url);
// add referer for picky sites
$headers[] = 'Referer: '.$this->referer;
// send cookies, if we have any
if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) {
$this->debug("......sending cookies: $cookies");
$headers[] = 'Cookie: '.$cookies;
}
$httpRequest = new RollingCurlRequest($req_url, $_meth, null, $headers, array(
CURLOPT_CONNECTTIMEOUT => $this->requestOptions['timeout'],
CURLOPT_TIMEOUT => $this->requestOptions['timeout']
));
$httpRequest->set_original_url($orig);
$this->requests[$orig] = array('headers'=>null, 'body'=>null, 'httpRequest'=>$httpRequest);
$this->requests[$orig]['original_url'] = $orig; // TODO: is this needed anymore?
$pool->add($httpRequest);
}
}
// did we get anything into the pool?
if (count($pool) > 0) {
$this->debug('Sending request...');
$pool->execute(); // this will call handleCurlResponse() and populate $this->requests[$orig]
$this->debug('Received responses');
foreach($subset as $orig => $url) {
if (!$isRedirect) $orig = $url;
// $this->requests[$orig]['headers']
// $this->requests[$orig]['body']
// $this->requests[$orig]['effective_url']
// check content type
if ($this->headerOnlyType($this->requests[$orig]['headers'])) {
$this->requests[$orig]['body'] = '';
$_header_only_type = true;
$this->debug('Header only type returned');
} else {
$_header_only_type = false;
}
$status_code = $this->requests[$orig]['status_code'];
if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && isset($this->requests[$orig]['location'])) {
$redirectURL = $this->requests[$orig]['location'];
if (!preg_match('!^https?://!i', $redirectURL)) {
$redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url);
}
if ($this->validateURL($redirectURL)) {
$this->debug('Redirect detected. Valid URL: '.$redirectURL);
// store any cookies
$cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']);
if (!empty($cookies)) $this->cookieJar->storeCookies($url, $cookies);
$this->redirectQueue[$orig] = $redirectURL;
} else {
$this->debug('Redirect detected. Invalid URL: '.$redirectURL);
}
} elseif (!$_header_only_type && $this->requests[$orig]['method'] == 'HEAD') {
// the response content-type did not match our 'header only' types,
// but we'd issues a HEAD request because we assumed it would. So
// let's queue a proper GET request for this item...
$this->debug('Wrong guess at content-type, queing GET request');
$this->requests[$orig]['wrongGuess'] = true;
$this->redirectQueue[$orig] = $this->requests[$orig]['effective_url'];
} elseif (strpos($this->requests[$orig]['effective_url'], '_escaped_fragment_') === false) {
// check for <meta name='fragment' content='!'/>
// for AJAX sites, e.g. Blogger with its dynamic views templates.
// Based on Google's spec: https://developers.google.com/webmasters/ajax-crawling/docs/specification
if (isset($this->requests[$orig]['body'])) {
$redirectURL = $this->getUglyURL($this->requests[$orig]['effective_url'], substr($this->requests[$orig]['body'], 0, 4000));
if ($redirectURL) {
$this->debug('AJAX trigger (meta name="fragment" content="!") found. Queueing '.$redirectURL);
$this->redirectQueue[$orig] = $redirectURL;
}
}
}
// die($url.' -multi- '.$request->getResponseInfo('effective_url'));
unset($this->requests[$orig]['httpRequest'], $this->requests[$orig]['method']);
}
}
}
}
//////////////////////////////////////////////////////
// sequential (file_get_contents)
else {
$this->debug('Starting sequential fetch (file_get_contents)');
$this->debug('Processing set of '.count($urls));
foreach ($urls as $orig => $url) {
if (!$isRedirect) $orig = $url;
unset($this->redirectQueue[$orig]);
$this->debug("...$url");
if (!$isRedirect && isset($this->requests[$url])) {
$this->debug("......in memory");
/*
} elseif ($this->isCached($url)) {
$this->debug("......is cached");
if (!$this->minimiseMemoryUse) {
$this->requests[$url] = $this->getCached($url);
}
*/
} else {
$this->debug("Sending request for $url");
$this->requests[$orig]['original_url'] = $orig;
$req_url = $this->rewriteUrls($url);
$req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url;
$req_url = $this->removeFragment($req_url);
// send cookies, if we have any
$httpContext = $this->httpContext;
$httpContext['http']['header'] .= $this->getUserAgent($req_url)."\r\n";
// add referer for picky sites
$httpContext['http']['header'] .= 'Referer: '.$this->referer."\r\n";
if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) {
$this->debug("......sending cookies: $cookies");
$httpContext['http']['header'] .= 'Cookie: '.$cookies."\r\n";
}
if (false !== ($html = @file_get_contents($req_url, false, stream_context_create($httpContext)))) {
$this->debug('Received response');
// get status code
if (!isset($http_response_header[0]) || !preg_match('!^HTTP/\d+\.\d+\s+(\d+)!', trim($http_response_header[0]), $match)) {
$this->debug('Error: no status code found');
// TODO: handle error - no status code
} else {
$this->requests[$orig]['headers'] = $this->headersToString($http_response_header, false);
// check content type
if ($this->headerOnlyType($this->requests[$orig]['headers'])) {
$this->requests[$orig]['body'] = '';
} else {
$this->requests[$orig]['body'] = $html;
}
$this->requests[$orig]['effective_url'] = $req_url;
$this->requests[$orig]['status_code'] = $status_code = (int)$match[1];
unset($match);
// handle redirect
if (preg_match('/^Location:(.*?)$/mi', $this->requests[$orig]['headers'], $match)) {
$this->requests[$orig]['location'] = trim($match[1]);
}
if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && isset($this->requests[$orig]['location'])) {
$redirectURL = $this->requests[$orig]['location'];
if (!preg_match('!^https?://!i', $redirectURL)) {
$redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url);
}
if ($this->validateURL($redirectURL)) {
$this->debug('Redirect detected. Valid URL: '.$redirectURL);
// store any cookies
$cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']);
if (!empty($cookies)) $this->cookieJar->storeCookies($url, $cookies);
$this->redirectQueue[$orig] = $redirectURL;
} else {
$this->debug('Redirect detected. Invalid URL: '.$redirectURL);
}
} elseif (strpos($this->requests[$orig]['effective_url'], '_escaped_fragment_') === false) {
// check for <meta name='fragment' content='!'/>
// for AJAX sites, e.g. Blogger with its dynamic views templates.
// Based on Google's spec: https://developers.google.com/webmasters/ajax-crawling/docs/specification
if (isset($this->requests[$orig]['body'])) {
$redirectURL = $this->getUglyURL($this->requests[$orig]['effective_url'], substr($this->requests[$orig]['body'], 0, 4000));
if ($redirectURL) {
$this->debug('AJAX trigger (meta name="fragment" content="!") found. Queueing '.$redirectURL);
$this->redirectQueue[$orig] = $redirectURL;
}
}
}
}
} else {
$this->debug('Error retrieving URL');
//print_r($req_url);
//print_r($http_response_header);
//print_r($html);
// TODO: handle error - failed to retrieve URL
}
}
}
}
}
public function handleCurlResponse($response, $info, $request) {
$orig = $request->url_original;
$this->requests[$orig]['headers'] = substr($response, 0, $info['header_size']);
$this->requests[$orig]['body'] = substr($response, $info['header_size']);
$this->requests[$orig]['method'] = $request->method;
$this->requests[$orig]['effective_url'] = $info['url'];
$this->requests[$orig]['status_code'] = (int)$info['http_code'];
if (preg_match('/^Location:(.*?)$/mi', $this->requests[$orig]['headers'], $match)) {
$this->requests[$orig]['location'] = trim($match[1]);
}
}
protected function headersToString(array $headers, $associative=true) {
if (!$associative) {
return implode("\n", $headers);
} else {
$str = '';
foreach ($headers as $key => $val) {
if (is_array($val)) {
foreach ($val as $v) $str .= "$key: $v\n";
} else {
$str .= "$key: $val\n";
}
}
return rtrim($str);
}
}
public function get($url, $remove=false, $gzdecode=true) {
$url = "$url";
if (isset($this->requests[$url]) && isset($this->requests[$url]['body'])) {
$this->debug("URL already fetched - in memory ($url, effective: {$this->requests[$url]['effective_url']})");
$response = $this->requests[$url];
/*
} elseif ($this->isCached($url)) {
$this->debug("URL already fetched - in disk cache ($url)");
$response = $this->getCached($url);
$this->requests[$url] = $response;
*/
} else {
$this->debug("Fetching URL ($url)");
$this->fetchAll(array($url));
if (isset($this->requests[$url]) && isset($this->requests[$url]['body'])) {
$response = $this->requests[$url];
} else {
$this->debug("Request failed");
$response = false;
}
}
/*
if ($this->minimiseMemoryUse && $response) {
$this->cache($url);
unset($this->requests[$url]);
}
*/
if ($remove && $response) unset($this->requests[$url]);
if ($gzdecode && stripos($response['headers'], 'Content-Encoding: gzip')) {
if ($html = gzdecode($response['body'])) {
$response['body'] = $html;
}
}
return $response;
}
public function parallelSupport() {
return class_exists('HttpRequestPool') || function_exists('curl_multi_init');
}
private function headerOnlyType($headers) {
if (preg_match('!^Content-Type:\s*(([a-z-]+)/([^;\r\n ]+))!im', $headers, $match)) {
// look for full mime type (e.g. image/jpeg) or just type (e.g. image)
$match[1] = strtolower(trim($match[1]));
$match[2] = strtolower(trim($match[2]));
foreach (array($match[1], $match[2]) as $mime) {
if (in_array($mime, $this->headerOnlyTypes)) return true;
}
}
return false;
}
private function possibleUnsupportedType($url) {
$path = @parse_url($url, PHP_URL_PATH);
if ($path && strpos($path, '.') !== false) {
$ext = strtolower(trim(pathinfo($path, PATHINFO_EXTENSION)));
return in_array($ext, $this->headerOnlyClues);
}
return false;
}
}
// gzdecode from http://www.php.net/manual/en/function.gzdecode.php#82930
if (!function_exists('gzdecode')) {
function gzdecode($data,&$filename='',&$error='',$maxlength=null)
{
$len = strlen($data);
if ($len < 18 || strcmp(substr($data,0,2),"\x1f\x8b")) {
$error = "Not in GZIP format.";
return null; // Not GZIP format (See RFC 1952)
}
$method = ord(substr($data,2,1)); // Compression method
$flags = ord(substr($data,3,1)); // Flags
if ($flags & 31 != $flags) {
$error = "Reserved bits not allowed.";
return null;
}
// NOTE: $mtime may be negative (PHP integer limitations)
$mtime = unpack("V", substr($data,4,4));
$mtime = $mtime[1];
$xfl = substr($data,8,1);
$os = substr($data,8,1);
$headerlen = 10;
$extralen = 0;
$extra = "";
if ($flags & 4) {
// 2-byte length prefixed EXTRA data in header
if ($len - $headerlen - 2 < 8) {
return false; // invalid
}
$extralen = unpack("v",substr($data,8,2));
$extralen = $extralen[1];
if ($len - $headerlen - 2 - $extralen < 8) {
return false; // invalid
}
$extra = substr($data,10,$extralen);
$headerlen += 2 + $extralen;
}
$filenamelen = 0;
$filename = "";
if ($flags & 8) {
// C-style string
if ($len - $headerlen - 1 < 8) {
return false; // invalid
}
$filenamelen = strpos(substr($data,$headerlen),chr(0));
if ($filenamelen === false || $len - $headerlen - $filenamelen - 1 < 8) {
return false; // invalid
}
$filename = substr($data,$headerlen,$filenamelen);
$headerlen += $filenamelen + 1;
}
$commentlen = 0;
$comment = "";
if ($flags & 16) {
// C-style string COMMENT data in header
if ($len - $headerlen - 1 < 8) {
return false; // invalid
}
$commentlen = strpos(substr($data,$headerlen),chr(0));
if ($commentlen === false || $len - $headerlen - $commentlen - 1 < 8) {
return false; // Invalid header format
}
$comment = substr($data,$headerlen,$commentlen);
$headerlen += $commentlen + 1;
}
$headercrc = "";
if ($flags & 2) {
// 2-bytes (lowest order) of CRC32 on header present
if ($len - $headerlen - 2 < 8) {
return false; // invalid
}
$calccrc = crc32(substr($data,0,$headerlen)) & 0xffff;
$headercrc = unpack("v", substr($data,$headerlen,2));
$headercrc = $headercrc[1];
if ($headercrc != $calccrc) {
$error = "Header checksum failed.";
return false; // Bad header CRC
}
$headerlen += 2;
}
// GZIP FOOTER
$datacrc = unpack("V",substr($data,-8,4));
$datacrc = sprintf('%u',$datacrc[1] & 0xFFFFFFFF);
$isize = unpack("V",substr($data,-4));
$isize = $isize[1];
// decompression:
$bodylen = $len-$headerlen-8;
if ($bodylen < 1) {
// IMPLEMENTATION BUG!
return null;
}
$body = substr($data,$headerlen,$bodylen);
$data = "";
if ($bodylen > 0) {
switch ($method) {
case 8:
// Currently the only supported compression method:
$data = gzinflate($body,$maxlength);
break;
default:
$error = "Unknown compression method.";
return false;
}
} // zero-byte body content is allowed
// Verifiy CRC32
$crc = sprintf("%u",crc32($data));
$crcOK = $crc == $datacrc;
$lenOK = $isize == strlen($data);
if (!$lenOK || !$crcOK) {
$error = ( $lenOK ? '' : 'Length check FAILED. ') . ( $crcOK ? '' : 'Checksum FAILED.');
return false;
}
return $data;
}
}
?>

View file

@ -0,0 +1,402 @@
<?php
/*
Authored by Josh Fraser (www.joshfraser.com)
Released under Apache License 2.0
Maintained by Alexander Makarov, http://rmcreative.ru/
Modified by Keyvan Minoukadeh for the Five Filters project: http://fivefilters.org
*/
/**
* Class that represent a single curl request
*/
class RollingCurlRequest {
public $url = false;
public $url_original = false; // used for tracking redirects
public $method = 'GET';
public $post_data = null;
public $headers = null;
public $options = null;
/**
* @param string $url
* @param string $method
* @param $post_data
* @param $headers
* @param $options
* @return void
*/
function __construct($url, $method = "GET", $post_data = null, $headers = null, $options = null) {
$this->url = $url;
$this->url_original = $url;
$this->method = $method;
$this->post_data = $post_data;
$this->headers = $headers;
$this->options = $options;
}
/**
* @param string $url
* @return void
*/
public function set_original_url($url) {
$this->url_original = $url;
}
/**
* @return void
*/
public function __destruct() {
unset($this->url, $this->url_original, $this->method, $this->post_data, $this->headers, $this->options);
}
}
/**
* RollingCurl custom exception
*/
class RollingCurlException extends Exception {
}
/**
* Class that holds a rolling queue of curl requests.
*
* @throws RollingCurlException
*/
class RollingCurl implements Countable {
/**
* @var int
*
* Window size is the max number of simultaneous connections allowed.
*
* REMEMBER TO RESPECT THE SERVERS:
* Sending too many requests at one time can easily be perceived
* as a DOS attack. Increase this window_size if you are making requests
* to multiple servers or have permission from the receving server admins.
*/
private $window_size = 5;
/**
* @var float
*
* Timeout is the timeout used for curl_multi_select.
*/
private $timeout = 10;
/**
* @var string|array
*
* Callback function to be applied to each result.
*/
private $callback;
/**
* @var array
*
* Set your base options that you want to be used with EVERY request.
*/
protected $options = array(
CURLOPT_SSL_VERIFYPEER => 0,
CURLOPT_RETURNTRANSFER => 1,
CURLOPT_CONNECTTIMEOUT => 30,
CURLOPT_TIMEOUT => 30
);
/**
* @var array
*/
private $headers = array();
/**
* @var Request[]
*
* The request queue
*/
private $requests = array();
/**
* @var RequestMap[]
*
* Maps handles to request indexes
*/
private $requestMap = array();
/**
* @param $callback
* Callback function to be applied to each result.
*
* Can be specified as 'my_callback_function'
* or array($object, 'my_callback_method').
*
* Function should take three parameters: $response, $info, $request.
* $response is response body, $info is additional curl info.
* $request is the original request
*
* @return void
*/
function __construct($callback = null) {
$this->callback = $callback;
}
/**
* @param string $name
* @return mixed
*/
public function __get($name) {
return (isset($this->{$name})) ? $this->{$name} : null;
}
/**
* @param string $name
* @param mixed $value
* @return bool
*/
public function __set($name, $value) {
// append the base options & headers
if ($name == "options" || $name == "headers") {
$this->{$name} = $value + $this->{$name};
} else {
$this->{$name} = $value;
}
return true;
}
/**
* Count number of requests added (Countable interface)
*
* @return int
*/
public function count() {
return count($this->requests);
}
/**
* Add a request to the request queue
*
* @param Request $request
* @return bool
*/
public function add($request) {
$this->requests[] = $request;
return true;
}
/**
* Create new Request and add it to the request queue
*
* @param string $url
* @param string $method
* @param $post_data
* @param $headers
* @param $options
* @return bool
*/
public function request($url, $method = "GET", $post_data = null, $headers = null, $options = null) {
$this->requests[] = new RollingCurlRequest($url, $method, $post_data, $headers, $options);
return true;
}
/**
* Perform GET request
*
* @param string $url
* @param $headers
* @param $options
* @return bool
*/
public function get($url, $headers = null, $options = null) {
return $this->request($url, "GET", null, $headers, $options);
}
/**
* Perform POST request
*
* @param string $url
* @param $post_data
* @param $headers
* @param $options
* @return bool
*/
public function post($url, $post_data = null, $headers = null, $options = null) {
return $this->request($url, "POST", $post_data, $headers, $options);
}
/**
* Execute processing
*
* @param int $window_size Max number of simultaneous connections
* @return string|bool
*/
public function execute($window_size = null) {
// rolling curl window must always be greater than 1
if (sizeof($this->requests) == 1) {
return $this->single_curl();
} else {
// start the rolling curl. window_size is the max number of simultaneous connections
return $this->rolling_curl($window_size);
}
}
/**
* Performs a single curl request
*
* @access private
* @return string
*/
private function single_curl() {
$ch = curl_init();
$request = array_shift($this->requests);
$options = $this->get_options($request);
curl_setopt_array($ch, $options);
$output = curl_exec($ch);
$info = curl_getinfo($ch);
// it's not neccesary to set a callback for one-off requests
if ($this->callback) {
$callback = $this->callback;
if (is_callable($this->callback)) {
call_user_func($callback, $output, $info, $request);
}
}
else
return $output;
return true;
}
/**
* Performs multiple curl requests
*
* @access private
* @throws RollingCurlException
* @param int $window_size Max number of simultaneous connections
* @return bool
*/
private function rolling_curl($window_size = null) {
if ($window_size)
$this->window_size = $window_size;
// make sure the rolling window isn't greater than the # of urls
if (sizeof($this->requests) < $this->window_size)
$this->window_size = sizeof($this->requests);
if ($this->window_size < 2) {
throw new RollingCurlException("Window size must be greater than 1");
}
$master = curl_multi_init();
// start the first batch of requests
for ($i = 0; $i < $this->window_size; $i++) {
$ch = curl_init();
$options = $this->get_options($this->requests[$i]);
curl_setopt_array($ch, $options);
curl_multi_add_handle($master, $ch);
// Add to our request Maps
$key = (string) $ch;
$this->requestMap[$key] = $i;
}
do {
while (($execrun = curl_multi_exec($master, $running)) == CURLM_CALL_MULTI_PERFORM) ;
if ($execrun != CURLM_OK)
break;
// a request was just completed -- find out which one
while ($done = curl_multi_info_read($master)) {
// get the info and content returned on the request
$info = curl_getinfo($done['handle']);
$output = curl_multi_getcontent($done['handle']);
// send the return values to the callback function.
$callback = $this->callback;
if (is_callable($callback)) {
$key = (string) $done['handle'];
$request = $this->requests[$this->requestMap[$key]];
unset($this->requestMap[$key]);
call_user_func($callback, $output, $info, $request);
}
// start a new request (it's important to do this before removing the old one)
if ($i < sizeof($this->requests) && isset($this->requests[$i]) && $i < count($this->requests)) {
$ch = curl_init();
$options = $this->get_options($this->requests[$i]);
curl_setopt_array($ch, $options);
curl_multi_add_handle($master, $ch);
// Add to our request Maps
$key = (string) $ch;
$this->requestMap[$key] = $i;
$i++;
}
// remove the curl handle that just completed
curl_multi_remove_handle($master, $done['handle']);
}
// Block for data in / output; error handling is done by curl_multi_exec
//if ($running) curl_multi_select($master, $this->timeout);
// removing timeout as it causes problems on Windows with PHP 5.3.5 and Curl 7.20.0
if ($running) curl_multi_select($master);
} while ($running);
curl_multi_close($master);
return true;
}
/**
* Helper function to set up a new request by setting the appropriate options
*
* @access private
* @param Request $request
* @return array
*/
private function get_options($request) {
// options for this entire curl object
$options = $this->__get('options');
// We're managing reirects in PHP - allows us to intervene and rewrite/block URLs
// before the next request goes out.
$options[CURLOPT_FOLLOWLOCATION] = 0;
$options[CURLOPT_MAXREDIRS] = 0;
//if (ini_get('safe_mode') == 'Off' || !ini_get('safe_mode')) {
// $options[CURLOPT_FOLLOWLOCATION] = 1;
// $options[CURLOPT_MAXREDIRS] = 5;
//}
$headers = $this->__get('headers');
// append custom headers for this specific request
if ($request->headers) {
$headers = $headers + $request->headers;
}
// append custom options for this specific request
if ($request->options) {
$options = $request->options + $options;
}
// set the request URL
$options[CURLOPT_URL] = $request->url;
if ($headers) {
$options[CURLOPT_HTTPHEADER] = $headers;
}
// return response headers
$options[CURLOPT_HEADER] = 1;
// send HEAD request?
if ($request->method == 'HEAD') {
$options[CURLOPT_NOBODY] = 1;
}
return $options;
}
/**
* @return void
*/
public function __destruct() {
unset($this->window_size, $this->callback, $this->options, $this->headers, $this->requests);
}
}

View file

@ -0,0 +1,79 @@
<?php
/**
* Humble HTTP Agent extension for SimplePie_File
*
* This class is designed to extend and override SimplePie_File
* in order to prevent duplicate HTTP requests being sent out.
* The idea is to initialise an instance of Humble HTTP Agent
* and attach it, to a static class variable, of this class.
* SimplePie will then automatically initialise this class
*
* @date 2011-02-28
*/
class SimplePie_HumbleHttpAgent extends SimplePie_File
{
protected static $agent;
var $url;
var $useragent;
var $success = true;
var $headers = array();
var $body;
var $status_code;
var $redirects = 0;
var $error;
var $method = SIMPLEPIE_FILE_SOURCE_NONE;
public static function set_agent(HumbleHttpAgent $agent) {
self::$agent = $agent;
}
public function __construct($url, $timeout = 10, $redirects = 5, $headers = null, $useragent = null, $force_fsockopen = false) {
if (class_exists('idna_convert'))
{
$idn = new idna_convert();
$parsed = SimplePie_Misc::parse_url($url);
$url = SimplePie_Misc::compress_parse_url($parsed['scheme'], $idn->encode($parsed['authority']), $parsed['path'], $parsed['query'], $parsed['fragment']);
}
$this->url = $url;
$this->useragent = $useragent;
if (preg_match('/^http(s)?:\/\//i', $url))
{
if (!is_array($headers))
{
$headers = array();
}
$this->method = SIMPLEPIE_FILE_SOURCE_REMOTE | SIMPLEPIE_FILE_SOURCE_CURL;
$headers2 = array();
foreach ($headers as $key => $value) {
$headers2[] = "$key: $value";
}
//TODO: allow for HTTP headers
// curl_setopt($fp, CURLOPT_HTTPHEADER, $headers2);
$response = self::$agent->get($url);
if ($response === false || !isset($response['status_code'])) {
$this->error = 'failed to fetch URL';
$this->success = false;
} else {
// The extra lines at the end are there to satisfy SimplePie's HTTP parser.
// The class expects a full HTTP message, whereas we're giving it only
// headers - the new lines indicate the start of the body.
$parser = new SimplePie_HTTP_Parser($response['headers']."\r\n\r\n");
if ($parser->parse()) {
$this->headers = $parser->headers;
//$this->body = $parser->body;
$this->body = $response['body'];
$this->status_code = $parser->status_code;
}
}
}
else
{
$this->error = 'invalid URL';
$this->success = false;
}
}
}
?>

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,354 @@
<?php
/**
* This class represents a text sample to be parsed.
*
* @category Text
* @package Text_LanguageDetect
* @author Nicholas Pisarro
* @copyright 2006
* @license BSD
* @version CVS: $Id: Parser.php,v 1.5 2006/03/11 05:45:05 taak Exp $
* @link http://pear.php.net/package/Text_LanguageDetect/
* @link http://langdetect.blogspot.com/
*/
/**
* This class represents a text sample to be parsed.
*
* This separates the analysis of a text sample from the primary LanguageDetect
* class. After a new profile has been built, the data can be retrieved using
* the accessor functions.
*
* This class is intended to be used by the Text_LanguageDetect class, not
* end-users.
*
* @category Text
* @package Text_LanguageDetect
* @author Nicholas Pisarro
* @copyright 2006
* @license BSD
* @version release: 0.2.3
*/
class Text_LanguageDetect_Parser extends Text_LanguageDetect
{
/**
* the piece of text being parsed
*
* @access private
* @var string
*/
var $_string;
/**
* stores the trigram frequencies of the sample
*
* @access private
* @var string
*/
var $_trigrams = array();
/**
* stores the trigram ranks of the sample
*
* @access private
* @var array
*/
var $_trigram_ranks = array();
/**
* stores the unicode blocks of the sample
*
* @access private
* @var array
*/
var $_unicode_blocks = array();
/**
* Whether the parser should compile the unicode ranges
*
* @access private
* @var bool
*/
var $_compile_unicode = false;
/**
* Whether the parser should compile trigrams
*
* @access private
* @var bool
*/
var $_compile_trigram = false;
/**
* Whether the trigram parser should pad the beginning of the string
*
* @access private
* @var bool
*/
var $_trigram_pad_start = false;
/**
* Whether the unicode parser should skip non-alphabetical ascii chars
*
* @access private
* @var bool
*/
var $_unicode_skip_symbols = true;
/**
* Constructor
*
* @access private
* @param string $string string to be parsed
*/
function Text_LanguageDetect_Parser($string, $db=null, $unicode_db=null) {
if (isset($db)) $this->_db_filename = $db;
if (isset($unicode_db)) $this->_unicode_db_filename = $unicode_db;
$this->_string = $string;
}
/**
* Returns true if a string is suitable for parsing
*
* @static
* @access public
* @param string $str input string to test
* @return bool true if acceptable, false if not
*/
function validateString($str) {
if (!empty($str) && strlen($str) > 3 && preg_match('/\S/', $str)) {
return true;
} else {
return false;
}
}
/**
* turn on/off trigram counting
*
* @access public
* @param bool $bool true for on, false for off
*/
function prepareTrigram($bool = true)
{
$this->_compile_trigram = $bool;
}
/**
* turn on/off unicode block counting
*
* @access public
* @param bool $bool true for on, false for off
*/
function prepareUnicode($bool = true)
{
$this->_compile_unicode = $bool;
}
/**
* turn on/off padding the beginning of the sample string
*
* @access public
* @param bool $bool true for on, false for off
*/
function setPadStart($bool = true)
{
$this->_trigram_pad_start = $bool;
}
/**
* Should the unicode block counter skip non-alphabetical ascii chars?
*
* @access public
* @param bool $bool true for on, false for off
*/
function setUnicodeSkipSymbols($bool = true)
{
$this->_unicode_skip_symbols = $bool;
}
/**
* Returns the trigram ranks for the text sample
*
* @access public
* @return array trigram ranks in the text sample
*/
function &getTrigramRanks()
{
return $this->_trigram_ranks;
}
/**
* Return the trigram freqency table
*
* only used in testing to make sure the parser is working
*
* @access public
* @return array trigram freqencies in the text sample
*/
function &getTrigramFreqs()
{
return $this->_trigram;
}
/**
* returns the array of unicode blocks
*
* @access public
* @return array unicode blocks in the text sample
*/
function &getUnicodeBlocks()
{
return $this->_unicode_blocks;
}
/**
* Executes the parsing operation
*
* Be sure to call the set*() functions to set options and the
* prepare*() functions first to tell it what kind of data to compute
*
* Afterwards the get*() functions can be used to access the compiled
* information.
*
* @access public
*/
function analyze()
{
$len = strlen($this->_string);
$byte_counter = 0;
// unicode startup
if ($this->_compile_unicode) {
$blocks =& $this->_read_unicode_block_db();
$block_count = count($blocks);
$skipped_count = 0;
$unicode_chars = array();
}
// trigram startup
if ($this->_compile_trigram) {
// initialize them as blank so the parser will skip the first two
// (since it skips trigrams with more than 2 contiguous spaces)
$a = ' ';
$b = ' ';
// kludge
// if it finds a valid trigram to start and the start pad option is
// off, then set a variable that will be used to reduce this
// trigram after parsing has finished
if (!$this->_trigram_pad_start) {
$a = $this->_next_char($this->_string, $byte_counter, true);
if ($a != ' ') {
$b = $this->_next_char($this->_string, $byte_counter, true);
$dropone = " $a$b";
}
$byte_counter = 0;
$a = ' ';
$b = ' ';
}
}
while ($byte_counter < $len) {
$char = $this->_next_char($this->_string, $byte_counter, true);
// language trigram detection
if ($this->_compile_trigram) {
if (!($b == ' ' && ($a == ' ' || $char == ' '))) {
if (!isset($this->_trigram[$a . $b . $char])) {
$this->_trigram[$a . $b . $char] = 1;
} else {
$this->_trigram[$a . $b . $char]++;
}
}
$a = $b;
$b = $char;
}
// unicode block detection
if ($this->_compile_unicode) {
if ($this->_unicode_skip_symbols
&& strlen($char) == 1
&& ($char < 'A' || $char > 'z'
|| ($char > 'Z' && $char < 'a'))
&& $char != "'") { // does not skip the apostrophe
// since it's included in the language
// models
$skipped_count++;
continue;
}
// build an array of all the characters
if (isset($unicode_chars[$char])) {
$unicode_chars[$char]++;
} else {
$unicode_chars[$char] = 1;
}
}
// todo: add byte detection here
}
// unicode cleanup
if ($this->_compile_unicode) {
foreach ($unicode_chars as $utf8_char => $count) {
$search_result = $this->_unicode_block_name(
$this->_utf8char2unicode($utf8_char), $blocks, $block_count);
if ($search_result != -1) {
$block_name = $search_result[2];
} else {
$block_name = '[Malformatted]';
}
if (isset($this->_unicode_blocks[$block_name])) {
$this->_unicode_blocks[$block_name] += $count;
} else {
$this->_unicode_blocks[$block_name] = $count;
}
}
}
// trigram cleanup
if ($this->_compile_trigram) {
// pad the end
if ($b != ' ') {
if (!isset($this->_trigram["$a$b "])) {
$this->_trigram["$a$b "] = 1;
} else {
$this->_trigram["$a$b "]++;
}
}
// perl compatibility; Language::Guess does not pad the beginning
// kludge
if (isset($dropone)) {
if ($this->_trigram[$dropone] == 1) {
unset($this->_trigram[$dropone]);
} else {
$this->_trigram[$dropone]--;
}
}
if (!empty($this->_trigram)) {
$this->_trigram_ranks = $this->_arr_rank($this->_trigram);
} else {
$this->_trigram_ranks = array();
}
}
}
}
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
?>

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,110 @@
<?php
/**
* JavaScript-like HTML DOM Element
*
* This class extends PHP's DOMElement to allow
* users to get and set the innerHTML property of
* HTML elements in the same way it's done in
* JavaScript.
*
* Example usage:
* @code
* require_once 'JSLikeHTMLElement.php';
* header('Content-Type: text/plain');
* $doc = new DOMDocument();
* $doc->registerNodeClass('DOMElement', 'JSLikeHTMLElement');
* $doc->loadHTML('<div><p>Para 1</p><p>Para 2</p></div>');
* $elem = $doc->getElementsByTagName('div')->item(0);
*
* // print innerHTML
* echo $elem->innerHTML; // prints '<p>Para 1</p><p>Para 2</p>'
* echo "\n\n";
*
* // set innerHTML
* $elem->innerHTML = '<a href="http://fivefilters.org">FiveFilters.org</a>';
* echo $elem->innerHTML; // prints '<a href="http://fivefilters.org">FiveFilters.org</a>'
* echo "\n\n";
*
* // print document (with our changes)
* echo $doc->saveXML();
* @endcode
*
* @author Keyvan Minoukadeh - http://www.keyvan.net - keyvan@keyvan.net
* @see http://fivefilters.org (the project this was written for)
*/
class JSLikeHTMLElement extends DOMElement
{
/**
* Used for setting innerHTML like it's done in JavaScript:
* @code
* $div->innerHTML = '<h2>Chapter 2</h2><p>The story begins...</p>';
* @endcode
*/
public function __set($name, $value) {
if ($name == 'innerHTML') {
// first, empty the element
for ($x=$this->childNodes->length-1; $x>=0; $x--) {
$this->removeChild($this->childNodes->item($x));
}
// $value holds our new inner HTML
if ($value != '') {
$f = $this->ownerDocument->createDocumentFragment();
// appendXML() expects well-formed markup (XHTML)
$result = @$f->appendXML($value); // @ to suppress PHP warnings
if ($result) {
if ($f->hasChildNodes()) $this->appendChild($f);
} else {
// $value is probably ill-formed
$f = new DOMDocument();
$value = mb_convert_encoding($value, 'HTML-ENTITIES', 'UTF-8');
// Using <htmlfragment> will generate a warning, but so will bad HTML
// (and by this point, bad HTML is what we've got).
// We use it (and suppress the warning) because an HTML fragment will
// be wrapped around <html><body> tags which we don't really want to keep.
// Note: despite the warning, if loadHTML succeeds it will return true.
$result = @$f->loadHTML('<htmlfragment>'.$value.'</htmlfragment>');
if ($result) {
$import = $f->getElementsByTagName('htmlfragment')->item(0);
foreach ($import->childNodes as $child) {
$importedNode = $this->ownerDocument->importNode($child, true);
$this->appendChild($importedNode);
}
} else {
// oh well, we tried, we really did. :(
// this element is now empty
}
}
}
} else {
$trace = debug_backtrace();
trigger_error('Undefined property via __set(): '.$name.' in '.$trace[0]['file'].' on line '.$trace[0]['line'], E_USER_NOTICE);
}
}
/**
* Used for getting innerHTML like it's done in JavaScript:
* @code
* $string = $div->innerHTML;
* @endcode
*/
public function __get($name)
{
if ($name == 'innerHTML') {
$inner = '';
foreach ($this->childNodes as $child) {
$inner .= $this->ownerDocument->saveXML($child);
}
return $inner;
}
$trace = debug_backtrace();
trigger_error('Undefined property via __get(): '.$name.' in '.$trace[0]['file'].' on line '.$trace[0]['line'], E_USER_NOTICE);
return null;
}
public function __toString()
{
return '['.$this->tagName.']';
}
}
?>

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,26 @@
Copyright (c) 2004-2007, Ryan Parman and Geoffrey Sneddon.
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are
permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this list of
conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice, this list
of conditions and the following disclaimer in the documentation and/or other materials
provided with the distribution.
* Neither the name of the SimplePie Team nor the names of its contributors may be used
to endorse or promote products derived from this software without specific prior
written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.

View file

@ -0,0 +1,86 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
*/
// autoloader
spl_autoload_register(array(new SimplePie_Autoloader(), 'autoload'));
if (!class_exists('SimplePie'))
{
trigger_error('Autoloader not registered properly', E_USER_ERROR);
}
/**
* Autoloader class
*
* @package SimplePie
* @subpackage API
*/
class SimplePie_Autoloader
{
/**
* Constructor
*/
public function __construct()
{
$this->path = dirname(__FILE__) . DIRECTORY_SEPARATOR . 'library';
}
/**
* Autoloader
*
* @param string $class The name of the class to attempt to load.
*/
public function autoload($class)
{
// Only load the class if it starts with "SimplePie"
if (strpos($class, 'SimplePie') !== 0)
{
return;
}
$filename = $this->path . DIRECTORY_SEPARATOR . str_replace('_', DIRECTORY_SEPARATOR, $class) . '.php';
include $filename;
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,157 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
*/
/**
* Manages all author-related data
*
* Used by {@see SimplePie_Item::get_author()} and {@see SimplePie::get_authors()}
*
* This class can be overloaded with {@see SimplePie::set_author_class()}
*
* @package SimplePie
* @subpackage API
*/
class SimplePie_Author
{
/**
* Author's name
*
* @var string
* @see get_name()
*/
var $name;
/**
* Author's link
*
* @var string
* @see get_link()
*/
var $link;
/**
* Author's email address
*
* @var string
* @see get_email()
*/
var $email;
/**
* Constructor, used to input the data
*
* @param string $name
* @param string $link
* @param string $email
*/
public function __construct($name = null, $link = null, $email = null)
{
$this->name = $name;
$this->link = $link;
$this->email = $email;
}
/**
* String-ified version
*
* @return string
*/
public function __toString()
{
// There is no $this->data here
return md5(serialize($this));
}
/**
* Author's name
*
* @return string|null
*/
public function get_name()
{
if ($this->name !== null)
{
return $this->name;
}
else
{
return null;
}
}
/**
* Author's link
*
* @return string|null
*/
public function get_link()
{
if ($this->link !== null)
{
return $this->link;
}
else
{
return null;
}
}
/**
* Author's email address
*
* @return string|null
*/
public function get_email()
{
if ($this->email !== null)
{
return $this->email;
}
else
{
return null;
}
}
}

View file

@ -0,0 +1,133 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
*/
/**
* Used to create cache objects
*
* This class can be overloaded with {@see SimplePie::set_cache_class()},
* although the preferred way is to create your own handler
* via {@see register()}
*
* @package SimplePie
* @subpackage Caching
*/
class SimplePie_Cache
{
/**
* Cache handler classes
*
* These receive 3 parameters to their constructor, as documented in
* {@see register()}
* @var array
*/
protected static $handlers = array(
'mysql' => 'SimplePie_Cache_MySQL',
'memcache' => 'SimplePie_Cache_Memcache',
);
/**
* Don't call the constructor. Please.
*/
private function __construct() { }
/**
* Create a new SimplePie_Cache object
*
* @param string $location URL location (scheme is used to determine handler)
* @param string $filename Unique identifier for cache object
* @param string $extension 'spi' or 'spc'
* @return SimplePie_Cache_Base Type of object depends on scheme of `$location`
*/
public static function get_handler($location, $filename, $extension)
{
$type = explode(':', $location, 2);
$type = $type[0];
if (!empty(self::$handlers[$type]))
{
$class = self::$handlers[$type];
return new $class($location, $filename, $extension);
}
return new SimplePie_Cache_File($location, $filename, $extension);
}
/**
* Create a new SimplePie_Cache object
*
* @deprecated Use {@see get_handler} instead
*/
public function create($location, $filename, $extension)
{
trigger_error('Cache::create() has been replaced with Cache::get_handler(). Switch to the registry system to use this.', E_USER_DEPRECATED);
return self::get_handler($location, $filename, $extension);
}
/**
* Register a handler
*
* @param string $type DSN type to register for
* @param string $class Name of handler class. Must implement SimplePie_Cache_Base
*/
public static function register($type, $class)
{
self::$handlers[$type] = $class;
}
/**
* Parse a URL into an array
*
* @param string $url
* @return array
*/
public static function parse_URL($url)
{
$params = parse_url($url);
$params['extras'] = array();
if (isset($params['query']))
{
parse_str($params['query'], $params['extras']);
}
return $params;
}
}

View file

@ -0,0 +1,114 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
*/
/**
* Base for cache objects
*
* Classes to be used with {@see SimplePie_Cache::register()} are expected
* to implement this interface.
*
* @package SimplePie
* @subpackage Caching
*/
interface SimplePie_Cache_Base
{
/**
* Feed cache type
*
* @var string
*/
const TYPE_FEED = 'spc';
/**
* Image cache type
*
* @var string
*/
const TYPE_IMAGE = 'spi';
/**
* Create a new cache object
*
* @param string $location Location string (from SimplePie::$cache_location)
* @param string $name Unique ID for the cache
* @param string $type Either TYPE_FEED for SimplePie data, or TYPE_IMAGE for image data
*/
public function __construct($location, $name, $type);
/**
* Save data to the cache
*
* @param array|SimplePie $data Data to store in the cache. If passed a SimplePie object, only cache the $data property
* @return bool Successfulness
*/
public function save($data);
/**
* Retrieve the data saved to the cache
*
* @return array Data for SimplePie::$data
*/
public function load();
/**
* Retrieve the last modified time for the cache
*
* @return int Timestamp
*/
public function mtime();
/**
* Set the last modified time to the current time
*
* @return bool Success status
*/
public function touch();
/**
* Remove the cache
*
* @return bool Success status
*/
public function unlink();
}

View file

@ -0,0 +1,137 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
*/
/**
* Base class for database-based caches
*
* @package SimplePie
* @subpackage Caching
*/
abstract class SimplePie_Cache_DB implements SimplePie_Cache_Base
{
/**
* Helper for database conversion
*
* Converts a given {@see SimplePie} object into data to be stored
*
* @param SimplePie $data
* @return array First item is the serialized data for storage, second item is the unique ID for this item
*/
protected static function prepare_simplepie_object_for_cache($data)
{
$items = $data->get_items();
$items_by_id = array();
if (!empty($items))
{
foreach ($items as $item)
{
$items_by_id[$item->get_id()] = $item;
}
if (count($items_by_id) !== count($items))
{
$items_by_id = array();
foreach ($items as $item)
{
$items_by_id[$item->get_id(true)] = $item;
}
}
if (isset($data->data['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['feed'][0]))
{
$channel =& $data->data['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['feed'][0];
}
elseif (isset($data->data['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['feed'][0]))
{
$channel =& $data->data['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['feed'][0];
}
elseif (isset($data->data['child'][SIMPLEPIE_NAMESPACE_RDF]['RDF'][0]))
{
$channel =& $data->data['child'][SIMPLEPIE_NAMESPACE_RDF]['RDF'][0];
}
elseif (isset($data->data['child'][SIMPLEPIE_NAMESPACE_RSS_20]['rss'][0]['child'][SIMPLEPIE_NAMESPACE_RSS_20]['channel'][0]))
{
$channel =& $data->data['child'][SIMPLEPIE_NAMESPACE_RSS_20]['rss'][0]['child'][SIMPLEPIE_NAMESPACE_RSS_20]['channel'][0];
}
else
{
$channel = null;
}
if ($channel !== null)
{
if (isset($channel['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['entry']))
{
unset($channel['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['entry']);
}
if (isset($channel['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['entry']))
{
unset($channel['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['entry']);
}
if (isset($channel['child'][SIMPLEPIE_NAMESPACE_RSS_10]['item']))
{
unset($channel['child'][SIMPLEPIE_NAMESPACE_RSS_10]['item']);
}
if (isset($channel['child'][SIMPLEPIE_NAMESPACE_RSS_090]['item']))
{
unset($channel['child'][SIMPLEPIE_NAMESPACE_RSS_090]['item']);
}
if (isset($channel['child'][SIMPLEPIE_NAMESPACE_RSS_20]['item']))
{
unset($channel['child'][SIMPLEPIE_NAMESPACE_RSS_20]['item']);
}
}
if (isset($data->data['items']))
{
unset($data->data['items']);
}
if (isset($data->data['ordered_items']))
{
unset($data->data['ordered_items']);
}
}
return array(serialize($data->data), $items_by_id);
}
}

View file

@ -0,0 +1,173 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
*/
/**
* Caches data to the filesystem
*
* @package SimplePie
* @subpackage Caching
*/
class SimplePie_Cache_File implements SimplePie_Cache_Base
{
/**
* Location string
*
* @see SimplePie::$cache_location
* @var string
*/
protected $location;
/**
* Filename
*
* @var string
*/
protected $filename;
/**
* File extension
*
* @var string
*/
protected $extension;
/**
* File path
*
* @var string
*/
protected $name;
/**
* Create a new cache object
*
* @param string $location Location string (from SimplePie::$cache_location)
* @param string $name Unique ID for the cache
* @param string $type Either TYPE_FEED for SimplePie data, or TYPE_IMAGE for image data
*/
public function __construct($location, $name, $type)
{
$this->location = $location;
$this->filename = $name;
$this->extension = $type;
$this->name = "$this->location/$this->filename.$this->extension";
}
/**
* Save data to the cache
*
* @param array|SimplePie $data Data to store in the cache. If passed a SimplePie object, only cache the $data property
* @return bool Successfulness
*/
public function save($data)
{
if (file_exists($this->name) && is_writeable($this->name) || file_exists($this->location) && is_writeable($this->location))
{
if ($data instanceof SimplePie)
{
$data = $data->data;
}
$data = serialize($data);
return (bool) file_put_contents($this->name, $data);
}
return false;
}
/**
* Retrieve the data saved to the cache
*
* @return array Data for SimplePie::$data
*/
public function load()
{
if (file_exists($this->name) && is_readable($this->name))
{
return unserialize(file_get_contents($this->name));
}
return false;
}
/**
* Retrieve the last modified time for the cache
*
* @return int Timestamp
*/
public function mtime()
{
if (file_exists($this->name))
{
return filemtime($this->name);
}
return false;
}
/**
* Set the last modified time to the current time
*
* @return bool Success status
*/
public function touch()
{
if (file_exists($this->name))
{
return touch($this->name);
}
return false;
}
/**
* Remove the cache
*
* @return bool Success status
*/
public function unlink()
{
if (file_exists($this->name))
{
return unlink($this->name);
}
return false;
}
}

View file

@ -0,0 +1,183 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
*/
/**
* Caches data to memcache
*
* Registered for URLs with the "memcache" protocol
*
* For example, `memcache://localhost:11211/?timeout=3600&prefix=sp_` will
* connect to memcache on `localhost` on port 11211. All tables will be
* prefixed with `sp_` and data will expire after 3600 seconds
*
* @package SimplePie
* @subpackage Caching
* @uses Memcache
*/
class SimplePie_Cache_Memcache implements SimplePie_Cache_Base
{
/**
* Memcache instance
*
* @var Memcache
*/
protected $cache;
/**
* Options
*
* @var array
*/
protected $options;
/**
* Cache name
*
* @var string
*/
protected $name;
/**
* Create a new cache object
*
* @param string $location Location string (from SimplePie::$cache_location)
* @param string $name Unique ID for the cache
* @param string $type Either TYPE_FEED for SimplePie data, or TYPE_IMAGE for image data
*/
public function __construct($location, $name, $type)
{
$this->options = array(
'host' => '127.0.0.1',
'port' => 11211,
'extras' => array(
'timeout' => 3600, // one hour
'prefix' => 'simplepie_',
),
);
$parsed = SimplePie_Cache::parse_URL($location);
$this->options['host'] = empty($parsed['host']) ? $this->options['host'] : $parsed['host'];
$this->options['port'] = empty($parsed['port']) ? $this->options['port'] : $parsed['port'];
$this->options['extras'] = array_merge($this->options['extras'], $parsed['extras']);
$this->name = $this->options['extras']['prefix'] . md5("$name:$type");
$this->cache = new Memcache();
$this->cache->addServer($this->options['host'], (int) $this->options['port']);
}
/**
* Save data to the cache
*
* @param array|SimplePie $data Data to store in the cache. If passed a SimplePie object, only cache the $data property
* @return bool Successfulness
*/
public function save($data)
{
if ($data instanceof SimplePie)
{
$data = $data->data;
}
return $this->cache->set($this->name, serialize($data), MEMCACHE_COMPRESSED, (int) $this->options['extras']['timeout']);
}
/**
* Retrieve the data saved to the cache
*
* @return array Data for SimplePie::$data
*/
public function load()
{
$data = $this->cache->get($this->name);
if ($data !== false)
{
return unserialize($data);
}
return false;
}
/**
* Retrieve the last modified time for the cache
*
* @return int Timestamp
*/
public function mtime()
{
$data = $this->cache->get($this->name);
if ($data !== false)
{
// essentially ignore the mtime because Memcache expires on it's own
return time();
}
return false;
}
/**
* Set the last modified time to the current time
*
* @return bool Success status
*/
public function touch()
{
$data = $this->cache->get($this->name);
if ($data !== false)
{
return $this->cache->set($this->name, $data, MEMCACHE_COMPRESSED, (int) $this->duration);
}
return false;
}
/**
* Remove the cache
*
* @return bool Success status
*/
public function unlink()
{
return $this->cache->delete($this->name, 0);
}
}

View file

@ -0,0 +1,438 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
*/
/**
* Caches data to a MySQL database
*
* Registered for URLs with the "mysql" protocol
*
* For example, `mysql://root:password@localhost:3306/mydb?prefix=sp_` will
* connect to the `mydb` database on `localhost` on port 3306, with the user
* `root` and the password `password`. All tables will be prefixed with `sp_`
*
* @package SimplePie
* @subpackage Caching
*/
class SimplePie_Cache_MySQL extends SimplePie_Cache_DB
{
/**
* PDO instance
*
* @var PDO
*/
protected $mysql;
/**
* Options
*
* @var array
*/
protected $options;
/**
* Cache ID
*
* @var string
*/
protected $id;
/**
* Create a new cache object
*
* @param string $location Location string (from SimplePie::$cache_location)
* @param string $name Unique ID for the cache
* @param string $type Either TYPE_FEED for SimplePie data, or TYPE_IMAGE for image data
*/
public function __construct($location, $name, $type)
{
$this->options = array(
'user' => null,
'pass' => null,
'host' => '127.0.0.1',
'port' => '3306',
'path' => '',
'extras' => array(
'prefix' => '',
),
);
$this->options = array_merge_recursive($this->options, SimplePie_Cache::parse_URL($location));
// Path is prefixed with a "/"
$this->options['dbname'] = substr($this->options['path'], 1);
try
{
$this->mysql = new PDO("mysql:dbname={$this->options['dbname']};host={$this->options['host']};port={$this->options['port']}", $this->options['user'], $this->options['pass'], array(PDO::MYSQL_ATTR_INIT_COMMAND => 'SET NAMES utf8'));
}
catch (PDOException $e)
{
$this->mysql = null;
return;
}
$this->id = $name . $type;
if (!$query = $this->mysql->query('SHOW TABLES'))
{
$this->mysql = null;
return;
}
$db = array();
while ($row = $query->fetchColumn())
{
$db[] = $row;
}
if (!in_array($this->options['extras']['prefix'] . 'cache_data', $db))
{
$query = $this->mysql->exec('CREATE TABLE `' . $this->options['extras']['prefix'] . 'cache_data` (`id` TEXT CHARACTER SET utf8 NOT NULL, `items` SMALLINT NOT NULL DEFAULT 0, `data` BLOB NOT NULL, `mtime` INT UNSIGNED NOT NULL, UNIQUE (`id`(125)))');
if ($query === false)
{
$this->mysql = null;
}
}
if (!in_array($this->options['extras']['prefix'] . 'items', $db))
{
$query = $this->mysql->exec('CREATE TABLE `' . $this->options['extras']['prefix'] . 'items` (`feed_id` TEXT CHARACTER SET utf8 NOT NULL, `id` TEXT CHARACTER SET utf8 NOT NULL, `data` TEXT CHARACTER SET utf8 NOT NULL, `posted` INT UNSIGNED NOT NULL, INDEX `feed_id` (`feed_id`(125)))');
if ($query === false)
{
$this->mysql = null;
}
}
}
/**
* Save data to the cache
*
* @param array|SimplePie $data Data to store in the cache. If passed a SimplePie object, only cache the $data property
* @return bool Successfulness
*/
public function save($data)
{
if ($this->mysql === null)
{
return false;
}
if ($data instanceof SimplePie)
{
$data = clone $data;
$prepared = self::prepare_simplepie_object_for_cache($data);
$query = $this->mysql->prepare('SELECT COUNT(*) FROM `' . $this->options['extras']['prefix'] . 'cache_data` WHERE `id` = :feed');
$query->bindValue(':feed', $this->id);
if ($query->execute())
{
if ($query->fetchColumn() > 0)
{
$items = count($prepared[1]);
if ($items)
{
$sql = 'UPDATE `' . $this->options['extras']['prefix'] . 'cache_data` SET `items` = :items, `data` = :data, `mtime` = :time WHERE `id` = :feed';
$query = $this->mysql->prepare($sql);
$query->bindValue(':items', $items);
}
else
{
$sql = 'UPDATE `' . $this->options['extras']['prefix'] . 'cache_data` SET `data` = :data, `mtime` = :time WHERE `id` = :feed';
$query = $this->mysql->prepare($sql);
}
$query->bindValue(':data', $prepared[0]);
$query->bindValue(':time', time());
$query->bindValue(':feed', $this->id);
if (!$query->execute())
{
return false;
}
}
else
{
$query = $this->mysql->prepare('INSERT INTO `' . $this->options['extras']['prefix'] . 'cache_data` (`id`, `items`, `data`, `mtime`) VALUES(:feed, :count, :data, :time)');
$query->bindValue(':feed', $this->id);
$query->bindValue(':count', count($prepared[1]));
$query->bindValue(':data', $prepared[0]);
$query->bindValue(':time', time());
if (!$query->execute())
{
return false;
}
}
$ids = array_keys($prepared[1]);
if (!empty($ids))
{
foreach ($ids as $id)
{
$database_ids[] = $this->mysql->quote($id);
}
$query = $this->mysql->prepare('SELECT `id` FROM `' . $this->options['extras']['prefix'] . 'items` WHERE `id` = ' . implode(' OR `id` = ', $database_ids) . ' AND `feed_id` = :feed');
$query->bindValue(':feed', $this->id);
if ($query->execute())
{
$existing_ids = array();
while ($row = $query->fetchColumn())
{
$existing_ids[] = $row;
}
$new_ids = array_diff($ids, $existing_ids);
foreach ($new_ids as $new_id)
{
if (!($date = $prepared[1][$new_id]->get_date('U')))
{
$date = time();
}
$query = $this->mysql->prepare('INSERT INTO `' . $this->options['extras']['prefix'] . 'items` (`feed_id`, `id`, `data`, `posted`) VALUES(:feed, :id, :data, :date)');
$query->bindValue(':feed', $this->id);
$query->bindValue(':id', $new_id);
$query->bindValue(':data', serialize($prepared[1][$new_id]->data));
$query->bindValue(':date', $date);
if (!$query->execute())
{
return false;
}
}
return true;
}
}
else
{
return true;
}
}
}
else
{
$query = $this->mysql->prepare('SELECT `id` FROM `' . $this->options['extras']['prefix'] . 'cache_data` WHERE `id` = :feed');
$query->bindValue(':feed', $this->id);
if ($query->execute())
{
if ($query->rowCount() > 0)
{
$query = $this->mysql->prepare('UPDATE `' . $this->options['extras']['prefix'] . 'cache_data` SET `items` = 0, `data` = :data, `mtime` = :time WHERE `id` = :feed');
$query->bindValue(':data', serialize($data));
$query->bindValue(':time', time());
$query->bindValue(':feed', $this->id);
if ($this->execute())
{
return true;
}
}
else
{
$query = $this->mysql->prepare('INSERT INTO `' . $this->options['extras']['prefix'] . 'cache_data` (`id`, `items`, `data`, `mtime`) VALUES(:id, 0, :data, :time)');
$query->bindValue(':id', $this->id);
$query->bindValue(':data', serialize($data));
$query->bindValue(':time', time());
if ($query->execute())
{
return true;
}
}
}
}
return false;
}
/**
* Retrieve the data saved to the cache
*
* @return array Data for SimplePie::$data
*/
public function load()
{
if ($this->mysql === null)
{
return false;
}
$query = $this->mysql->prepare('SELECT `items`, `data` FROM `' . $this->options['extras']['prefix'] . 'cache_data` WHERE `id` = :id');
$query->bindValue(':id', $this->id);
if ($query->execute() && ($row = $query->fetch()))
{
$data = unserialize($row[1]);
if (isset($this->options['items'][0]))
{
$items = (int) $this->options['items'][0];
}
else
{
$items = (int) $row[0];
}
if ($items !== 0)
{
if (isset($data['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['feed'][0]))
{
$feed =& $data['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['feed'][0];
}
elseif (isset($data['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['feed'][0]))
{
$feed =& $data['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['feed'][0];
}
elseif (isset($data['child'][SIMPLEPIE_NAMESPACE_RDF]['RDF'][0]))
{
$feed =& $data['child'][SIMPLEPIE_NAMESPACE_RDF]['RDF'][0];
}
elseif (isset($data['child'][SIMPLEPIE_NAMESPACE_RSS_20]['rss'][0]))
{
$feed =& $data['child'][SIMPLEPIE_NAMESPACE_RSS_20]['rss'][0];
}
else
{
$feed = null;
}
if ($feed !== null)
{
$sql = 'SELECT `data` FROM `' . $this->options['extras']['prefix'] . 'items` WHERE `feed_id` = :feed ORDER BY `posted` DESC';
if ($items > 0)
{
$sql .= ' LIMIT ' . $items;
}
$query = $this->mysql->prepare($sql);
$query->bindValue(':feed', $this->id);
if ($query->execute())
{
while ($row = $query->fetchColumn())
{
$feed['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['entry'][] = unserialize($row);
}
}
else
{
return false;
}
}
}
return $data;
}
return false;
}
/**
* Retrieve the last modified time for the cache
*
* @return int Timestamp
*/
public function mtime()
{
if ($this->mysql === null)
{
return false;
}
$query = $this->mysql->prepare('SELECT `mtime` FROM `' . $this->options['extras']['prefix'] . 'cache_data` WHERE `id` = :id');
$query->bindValue(':id', $this->id);
if ($query->execute() && ($time = $query->fetchColumn()))
{
return $time;
}
else
{
return false;
}
}
/**
* Set the last modified time to the current time
*
* @return bool Success status
*/
public function touch()
{
if ($this->mysql === null)
{
return false;
}
$query = $this->mysql->prepare('UPDATE `' . $this->options['extras']['prefix'] . 'cache_data` SET `mtime` = :time WHERE `id` = :id');
$query->bindValue(':time', time());
$query->bindValue(':id', $this->id);
if ($query->execute() && $query->rowCount() > 0)
{
return true;
}
else
{
return false;
}
}
/**
* Remove the cache
*
* @return bool Success status
*/
public function unlink()
{
if ($this->mysql === null)
{
return false;
}
$query = $this->mysql->prepare('DELETE FROM `' . $this->options['extras']['prefix'] . 'cache_data` WHERE `id` = :id');
$query->bindValue(':id', $this->id);
$query2 = $this->mysql->prepare('DELETE FROM `' . $this->options['extras']['prefix'] . 'items` WHERE `feed_id` = :id');
$query2->bindValue(':id', $this->id);
if ($query->execute() && $query2->execute())
{
return true;
}
else
{
return false;
}
}
}

View file

@ -0,0 +1,210 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
*/
/**
* Handles `<media:text>` captions as defined in Media RSS.
*
* Used by {@see SimplePie_Enclosure::get_caption()} and {@see SimplePie_Enclosure::get_captions()}
*
* This class can be overloaded with {@see SimplePie::set_caption_class()}
*
* @package SimplePie
* @subpackage API
*/
class SimplePie_Caption
{
/**
* Content type
*
* @var string
* @see get_type()
*/
var $type;
/**
* Language
*
* @var string
* @see get_language()
*/
var $lang;
/**
* Start time
*
* @var string
* @see get_starttime()
*/
var $startTime;
/**
* End time
*
* @var string
* @see get_endtime()
*/
var $endTime;
/**
* Caption text
*
* @var string
* @see get_text()
*/
var $text;
/**
* Constructor, used to input the data
*
* For documentation on all the parameters, see the corresponding
* properties and their accessors
*/
public function __construct($type = null, $lang = null, $startTime = null, $endTime = null, $text = null)
{
$this->type = $type;
$this->lang = $lang;
$this->startTime = $startTime;
$this->endTime = $endTime;
$this->text = $text;
}
/**
* String-ified version
*
* @return string
*/
public function __toString()
{
// There is no $this->data here
return md5(serialize($this));
}
/**
* Get the end time
*
* @return string|null Time in the format 'hh:mm:ss.SSS'
*/
public function get_endtime()
{
if ($this->endTime !== null)
{
return $this->endTime;
}
else
{
return null;
}
}
/**
* Get the language
*
* @link http://tools.ietf.org/html/rfc3066
* @return string|null Language code as per RFC 3066
*/
public function get_language()
{
if ($this->lang !== null)
{
return $this->lang;
}
else
{
return null;
}
}
/**
* Get the start time
*
* @return string|null Time in the format 'hh:mm:ss.SSS'
*/
public function get_starttime()
{
if ($this->startTime !== null)
{
return $this->startTime;
}
else
{
return null;
}
}
/**
* Get the text of the caption
*
* @return string|null
*/
public function get_text()
{
if ($this->text !== null)
{
return $this->text;
}
else
{
return null;
}
}
/**
* Get the content type (not MIME type)
*
* @return string|null Either 'text' or 'html'
*/
public function get_type()
{
if ($this->type !== null)
{
return $this->type;
}
else
{
return null;
}
}
}

View file

@ -0,0 +1,157 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
*/
/**
* Manages all category-related data
*
* Used by {@see SimplePie_Item::get_category()} and {@see SimplePie_Item::get_categories()}
*
* This class can be overloaded with {@see SimplePie::set_category_class()}
*
* @package SimplePie
* @subpackage API
*/
class SimplePie_Category
{
/**
* Category identifier
*
* @var string
* @see get_term
*/
var $term;
/**
* Categorization scheme identifier
*
* @var string
* @see get_scheme()
*/
var $scheme;
/**
* Human readable label
*
* @var string
* @see get_label()
*/
var $label;
/**
* Constructor, used to input the data
*
* @param string $term
* @param string $scheme
* @param string $label
*/
public function __construct($term = null, $scheme = null, $label = null)
{
$this->term = $term;
$this->scheme = $scheme;
$this->label = $label;
}
/**
* String-ified version
*
* @return string
*/
public function __toString()
{
// There is no $this->data here
return md5(serialize($this));
}
/**
* Get the category identifier
*
* @return string|null
*/
public function get_term()
{
if ($this->term !== null)
{
return $this->term;
}
else
{
return null;
}
}
/**
* Get the categorization scheme identifier
*
* @return string|null
*/
public function get_scheme()
{
if ($this->scheme !== null)
{
return $this->scheme;
}
else
{
return null;
}
}
/**
* Get the human readable label
*
* @return string|null
*/
public function get_label()
{
if ($this->label !== null)
{
return $this->label;
}
else
{
return $this->get_term();
}
}
}

View file

@ -0,0 +1,332 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
*/
/**
* Content-type sniffing
*
* Based on the rules in http://tools.ietf.org/html/draft-abarth-mime-sniff-06
*
* This is used since we can't always trust Content-Type headers, and is based
* upon the HTML5 parsing rules.
*
*
* This class can be overloaded with {@see SimplePie::set_content_type_sniffer_class()}
*
* @package SimplePie
* @subpackage HTTP
*/
class SimplePie_Content_Type_Sniffer
{
/**
* File object
*
* @var SimplePie_File
*/
var $file;
/**
* Create an instance of the class with the input file
*
* @param SimplePie_Content_Type_Sniffer $file Input file
*/
public function __construct($file)
{
$this->file = $file;
}
/**
* Get the Content-Type of the specified file
*
* @return string Actual Content-Type
*/
public function get_type()
{
if (isset($this->file->headers['content-type']))
{
if (!isset($this->file->headers['content-encoding'])
&& ($this->file->headers['content-type'] === 'text/plain'
|| $this->file->headers['content-type'] === 'text/plain; charset=ISO-8859-1'
|| $this->file->headers['content-type'] === 'text/plain; charset=iso-8859-1'
|| $this->file->headers['content-type'] === 'text/plain; charset=UTF-8'))
{
return $this->text_or_binary();
}
if (($pos = strpos($this->file->headers['content-type'], ';')) !== false)
{
$official = substr($this->file->headers['content-type'], 0, $pos);
}
else
{
$official = $this->file->headers['content-type'];
}
$official = trim(strtolower($official));
if ($official === 'unknown/unknown'
|| $official === 'application/unknown')
{
return $this->unknown();
}
elseif (substr($official, -4) === '+xml'
|| $official === 'text/xml'
|| $official === 'application/xml')
{
return $official;
}
elseif (substr($official, 0, 6) === 'image/')
{
if ($return = $this->image())
{
return $return;
}
else
{
return $official;
}
}
elseif ($official === 'text/html')
{
return $this->feed_or_html();
}
else
{
return $official;
}
}
else
{
return $this->unknown();
}
}
/**
* Sniff text or binary
*
* @return string Actual Content-Type
*/
public function text_or_binary()
{
if (substr($this->file->body, 0, 2) === "\xFE\xFF"
|| substr($this->file->body, 0, 2) === "\xFF\xFE"
|| substr($this->file->body, 0, 4) === "\x00\x00\xFE\xFF"
|| substr($this->file->body, 0, 3) === "\xEF\xBB\xBF")
{
return 'text/plain';
}
elseif (preg_match('/[\x00-\x08\x0E-\x1A\x1C-\x1F]/', $this->file->body))
{
return 'application/octect-stream';
}
else
{
return 'text/plain';
}
}
/**
* Sniff unknown
*
* @return string Actual Content-Type
*/
public function unknown()
{
$ws = strspn($this->file->body, "\x09\x0A\x0B\x0C\x0D\x20");
if (strtolower(substr($this->file->body, $ws, 14)) === '<!doctype html'
|| strtolower(substr($this->file->body, $ws, 5)) === '<html'
|| strtolower(substr($this->file->body, $ws, 7)) === '<script')
{
return 'text/html';
}
elseif (substr($this->file->body, 0, 5) === '%PDF-')
{
return 'application/pdf';
}
elseif (substr($this->file->body, 0, 11) === '%!PS-Adobe-')
{
return 'application/postscript';
}
elseif (substr($this->file->body, 0, 6) === 'GIF87a'
|| substr($this->file->body, 0, 6) === 'GIF89a')
{
return 'image/gif';
}
elseif (substr($this->file->body, 0, 8) === "\x89\x50\x4E\x47\x0D\x0A\x1A\x0A")
{
return 'image/png';
}
elseif (substr($this->file->body, 0, 3) === "\xFF\xD8\xFF")
{
return 'image/jpeg';
}
elseif (substr($this->file->body, 0, 2) === "\x42\x4D")
{
return 'image/bmp';
}
elseif (substr($this->file->body, 0, 4) === "\x00\x00\x01\x00")
{
return 'image/vnd.microsoft.icon';
}
else
{
return $this->text_or_binary();
}
}
/**
* Sniff images
*
* @return string Actual Content-Type
*/
public function image()
{
if (substr($this->file->body, 0, 6) === 'GIF87a'
|| substr($this->file->body, 0, 6) === 'GIF89a')
{
return 'image/gif';
}
elseif (substr($this->file->body, 0, 8) === "\x89\x50\x4E\x47\x0D\x0A\x1A\x0A")
{
return 'image/png';
}
elseif (substr($this->file->body, 0, 3) === "\xFF\xD8\xFF")
{
return 'image/jpeg';
}
elseif (substr($this->file->body, 0, 2) === "\x42\x4D")
{
return 'image/bmp';
}
elseif (substr($this->file->body, 0, 4) === "\x00\x00\x01\x00")
{
return 'image/vnd.microsoft.icon';
}
else
{
return false;
}
}
/**
* Sniff HTML
*
* @return string Actual Content-Type
*/
public function feed_or_html()
{
$len = strlen($this->file->body);
$pos = strspn($this->file->body, "\x09\x0A\x0D\x20");
while ($pos < $len)
{
switch ($this->file->body[$pos])
{
case "\x09":
case "\x0A":
case "\x0D":
case "\x20":
$pos += strspn($this->file->body, "\x09\x0A\x0D\x20", $pos);
continue 2;
case '<':
$pos++;
break;
default:
return 'text/html';
}
if (substr($this->file->body, $pos, 3) === '!--')
{
$pos += 3;
if ($pos < $len && ($pos = strpos($this->file->body, '-->', $pos)) !== false)
{
$pos += 3;
}
else
{
return 'text/html';
}
}
elseif (substr($this->file->body, $pos, 1) === '!')
{
if ($pos < $len && ($pos = strpos($this->file->body, '>', $pos)) !== false)
{
$pos++;
}
else
{
return 'text/html';
}
}
elseif (substr($this->file->body, $pos, 1) === '?')
{
if ($pos < $len && ($pos = strpos($this->file->body, '?>', $pos)) !== false)
{
$pos += 2;
}
else
{
return 'text/html';
}
}
elseif (substr($this->file->body, $pos, 3) === 'rss'
|| substr($this->file->body, $pos, 7) === 'rdf:RDF')
{
return 'application/rss+xml';
}
elseif (substr($this->file->body, $pos, 4) === 'feed')
{
return 'application/atom+xml';
}
else
{
return 'text/html';
}
}
return 'text/html';
}
}

View file

@ -0,0 +1,130 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
*/
/**
* Manages `<media:copyright>` copyright tags as defined in Media RSS
*
* Used by {@see SimplePie_Enclosure::get_copyright()}
*
* This class can be overloaded with {@see SimplePie::set_copyright_class()}
*
* @package SimplePie
* @subpackage API
*/
class SimplePie_Copyright
{
/**
* Copyright URL
*
* @var string
* @see get_url()
*/
var $url;
/**
* Attribution
*
* @var string
* @see get_attribution()
*/
var $label;
/**
* Constructor, used to input the data
*
* For documentation on all the parameters, see the corresponding
* properties and their accessors
*/
public function __construct($url = null, $label = null)
{
$this->url = $url;
$this->label = $label;
}
/**
* String-ified version
*
* @return string
*/
public function __toString()
{
// There is no $this->data here
return md5(serialize($this));
}
/**
* Get the copyright URL
*
* @return string|null URL to copyright information
*/
public function get_url()
{
if ($this->url !== null)
{
return $this->url;
}
else
{
return null;
}
}
/**
* Get the attribution text
*
* @return string|null
*/
public function get_attribution()
{
if ($this->label !== null)
{
return $this->label;
}
else
{
return null;
}
}
}

View file

@ -0,0 +1,57 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
*/
/**
* SimplePie class.
*
* Class for backward compatibility.
*
* @deprecated Use {@see SimplePie} directly
* @package SimplePie
* @subpackage API
*/
class SimplePie_Core extends SimplePie
{
}

View file

@ -0,0 +1,156 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
*/
/**
* Handles `<media:credit>` as defined in Media RSS
*
* Used by {@see SimplePie_Enclosure::get_credit()} and {@see SimplePie_Enclosure::get_credits()}
*
* This class can be overloaded with {@see SimplePie::set_credit_class()}
*
* @package SimplePie
* @subpackage API
*/
class SimplePie_Credit
{
/**
* Credited role
*
* @var string
* @see get_role()
*/
var $role;
/**
* Organizational scheme
*
* @var string
* @see get_scheme()
*/
var $scheme;
/**
* Credited name
*
* @var string
* @see get_name()
*/
var $name;
/**
* Constructor, used to input the data
*
* For documentation on all the parameters, see the corresponding
* properties and their accessors
*/
public function __construct($role = null, $scheme = null, $name = null)
{
$this->role = $role;
$this->scheme = $scheme;
$this->name = $name;
}
/**
* String-ified version
*
* @return string
*/
public function __toString()
{
// There is no $this->data here
return md5(serialize($this));
}
/**
* Get the role of the person receiving credit
*
* @return string|null
*/
public function get_role()
{
if ($this->role !== null)
{
return $this->role;
}
else
{
return null;
}
}
/**
* Get the organizational scheme
*
* @return string|null
*/
public function get_scheme()
{
if ($this->scheme !== null)
{
return $this->scheme;
}
else
{
return null;
}
}
/**
* Get the credited person/entity's name
*
* @return string|null
*/
public function get_name()
{
if ($this->name !== null)
{
return $this->name;
}
else
{
return null;
}
}
}

View file

@ -0,0 +1,617 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
*/
/**
* Decode HTML Entities
*
* This implements HTML5 as of revision 967 (2007-06-28)
*
* @deprecated Use DOMDocument instead!
* @package SimplePie
*/
class SimplePie_Decode_HTML_Entities
{
/**
* Data to be parsed
*
* @access private
* @var string
*/
var $data = '';
/**
* Currently consumed bytes
*
* @access private
* @var string
*/
var $consumed = '';
/**
* Position of the current byte being parsed
*
* @access private
* @var int
*/
var $position = 0;
/**
* Create an instance of the class with the input data
*
* @access public
* @param string $data Input data
*/
public function __construct($data)
{
$this->data = $data;
}
/**
* Parse the input data
*
* @access public
* @return string Output data
*/
public function parse()
{
while (($this->position = strpos($this->data, '&', $this->position)) !== false)
{
$this->consume();
$this->entity();
$this->consumed = '';
}
return $this->data;
}
/**
* Consume the next byte
*
* @access private
* @return mixed The next byte, or false, if there is no more data
*/
public function consume()
{
if (isset($this->data[$this->position]))
{
$this->consumed .= $this->data[$this->position];
return $this->data[$this->position++];
}
else
{
return false;
}
}
/**
* Consume a range of characters
*
* @access private
* @param string $chars Characters to consume
* @return mixed A series of characters that match the range, or false
*/
public function consume_range($chars)
{
if ($len = strspn($this->data, $chars, $this->position))
{
$data = substr($this->data, $this->position, $len);
$this->consumed .= $data;
$this->position += $len;
return $data;
}
else
{
return false;
}
}
/**
* Unconsume one byte
*
* @access private
*/
public function unconsume()
{
$this->consumed = substr($this->consumed, 0, -1);
$this->position--;
}
/**
* Decode an entity
*
* @access private
*/
public function entity()
{
switch ($this->consume())
{
case "\x09":
case "\x0A":
case "\x0B":
case "\x0B":
case "\x0C":
case "\x20":
case "\x3C":
case "\x26":
case false:
break;
case "\x23":
switch ($this->consume())
{
case "\x78":
case "\x58":
$range = '0123456789ABCDEFabcdef';
$hex = true;
break;
default:
$range = '0123456789';
$hex = false;
$this->unconsume();
break;
}
if ($codepoint = $this->consume_range($range))
{
static $windows_1252_specials = array(0x0D => "\x0A", 0x80 => "\xE2\x82\xAC", 0x81 => "\xEF\xBF\xBD", 0x82 => "\xE2\x80\x9A", 0x83 => "\xC6\x92", 0x84 => "\xE2\x80\x9E", 0x85 => "\xE2\x80\xA6", 0x86 => "\xE2\x80\xA0", 0x87 => "\xE2\x80\xA1", 0x88 => "\xCB\x86", 0x89 => "\xE2\x80\xB0", 0x8A => "\xC5\xA0", 0x8B => "\xE2\x80\xB9", 0x8C => "\xC5\x92", 0x8D => "\xEF\xBF\xBD", 0x8E => "\xC5\xBD", 0x8F => "\xEF\xBF\xBD", 0x90 => "\xEF\xBF\xBD", 0x91 => "\xE2\x80\x98", 0x92 => "\xE2\x80\x99", 0x93 => "\xE2\x80\x9C", 0x94 => "\xE2\x80\x9D", 0x95 => "\xE2\x80\xA2", 0x96 => "\xE2\x80\x93", 0x97 => "\xE2\x80\x94", 0x98 => "\xCB\x9C", 0x99 => "\xE2\x84\xA2", 0x9A => "\xC5\xA1", 0x9B => "\xE2\x80\xBA", 0x9C => "\xC5\x93", 0x9D => "\xEF\xBF\xBD", 0x9E => "\xC5\xBE", 0x9F => "\xC5\xB8");
if ($hex)
{
$codepoint = hexdec($codepoint);
}
else
{
$codepoint = intval($codepoint);
}
if (isset($windows_1252_specials[$codepoint]))
{
$replacement = $windows_1252_specials[$codepoint];
}
else
{
$replacement = SimplePie_Misc::codepoint_to_utf8($codepoint);
}
if (!in_array($this->consume(), array(';', false), true))
{
$this->unconsume();
}
$consumed_length = strlen($this->consumed);
$this->data = substr_replace($this->data, $replacement, $this->position - $consumed_length, $consumed_length);
$this->position += strlen($replacement) - $consumed_length;
}
break;
default:
static $entities = array(
'Aacute' => "\xC3\x81",
'aacute' => "\xC3\xA1",
'Aacute;' => "\xC3\x81",
'aacute;' => "\xC3\xA1",
'Acirc' => "\xC3\x82",
'acirc' => "\xC3\xA2",
'Acirc;' => "\xC3\x82",
'acirc;' => "\xC3\xA2",
'acute' => "\xC2\xB4",
'acute;' => "\xC2\xB4",
'AElig' => "\xC3\x86",
'aelig' => "\xC3\xA6",
'AElig;' => "\xC3\x86",
'aelig;' => "\xC3\xA6",
'Agrave' => "\xC3\x80",
'agrave' => "\xC3\xA0",
'Agrave;' => "\xC3\x80",
'agrave;' => "\xC3\xA0",
'alefsym;' => "\xE2\x84\xB5",
'Alpha;' => "\xCE\x91",
'alpha;' => "\xCE\xB1",
'AMP' => "\x26",
'amp' => "\x26",
'AMP;' => "\x26",
'amp;' => "\x26",
'and;' => "\xE2\x88\xA7",
'ang;' => "\xE2\x88\xA0",
'apos;' => "\x27",
'Aring' => "\xC3\x85",
'aring' => "\xC3\xA5",
'Aring;' => "\xC3\x85",
'aring;' => "\xC3\xA5",
'asymp;' => "\xE2\x89\x88",
'Atilde' => "\xC3\x83",
'atilde' => "\xC3\xA3",
'Atilde;' => "\xC3\x83",
'atilde;' => "\xC3\xA3",
'Auml' => "\xC3\x84",
'auml' => "\xC3\xA4",
'Auml;' => "\xC3\x84",
'auml;' => "\xC3\xA4",
'bdquo;' => "\xE2\x80\x9E",
'Beta;' => "\xCE\x92",
'beta;' => "\xCE\xB2",
'brvbar' => "\xC2\xA6",
'brvbar;' => "\xC2\xA6",
'bull;' => "\xE2\x80\xA2",
'cap;' => "\xE2\x88\xA9",
'Ccedil' => "\xC3\x87",
'ccedil' => "\xC3\xA7",
'Ccedil;' => "\xC3\x87",
'ccedil;' => "\xC3\xA7",
'cedil' => "\xC2\xB8",
'cedil;' => "\xC2\xB8",
'cent' => "\xC2\xA2",
'cent;' => "\xC2\xA2",
'Chi;' => "\xCE\xA7",
'chi;' => "\xCF\x87",
'circ;' => "\xCB\x86",
'clubs;' => "\xE2\x99\xA3",
'cong;' => "\xE2\x89\x85",
'COPY' => "\xC2\xA9",
'copy' => "\xC2\xA9",
'COPY;' => "\xC2\xA9",
'copy;' => "\xC2\xA9",
'crarr;' => "\xE2\x86\xB5",
'cup;' => "\xE2\x88\xAA",
'curren' => "\xC2\xA4",
'curren;' => "\xC2\xA4",
'Dagger;' => "\xE2\x80\xA1",
'dagger;' => "\xE2\x80\xA0",
'dArr;' => "\xE2\x87\x93",
'darr;' => "\xE2\x86\x93",
'deg' => "\xC2\xB0",
'deg;' => "\xC2\xB0",
'Delta;' => "\xCE\x94",
'delta;' => "\xCE\xB4",
'diams;' => "\xE2\x99\xA6",
'divide' => "\xC3\xB7",
'divide;' => "\xC3\xB7",
'Eacute' => "\xC3\x89",
'eacute' => "\xC3\xA9",
'Eacute;' => "\xC3\x89",
'eacute;' => "\xC3\xA9",
'Ecirc' => "\xC3\x8A",
'ecirc' => "\xC3\xAA",
'Ecirc;' => "\xC3\x8A",
'ecirc;' => "\xC3\xAA",
'Egrave' => "\xC3\x88",
'egrave' => "\xC3\xA8",
'Egrave;' => "\xC3\x88",
'egrave;' => "\xC3\xA8",
'empty;' => "\xE2\x88\x85",
'emsp;' => "\xE2\x80\x83",
'ensp;' => "\xE2\x80\x82",
'Epsilon;' => "\xCE\x95",
'epsilon;' => "\xCE\xB5",
'equiv;' => "\xE2\x89\xA1",
'Eta;' => "\xCE\x97",
'eta;' => "\xCE\xB7",
'ETH' => "\xC3\x90",
'eth' => "\xC3\xB0",
'ETH;' => "\xC3\x90",
'eth;' => "\xC3\xB0",
'Euml' => "\xC3\x8B",
'euml' => "\xC3\xAB",
'Euml;' => "\xC3\x8B",
'euml;' => "\xC3\xAB",
'euro;' => "\xE2\x82\xAC",
'exist;' => "\xE2\x88\x83",
'fnof;' => "\xC6\x92",
'forall;' => "\xE2\x88\x80",
'frac12' => "\xC2\xBD",
'frac12;' => "\xC2\xBD",
'frac14' => "\xC2\xBC",
'frac14;' => "\xC2\xBC",
'frac34' => "\xC2\xBE",
'frac34;' => "\xC2\xBE",
'frasl;' => "\xE2\x81\x84",
'Gamma;' => "\xCE\x93",
'gamma;' => "\xCE\xB3",
'ge;' => "\xE2\x89\xA5",
'GT' => "\x3E",
'gt' => "\x3E",
'GT;' => "\x3E",
'gt;' => "\x3E",
'hArr;' => "\xE2\x87\x94",
'harr;' => "\xE2\x86\x94",
'hearts;' => "\xE2\x99\xA5",
'hellip;' => "\xE2\x80\xA6",
'Iacute' => "\xC3\x8D",
'iacute' => "\xC3\xAD",
'Iacute;' => "\xC3\x8D",
'iacute;' => "\xC3\xAD",
'Icirc' => "\xC3\x8E",
'icirc' => "\xC3\xAE",
'Icirc;' => "\xC3\x8E",
'icirc;' => "\xC3\xAE",
'iexcl' => "\xC2\xA1",
'iexcl;' => "\xC2\xA1",
'Igrave' => "\xC3\x8C",
'igrave' => "\xC3\xAC",
'Igrave;' => "\xC3\x8C",
'igrave;' => "\xC3\xAC",
'image;' => "\xE2\x84\x91",
'infin;' => "\xE2\x88\x9E",
'int;' => "\xE2\x88\xAB",
'Iota;' => "\xCE\x99",
'iota;' => "\xCE\xB9",
'iquest' => "\xC2\xBF",
'iquest;' => "\xC2\xBF",
'isin;' => "\xE2\x88\x88",
'Iuml' => "\xC3\x8F",
'iuml' => "\xC3\xAF",
'Iuml;' => "\xC3\x8F",
'iuml;' => "\xC3\xAF",
'Kappa;' => "\xCE\x9A",
'kappa;' => "\xCE\xBA",
'Lambda;' => "\xCE\x9B",
'lambda;' => "\xCE\xBB",
'lang;' => "\xE3\x80\x88",
'laquo' => "\xC2\xAB",
'laquo;' => "\xC2\xAB",
'lArr;' => "\xE2\x87\x90",
'larr;' => "\xE2\x86\x90",
'lceil;' => "\xE2\x8C\x88",
'ldquo;' => "\xE2\x80\x9C",
'le;' => "\xE2\x89\xA4",
'lfloor;' => "\xE2\x8C\x8A",
'lowast;' => "\xE2\x88\x97",
'loz;' => "\xE2\x97\x8A",
'lrm;' => "\xE2\x80\x8E",
'lsaquo;' => "\xE2\x80\xB9",
'lsquo;' => "\xE2\x80\x98",
'LT' => "\x3C",
'lt' => "\x3C",
'LT;' => "\x3C",
'lt;' => "\x3C",
'macr' => "\xC2\xAF",
'macr;' => "\xC2\xAF",
'mdash;' => "\xE2\x80\x94",
'micro' => "\xC2\xB5",
'micro;' => "\xC2\xB5",
'middot' => "\xC2\xB7",
'middot;' => "\xC2\xB7",
'minus;' => "\xE2\x88\x92",
'Mu;' => "\xCE\x9C",
'mu;' => "\xCE\xBC",
'nabla;' => "\xE2\x88\x87",
'nbsp' => "\xC2\xA0",
'nbsp;' => "\xC2\xA0",
'ndash;' => "\xE2\x80\x93",
'ne;' => "\xE2\x89\xA0",
'ni;' => "\xE2\x88\x8B",
'not' => "\xC2\xAC",
'not;' => "\xC2\xAC",
'notin;' => "\xE2\x88\x89",
'nsub;' => "\xE2\x8A\x84",
'Ntilde' => "\xC3\x91",
'ntilde' => "\xC3\xB1",
'Ntilde;' => "\xC3\x91",
'ntilde;' => "\xC3\xB1",
'Nu;' => "\xCE\x9D",
'nu;' => "\xCE\xBD",
'Oacute' => "\xC3\x93",
'oacute' => "\xC3\xB3",
'Oacute;' => "\xC3\x93",
'oacute;' => "\xC3\xB3",
'Ocirc' => "\xC3\x94",
'ocirc' => "\xC3\xB4",
'Ocirc;' => "\xC3\x94",
'ocirc;' => "\xC3\xB4",
'OElig;' => "\xC5\x92",
'oelig;' => "\xC5\x93",
'Ograve' => "\xC3\x92",
'ograve' => "\xC3\xB2",
'Ograve;' => "\xC3\x92",
'ograve;' => "\xC3\xB2",
'oline;' => "\xE2\x80\xBE",
'Omega;' => "\xCE\xA9",
'omega;' => "\xCF\x89",
'Omicron;' => "\xCE\x9F",
'omicron;' => "\xCE\xBF",
'oplus;' => "\xE2\x8A\x95",
'or;' => "\xE2\x88\xA8",
'ordf' => "\xC2\xAA",
'ordf;' => "\xC2\xAA",
'ordm' => "\xC2\xBA",
'ordm;' => "\xC2\xBA",
'Oslash' => "\xC3\x98",
'oslash' => "\xC3\xB8",
'Oslash;' => "\xC3\x98",
'oslash;' => "\xC3\xB8",
'Otilde' => "\xC3\x95",
'otilde' => "\xC3\xB5",
'Otilde;' => "\xC3\x95",
'otilde;' => "\xC3\xB5",
'otimes;' => "\xE2\x8A\x97",
'Ouml' => "\xC3\x96",
'ouml' => "\xC3\xB6",
'Ouml;' => "\xC3\x96",
'ouml;' => "\xC3\xB6",
'para' => "\xC2\xB6",
'para;' => "\xC2\xB6",
'part;' => "\xE2\x88\x82",
'permil;' => "\xE2\x80\xB0",
'perp;' => "\xE2\x8A\xA5",
'Phi;' => "\xCE\xA6",
'phi;' => "\xCF\x86",
'Pi;' => "\xCE\xA0",
'pi;' => "\xCF\x80",
'piv;' => "\xCF\x96",
'plusmn' => "\xC2\xB1",
'plusmn;' => "\xC2\xB1",
'pound' => "\xC2\xA3",
'pound;' => "\xC2\xA3",
'Prime;' => "\xE2\x80\xB3",
'prime;' => "\xE2\x80\xB2",
'prod;' => "\xE2\x88\x8F",
'prop;' => "\xE2\x88\x9D",
'Psi;' => "\xCE\xA8",
'psi;' => "\xCF\x88",
'QUOT' => "\x22",
'quot' => "\x22",
'QUOT;' => "\x22",
'quot;' => "\x22",
'radic;' => "\xE2\x88\x9A",
'rang;' => "\xE3\x80\x89",
'raquo' => "\xC2\xBB",
'raquo;' => "\xC2\xBB",
'rArr;' => "\xE2\x87\x92",
'rarr;' => "\xE2\x86\x92",
'rceil;' => "\xE2\x8C\x89",
'rdquo;' => "\xE2\x80\x9D",
'real;' => "\xE2\x84\x9C",
'REG' => "\xC2\xAE",
'reg' => "\xC2\xAE",
'REG;' => "\xC2\xAE",
'reg;' => "\xC2\xAE",
'rfloor;' => "\xE2\x8C\x8B",
'Rho;' => "\xCE\xA1",
'rho;' => "\xCF\x81",
'rlm;' => "\xE2\x80\x8F",
'rsaquo;' => "\xE2\x80\xBA",
'rsquo;' => "\xE2\x80\x99",
'sbquo;' => "\xE2\x80\x9A",
'Scaron;' => "\xC5\xA0",
'scaron;' => "\xC5\xA1",
'sdot;' => "\xE2\x8B\x85",
'sect' => "\xC2\xA7",
'sect;' => "\xC2\xA7",
'shy' => "\xC2\xAD",
'shy;' => "\xC2\xAD",
'Sigma;' => "\xCE\xA3",
'sigma;' => "\xCF\x83",
'sigmaf;' => "\xCF\x82",
'sim;' => "\xE2\x88\xBC",
'spades;' => "\xE2\x99\xA0",
'sub;' => "\xE2\x8A\x82",
'sube;' => "\xE2\x8A\x86",
'sum;' => "\xE2\x88\x91",
'sup;' => "\xE2\x8A\x83",
'sup1' => "\xC2\xB9",
'sup1;' => "\xC2\xB9",
'sup2' => "\xC2\xB2",
'sup2;' => "\xC2\xB2",
'sup3' => "\xC2\xB3",
'sup3;' => "\xC2\xB3",
'supe;' => "\xE2\x8A\x87",
'szlig' => "\xC3\x9F",
'szlig;' => "\xC3\x9F",
'Tau;' => "\xCE\xA4",
'tau;' => "\xCF\x84",
'there4;' => "\xE2\x88\xB4",
'Theta;' => "\xCE\x98",
'theta;' => "\xCE\xB8",
'thetasym;' => "\xCF\x91",
'thinsp;' => "\xE2\x80\x89",
'THORN' => "\xC3\x9E",
'thorn' => "\xC3\xBE",
'THORN;' => "\xC3\x9E",
'thorn;' => "\xC3\xBE",
'tilde;' => "\xCB\x9C",
'times' => "\xC3\x97",
'times;' => "\xC3\x97",
'TRADE;' => "\xE2\x84\xA2",
'trade;' => "\xE2\x84\xA2",
'Uacute' => "\xC3\x9A",
'uacute' => "\xC3\xBA",
'Uacute;' => "\xC3\x9A",
'uacute;' => "\xC3\xBA",
'uArr;' => "\xE2\x87\x91",
'uarr;' => "\xE2\x86\x91",
'Ucirc' => "\xC3\x9B",
'ucirc' => "\xC3\xBB",
'Ucirc;' => "\xC3\x9B",
'ucirc;' => "\xC3\xBB",
'Ugrave' => "\xC3\x99",
'ugrave' => "\xC3\xB9",
'Ugrave;' => "\xC3\x99",
'ugrave;' => "\xC3\xB9",
'uml' => "\xC2\xA8",
'uml;' => "\xC2\xA8",
'upsih;' => "\xCF\x92",
'Upsilon;' => "\xCE\xA5",
'upsilon;' => "\xCF\x85",
'Uuml' => "\xC3\x9C",
'uuml' => "\xC3\xBC",
'Uuml;' => "\xC3\x9C",
'uuml;' => "\xC3\xBC",
'weierp;' => "\xE2\x84\x98",
'Xi;' => "\xCE\x9E",
'xi;' => "\xCE\xBE",
'Yacute' => "\xC3\x9D",
'yacute' => "\xC3\xBD",
'Yacute;' => "\xC3\x9D",
'yacute;' => "\xC3\xBD",
'yen' => "\xC2\xA5",
'yen;' => "\xC2\xA5",
'yuml' => "\xC3\xBF",
'Yuml;' => "\xC5\xB8",
'yuml;' => "\xC3\xBF",
'Zeta;' => "\xCE\x96",
'zeta;' => "\xCE\xB6",
'zwj;' => "\xE2\x80\x8D",
'zwnj;' => "\xE2\x80\x8C"
);
for ($i = 0, $match = null; $i < 9 && $this->consume() !== false; $i++)
{
$consumed = substr($this->consumed, 1);
if (isset($entities[$consumed]))
{
$match = $consumed;
}
}
if ($match !== null)
{
$this->data = substr_replace($this->data, $entities[$match], $this->position - strlen($consumed) - 1, strlen($match) + 1);
$this->position += strlen($entities[$match]) - strlen($consumed) - 1;
}
break;
}
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,52 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.4-dev
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
*/
/**
* General SimplePie exception class
*
* @package SimplePie
*/
class SimplePie_Exception extends Exception
{
}

View file

@ -0,0 +1,292 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
*/
/**
* Used for fetching remote files and reading local files
*
* Supports HTTP 1.0 via cURL or fsockopen, with spotty HTTP 1.1 support
*
* This class can be overloaded with {@see SimplePie::set_file_class()}
*
* @package SimplePie
* @subpackage HTTP
* @todo Move to properly supporting RFC2616 (HTTP/1.1)
*/
class SimplePie_File
{
var $url;
var $useragent;
var $success = true;
var $headers = array();
var $body;
var $status_code;
var $redirects = 0;
var $error;
var $method = SIMPLEPIE_FILE_SOURCE_NONE;
public function __construct($url, $timeout = 10, $redirects = 5, $headers = null, $useragent = null, $force_fsockopen = false)
{
if (class_exists('idna_convert'))
{
$idn = new idna_convert();
$parsed = SimplePie_Misc::parse_url($url);
$url = SimplePie_Misc::compress_parse_url($parsed['scheme'], $idn->encode($parsed['authority']), $parsed['path'], $parsed['query'], $parsed['fragment']);
}
$this->url = $url;
$this->useragent = $useragent;
if (preg_match('/^http(s)?:\/\//i', $url))
{
if ($useragent === null)
{
$useragent = ini_get('user_agent');
$this->useragent = $useragent;
}
if (!is_array($headers))
{
$headers = array();
}
if (!$force_fsockopen && function_exists('curl_exec'))
{
$this->method = SIMPLEPIE_FILE_SOURCE_REMOTE | SIMPLEPIE_FILE_SOURCE_CURL;
$fp = curl_init();
$headers2 = array();
foreach ($headers as $key => $value)
{
$headers2[] = "$key: $value";
}
if (version_compare(SimplePie_Misc::get_curl_version(), '7.10.5', '>='))
{
curl_setopt($fp, CURLOPT_ENCODING, '');
}
curl_setopt($fp, CURLOPT_URL, $url);
curl_setopt($fp, CURLOPT_HEADER, 1);
curl_setopt($fp, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($fp, CURLOPT_TIMEOUT, $timeout);
curl_setopt($fp, CURLOPT_CONNECTTIMEOUT, $timeout);
curl_setopt($fp, CURLOPT_REFERER, $url);
curl_setopt($fp, CURLOPT_USERAGENT, $useragent);
curl_setopt($fp, CURLOPT_HTTPHEADER, $headers2);
if (!ini_get('open_basedir') && !ini_get('safe_mode') && version_compare(SimplePie_Misc::get_curl_version(), '7.15.2', '>='))
{
curl_setopt($fp, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($fp, CURLOPT_MAXREDIRS, $redirects);
}
$this->headers = curl_exec($fp);
if (curl_errno($fp) === 23 || curl_errno($fp) === 61)
{
curl_setopt($fp, CURLOPT_ENCODING, 'none');
$this->headers = curl_exec($fp);
}
if (curl_errno($fp))
{
$this->error = 'cURL error ' . curl_errno($fp) . ': ' . curl_error($fp);
$this->success = false;
}
else
{
$info = curl_getinfo($fp);
curl_close($fp);
$this->headers = explode("\r\n\r\n", $this->headers, $info['redirect_count'] + 1);
$this->headers = array_pop($this->headers);
$parser = new SimplePie_HTTP_Parser($this->headers);
if ($parser->parse())
{
$this->headers = $parser->headers;
$this->body = $parser->body;
$this->status_code = $parser->status_code;
if ((in_array($this->status_code, array(300, 301, 302, 303, 307)) || $this->status_code > 307 && $this->status_code < 400) && isset($this->headers['location']) && $this->redirects < $redirects)
{
$this->redirects++;
$location = SimplePie_Misc::absolutize_url($this->headers['location'], $url);
return $this->__construct($location, $timeout, $redirects, $headers, $useragent, $force_fsockopen);
}
}
}
}
else
{
$this->method = SIMPLEPIE_FILE_SOURCE_REMOTE | SIMPLEPIE_FILE_SOURCE_FSOCKOPEN;
$url_parts = parse_url($url);
$socket_host = $url_parts['host'];
if (isset($url_parts['scheme']) && strtolower($url_parts['scheme']) === 'https')
{
$socket_host = "ssl://$url_parts[host]";
$url_parts['port'] = 443;
}
if (!isset($url_parts['port']))
{
$url_parts['port'] = 80;
}
$fp = @fsockopen($socket_host, $url_parts['port'], $errno, $errstr, $timeout);
if (!$fp)
{
$this->error = 'fsockopen error: ' . $errstr;
$this->success = false;
}
else
{
stream_set_timeout($fp, $timeout);
if (isset($url_parts['path']))
{
if (isset($url_parts['query']))
{
$get = "$url_parts[path]?$url_parts[query]";
}
else
{
$get = $url_parts['path'];
}
}
else
{
$get = '/';
}
$out = "GET $get HTTP/1.1\r\n";
$out .= "Host: $url_parts[host]\r\n";
$out .= "User-Agent: $useragent\r\n";
if (extension_loaded('zlib'))
{
$out .= "Accept-Encoding: x-gzip,gzip,deflate\r\n";
}
if (isset($url_parts['user']) && isset($url_parts['pass']))
{
$out .= "Authorization: Basic " . base64_encode("$url_parts[user]:$url_parts[pass]") . "\r\n";
}
foreach ($headers as $key => $value)
{
$out .= "$key: $value\r\n";
}
$out .= "Connection: Close\r\n\r\n";
fwrite($fp, $out);
$info = stream_get_meta_data($fp);
$this->headers = '';
while (!$info['eof'] && !$info['timed_out'])
{
$this->headers .= fread($fp, 1160);
$info = stream_get_meta_data($fp);
}
if (!$info['timed_out'])
{
$parser = new SimplePie_HTTP_Parser($this->headers);
if ($parser->parse())
{
$this->headers = $parser->headers;
$this->body = $parser->body;
$this->status_code = $parser->status_code;
if ((in_array($this->status_code, array(300, 301, 302, 303, 307)) || $this->status_code > 307 && $this->status_code < 400) && isset($this->headers['location']) && $this->redirects < $redirects)
{
$this->redirects++;
$location = SimplePie_Misc::absolutize_url($this->headers['location'], $url);
return $this->__construct($location, $timeout, $redirects, $headers, $useragent, $force_fsockopen);
}
if (isset($this->headers['content-encoding']))
{
// Hey, we act dumb elsewhere, so let's do that here too
switch (strtolower(trim($this->headers['content-encoding'], "\x09\x0A\x0D\x20")))
{
case 'gzip':
case 'x-gzip':
$decoder = new SimplePie_gzdecode($this->body);
if (!$decoder->parse())
{
$this->error = 'Unable to decode HTTP "gzip" stream';
$this->success = false;
}
else
{
$this->body = $decoder->data;
}
break;
case 'deflate':
if (($decompressed = gzinflate($this->body)) !== false)
{
$this->body = $decompressed;
}
else if (($decompressed = gzuncompress($this->body)) !== false)
{
$this->body = $decompressed;
}
else if (function_exists('gzdecode') && ($decompressed = gzdecode($this->body)) !== false)
{
$this->body = $decompressed;
}
else
{
$this->error = 'Unable to decode HTTP "deflate" stream';
$this->success = false;
}
break;
default:
$this->error = 'Unknown content coding';
$this->success = false;
}
}
}
}
else
{
$this->error = 'fsocket timed out';
$this->success = false;
}
fclose($fp);
}
}
}
else
{
$this->method = SIMPLEPIE_FILE_SOURCE_LOCAL | SIMPLEPIE_FILE_SOURCE_FILE_GET_CONTENTS;
if (!$this->body = file_get_contents($url))
{
$this->error = 'file_get_contents could not read the file';
$this->success = false;
}
}
}
}

View file

@ -0,0 +1,500 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
*/
/**
* HTTP Response Parser
*
* @package SimplePie
* @subpackage HTTP
*/
class SimplePie_HTTP_Parser
{
/**
* HTTP Version
*
* @var float
*/
public $http_version = 0.0;
/**
* Status code
*
* @var int
*/
public $status_code = 0;
/**
* Reason phrase
*
* @var string
*/
public $reason = '';
/**
* Key/value pairs of the headers
*
* @var array
*/
public $headers = array();
/**
* Body of the response
*
* @var string
*/
public $body = '';
/**
* Current state of the state machine
*
* @var string
*/
protected $state = 'http_version';
/**
* Input data
*
* @var string
*/
protected $data = '';
/**
* Input data length (to avoid calling strlen() everytime this is needed)
*
* @var int
*/
protected $data_length = 0;
/**
* Current position of the pointer
*
* @var int
*/
protected $position = 0;
/**
* Name of the hedaer currently being parsed
*
* @var string
*/
protected $name = '';
/**
* Value of the hedaer currently being parsed
*
* @var string
*/
protected $value = '';
/**
* Create an instance of the class with the input data
*
* @param string $data Input data
*/
public function __construct($data)
{
$this->data = $data;
$this->data_length = strlen($this->data);
}
/**
* Parse the input data
*
* @return bool true on success, false on failure
*/
public function parse()
{
while ($this->state && $this->state !== 'emit' && $this->has_data())
{
$state = $this->state;
$this->$state();
}
$this->data = '';
if ($this->state === 'emit' || $this->state === 'body')
{
return true;
}
else
{
$this->http_version = '';
$this->status_code = '';
$this->reason = '';
$this->headers = array();
$this->body = '';
return false;
}
}
/**
* Check whether there is data beyond the pointer
*
* @return bool true if there is further data, false if not
*/
protected function has_data()
{
return (bool) ($this->position < $this->data_length);
}
/**
* See if the next character is LWS
*
* @return bool true if the next character is LWS, false if not
*/
protected function is_linear_whitespace()
{
return (bool) ($this->data[$this->position] === "\x09"
|| $this->data[$this->position] === "\x20"
|| ($this->data[$this->position] === "\x0A"
&& isset($this->data[$this->position + 1])
&& ($this->data[$this->position + 1] === "\x09" || $this->data[$this->position + 1] === "\x20")));
}
/**
* Parse the HTTP version
*/
protected function http_version()
{
if (strpos($this->data, "\x0A") !== false && strtoupper(substr($this->data, 0, 5)) === 'HTTP/')
{
$len = strspn($this->data, '0123456789.', 5);
$this->http_version = substr($this->data, 5, $len);
$this->position += 5 + $len;
if (substr_count($this->http_version, '.') <= 1)
{
$this->http_version = (float) $this->http_version;
$this->position += strspn($this->data, "\x09\x20", $this->position);
$this->state = 'status';
}
else
{
$this->state = false;
}
}
else
{
$this->state = false;
}
}
/**
* Parse the status code
*/
protected function status()
{
if ($len = strspn($this->data, '0123456789', $this->position))
{
$this->status_code = (int) substr($this->data, $this->position, $len);
$this->position += $len;
$this->state = 'reason';
}
else
{
$this->state = false;
}
}
/**
* Parse the reason phrase
*/
protected function reason()
{
$len = strcspn($this->data, "\x0A", $this->position);
$this->reason = trim(substr($this->data, $this->position, $len), "\x09\x0D\x20");
$this->position += $len + 1;
$this->state = 'new_line';
}
/**
* Deal with a new line, shifting data around as needed
*/
protected function new_line()
{
$this->value = trim($this->value, "\x0D\x20");
if ($this->name !== '' && $this->value !== '')
{
$this->name = strtolower($this->name);
// We should only use the last Content-Type header. c.f. issue #1
if (isset($this->headers[$this->name]) && $this->name !== 'content-type')
{
$this->headers[$this->name] .= ', ' . $this->value;
}
else
{
$this->headers[$this->name] = $this->value;
}
}
$this->name = '';
$this->value = '';
if (substr($this->data[$this->position], 0, 2) === "\x0D\x0A")
{
$this->position += 2;
$this->state = 'body';
}
elseif ($this->data[$this->position] === "\x0A")
{
$this->position++;
$this->state = 'body';
}
else
{
$this->state = 'name';
}
}
/**
* Parse a header name
*/
protected function name()
{
$len = strcspn($this->data, "\x0A:", $this->position);
if (isset($this->data[$this->position + $len]))
{
if ($this->data[$this->position + $len] === "\x0A")
{
$this->position += $len;
$this->state = 'new_line';
}
else
{
$this->name = substr($this->data, $this->position, $len);
$this->position += $len + 1;
$this->state = 'value';
}
}
else
{
$this->state = false;
}
}
/**
* Parse LWS, replacing consecutive LWS characters with a single space
*/
protected function linear_whitespace()
{
do
{
if (substr($this->data, $this->position, 2) === "\x0D\x0A")
{
$this->position += 2;
}
elseif ($this->data[$this->position] === "\x0A")
{
$this->position++;
}
$this->position += strspn($this->data, "\x09\x20", $this->position);
} while ($this->has_data() && $this->is_linear_whitespace());
$this->value .= "\x20";
}
/**
* See what state to move to while within non-quoted header values
*/
protected function value()
{
if ($this->is_linear_whitespace())
{
$this->linear_whitespace();
}
else
{
switch ($this->data[$this->position])
{
case '"':
// Workaround for ETags: we have to include the quotes as
// part of the tag.
if (strtolower($this->name) === 'etag')
{
$this->value .= '"';
$this->position++;
$this->state = 'value_char';
break;
}
$this->position++;
$this->state = 'quote';
break;
case "\x0A":
$this->position++;
$this->state = 'new_line';
break;
default:
$this->state = 'value_char';
break;
}
}
}
/**
* Parse a header value while outside quotes
*/
protected function value_char()
{
$len = strcspn($this->data, "\x09\x20\x0A\"", $this->position);
$this->value .= substr($this->data, $this->position, $len);
$this->position += $len;
$this->state = 'value';
}
/**
* See what state to move to while within quoted header values
*/
protected function quote()
{
if ($this->is_linear_whitespace())
{
$this->linear_whitespace();
}
else
{
switch ($this->data[$this->position])
{
case '"':
$this->position++;
$this->state = 'value';
break;
case "\x0A":
$this->position++;
$this->state = 'new_line';
break;
case '\\':
$this->position++;
$this->state = 'quote_escaped';
break;
default:
$this->state = 'quote_char';
break;
}
}
}
/**
* Parse a header value while within quotes
*/
protected function quote_char()
{
$len = strcspn($this->data, "\x09\x20\x0A\"\\", $this->position);
$this->value .= substr($this->data, $this->position, $len);
$this->position += $len;
$this->state = 'value';
}
/**
* Parse an escaped character within quotes
*/
protected function quote_escaped()
{
$this->value .= $this->data[$this->position];
$this->position++;
$this->state = 'quote';
}
/**
* Parse the body
*/
protected function body()
{
$this->body = substr($this->data, $this->position);
if (!empty($this->headers['transfer-encoding']))
{
unset($this->headers['transfer-encoding']);
$this->state = 'chunked';
}
else
{
$this->state = 'emit';
}
}
/**
* Parsed a "Transfer-Encoding: chunked" body
*/
protected function chunked()
{
if (!preg_match('/^([0-9a-f]+)[^\r\n]*\r\n/i', trim($this->body)))
{
$this->state = 'emit';
return;
}
$decoded = '';
$encoded = $this->body;
while (true)
{
$is_chunked = (bool) preg_match( '/^([0-9a-f]+)[^\r\n]*\r\n/i', $encoded, $matches );
if (!$is_chunked)
{
// Looks like it's not chunked after all
$this->state = 'emit';
return;
}
$length = hexdec(trim($matches[1]));
if ($length === 0)
{
// Ignore trailer headers
$this->state = 'emit';
$this->body = $decoded;
return;
}
$chunk_length = strlen($matches[0]);
$decoded .= $part = substr($encoded, $chunk_length, $length);
$encoded = substr($encoded, $chunk_length + $length + 2);
if (trim($encoded) === '0' || empty($encoded))
{
$this->state = 'emit';
$this->body = $decoded;
return;
}
}
}
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,372 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
*/
/**
* Used for feed auto-discovery
*
*
* This class can be overloaded with {@see SimplePie::set_locator_class()}
*
* @package SimplePie
*/
class SimplePie_Locator
{
var $useragent;
var $timeout;
var $file;
var $local = array();
var $elsewhere = array();
var $cached_entities = array();
var $http_base;
var $base;
var $base_location = 0;
var $checked_feeds = 0;
var $max_checked_feeds = 10;
protected $registry;
public function __construct(SimplePie_File $file, $timeout = 10, $useragent = null, $max_checked_feeds = 10)
{
$this->file = $file;
$this->useragent = $useragent;
$this->timeout = $timeout;
$this->max_checked_feeds = $max_checked_feeds;
if (class_exists('DOMDocument'))
{
$this->dom = new DOMDocument();
set_error_handler(array('SimplePie_Misc', 'silence_errors'));
$this->dom->loadHTML($this->file->body);
restore_error_handler();
}
else
{
$this->dom = null;
}
}
public function set_registry(SimplePie_Registry $registry)
{
$this->registry = $registry;
}
public function find($type = SIMPLEPIE_LOCATOR_ALL, &$working)
{
if ($this->is_feed($this->file))
{
return $this->file;
}
if ($this->file->method & SIMPLEPIE_FILE_SOURCE_REMOTE)
{
$sniffer = $this->registry->create('Content_Type_Sniffer', array($this->file));
if ($sniffer->get_type() !== 'text/html')
{
return null;
}
}
if ($type & ~SIMPLEPIE_LOCATOR_NONE)
{
$this->get_base();
}
if ($type & SIMPLEPIE_LOCATOR_AUTODISCOVERY && $working = $this->autodiscovery())
{
return $working[0];
}
if ($type & (SIMPLEPIE_LOCATOR_LOCAL_EXTENSION | SIMPLEPIE_LOCATOR_LOCAL_BODY | SIMPLEPIE_LOCATOR_REMOTE_EXTENSION | SIMPLEPIE_LOCATOR_REMOTE_BODY) && $this->get_links())
{
if ($type & SIMPLEPIE_LOCATOR_LOCAL_EXTENSION && $working = $this->extension($this->local))
{
return $working;
}
if ($type & SIMPLEPIE_LOCATOR_LOCAL_BODY && $working = $this->body($this->local))
{
return $working;
}
if ($type & SIMPLEPIE_LOCATOR_REMOTE_EXTENSION && $working = $this->extension($this->elsewhere))
{
return $working;
}
if ($type & SIMPLEPIE_LOCATOR_REMOTE_BODY && $working = $this->body($this->elsewhere))
{
return $working;
}
}
return null;
}
public function is_feed($file)
{
if ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE)
{
$sniffer = $this->registry->create('Content_Type_Sniffer', array($file));
$sniffed = $sniffer->get_type();
if (in_array($sniffed, array('application/rss+xml', 'application/rdf+xml', 'text/rdf', 'application/atom+xml', 'text/xml', 'application/xml')))
{
return true;
}
else
{
return false;
}
}
elseif ($file->method & SIMPLEPIE_FILE_SOURCE_LOCAL)
{
return true;
}
else
{
return false;
}
}
public function get_base()
{
if ($this->dom === null)
{
throw new SimplePie_Exception('DOMDocument not found, unable to use locator');
}
$this->http_base = $this->file->url;
$this->base = $this->http_base;
$elements = $this->dom->getElementsByTagName('base');
foreach ($elements as $element)
{
if ($element->hasAttribute('href'))
{
$base = $this->registry->call('Misc', 'absolutize_url', array(trim($element->getAttribute('href')), $this->http_base));
if ($base === false)
{
continue;
}
$this->base = $base;
$this->base_location = method_exists($element, 'getLineNo') ? $element->getLineNo() : 0;
break;
}
}
}
public function autodiscovery()
{
$done = array();
$feeds = array();
$feeds = array_merge($feeds, $this->search_elements_by_tag('link', $done, $feeds));
$feeds = array_merge($feeds, $this->search_elements_by_tag('a', $done, $feeds));
$feeds = array_merge($feeds, $this->search_elements_by_tag('area', $done, $feeds));
if (!empty($feeds))
{
return array_values($feeds);
}
else
{
return null;
}
}
protected function search_elements_by_tag($name, &$done, $feeds)
{
if ($this->dom === null)
{
throw new SimplePie_Exception('DOMDocument not found, unable to use locator');
}
$links = $this->dom->getElementsByTagName($name);
foreach ($links as $link)
{
if ($this->checked_feeds === $this->max_checked_feeds)
{
break;
}
if ($link->hasAttribute('href') && $link->hasAttribute('rel'))
{
$rel = array_unique($this->registry->call('Misc', 'space_seperated_tokens', array(strtolower($link->getAttribute('rel')))));
$line = method_exists($link, 'getLineNo') ? $link->getLineNo() : 1;
if ($this->base_location < $line)
{
$href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->base));
}
else
{
$href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->http_base));
}
if ($href === false)
{
continue;
}
if (!in_array($href, $done) && in_array('feed', $rel) || (in_array('alternate', $rel) && !in_array('stylesheet', $rel) && $link->hasAttribute('type') && in_array(strtolower($this->registry->call('Misc', 'parse_mime', array($link->getAttribute('type')))), array('application/rss+xml', 'application/atom+xml'))) && !isset($feeds[$href]))
{
$this->checked_feeds++;
$headers = array(
'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
);
$feed = $this->registry->create('File', array($href, $this->timeout, 5, $headers, $this->useragent));
if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed))
{
$feeds[$href] = $feed;
}
}
$done[] = $href;
}
}
return $feeds;
}
public function get_links()
{
if ($this->dom === null)
{
throw new SimplePie_Exception('DOMDocument not found, unable to use locator');
}
$links = $this->dom->getElementsByTagName('a');
foreach ($links as $link)
{
if ($link->hasAttribute('href'))
{
$href = trim($link->getAttribute('href'));
$parsed = $this->registry->call('Misc', 'parse_url', array($href));
if ($parsed['scheme'] === '' || preg_match('/^(http(s)|feed)?$/i', $parsed['scheme']))
{
if ($this->base_location < $link->getLineNo())
{
$href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->base));
}
else
{
$href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->http_base));
}
if ($href === false)
{
continue;
}
$current = $this->registry->call('Misc', 'parse_url', array($this->file->url));
if ($parsed['authority'] === '' || $parsed['authority'] === $current['authority'])
{
$this->local[] = $href;
}
else
{
$this->elsewhere[] = $href;
}
}
}
}
$this->local = array_unique($this->local);
$this->elsewhere = array_unique($this->elsewhere);
if (!empty($this->local) || !empty($this->elsewhere))
{
return true;
}
return null;
}
public function extension(&$array)
{
foreach ($array as $key => $value)
{
if ($this->checked_feeds === $this->max_checked_feeds)
{
break;
}
if (in_array(strtolower(strrchr($value, '.')), array('.rss', '.rdf', '.atom', '.xml')))
{
$this->checked_feeds++;
$headers = array(
'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
);
$feed = $this->registry->create('File', array($value, $this->timeout, 5, $headers, $this->useragent));
if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed))
{
return $feed;
}
else
{
unset($array[$key]);
}
}
}
return null;
}
public function body(&$array)
{
foreach ($array as $key => $value)
{
if ($this->checked_feeds === $this->max_checked_feeds)
{
break;
}
if (preg_match('/(rss|rdf|atom|xml)/i', $value))
{
$this->checked_feeds++;
$headers = array(
'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
);
$feed = $this->registry->create('File', array($value, $this->timeout, 5, null, $this->useragent));
if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed))
{
return $feed;
}
else
{
unset($array[$key]);
}
}
}
return null;
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,276 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
*/
/**
* Class to validate and to work with IPv6 addresses.
*
* @package SimplePie
* @subpackage HTTP
* @copyright 2003-2005 The PHP Group
* @license http://www.opensource.org/licenses/bsd-license.php
* @link http://pear.php.net/package/Net_IPv6
* @author Alexander Merz <alexander.merz@web.de>
* @author elfrink at introweb dot nl
* @author Josh Peck <jmp at joshpeck dot org>
* @author Geoffrey Sneddon <geoffers@gmail.com>
*/
class SimplePie_Net_IPv6
{
/**
* Uncompresses an IPv6 address
*
* RFC 4291 allows you to compress concecutive zero pieces in an address to
* '::'. This method expects a valid IPv6 address and expands the '::' to
* the required number of zero pieces.
*
* Example: FF01::101 -> FF01:0:0:0:0:0:0:101
* ::1 -> 0:0:0:0:0:0:0:1
*
* @author Alexander Merz <alexander.merz@web.de>
* @author elfrink at introweb dot nl
* @author Josh Peck <jmp at joshpeck dot org>
* @copyright 2003-2005 The PHP Group
* @license http://www.opensource.org/licenses/bsd-license.php
* @param string $ip An IPv6 address
* @return string The uncompressed IPv6 address
*/
public static function uncompress($ip)
{
$c1 = -1;
$c2 = -1;
if (substr_count($ip, '::') === 1)
{
list($ip1, $ip2) = explode('::', $ip);
if ($ip1 === '')
{
$c1 = -1;
}
else
{
$c1 = substr_count($ip1, ':');
}
if ($ip2 === '')
{
$c2 = -1;
}
else
{
$c2 = substr_count($ip2, ':');
}
if (strpos($ip2, '.') !== false)
{
$c2++;
}
// ::
if ($c1 === -1 && $c2 === -1)
{
$ip = '0:0:0:0:0:0:0:0';
}
// ::xxx
else if ($c1 === -1)
{
$fill = str_repeat('0:', 7 - $c2);
$ip = str_replace('::', $fill, $ip);
}
// xxx::
else if ($c2 === -1)
{
$fill = str_repeat(':0', 7 - $c1);
$ip = str_replace('::', $fill, $ip);
}
// xxx::xxx
else
{
$fill = ':' . str_repeat('0:', 6 - $c2 - $c1);
$ip = str_replace('::', $fill, $ip);
}
}
return $ip;
}
/**
* Compresses an IPv6 address
*
* RFC 4291 allows you to compress concecutive zero pieces in an address to
* '::'. This method expects a valid IPv6 address and compresses consecutive
* zero pieces to '::'.
*
* Example: FF01:0:0:0:0:0:0:101 -> FF01::101
* 0:0:0:0:0:0:0:1 -> ::1
*
* @see uncompress()
* @param string $ip An IPv6 address
* @return string The compressed IPv6 address
*/
public static function compress($ip)
{
// Prepare the IP to be compressed
$ip = self::uncompress($ip);
$ip_parts = self::split_v6_v4($ip);
// Replace all leading zeros
$ip_parts[0] = preg_replace('/(^|:)0+([0-9])/', '\1\2', $ip_parts[0]);
// Find bunches of zeros
if (preg_match_all('/(?:^|:)(?:0(?::|$))+/', $ip_parts[0], $matches, PREG_OFFSET_CAPTURE))
{
$max = 0;
$pos = null;
foreach ($matches[0] as $match)
{
if (strlen($match[0]) > $max)
{
$max = strlen($match[0]);
$pos = $match[1];
}
}
$ip_parts[0] = substr_replace($ip_parts[0], '::', $pos, $max);
}
if ($ip_parts[1] !== '')
{
return implode(':', $ip_parts);
}
else
{
return $ip_parts[0];
}
}
/**
* Splits an IPv6 address into the IPv6 and IPv4 representation parts
*
* RFC 4291 allows you to represent the last two parts of an IPv6 address
* using the standard IPv4 representation
*
* Example: 0:0:0:0:0:0:13.1.68.3
* 0:0:0:0:0:FFFF:129.144.52.38
*
* @param string $ip An IPv6 address
* @return array [0] contains the IPv6 represented part, and [1] the IPv4 represented part
*/
private static function split_v6_v4($ip)
{
if (strpos($ip, '.') !== false)
{
$pos = strrpos($ip, ':');
$ipv6_part = substr($ip, 0, $pos);
$ipv4_part = substr($ip, $pos + 1);
return array($ipv6_part, $ipv4_part);
}
else
{
return array($ip, '');
}
}
/**
* Checks an IPv6 address
*
* Checks if the given IP is a valid IPv6 address
*
* @param string $ip An IPv6 address
* @return bool true if $ip is a valid IPv6 address
*/
public static function check_ipv6($ip)
{
$ip = self::uncompress($ip);
list($ipv6, $ipv4) = self::split_v6_v4($ip);
$ipv6 = explode(':', $ipv6);
$ipv4 = explode('.', $ipv4);
if (count($ipv6) === 8 && count($ipv4) === 1 || count($ipv6) === 6 && count($ipv4) === 4)
{
foreach ($ipv6 as $ipv6_part)
{
// The section can't be empty
if ($ipv6_part === '')
return false;
// Nor can it be over four characters
if (strlen($ipv6_part) > 4)
return false;
// Remove leading zeros (this is safe because of the above)
$ipv6_part = ltrim($ipv6_part, '0');
if ($ipv6_part === '')
$ipv6_part = '0';
// Check the value is valid
$value = hexdec($ipv6_part);
if (dechex($value) !== strtolower($ipv6_part) || $value < 0 || $value > 0xFFFF)
return false;
}
if (count($ipv4) === 4)
{
foreach ($ipv4 as $ipv4_part)
{
$value = (int) $ipv4_part;
if ((string) $value !== $ipv4_part || $value < 0 || $value > 0xFF)
return false;
}
}
return true;
}
else
{
return false;
}
}
/**
* Checks if the given IP is a valid IPv6 address
*
* @codeCoverageIgnore
* @deprecated Use {@see SimplePie_Net_IPv6::check_ipv6()} instead
* @see check_ipv6
* @param string $ip An IPv6 address
* @return bool true if $ip is a valid IPv6 address
*/
public static function checkIPv6($ip)
{
return self::check_ipv6($ip);
}
}

View file

@ -0,0 +1,983 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
*/
/**
* Date Parser
*
* @package SimplePie
* @subpackage Parsing
*/
class SimplePie_Parse_Date
{
/**
* Input data
*
* @access protected
* @var string
*/
var $date;
/**
* List of days, calendar day name => ordinal day number in the week
*
* @access protected
* @var array
*/
var $day = array(
// English
'mon' => 1,
'monday' => 1,
'tue' => 2,
'tuesday' => 2,
'wed' => 3,
'wednesday' => 3,
'thu' => 4,
'thursday' => 4,
'fri' => 5,
'friday' => 5,
'sat' => 6,
'saturday' => 6,
'sun' => 7,
'sunday' => 7,
// Dutch
'maandag' => 1,
'dinsdag' => 2,
'woensdag' => 3,
'donderdag' => 4,
'vrijdag' => 5,
'zaterdag' => 6,
'zondag' => 7,
// French
'lundi' => 1,
'mardi' => 2,
'mercredi' => 3,
'jeudi' => 4,
'vendredi' => 5,
'samedi' => 6,
'dimanche' => 7,
// German
'montag' => 1,
'dienstag' => 2,
'mittwoch' => 3,
'donnerstag' => 4,
'freitag' => 5,
'samstag' => 6,
'sonnabend' => 6,
'sonntag' => 7,
// Italian
'lunedì' => 1,
'martedì' => 2,
'mercoledì' => 3,
'giovedì' => 4,
'venerdì' => 5,
'sabato' => 6,
'domenica' => 7,
// Spanish
'lunes' => 1,
'martes' => 2,
'miércoles' => 3,
'jueves' => 4,
'viernes' => 5,
'sábado' => 6,
'domingo' => 7,
// Finnish
'maanantai' => 1,
'tiistai' => 2,
'keskiviikko' => 3,
'torstai' => 4,
'perjantai' => 5,
'lauantai' => 6,
'sunnuntai' => 7,
// Hungarian
'hétfő' => 1,
'kedd' => 2,
'szerda' => 3,
'csütörtok' => 4,
'péntek' => 5,
'szombat' => 6,
'vasárnap' => 7,
// Greek
'Δευ' => 1,
'Τρι' => 2,
'Τετ' => 3,
'Πεμ' => 4,
'Παρ' => 5,
'Σαβ' => 6,
'Κυρ' => 7,
);
/**
* List of months, calendar month name => calendar month number
*
* @access protected
* @var array
*/
var $month = array(
// English
'jan' => 1,
'january' => 1,
'feb' => 2,
'february' => 2,
'mar' => 3,
'march' => 3,
'apr' => 4,
'april' => 4,
'may' => 5,
// No long form of May
'jun' => 6,
'june' => 6,
'jul' => 7,
'july' => 7,
'aug' => 8,
'august' => 8,
'sep' => 9,
'september' => 8,
'oct' => 10,
'october' => 10,
'nov' => 11,
'november' => 11,
'dec' => 12,
'december' => 12,
// Dutch
'januari' => 1,
'februari' => 2,
'maart' => 3,
'april' => 4,
'mei' => 5,
'juni' => 6,
'juli' => 7,
'augustus' => 8,
'september' => 9,
'oktober' => 10,
'november' => 11,
'december' => 12,
// French
'janvier' => 1,
'février' => 2,
'mars' => 3,
'avril' => 4,
'mai' => 5,
'juin' => 6,
'juillet' => 7,
'août' => 8,
'septembre' => 9,
'octobre' => 10,
'novembre' => 11,
'décembre' => 12,
// German
'januar' => 1,
'februar' => 2,
'märz' => 3,
'april' => 4,
'mai' => 5,
'juni' => 6,
'juli' => 7,
'august' => 8,
'september' => 9,
'oktober' => 10,
'november' => 11,
'dezember' => 12,
// Italian
'gennaio' => 1,
'febbraio' => 2,
'marzo' => 3,
'aprile' => 4,
'maggio' => 5,
'giugno' => 6,
'luglio' => 7,
'agosto' => 8,
'settembre' => 9,
'ottobre' => 10,
'novembre' => 11,
'dicembre' => 12,
// Spanish
'enero' => 1,
'febrero' => 2,
'marzo' => 3,
'abril' => 4,
'mayo' => 5,
'junio' => 6,
'julio' => 7,
'agosto' => 8,
'septiembre' => 9,
'setiembre' => 9,
'octubre' => 10,
'noviembre' => 11,
'diciembre' => 12,
// Finnish
'tammikuu' => 1,
'helmikuu' => 2,
'maaliskuu' => 3,
'huhtikuu' => 4,
'toukokuu' => 5,
'kesäkuu' => 6,
'heinäkuu' => 7,
'elokuu' => 8,
'suuskuu' => 9,
'lokakuu' => 10,
'marras' => 11,
'joulukuu' => 12,
// Hungarian
'január' => 1,
'február' => 2,
'március' => 3,
'április' => 4,
'május' => 5,
'június' => 6,
'július' => 7,
'augusztus' => 8,
'szeptember' => 9,
'október' => 10,
'november' => 11,
'december' => 12,
// Greek
'Ιαν' => 1,
'Φεβ' => 2,
'Μάώ' => 3,
'Μαώ' => 3,
'Απρ' => 4,
'Μάι' => 5,
'Μαϊ' => 5,
'Μαι' => 5,
'Ιούν' => 6,
'Ιον' => 6,
'Ιούλ' => 7,
'Ιολ' => 7,
'Αύγ' => 8,
'Αυγ' => 8,
'Σεπ' => 9,
'Οκτ' => 10,
'Νοέ' => 11,
'Δεκ' => 12,
);
/**
* List of timezones, abbreviation => offset from UTC
*
* @access protected
* @var array
*/
var $timezone = array(
'ACDT' => 37800,
'ACIT' => 28800,
'ACST' => 34200,
'ACT' => -18000,
'ACWDT' => 35100,
'ACWST' => 31500,
'AEDT' => 39600,
'AEST' => 36000,
'AFT' => 16200,
'AKDT' => -28800,
'AKST' => -32400,
'AMDT' => 18000,
'AMT' => -14400,
'ANAST' => 46800,
'ANAT' => 43200,
'ART' => -10800,
'AZOST' => -3600,
'AZST' => 18000,
'AZT' => 14400,
'BIOT' => 21600,
'BIT' => -43200,
'BOT' => -14400,
'BRST' => -7200,
'BRT' => -10800,
'BST' => 3600,
'BTT' => 21600,
'CAST' => 18000,
'CAT' => 7200,
'CCT' => 23400,
'CDT' => -18000,
'CEDT' => 7200,
'CET' => 3600,
'CGST' => -7200,
'CGT' => -10800,
'CHADT' => 49500,
'CHAST' => 45900,
'CIST' => -28800,
'CKT' => -36000,
'CLDT' => -10800,
'CLST' => -14400,
'COT' => -18000,
'CST' => -21600,
'CVT' => -3600,
'CXT' => 25200,
'DAVT' => 25200,
'DTAT' => 36000,
'EADT' => -18000,
'EAST' => -21600,
'EAT' => 10800,
'ECT' => -18000,
'EDT' => -14400,
'EEST' => 10800,
'EET' => 7200,
'EGT' => -3600,
'EKST' => 21600,
'EST' => -18000,
'FJT' => 43200,
'FKDT' => -10800,
'FKST' => -14400,
'FNT' => -7200,
'GALT' => -21600,
'GEDT' => 14400,
'GEST' => 10800,
'GFT' => -10800,
'GILT' => 43200,
'GIT' => -32400,
'GST' => 14400,
'GST' => -7200,
'GYT' => -14400,
'HAA' => -10800,
'HAC' => -18000,
'HADT' => -32400,
'HAE' => -14400,
'HAP' => -25200,
'HAR' => -21600,
'HAST' => -36000,
'HAT' => -9000,
'HAY' => -28800,
'HKST' => 28800,
'HMT' => 18000,
'HNA' => -14400,
'HNC' => -21600,
'HNE' => -18000,
'HNP' => -28800,
'HNR' => -25200,
'HNT' => -12600,
'HNY' => -32400,
'IRDT' => 16200,
'IRKST' => 32400,
'IRKT' => 28800,
'IRST' => 12600,
'JFDT' => -10800,
'JFST' => -14400,
'JST' => 32400,
'KGST' => 21600,
'KGT' => 18000,
'KOST' => 39600,
'KOVST' => 28800,
'KOVT' => 25200,
'KRAST' => 28800,
'KRAT' => 25200,
'KST' => 32400,
'LHDT' => 39600,
'LHST' => 37800,
'LINT' => 50400,
'LKT' => 21600,
'MAGST' => 43200,
'MAGT' => 39600,
'MAWT' => 21600,
'MDT' => -21600,
'MESZ' => 7200,
'MEZ' => 3600,
'MHT' => 43200,
'MIT' => -34200,
'MNST' => 32400,
'MSDT' => 14400,
'MSST' => 10800,
'MST' => -25200,
'MUT' => 14400,
'MVT' => 18000,
'MYT' => 28800,
'NCT' => 39600,
'NDT' => -9000,
'NFT' => 41400,
'NMIT' => 36000,
'NOVST' => 25200,
'NOVT' => 21600,
'NPT' => 20700,
'NRT' => 43200,
'NST' => -12600,
'NUT' => -39600,
'NZDT' => 46800,
'NZST' => 43200,
'OMSST' => 25200,
'OMST' => 21600,
'PDT' => -25200,
'PET' => -18000,
'PETST' => 46800,
'PETT' => 43200,
'PGT' => 36000,
'PHOT' => 46800,
'PHT' => 28800,
'PKT' => 18000,
'PMDT' => -7200,
'PMST' => -10800,
'PONT' => 39600,
'PST' => -28800,
'PWT' => 32400,
'PYST' => -10800,
'PYT' => -14400,
'RET' => 14400,
'ROTT' => -10800,
'SAMST' => 18000,
'SAMT' => 14400,
'SAST' => 7200,
'SBT' => 39600,
'SCDT' => 46800,
'SCST' => 43200,
'SCT' => 14400,
'SEST' => 3600,
'SGT' => 28800,
'SIT' => 28800,
'SRT' => -10800,
'SST' => -39600,
'SYST' => 10800,
'SYT' => 7200,
'TFT' => 18000,
'THAT' => -36000,
'TJT' => 18000,
'TKT' => -36000,
'TMT' => 18000,
'TOT' => 46800,
'TPT' => 32400,
'TRUT' => 36000,
'TVT' => 43200,
'TWT' => 28800,
'UYST' => -7200,
'UYT' => -10800,
'UZT' => 18000,
'VET' => -14400,
'VLAST' => 39600,
'VLAT' => 36000,
'VOST' => 21600,
'VUT' => 39600,
'WAST' => 7200,
'WAT' => 3600,
'WDT' => 32400,
'WEST' => 3600,
'WFT' => 43200,
'WIB' => 25200,
'WIT' => 32400,
'WITA' => 28800,
'WKST' => 18000,
'WST' => 28800,
'YAKST' => 36000,
'YAKT' => 32400,
'YAPT' => 36000,
'YEKST' => 21600,
'YEKT' => 18000,
);
/**
* Cached PCRE for SimplePie_Parse_Date::$day
*
* @access protected
* @var string
*/
var $day_pcre;
/**
* Cached PCRE for SimplePie_Parse_Date::$month
*
* @access protected
* @var string
*/
var $month_pcre;
/**
* Array of user-added callback methods
*
* @access private
* @var array
*/
var $built_in = array();
/**
* Array of user-added callback methods
*
* @access private
* @var array
*/
var $user = array();
/**
* Create new SimplePie_Parse_Date object, and set self::day_pcre,
* self::month_pcre, and self::built_in
*
* @access private
*/
public function __construct()
{
$this->day_pcre = '(' . implode(array_keys($this->day), '|') . ')';
$this->month_pcre = '(' . implode(array_keys($this->month), '|') . ')';
static $cache;
if (!isset($cache[get_class($this)]))
{
$all_methods = get_class_methods($this);
foreach ($all_methods as $method)
{
if (strtolower(substr($method, 0, 5)) === 'date_')
{
$cache[get_class($this)][] = $method;
}
}
}
foreach ($cache[get_class($this)] as $method)
{
$this->built_in[] = $method;
}
}
/**
* Get the object
*
* @access public
*/
public static function get()
{
static $object;
if (!$object)
{
$object = new SimplePie_Parse_Date;
}
return $object;
}
/**
* Parse a date
*
* @final
* @access public
* @param string $date Date to parse
* @return int Timestamp corresponding to date string, or false on failure
*/
public function parse($date)
{
foreach ($this->user as $method)
{
if (($returned = call_user_func($method, $date)) !== false)
{
return $returned;
}
}
foreach ($this->built_in as $method)
{
if (($returned = call_user_func(array($this, $method), $date)) !== false)
{
return $returned;
}
}
return false;
}
/**
* Add a callback method to parse a date
*
* @final
* @access public
* @param callback $callback
*/
public function add_callback($callback)
{
if (is_callable($callback))
{
$this->user[] = $callback;
}
else
{
trigger_error('User-supplied function must be a valid callback', E_USER_WARNING);
}
}
/**
* Parse a superset of W3C-DTF (allows hyphens and colons to be omitted, as
* well as allowing any of upper or lower case "T", horizontal tabs, or
* spaces to be used as the time seperator (including more than one))
*
* @access protected
* @return int Timestamp
*/
public function date_w3cdtf($date)
{
static $pcre;
if (!$pcre)
{
$year = '([0-9]{4})';
$month = $day = $hour = $minute = $second = '([0-9]{2})';
$decimal = '([0-9]*)';
$zone = '(?:(Z)|([+\-])([0-9]{1,2}):?([0-9]{1,2}))';
$pcre = '/^' . $year . '(?:-?' . $month . '(?:-?' . $day . '(?:[Tt\x09\x20]+' . $hour . '(?::?' . $minute . '(?::?' . $second . '(?:.' . $decimal . ')?)?)?' . $zone . ')?)?)?$/';
}
if (preg_match($pcre, $date, $match))
{
/*
Capturing subpatterns:
1: Year
2: Month
3: Day
4: Hour
5: Minute
6: Second
7: Decimal fraction of a second
8: Zulu
9: Timezone ±
10: Timezone hours
11: Timezone minutes
*/
// Fill in empty matches
for ($i = count($match); $i <= 3; $i++)
{
$match[$i] = '1';
}
for ($i = count($match); $i <= 7; $i++)
{
$match[$i] = '0';
}
// Numeric timezone
if (isset($match[9]) && $match[9] !== '')
{
$timezone = $match[10] * 3600;
$timezone += $match[11] * 60;
if ($match[9] === '-')
{
$timezone = 0 - $timezone;
}
}
else
{
$timezone = 0;
}
// Convert the number of seconds to an integer, taking decimals into account
$second = round($match[6] + $match[7] / pow(10, strlen($match[7])));
return gmmktime($match[4], $match[5], $second, $match[2], $match[3], $match[1]) - $timezone;
}
else
{
return false;
}
}
/**
* Remove RFC822 comments
*
* @access protected
* @param string $data Data to strip comments from
* @return string Comment stripped string
*/
public function remove_rfc2822_comments($string)
{
$string = (string) $string;
$position = 0;
$length = strlen($string);
$depth = 0;
$output = '';
while ($position < $length && ($pos = strpos($string, '(', $position)) !== false)
{
$output .= substr($string, $position, $pos - $position);
$position = $pos + 1;
if ($string[$pos - 1] !== '\\')
{
$depth++;
while ($depth && $position < $length)
{
$position += strcspn($string, '()', $position);
if ($string[$position - 1] === '\\')
{
$position++;
continue;
}
elseif (isset($string[$position]))
{
switch ($string[$position])
{
case '(':
$depth++;
break;
case ')':
$depth--;
break;
}
$position++;
}
else
{
break;
}
}
}
else
{
$output .= '(';
}
}
$output .= substr($string, $position);
return $output;
}
/**
* Parse RFC2822's date format
*
* @access protected
* @return int Timestamp
*/
public function date_rfc2822($date)
{
static $pcre;
if (!$pcre)
{
$wsp = '[\x09\x20]';
$fws = '(?:' . $wsp . '+|' . $wsp . '*(?:\x0D\x0A' . $wsp . '+)+)';
$optional_fws = $fws . '?';
$day_name = $this->day_pcre;
$month = $this->month_pcre;
$day = '([0-9]{1,2})';
$hour = $minute = $second = '([0-9]{2})';
$year = '([0-9]{2,4})';
$num_zone = '([+\-])([0-9]{2})([0-9]{2})';
$character_zone = '([A-Z]{1,5})';
$zone = '(?:' . $num_zone . '|' . $character_zone . ')';
$pcre = '/(?:' . $optional_fws . $day_name . $optional_fws . ',)?' . $optional_fws . $day . $fws . $month . $fws . $year . $fws . $hour . $optional_fws . ':' . $optional_fws . $minute . '(?:' . $optional_fws . ':' . $optional_fws . $second . ')?' . $fws . $zone . '/i';
}
if (preg_match($pcre, $this->remove_rfc2822_comments($date), $match))
{
/*
Capturing subpatterns:
1: Day name
2: Day
3: Month
4: Year
5: Hour
6: Minute
7: Second
8: Timezone ±
9: Timezone hours
10: Timezone minutes
11: Alphabetic timezone
*/
// Find the month number
$month = $this->month[strtolower($match[3])];
// Numeric timezone
if ($match[8] !== '')
{
$timezone = $match[9] * 3600;
$timezone += $match[10] * 60;
if ($match[8] === '-')
{
$timezone = 0 - $timezone;
}
}
// Character timezone
elseif (isset($this->timezone[strtoupper($match[11])]))
{
$timezone = $this->timezone[strtoupper($match[11])];
}
// Assume everything else to be -0000
else
{
$timezone = 0;
}
// Deal with 2/3 digit years
if ($match[4] < 50)
{
$match[4] += 2000;
}
elseif ($match[4] < 1000)
{
$match[4] += 1900;
}
// Second is optional, if it is empty set it to zero
if ($match[7] !== '')
{
$second = $match[7];
}
else
{
$second = 0;
}
return gmmktime($match[5], $match[6], $second, $month, $match[2], $match[4]) - $timezone;
}
else
{
return false;
}
}
/**
* Parse RFC850's date format
*
* @access protected
* @return int Timestamp
*/
public function date_rfc850($date)
{
static $pcre;
if (!$pcre)
{
$space = '[\x09\x20]+';
$day_name = $this->day_pcre;
$month = $this->month_pcre;
$day = '([0-9]{1,2})';
$year = $hour = $minute = $second = '([0-9]{2})';
$zone = '([A-Z]{1,5})';
$pcre = '/^' . $day_name . ',' . $space . $day . '-' . $month . '-' . $year . $space . $hour . ':' . $minute . ':' . $second . $space . $zone . '$/i';
}
if (preg_match($pcre, $date, $match))
{
/*
Capturing subpatterns:
1: Day name
2: Day
3: Month
4: Year
5: Hour
6: Minute
7: Second
8: Timezone
*/
// Month
$month = $this->month[strtolower($match[3])];
// Character timezone
if (isset($this->timezone[strtoupper($match[8])]))
{
$timezone = $this->timezone[strtoupper($match[8])];
}
// Assume everything else to be -0000
else
{
$timezone = 0;
}
// Deal with 2 digit year
if ($match[4] < 50)
{
$match[4] += 2000;
}
else
{
$match[4] += 1900;
}
return gmmktime($match[5], $match[6], $match[7], $month, $match[2], $match[4]) - $timezone;
}
else
{
return false;
}
}
/**
* Parse C99's asctime()'s date format
*
* @access protected
* @return int Timestamp
*/
public function date_asctime($date)
{
static $pcre;
if (!$pcre)
{
$space = '[\x09\x20]+';
$wday_name = $this->day_pcre;
$mon_name = $this->month_pcre;
$day = '([0-9]{1,2})';
$hour = $sec = $min = '([0-9]{2})';
$year = '([0-9]{4})';
$terminator = '\x0A?\x00?';
$pcre = '/^' . $wday_name . $space . $mon_name . $space . $day . $space . $hour . ':' . $min . ':' . $sec . $space . $year . $terminator . '$/i';
}
if (preg_match($pcre, $date, $match))
{
/*
Capturing subpatterns:
1: Day name
2: Month
3: Day
4: Hour
5: Minute
6: Second
7: Year
*/
$month = $this->month[strtolower($match[2])];
return gmmktime($match[4], $match[5], $match[6], $month, $match[3], $match[7]);
}
else
{
return false;
}
}
/**
* Parse dates using strtotime()
*
* @access protected
* @return int Timestamp
*/
public function date_strtotime($date)
{
$strtotime = strtotime($date);
if ($strtotime === -1 || $strtotime === false)
{
return false;
}
else
{
return $strtotime;
}
}
}

View file

@ -0,0 +1,407 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
*/
/**
* Parses XML into something sane
*
*
* This class can be overloaded with {@see SimplePie::set_parser_class()}
*
* @package SimplePie
* @subpackage Parsing
*/
class SimplePie_Parser
{
var $error_code;
var $error_string;
var $current_line;
var $current_column;
var $current_byte;
var $separator = ' ';
var $namespace = array('');
var $element = array('');
var $xml_base = array('');
var $xml_base_explicit = array(false);
var $xml_lang = array('');
var $data = array();
var $datas = array(array());
var $current_xhtml_construct = -1;
var $encoding;
protected $registry;
public function set_registry(SimplePie_Registry $registry)
{
$this->registry = $registry;
}
public function parse(&$data, $encoding)
{
// Use UTF-8 if we get passed US-ASCII, as every US-ASCII character is a UTF-8 character
if (strtoupper($encoding) === 'US-ASCII')
{
$this->encoding = 'UTF-8';
}
else
{
$this->encoding = $encoding;
}
// Strip BOM:
// UTF-32 Big Endian BOM
if (substr($data, 0, 4) === "\x00\x00\xFE\xFF")
{
$data = substr($data, 4);
}
// UTF-32 Little Endian BOM
elseif (substr($data, 0, 4) === "\xFF\xFE\x00\x00")
{
$data = substr($data, 4);
}
// UTF-16 Big Endian BOM
elseif (substr($data, 0, 2) === "\xFE\xFF")
{
$data = substr($data, 2);
}
// UTF-16 Little Endian BOM
elseif (substr($data, 0, 2) === "\xFF\xFE")
{
$data = substr($data, 2);
}
// UTF-8 BOM
elseif (substr($data, 0, 3) === "\xEF\xBB\xBF")
{
$data = substr($data, 3);
}
if (substr($data, 0, 5) === '<?xml' && strspn(substr($data, 5, 1), "\x09\x0A\x0D\x20") && ($pos = strpos($data, '?>')) !== false)
{
$declaration = $this->registry->create('XML_Declaration_Parser', array(substr($data, 5, $pos - 5)));
if ($declaration->parse())
{
$data = substr($data, $pos + 2);
$data = '<?xml version="' . $declaration->version . '" encoding="' . $encoding . '" standalone="' . (($declaration->standalone) ? 'yes' : 'no') . '"?>' . $data;
}
else
{
$this->error_string = 'SimplePie bug! Please report this!';
return false;
}
}
$return = true;
static $xml_is_sane = null;
if ($xml_is_sane === null)
{
$parser_check = xml_parser_create();
xml_parse_into_struct($parser_check, '<foo>&amp;</foo>', $values);
xml_parser_free($parser_check);
$xml_is_sane = isset($values[0]['value']);
}
// Create the parser
if ($xml_is_sane)
{
$xml = xml_parser_create_ns($this->encoding, $this->separator);
xml_parser_set_option($xml, XML_OPTION_SKIP_WHITE, 1);
xml_parser_set_option($xml, XML_OPTION_CASE_FOLDING, 0);
xml_set_object($xml, $this);
xml_set_character_data_handler($xml, 'cdata');
xml_set_element_handler($xml, 'tag_open', 'tag_close');
// Parse!
if (!xml_parse($xml, $data, true))
{
$this->error_code = xml_get_error_code($xml);
$this->error_string = xml_error_string($this->error_code);
$return = false;
}
$this->current_line = xml_get_current_line_number($xml);
$this->current_column = xml_get_current_column_number($xml);
$this->current_byte = xml_get_current_byte_index($xml);
xml_parser_free($xml);
return $return;
}
else
{
libxml_clear_errors();
$xml = new XMLReader();
$xml->xml($data);
while (@$xml->read())
{
switch ($xml->nodeType)
{
case constant('XMLReader::END_ELEMENT'):
if ($xml->namespaceURI !== '')
{
$tagName = $xml->namespaceURI . $this->separator . $xml->localName;
}
else
{
$tagName = $xml->localName;
}
$this->tag_close(null, $tagName);
break;
case constant('XMLReader::ELEMENT'):
$empty = $xml->isEmptyElement;
if ($xml->namespaceURI !== '')
{
$tagName = $xml->namespaceURI . $this->separator . $xml->localName;
}
else
{
$tagName = $xml->localName;
}
$attributes = array();
while ($xml->moveToNextAttribute())
{
if ($xml->namespaceURI !== '')
{
$attrName = $xml->namespaceURI . $this->separator . $xml->localName;
}
else
{
$attrName = $xml->localName;
}
$attributes[$attrName] = $xml->value;
}
$this->tag_open(null, $tagName, $attributes);
if ($empty)
{
$this->tag_close(null, $tagName);
}
break;
case constant('XMLReader::TEXT'):
case constant('XMLReader::CDATA'):
$this->cdata(null, $xml->value);
break;
}
}
if ($error = libxml_get_last_error())
{
$this->error_code = $error->code;
$this->error_string = $error->message;
$this->current_line = $error->line;
$this->current_column = $error->column;
return false;
}
else
{
return true;
}
}
}
public function get_error_code()
{
return $this->error_code;
}
public function get_error_string()
{
return $this->error_string;
}
public function get_current_line()
{
return $this->current_line;
}
public function get_current_column()
{
return $this->current_column;
}
public function get_current_byte()
{
return $this->current_byte;
}
public function get_data()
{
return $this->data;
}
public function tag_open($parser, $tag, $attributes)
{
list($this->namespace[], $this->element[]) = $this->split_ns($tag);
$attribs = array();
foreach ($attributes as $name => $value)
{
list($attrib_namespace, $attribute) = $this->split_ns($name);
$attribs[$attrib_namespace][$attribute] = $value;
}
if (isset($attribs[SIMPLEPIE_NAMESPACE_XML]['base']))
{
$base = $this->registry->call('Misc', 'absolutize_url', array($attribs[SIMPLEPIE_NAMESPACE_XML]['base'], end($this->xml_base)));
if ($base !== false)
{
$this->xml_base[] = $base;
$this->xml_base_explicit[] = true;
}
}
else
{
$this->xml_base[] = end($this->xml_base);
$this->xml_base_explicit[] = end($this->xml_base_explicit);
}
if (isset($attribs[SIMPLEPIE_NAMESPACE_XML]['lang']))
{
$this->xml_lang[] = $attribs[SIMPLEPIE_NAMESPACE_XML]['lang'];
}
else
{
$this->xml_lang[] = end($this->xml_lang);
}
if ($this->current_xhtml_construct >= 0)
{
$this->current_xhtml_construct++;
if (end($this->namespace) === SIMPLEPIE_NAMESPACE_XHTML)
{
$this->data['data'] .= '<' . end($this->element);
if (isset($attribs['']))
{
foreach ($attribs[''] as $name => $value)
{
$this->data['data'] .= ' ' . $name . '="' . htmlspecialchars($value, ENT_COMPAT, $this->encoding) . '"';
}
}
$this->data['data'] .= '>';
}
}
else
{
$this->datas[] =& $this->data;
$this->data =& $this->data['child'][end($this->namespace)][end($this->element)][];
$this->data = array('data' => '', 'attribs' => $attribs, 'xml_base' => end($this->xml_base), 'xml_base_explicit' => end($this->xml_base_explicit), 'xml_lang' => end($this->xml_lang));
if ((end($this->namespace) === SIMPLEPIE_NAMESPACE_ATOM_03 && in_array(end($this->element), array('title', 'tagline', 'copyright', 'info', 'summary', 'content')) && isset($attribs['']['mode']) && $attribs['']['mode'] === 'xml')
|| (end($this->namespace) === SIMPLEPIE_NAMESPACE_ATOM_10 && in_array(end($this->element), array('rights', 'subtitle', 'summary', 'info', 'title', 'content')) && isset($attribs['']['type']) && $attribs['']['type'] === 'xhtml')
|| (end($this->namespace) === SIMPLEPIE_NAMESPACE_RSS_20 && in_array(end($this->element), array('title')))
|| (end($this->namespace) === SIMPLEPIE_NAMESPACE_RSS_090 && in_array(end($this->element), array('title')))
|| (end($this->namespace) === SIMPLEPIE_NAMESPACE_RSS_10 && in_array(end($this->element), array('title'))))
{
$this->current_xhtml_construct = 0;
}
}
}
public function cdata($parser, $cdata)
{
if ($this->current_xhtml_construct >= 0)
{
$this->data['data'] .= htmlspecialchars($cdata, ENT_QUOTES, $this->encoding);
}
else
{
$this->data['data'] .= $cdata;
}
}
public function tag_close($parser, $tag)
{
if ($this->current_xhtml_construct >= 0)
{
$this->current_xhtml_construct--;
if (end($this->namespace) === SIMPLEPIE_NAMESPACE_XHTML && !in_array(end($this->element), array('area', 'base', 'basefont', 'br', 'col', 'frame', 'hr', 'img', 'input', 'isindex', 'link', 'meta', 'param')))
{
$this->data['data'] .= '</' . end($this->element) . '>';
}
}
if ($this->current_xhtml_construct === -1)
{
$this->data =& $this->datas[count($this->datas) - 1];
array_pop($this->datas);
}
array_pop($this->element);
array_pop($this->namespace);
array_pop($this->xml_base);
array_pop($this->xml_base_explicit);
array_pop($this->xml_lang);
}
public function split_ns($string)
{
static $cache = array();
if (!isset($cache[$string]))
{
if ($pos = strpos($string, $this->separator))
{
static $separator_length;
if (!$separator_length)
{
$separator_length = strlen($this->separator);
}
$namespace = substr($string, 0, $pos);
$local_name = substr($string, $pos + $separator_length);
if (strtolower($namespace) === SIMPLEPIE_NAMESPACE_ITUNES)
{
$namespace = SIMPLEPIE_NAMESPACE_ITUNES;
}
// Normalize the Media RSS namespaces
if ($namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG ||
$namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG2 ||
$namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG3 ||
$namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG4 ||
$namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG5 )
{
$namespace = SIMPLEPIE_NAMESPACE_MEDIARSS;
}
$cache[$string] = array($namespace, $local_name);
}
else
{
$cache[$string] = array('', $string);
}
}
return $cache[$string];
}
}

View file

@ -0,0 +1,129 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
*/
/**
* Handles `<media:rating>` or `<itunes:explicit>` tags as defined in Media RSS and iTunes RSS respectively
*
* Used by {@see SimplePie_Enclosure::get_rating()} and {@see SimplePie_Enclosure::get_ratings()}
*
* This class can be overloaded with {@see SimplePie::set_rating_class()}
*
* @package SimplePie
* @subpackage API
*/
class SimplePie_Rating
{
/**
* Rating scheme
*
* @var string
* @see get_scheme()
*/
var $scheme;
/**
* Rating value
*
* @var string
* @see get_value()
*/
var $value;
/**
* Constructor, used to input the data
*
* For documentation on all the parameters, see the corresponding
* properties and their accessors
*/
public function __construct($scheme = null, $value = null)
{
$this->scheme = $scheme;
$this->value = $value;
}
/**
* String-ified version
*
* @return string
*/
public function __toString()
{
// There is no $this->data here
return md5(serialize($this));
}
/**
* Get the organizational scheme for the rating
*
* @return string|null
*/
public function get_scheme()
{
if ($this->scheme !== null)
{
return $this->scheme;
}
else
{
return null;
}
}
/**
* Get the value of the rating
*
* @return string|null
*/
public function get_value()
{
if ($this->value !== null)
{
return $this->value;
}
else
{
return null;
}
}
}

View file

@ -0,0 +1,225 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
*/
/**
* Handles creating objects and calling methods
*
* Access this via {@see SimplePie::get_registry()}
*
* @package SimplePie
*/
class SimplePie_Registry
{
/**
* Default class mapping
*
* Overriding classes *must* subclass these.
*
* @var array
*/
protected $default = array(
'Cache' => 'SimplePie_Cache',
'Locator' => 'SimplePie_Locator',
'Parser' => 'SimplePie_Parser',
'File' => 'SimplePie_File',
'Sanitize' => 'SimplePie_Sanitize',
'Item' => 'SimplePie_Item',
'Author' => 'SimplePie_Author',
'Category' => 'SimplePie_Category',
'Enclosure' => 'SimplePie_Enclosure',
'Caption' => 'SimplePie_Caption',
'Copyright' => 'SimplePie_Copyright',
'Credit' => 'SimplePie_Credit',
'Rating' => 'SimplePie_Rating',
'Restriction' => 'SimplePie_Restriction',
'Content_Type_Sniffer' => 'SimplePie_Content_Type_Sniffer',
'Source' => 'SimplePie_Source',
'Misc' => 'SimplePie_Misc',
'XML_Declaration_Parser' => 'SimplePie_XML_Declaration_Parser',
'Parse_Date' => 'SimplePie_Parse_Date',
);
/**
* Class mapping
*
* @see register()
* @var array
*/
protected $classes = array();
/**
* Legacy classes
*
* @see register()
* @var array
*/
protected $legacy = array();
/**
* Constructor
*
* No-op
*/
public function __construct() { }
/**
* Register a class
*
* @param string $type See {@see $default} for names
* @param string $class Class name, must subclass the corresponding default
* @param bool $legacy Whether to enable legacy support for this class
* @return bool Successfulness
*/
public function register($type, $class, $legacy = false)
{
if (!is_subclass_of($class, $this->default[$type]))
{
return false;
}
$this->classes[$type] = $class;
if ($legacy)
{
$this->legacy[] = $class;
}
return true;
}
/**
* Get the class registered for a type
*
* Where possible, use {@see create()} or {@see call()} instead
*
* @param string $type
* @return string|null
*/
public function get_class($type)
{
if (!empty($this->classes[$type]))
{
return $this->classes[$type];
}
if (!empty($this->default[$type]))
{
return $this->default[$type];
}
return null;
}
/**
* Create a new instance of a given type
*
* @param string $type
* @param array $parameters Parameters to pass to the constructor
* @return object Instance of class
*/
public function &create($type, $parameters = array())
{
$class = $this->get_class($type);
if (in_array($class, $this->legacy))
{
switch ($type)
{
case 'locator':
// Legacy: file, timeout, useragent, file_class, max_checked_feeds, content_type_sniffer_class
// Specified: file, timeout, useragent, max_checked_feeds
$replacement = array($this->get_class('file'), $parameters[3], $this->get_class('content_type_sniffer'));
array_splice($parameters, 3, 1, $replacement);
break;
}
}
if (!method_exists($class, '__construct'))
{
$instance = new $class;
}
else
{
$reflector = new ReflectionClass($class);
$instance = $reflector->newInstanceArgs($parameters);
}
if (method_exists($instance, 'set_registry'))
{
$instance->set_registry($this);
}
return $instance;
}
/**
* Call a static method for a type
*
* @param string $type
* @param string $method
* @param array $parameters
* @return mixed
*/
public function &call($type, $method, $parameters = array())
{
$class = $this->get_class($type);
if (in_array($class, $this->legacy))
{
switch ($type)
{
case 'Cache':
// For backwards compatibility with old non-static
// Cache::create() methods
if ($method === 'get_handler')
{
$result = @call_user_func_array(array($class, 'create'), $parameters);
return $result;
}
break;
}
}
$result = call_user_func_array(array($class, $method), $parameters);
return $result;
}
}

View file

@ -0,0 +1,155 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
*/
/**
* Handles `<media:restriction>` as defined in Media RSS
*
* Used by {@see SimplePie_Enclosure::get_restriction()} and {@see SimplePie_Enclosure::get_restrictions()}
*
* This class can be overloaded with {@see SimplePie::set_restriction_class()}
*
* @package SimplePie
* @subpackage API
*/
class SimplePie_Restriction
{
/**
* Relationship ('allow'/'deny')
*
* @var string
* @see get_relationship()
*/
var $relationship;
/**
* Type of restriction
*
* @var string
* @see get_type()
*/
var $type;
/**
* Restricted values
*
* @var string
* @see get_value()
*/
var $value;
/**
* Constructor, used to input the data
*
* For documentation on all the parameters, see the corresponding
* properties and their accessors
*/
public function __construct($relationship = null, $type = null, $value = null)
{
$this->relationship = $relationship;
$this->type = $type;
$this->value = $value;
}
/**
* String-ified version
*
* @return string
*/
public function __toString()
{
// There is no $this->data here
return md5(serialize($this));
}
/**
* Get the relationship
*
* @return string|null Either 'allow' or 'deny'
*/
public function get_relationship()
{
if ($this->relationship !== null)
{
return $this->relationship;
}
else
{
return null;
}
}
/**
* Get the type
*
* @return string|null
*/
public function get_type()
{
if ($this->type !== null)
{
return $this->type;
}
else
{
return null;
}
}
/**
* Get the list of restricted things
*
* @return string|null
*/
public function get_value()
{
if ($this->value !== null)
{
return $this->value;
}
else
{
return null;
}
}
}

View file

@ -0,0 +1,549 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
*/
/**
* Used for data cleanup and post-processing
*
*
* This class can be overloaded with {@see SimplePie::set_sanitize_class()}
*
* @package SimplePie
* @todo Move to using an actual HTML parser (this will allow tags to be properly stripped, and to switch between HTML and XHTML), this will also make it easier to shorten a string while preserving HTML tags
*/
class SimplePie_Sanitize
{
// Private vars
var $base;
// Options
var $remove_div = true;
var $image_handler = '';
var $strip_htmltags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style');
var $encode_instead_of_strip = false;
var $strip_attributes = array('bgsound', 'class', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc');
var $strip_comments = false;
var $output_encoding = 'UTF-8';
var $enable_cache = true;
var $cache_location = './cache';
var $cache_name_function = 'md5';
var $timeout = 10;
var $useragent = '';
var $force_fsockopen = false;
var $replace_url_attributes = null;
public function __construct()
{
// Set defaults
$this->set_url_replacements(null);
}
public function remove_div($enable = true)
{
$this->remove_div = (bool) $enable;
}
public function set_image_handler($page = false)
{
if ($page)
{
$this->image_handler = (string) $page;
}
else
{
$this->image_handler = false;
}
}
public function set_registry(SimplePie_Registry $registry)
{
$this->registry = $registry;
}
public function pass_cache_data($enable_cache = true, $cache_location = './cache', $cache_name_function = 'md5', $cache_class = 'SimplePie_Cache')
{
if (isset($enable_cache))
{
$this->enable_cache = (bool) $enable_cache;
}
if ($cache_location)
{
$this->cache_location = (string) $cache_location;
}
if ($cache_name_function)
{
$this->cache_name_function = (string) $cache_name_function;
}
}
public function pass_file_data($file_class = 'SimplePie_File', $timeout = 10, $useragent = '', $force_fsockopen = false)
{
if ($timeout)
{
$this->timeout = (string) $timeout;
}
if ($useragent)
{
$this->useragent = (string) $useragent;
}
if ($force_fsockopen)
{
$this->force_fsockopen = (string) $force_fsockopen;
}
}
public function strip_htmltags($tags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style'))
{
if ($tags)
{
if (is_array($tags))
{
$this->strip_htmltags = $tags;
}
else
{
$this->strip_htmltags = explode(',', $tags);
}
}
else
{
$this->strip_htmltags = false;
}
}
public function encode_instead_of_strip($encode = false)
{
$this->encode_instead_of_strip = (bool) $encode;
}
public function strip_attributes($attribs = array('bgsound', 'class', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc'))
{
if ($attribs)
{
if (is_array($attribs))
{
$this->strip_attributes = $attribs;
}
else
{
$this->strip_attributes = explode(',', $attribs);
}
}
else
{
$this->strip_attributes = false;
}
}
public function strip_comments($strip = false)
{
$this->strip_comments = (bool) $strip;
}
public function set_output_encoding($encoding = 'UTF-8')
{
$this->output_encoding = (string) $encoding;
}
/**
* Set element/attribute key/value pairs of HTML attributes
* containing URLs that need to be resolved relative to the feed
*
* Defaults to |a|@href, |area|@href, |blockquote|@cite, |del|@cite,
* |form|@action, |img|@longdesc, |img|@src, |input|@src, |ins|@cite,
* |q|@cite
*
* @since 1.0
* @param array|null $element_attribute Element/attribute key/value pairs, null for default
*/
public function set_url_replacements($element_attribute = null)
{
if ($element_attribute === null)
{
$element_attribute = array(
'a' => 'href',
'area' => 'href',
'blockquote' => 'cite',
'del' => 'cite',
'form' => 'action',
'img' => array(
'longdesc',
'src'
),
'input' => 'src',
'ins' => 'cite',
'q' => 'cite'
);
}
$this->replace_url_attributes = (array) $element_attribute;
}
public function sanitize($data, $type, $base = '')
{
$data = trim($data);
if ($data !== '' || $type & SIMPLEPIE_CONSTRUCT_IRI)
{
if ($type & SIMPLEPIE_CONSTRUCT_MAYBE_HTML)
{
if (preg_match('/(&(#(x[0-9a-fA-F]+|[0-9]+)|[a-zA-Z0-9]+)|<\/[A-Za-z][^\x09\x0A\x0B\x0C\x0D\x20\x2F\x3E]*' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>)/', $data))
{
$type |= SIMPLEPIE_CONSTRUCT_HTML;
}
else
{
$type |= SIMPLEPIE_CONSTRUCT_TEXT;
}
}
if ($type & SIMPLEPIE_CONSTRUCT_BASE64)
{
$data = base64_decode($data);
}
if ($type & (SIMPLEPIE_CONSTRUCT_HTML | SIMPLEPIE_CONSTRUCT_XHTML))
{
$document = new DOMDocument();
$document->encoding = 'UTF-8';
$data = $this->preprocess($data, $type);
set_error_handler(array('SimplePie_Misc', 'silence_errors'));
$document->loadHTML($data);
restore_error_handler();
// Strip comments
if ($this->strip_comments)
{
$xpath = new DOMXPath($document);
$comments = $xpath->query('//comment()');
foreach ($comments as $comment)
{
$comment->parentNode->removeChild($comment);
}
}
// Strip out HTML tags and attributes that might cause various security problems.
// Based on recommendations by Mark Pilgrim at:
// http://diveintomark.org/archives/2003/06/12/how_to_consume_rss_safely
if ($this->strip_htmltags)
{
foreach ($this->strip_htmltags as $tag)
{
$this->strip_tag($tag, $document, $type);
}
}
if ($this->strip_attributes)
{
foreach ($this->strip_attributes as $attrib)
{
$this->strip_attr($attrib, $document);
}
}
// Replace relative URLs
$this->base = $base;
foreach ($this->replace_url_attributes as $element => $attributes)
{
$this->replace_urls($document, $element, $attributes);
}
// If image handling (caching, etc.) is enabled, cache and rewrite all the image tags.
if (isset($this->image_handler) && ((string) $this->image_handler) !== '' && $this->enable_cache)
{
$images = $document->getElementsByTagName('img');
foreach ($images as $img)
{
if ($img->hasAttribute('src'))
{
$image_url = call_user_func($this->cache_name_function, $img->getAttribute('src'));
$cache = $this->registry->call('Cache', 'get_handler', array($this->cache_location, $image_url, 'spi'));
if ($cache->load())
{
$img->setAttribute('src', $this->image_handler . $image_url);
}
else
{
$file = $this->registry->create('File', array($img['attribs']['src']['data'], $this->timeout, 5, array('X-FORWARDED-FOR' => $_SERVER['REMOTE_ADDR']), $this->useragent, $this->force_fsockopen));
$headers = $file->headers;
if ($file->success && ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($file->status_code === 200 || $file->status_code > 206 && $file->status_code < 300)))
{
if ($cache->save(array('headers' => $file->headers, 'body' => $file->body)))
{
$img->setAttribute('src', $this->image_handler . $image_url);
}
else
{
trigger_error("$this->cache_location is not writeable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING);
}
}
}
}
}
}
// Remove the DOCTYPE
// Seems to cause segfaulting if we don't do this
if ($document->firstChild instanceof DOMDocumentType)
{
$document->removeChild($document->firstChild);
}
// Move everything from the body to the root
$real_body = $document->getElementsByTagName('body')->item(0)->childNodes->item(0);
$document->replaceChild($real_body, $document->firstChild);
// Finally, convert to a HTML string
$data = trim($document->saveHTML());
if ($this->remove_div)
{
$data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '', $data);
$data = preg_replace('/<\/div>$/', '', $data);
}
else
{
$data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '<div>', $data);
}
}
if ($type & SIMPLEPIE_CONSTRUCT_IRI)
{
$absolute = $this->registry->call('Misc', 'absolutize_url', array($data, $base));
if ($absolute !== false)
{
$data = $absolute;
}
}
if ($type & (SIMPLEPIE_CONSTRUCT_TEXT | SIMPLEPIE_CONSTRUCT_IRI))
{
$data = htmlspecialchars($data, ENT_COMPAT, 'UTF-8');
}
if ($this->output_encoding !== 'UTF-8')
{
$data = $this->registry->call('Misc', 'change_encoding', array($data, 'UTF-8', $this->output_encoding));
}
}
return $data;
}
protected function preprocess($html, $type)
{
$ret = '';
if ($type & ~SIMPLEPIE_CONSTRUCT_XHTML)
{
// Atom XHTML constructs are wrapped with a div by default
// Note: No protection if $html contains a stray </div>!
$html = '<div>' . $html . '</div>';
$ret .= '<!DOCTYPE html>';
$content_type = 'text/html';
}
else
{
$ret .= '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">';
$content_type = 'application/xhtml+xml';
}
$ret .= '<html><head>';
$ret .= '<meta http-equiv="Content-Type" content="' . $content_type . '; charset=utf-8" />';
$ret .= '</head><body>' . $html . '</body></html>';
return $ret;
}
public function replace_urls($document, $tag, $attributes)
{
if (!is_array($attributes))
{
$attributes = array($attributes);
}
if (!is_array($this->strip_htmltags) || !in_array($tag, $this->strip_htmltags))
{
$elements = $document->getElementsByTagName($tag);
foreach ($elements as $element)
{
foreach ($attributes as $attribute)
{
if ($element->hasAttribute($attribute))
{
$value = $this->registry->call('Misc', 'absolutize_url', array($element->getAttribute($attribute), $this->base));
if ($value !== false)
{
$element->setAttribute($attribute, $value);
}
}
}
}
}
}
public function do_strip_htmltags($match)
{
if ($this->encode_instead_of_strip)
{
if (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style')))
{
$match[1] = htmlspecialchars($match[1], ENT_COMPAT, 'UTF-8');
$match[2] = htmlspecialchars($match[2], ENT_COMPAT, 'UTF-8');
return "&lt;$match[1]$match[2]&gt;$match[3]&lt;/$match[1]&gt;";
}
else
{
return htmlspecialchars($match[0], ENT_COMPAT, 'UTF-8');
}
}
elseif (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style')))
{
return $match[4];
}
else
{
return '';
}
}
protected function strip_tag($tag, $document, $type)
{
$xpath = new DOMXPath($document);
$elements = $xpath->query('body//' . $tag);
if ($this->encode_instead_of_strip)
{
foreach ($elements as $element)
{
$fragment = $document->createDocumentFragment();
// For elements which aren't script or style, include the tag itself
if (!in_array($tag, array('script', 'style')))
{
$text = '<' . $tag;
if ($element->hasAttributes())
{
$attrs = array();
foreach ($element->attributes as $name => $attr)
{
$value = $attr->value;
// In XHTML, empty values should never exist, so we repeat the value
if (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_XHTML))
{
$value = $name;
}
// For HTML, empty is fine
elseif (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_HTML))
{
$attrs[] = $name;
continue;
}
// Standard attribute text
$attrs[] = $name . '="' . $attr->value . '"';
}
$text .= ' ' . implode(' ', $attrs);
}
$text .= '>';
$fragment->appendChild(new DOMText($text));
}
$number = $element->childNodes->length;
for ($i = $number; $i > 0; $i--)
{
$child = $element->childNodes->item(0);
$fragment->appendChild($child);
}
if (!in_array($tag, array('script', 'style')))
{
$fragment->appendChild(new DOMText('</' . $tag . '>'));
}
$element->parentNode->replaceChild($fragment, $element);
}
return;
}
elseif (in_array($tag, array('script', 'style')))
{
foreach ($elements as $element)
{
$element->parentNode->removeChild($element);
}
return;
}
else
{
foreach ($elements as $element)
{
$fragment = $document->createDocumentFragment();
$number = $element->childNodes->length;
for ($i = $number; $i > 0; $i--)
{
$child = $element->childNodes->item(0);
$fragment->appendChild($child);
}
$element->parentNode->replaceChild($fragment, $element);
}
}
}
protected function strip_attr($attrib, $document)
{
$xpath = new DOMXPath($document);
$elements = $xpath->query('//*[@' . $attrib . ']');
foreach ($elements as $element)
{
$element->removeAttribute($attrib);
}
}
}

View file

@ -0,0 +1,611 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
*/
/**
* Handles `<atom:source>`
*
* Used by {@see SimplePie_Item::get_source()}
*
* This class can be overloaded with {@see SimplePie::set_source_class()}
*
* @package SimplePie
* @subpackage API
*/
class SimplePie_Source
{
var $item;
var $data = array();
protected $registry;
public function __construct($item, $data)
{
$this->item = $item;
$this->data = $data;
}
public function set_registry(SimplePie_Registry $registry)
{
$this->registry = $registry;
}
public function __toString()
{
return md5(serialize($this->data));
}
public function get_source_tags($namespace, $tag)
{
if (isset($this->data['child'][$namespace][$tag]))
{
return $this->data['child'][$namespace][$tag];
}
else
{
return null;
}
}
public function get_base($element = array())
{
return $this->item->get_base($element);
}
public function sanitize($data, $type, $base = '')
{
return $this->item->sanitize($data, $type, $base);
}
public function get_item()
{
return $this->item;
}
public function get_title()
{
if ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'title'))
{
return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_10_construct_type', array($return[0]['attribs'])), $this->get_base($return[0]));
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'title'))
{
return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_03_construct_type', array($return[0]['attribs'])), $this->get_base($return[0]));
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_10, 'title'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_MAYBE_HTML, $this->get_base($return[0]));
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_090, 'title'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_MAYBE_HTML, $this->get_base($return[0]));
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'title'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_MAYBE_HTML, $this->get_base($return[0]));
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_11, 'title'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_10, 'title'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
}
else
{
return null;
}
}
public function get_category($key = 0)
{
$categories = $this->get_categories();
if (isset($categories[$key]))
{
return $categories[$key];
}
else
{
return null;
}
}
public function get_categories()
{
$categories = array();
foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'category') as $category)
{
$term = null;
$scheme = null;
$label = null;
if (isset($category['attribs']['']['term']))
{
$term = $this->sanitize($category['attribs']['']['term'], SIMPLEPIE_CONSTRUCT_TEXT);
}
if (isset($category['attribs']['']['scheme']))
{
$scheme = $this->sanitize($category['attribs']['']['scheme'], SIMPLEPIE_CONSTRUCT_TEXT);
}
if (isset($category['attribs']['']['label']))
{
$label = $this->sanitize($category['attribs']['']['label'], SIMPLEPIE_CONSTRUCT_TEXT);
}
$categories[] = $this->registry->create('Category', array($term, $scheme, $label));
}
foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'category') as $category)
{
// This is really the label, but keep this as the term also for BC.
// Label will also work on retrieving because that falls back to term.
$term = $this->sanitize($category['data'], SIMPLEPIE_CONSTRUCT_TEXT);
if (isset($category['attribs']['']['domain']))
{
$scheme = $this->sanitize($category['attribs']['']['domain'], SIMPLEPIE_CONSTRUCT_TEXT);
}
else
{
$scheme = null;
}
$categories[] = $this->registry->create('Category', array($term, $scheme, null));
}
foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_11, 'subject') as $category)
{
$categories[] = $this->registry->create('Category', array($this->sanitize($category['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null));
}
foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_10, 'subject') as $category)
{
$categories[] = $this->registry->create('Category', array($this->sanitize($category['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null));
}
if (!empty($categories))
{
return array_unique($categories);
}
else
{
return null;
}
}
public function get_author($key = 0)
{
$authors = $this->get_authors();
if (isset($authors[$key]))
{
return $authors[$key];
}
else
{
return null;
}
}
public function get_authors()
{
$authors = array();
foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'author') as $author)
{
$name = null;
$uri = null;
$email = null;
if (isset($author['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['name'][0]['data']))
{
$name = $this->sanitize($author['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['name'][0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
}
if (isset($author['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['uri'][0]['data']))
{
$uri = $this->sanitize($author['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['uri'][0]['data'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($author['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['uri'][0]));
}
if (isset($author['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['email'][0]['data']))
{
$email = $this->sanitize($author['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['email'][0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
}
if ($name !== null || $email !== null || $uri !== null)
{
$authors[] = $this->registry->create('Author', array($name, $uri, $email));
}
}
if ($author = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'author'))
{
$name = null;
$url = null;
$email = null;
if (isset($author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['name'][0]['data']))
{
$name = $this->sanitize($author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['name'][0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
}
if (isset($author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['url'][0]['data']))
{
$url = $this->sanitize($author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['url'][0]['data'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['url'][0]));
}
if (isset($author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['email'][0]['data']))
{
$email = $this->sanitize($author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['email'][0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
}
if ($name !== null || $email !== null || $url !== null)
{
$authors[] = $this->registry->create('Author', array($name, $url, $email));
}
}
foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_11, 'creator') as $author)
{
$authors[] = $this->registry->create('Author', array($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null));
}
foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_10, 'creator') as $author)
{
$authors[] = $this->registry->create('Author', array($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null));
}
foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'author') as $author)
{
$authors[] = $this->registry->create('Author', array($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null));
}
if (!empty($authors))
{
return array_unique($authors);
}
else
{
return null;
}
}
public function get_contributor($key = 0)
{
$contributors = $this->get_contributors();
if (isset($contributors[$key]))
{
return $contributors[$key];
}
else
{
return null;
}
}
public function get_contributors()
{
$contributors = array();
foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'contributor') as $contributor)
{
$name = null;
$uri = null;
$email = null;
if (isset($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['name'][0]['data']))
{
$name = $this->sanitize($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['name'][0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
}
if (isset($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['uri'][0]['data']))
{
$uri = $this->sanitize($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['uri'][0]['data'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['uri'][0]));
}
if (isset($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['email'][0]['data']))
{
$email = $this->sanitize($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['email'][0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
}
if ($name !== null || $email !== null || $uri !== null)
{
$contributors[] = $this->registry->create('Author', array($name, $uri, $email));
}
}
foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'contributor') as $contributor)
{
$name = null;
$url = null;
$email = null;
if (isset($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['name'][0]['data']))
{
$name = $this->sanitize($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['name'][0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
}
if (isset($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['url'][0]['data']))
{
$url = $this->sanitize($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['url'][0]['data'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['url'][0]));
}
if (isset($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['email'][0]['data']))
{
$email = $this->sanitize($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['email'][0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
}
if ($name !== null || $email !== null || $url !== null)
{
$contributors[] = $this->registry->create('Author', array($name, $url, $email));
}
}
if (!empty($contributors))
{
return array_unique($contributors);
}
else
{
return null;
}
}
public function get_link($key = 0, $rel = 'alternate')
{
$links = $this->get_links($rel);
if (isset($links[$key]))
{
return $links[$key];
}
else
{
return null;
}
}
/**
* Added for parity between the parent-level and the item/entry-level.
*/
public function get_permalink()
{
return $this->get_link(0);
}
public function get_links($rel = 'alternate')
{
if (!isset($this->data['links']))
{
$this->data['links'] = array();
if ($links = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'link'))
{
foreach ($links as $link)
{
if (isset($link['attribs']['']['href']))
{
$link_rel = (isset($link['attribs']['']['rel'])) ? $link['attribs']['']['rel'] : 'alternate';
$this->data['links'][$link_rel][] = $this->sanitize($link['attribs']['']['href'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($link));
}
}
}
if ($links = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'link'))
{
foreach ($links as $link)
{
if (isset($link['attribs']['']['href']))
{
$link_rel = (isset($link['attribs']['']['rel'])) ? $link['attribs']['']['rel'] : 'alternate';
$this->data['links'][$link_rel][] = $this->sanitize($link['attribs']['']['href'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($link));
}
}
}
if ($links = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_10, 'link'))
{
$this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($links[0]));
}
if ($links = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_090, 'link'))
{
$this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($links[0]));
}
if ($links = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'link'))
{
$this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($links[0]));
}
$keys = array_keys($this->data['links']);
foreach ($keys as $key)
{
if ($this->registry->call('Misc', 'is_isegment_nz_nc', array($key)))
{
if (isset($this->data['links'][SIMPLEPIE_IANA_LINK_RELATIONS_REGISTRY . $key]))
{
$this->data['links'][SIMPLEPIE_IANA_LINK_RELATIONS_REGISTRY . $key] = array_merge($this->data['links'][$key], $this->data['links'][SIMPLEPIE_IANA_LINK_RELATIONS_REGISTRY . $key]);
$this->data['links'][$key] =& $this->data['links'][SIMPLEPIE_IANA_LINK_RELATIONS_REGISTRY . $key];
}
else
{
$this->data['links'][SIMPLEPIE_IANA_LINK_RELATIONS_REGISTRY . $key] =& $this->data['links'][$key];
}
}
elseif (substr($key, 0, 41) === SIMPLEPIE_IANA_LINK_RELATIONS_REGISTRY)
{
$this->data['links'][substr($key, 41)] =& $this->data['links'][$key];
}
$this->data['links'][$key] = array_unique($this->data['links'][$key]);
}
}
if (isset($this->data['links'][$rel]))
{
return $this->data['links'][$rel];
}
else
{
return null;
}
}
public function get_description()
{
if ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'subtitle'))
{
return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_10_construct_type', array($return[0]['attribs'])), $this->get_base($return[0]));
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'tagline'))
{
return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_03_construct_type', array($return[0]['attribs'])), $this->get_base($return[0]));
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_10, 'description'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_MAYBE_HTML, $this->get_base($return[0]));
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_090, 'description'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_MAYBE_HTML, $this->get_base($return[0]));
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'description'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_MAYBE_HTML, $this->get_base($return[0]));
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_11, 'description'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_10, 'description'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'summary'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_HTML, $this->get_base($return[0]));
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'subtitle'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_HTML, $this->get_base($return[0]));
}
else
{
return null;
}
}
public function get_copyright()
{
if ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'rights'))
{
return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_10_construct_type', array($return[0]['attribs'])), $this->get_base($return[0]));
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'copyright'))
{
return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_03_construct_type', array($return[0]['attribs'])), $this->get_base($return[0]));
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'copyright'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_11, 'rights'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_10, 'rights'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
}
else
{
return null;
}
}
public function get_language()
{
if ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'language'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_11, 'language'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_10, 'language'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
}
elseif (isset($this->data['xml_lang']))
{
return $this->sanitize($this->data['xml_lang'], SIMPLEPIE_CONSTRUCT_TEXT);
}
else
{
return null;
}
}
public function get_latitude()
{
if ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_W3C_BASIC_GEO, 'lat'))
{
return (float) $return[0]['data'];
}
elseif (($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_GEORSS, 'point')) && preg_match('/^((?:-)?[0-9]+(?:\.[0-9]+)) ((?:-)?[0-9]+(?:\.[0-9]+))$/', trim($return[0]['data']), $match))
{
return (float) $match[1];
}
else
{
return null;
}
}
public function get_longitude()
{
if ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_W3C_BASIC_GEO, 'long'))
{
return (float) $return[0]['data'];
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_W3C_BASIC_GEO, 'lon'))
{
return (float) $return[0]['data'];
}
elseif (($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_GEORSS, 'point')) && preg_match('/^((?:-)?[0-9]+(?:\.[0-9]+)) ((?:-)?[0-9]+(?:\.[0-9]+))$/', trim($return[0]['data']), $match))
{
return (float) $match[2];
}
else
{
return null;
}
}
public function get_image_url()
{
if ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'image'))
{
return $this->sanitize($return[0]['attribs']['']['href'], SIMPLEPIE_CONSTRUCT_IRI);
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'logo'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($return[0]));
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'icon'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($return[0]));
}
else
{
return null;
}
}
}

View file

@ -0,0 +1,362 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
*/
/**
* Parses the XML Declaration
*
* @package SimplePie
* @subpackage Parsing
*/
class SimplePie_XML_Declaration_Parser
{
/**
* XML Version
*
* @access public
* @var string
*/
var $version = '1.0';
/**
* Encoding
*
* @access public
* @var string
*/
var $encoding = 'UTF-8';
/**
* Standalone
*
* @access public
* @var bool
*/
var $standalone = false;
/**
* Current state of the state machine
*
* @access private
* @var string
*/
var $state = 'before_version_name';
/**
* Input data
*
* @access private
* @var string
*/
var $data = '';
/**
* Input data length (to avoid calling strlen() everytime this is needed)
*
* @access private
* @var int
*/
var $data_length = 0;
/**
* Current position of the pointer
*
* @var int
* @access private
*/
var $position = 0;
/**
* Create an instance of the class with the input data
*
* @access public
* @param string $data Input data
*/
public function __construct($data)
{
$this->data = $data;
$this->data_length = strlen($this->data);
}
/**
* Parse the input data
*
* @access public
* @return bool true on success, false on failure
*/
public function parse()
{
while ($this->state && $this->state !== 'emit' && $this->has_data())
{
$state = $this->state;
$this->$state();
}
$this->data = '';
if ($this->state === 'emit')
{
return true;
}
else
{
$this->version = '';
$this->encoding = '';
$this->standalone = '';
return false;
}
}
/**
* Check whether there is data beyond the pointer
*
* @access private
* @return bool true if there is further data, false if not
*/
public function has_data()
{
return (bool) ($this->position < $this->data_length);
}
/**
* Advance past any whitespace
*
* @return int Number of whitespace characters passed
*/
public function skip_whitespace()
{
$whitespace = strspn($this->data, "\x09\x0A\x0D\x20", $this->position);
$this->position += $whitespace;
return $whitespace;
}
/**
* Read value
*/
public function get_value()
{
$quote = substr($this->data, $this->position, 1);
if ($quote === '"' || $quote === "'")
{
$this->position++;
$len = strcspn($this->data, $quote, $this->position);
if ($this->has_data())
{
$value = substr($this->data, $this->position, $len);
$this->position += $len + 1;
return $value;
}
}
return false;
}
public function before_version_name()
{
if ($this->skip_whitespace())
{
$this->state = 'version_name';
}
else
{
$this->state = false;
}
}
public function version_name()
{
if (substr($this->data, $this->position, 7) === 'version')
{
$this->position += 7;
$this->skip_whitespace();
$this->state = 'version_equals';
}
else
{
$this->state = false;
}
}
public function version_equals()
{
if (substr($this->data, $this->position, 1) === '=')
{
$this->position++;
$this->skip_whitespace();
$this->state = 'version_value';
}
else
{
$this->state = false;
}
}
public function version_value()
{
if ($this->version = $this->get_value())
{
$this->skip_whitespace();
if ($this->has_data())
{
$this->state = 'encoding_name';
}
else
{
$this->state = 'emit';
}
}
else
{
$this->state = false;
}
}
public function encoding_name()
{
if (substr($this->data, $this->position, 8) === 'encoding')
{
$this->position += 8;
$this->skip_whitespace();
$this->state = 'encoding_equals';
}
else
{
$this->state = 'standalone_name';
}
}
public function encoding_equals()
{
if (substr($this->data, $this->position, 1) === '=')
{
$this->position++;
$this->skip_whitespace();
$this->state = 'encoding_value';
}
else
{
$this->state = false;
}
}
public function encoding_value()
{
if ($this->encoding = $this->get_value())
{
$this->skip_whitespace();
if ($this->has_data())
{
$this->state = 'standalone_name';
}
else
{
$this->state = 'emit';
}
}
else
{
$this->state = false;
}
}
public function standalone_name()
{
if (substr($this->data, $this->position, 10) === 'standalone')
{
$this->position += 10;
$this->skip_whitespace();
$this->state = 'standalone_equals';
}
else
{
$this->state = false;
}
}
public function standalone_equals()
{
if (substr($this->data, $this->position, 1) === '=')
{
$this->position++;
$this->skip_whitespace();
$this->state = 'standalone_value';
}
else
{
$this->state = false;
}
}
public function standalone_value()
{
if ($standalone = $this->get_value())
{
switch ($standalone)
{
case 'yes':
$this->standalone = true;
break;
case 'no':
$this->standalone = false;
break;
default:
$this->state = false;
return;
}
$this->skip_whitespace();
if ($this->has_data())
{
$this->state = false;
}
else
{
$this->state = 'emit';
}
}
else
{
$this->state = false;
}
}
}

View file

@ -0,0 +1,371 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
*/
/**
* Decode 'gzip' encoded HTTP data
*
* @package SimplePie
* @subpackage HTTP
* @link http://www.gzip.org/format.txt
*/
class SimplePie_gzdecode
{
/**
* Compressed data
*
* @access private
* @var string
* @see gzdecode::$data
*/
var $compressed_data;
/**
* Size of compressed data
*
* @access private
* @var int
*/
var $compressed_size;
/**
* Minimum size of a valid gzip string
*
* @access private
* @var int
*/
var $min_compressed_size = 18;
/**
* Current position of pointer
*
* @access private
* @var int
*/
var $position = 0;
/**
* Flags (FLG)
*
* @access private
* @var int
*/
var $flags;
/**
* Uncompressed data
*
* @access public
* @see gzdecode::$compressed_data
* @var string
*/
var $data;
/**
* Modified time
*
* @access public
* @var int
*/
var $MTIME;
/**
* Extra Flags
*
* @access public
* @var int
*/
var $XFL;
/**
* Operating System
*
* @access public
* @var int
*/
var $OS;
/**
* Subfield ID 1
*
* @access public
* @see gzdecode::$extra_field
* @see gzdecode::$SI2
* @var string
*/
var $SI1;
/**
* Subfield ID 2
*
* @access public
* @see gzdecode::$extra_field
* @see gzdecode::$SI1
* @var string
*/
var $SI2;
/**
* Extra field content
*
* @access public
* @see gzdecode::$SI1
* @see gzdecode::$SI2
* @var string
*/
var $extra_field;
/**
* Original filename
*
* @access public
* @var string
*/
var $filename;
/**
* Human readable comment
*
* @access public
* @var string
*/
var $comment;
/**
* Don't allow anything to be set
*
* @param string $name
* @param mixed $value
*/
public function __set($name, $value)
{
trigger_error("Cannot write property $name", E_USER_ERROR);
}
/**
* Set the compressed string and related properties
*
* @param string $data
*/
public function __construct($data)
{
$this->compressed_data = $data;
$this->compressed_size = strlen($data);
}
/**
* Decode the GZIP stream
*
* @return bool Successfulness
*/
public function parse()
{
if ($this->compressed_size >= $this->min_compressed_size)
{
// Check ID1, ID2, and CM
if (substr($this->compressed_data, 0, 3) !== "\x1F\x8B\x08")
{
return false;
}
// Get the FLG (FLaGs)
$this->flags = ord($this->compressed_data[3]);
// FLG bits above (1 << 4) are reserved
if ($this->flags > 0x1F)
{
return false;
}
// Advance the pointer after the above
$this->position += 4;
// MTIME
$mtime = substr($this->compressed_data, $this->position, 4);
// Reverse the string if we're on a big-endian arch because l is the only signed long and is machine endianness
if (current(unpack('S', "\x00\x01")) === 1)
{
$mtime = strrev($mtime);
}
$this->MTIME = current(unpack('l', $mtime));
$this->position += 4;
// Get the XFL (eXtra FLags)
$this->XFL = ord($this->compressed_data[$this->position++]);
// Get the OS (Operating System)
$this->OS = ord($this->compressed_data[$this->position++]);
// Parse the FEXTRA
if ($this->flags & 4)
{
// Read subfield IDs
$this->SI1 = $this->compressed_data[$this->position++];
$this->SI2 = $this->compressed_data[$this->position++];
// SI2 set to zero is reserved for future use
if ($this->SI2 === "\x00")
{
return false;
}
// Get the length of the extra field
$len = current(unpack('v', substr($this->compressed_data, $this->position, 2)));
$this->position += 2;
// Check the length of the string is still valid
$this->min_compressed_size += $len + 4;
if ($this->compressed_size >= $this->min_compressed_size)
{
// Set the extra field to the given data
$this->extra_field = substr($this->compressed_data, $this->position, $len);
$this->position += $len;
}
else
{
return false;
}
}
// Parse the FNAME
if ($this->flags & 8)
{
// Get the length of the filename
$len = strcspn($this->compressed_data, "\x00", $this->position);
// Check the length of the string is still valid
$this->min_compressed_size += $len + 1;
if ($this->compressed_size >= $this->min_compressed_size)
{
// Set the original filename to the given string
$this->filename = substr($this->compressed_data, $this->position, $len);
$this->position += $len + 1;
}
else
{
return false;
}
}
// Parse the FCOMMENT
if ($this->flags & 16)
{
// Get the length of the comment
$len = strcspn($this->compressed_data, "\x00", $this->position);
// Check the length of the string is still valid
$this->min_compressed_size += $len + 1;
if ($this->compressed_size >= $this->min_compressed_size)
{
// Set the original comment to the given string
$this->comment = substr($this->compressed_data, $this->position, $len);
$this->position += $len + 1;
}
else
{
return false;
}
}
// Parse the FHCRC
if ($this->flags & 2)
{
// Check the length of the string is still valid
$this->min_compressed_size += $len + 2;
if ($this->compressed_size >= $this->min_compressed_size)
{
// Read the CRC
$crc = current(unpack('v', substr($this->compressed_data, $this->position, 2)));
// Check the CRC matches
if ((crc32(substr($this->compressed_data, 0, $this->position)) & 0xFFFF) === $crc)
{
$this->position += 2;
}
else
{
return false;
}
}
else
{
return false;
}
}
// Decompress the actual data
if (($this->data = gzinflate(substr($this->compressed_data, $this->position, -8))) === false)
{
return false;
}
else
{
$this->position = $this->compressed_size - 8;
}
// Check CRC of data
$crc = current(unpack('V', substr($this->compressed_data, $this->position, 4)));
$this->position += 4;
/*if (extension_loaded('hash') && sprintf('%u', current(unpack('V', hash('crc32b', $this->data)))) !== sprintf('%u', $crc))
{
return false;
}*/
// Check ISIZE of data
$isize = current(unpack('V', substr($this->compressed_data, $this->position, 4)));
$this->position += 4;
if (sprintf('%u', strlen($this->data) & 0xFFFFFFFF) !== sprintf('%u', $isize))
{
return false;
}
// Wow, against all odds, we've actually got a valid gzip string
return true;
}
else
{
return false;
}
}
}