* Retrieves or sets the default Lexer as a Prototype Factory.
*
* By default HTMLPurifier_Lexer_DOMLex will be returned. There are
* a few exceptions involving special features that only DirectLex
* implements.
*
* @note The behavior of this class has changed, rather than accepting
* a prototype object, it now accepts a configuration object.
* To specify your own prototype, set %Core.LexerImpl to it.
* This change in behavior de-singletonizes the lexer object.
*
* @param $config Instance of HTMLPurifier_Config
* @return Concrete lexer.
*/
public static function create($config) {
if (!($config instanceof HTMLPurifier_Config)) {
$lexer = $config;
trigger_error("Passing a prototype to
HTMLPurifier_Lexer::create() is deprecated, please instead
use %Core.LexerImpl", E_USER_WARNING);
} else {
$lexer = $config->get('Core.LexerImpl');
}
$needs_tracking =
$config->get('Core.MaintainLineNumbers') ||
$config->get('Core.CollectErrors');
$inst = null;
if (is_object($lexer)) {
$inst = $lexer;
} else {
if (is_null($lexer)) { do {
// auto-detection algorithm
if ($needs_tracking) {
$lexer = 'DirectLex';
break;
}
if (
class_exists('DOMDocument') &&
method_exists('DOMDocument', 'loadHTML') &&
!extension_loaded('domxml')
) {
// check for DOM support, because while it's part of the
// core, it can be disabled compile time. Also, the PECL
// domxml extension overrides the default DOM, and is evil
// and nasty and we shan't bother to support it
$lexer = 'DOMLex';
} else {
$lexer = 'DirectLex';
}
} while(0); } // do..while so we can break
// instantiate recognized string names
switch ($lexer) {
case 'DOMLex':
$inst = new HTMLPurifier_Lexer_DOMLex();
break;
case 'DirectLex':
$inst = new HTMLPurifier_Lexer_DirectLex();
break;
case 'PH5P':
$inst = new HTMLPurifier_Lexer_PH5P();
break;
default:
throw new HTMLPurifier_Exception("Cannot instantiate unrecognized Lexer type " . htmlspecialchars($lexer));
}
}
if (!$inst) throw new HTMLPurifier_Exception('No lexer was instantiated');
// once PHP DOM implements native line numbers, or we
// hack out something using XSLT, remove this stipulation
if ($needs_tracking && !$inst->tracksLineNumbers) {
throw new HTMLPurifier_Exception('Cannot use lexer that does not support line numbers with Core.MaintainLineNumbers or Core.CollectErrors (use DirectLex instead)');
}
return $inst;
}
// -- CONVENIENCE MEMBERS ---------------------------------------------
public function __construct() {
$this->_entity_parser = new HTMLPurifier_EntityParser();
}
/**
* Most common entity to raw value conversion table for special entities.
*/
protected $_special_entity2str =
array(
'"' => '"',
'&' => '&',
'<' => '<',
'>' => '>',
''' => "'",
''' => "'",
''' => "'"
);
/**
* Parses special entities into the proper characters.
*
* This string will translate escaped versions of the special characters
* into the correct ones.
*
* @warning
* You should be able to treat the output of this function as
* completely parsed, but that's only because all other entities should
* have been handled previously in substituteNonSpecialEntities()
*
* @param $string String character data to be parsed.
* @returns Parsed character data.
*/
public function parseData($string) {
// following functions require at least one character