diff --git a/main/inc/lib/htmlpurifier/library/HTMLPurifier.autoload.php b/main/inc/lib/htmlpurifier/library/HTMLPurifier.autoload.php
old mode 100755
new mode 100644
index 8d40176406..62da5b60d4
--- a/main/inc/lib/htmlpurifier/library/HTMLPurifier.autoload.php
+++ b/main/inc/lib/htmlpurifier/library/HTMLPurifier.autoload.php
@@ -3,6 +3,7 @@
/**
* @file
* Convenience file that registers autoload handler for HTML Purifier.
+ * It also does some sanity checks.
*/
if (function_exists('spl_autoload_register') && function_exists('spl_autoload_unregister')) {
@@ -18,4 +19,8 @@ if (function_exists('spl_autoload_register') && function_exists('spl_autoload_un
}
}
+if (ini_get('zend.ze1_compatibility_mode')) {
+ trigger_error("HTML Purifier is not compatible with zend.ze1_compatibility_mode; please turn it off", E_USER_ERROR);
+}
+
// vim: et sw=4 sts=4
diff --git a/main/inc/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Host.php b/main/inc/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Host.php
old mode 100755
new mode 100644
index 2156c10c66..feca469d70
--- a/main/inc/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Host.php
+++ b/main/inc/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Host.php
@@ -23,6 +23,12 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
public function validate($string, $config, $context) {
$length = strlen($string);
+ // empty hostname is OK; it's usually semantically equivalent:
+ // the default host as defined by a URI scheme is used:
+ //
+ // If the URI scheme defines a default for host, then that
+ // default applies when the host subcomponent is undefined
+ // or when the registered name is empty (zero length).
if ($string === '') return '';
if ($length > 1 && $string[0] === '[' && $string[$length-1] === ']') {
//IPv6
diff --git a/main/inc/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/SafeParam.php b/main/inc/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/SafeParam.php
index 91f67b08cb..4ceea62c0a 100644
--- a/main/inc/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/SafeParam.php
+++ b/main/inc/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/SafeParam.php
@@ -19,6 +19,7 @@ class HTMLPurifier_AttrTransform_SafeParam extends HTMLPurifier_AttrTransform
public function __construct() {
$this->uri = new HTMLPurifier_AttrDef_URI(true); // embedded
+ $this->wmode = new HTMLPurifier_AttrDef_Enum(array('window', 'opaque', 'transparent'));
}
public function transform($attr, $config, $context) {
@@ -41,7 +42,7 @@ class HTMLPurifier_AttrTransform_SafeParam extends HTMLPurifier_AttrTransform
}
break;
case 'wmode':
- $attr['value'] = 'window';
+ $attr['value'] = $this->wmode->validate($attr['value'], $config, $context);
break;
case 'movie':
case 'src':
diff --git a/main/inc/lib/htmlpurifier/library/HTMLPurifier/Bootstrap.php b/main/inc/lib/htmlpurifier/library/HTMLPurifier/Bootstrap.php
old mode 100755
new mode 100644
index 559f61a232..607c5b1880
--- a/main/inc/lib/htmlpurifier/library/HTMLPurifier/Bootstrap.php
+++ b/main/inc/lib/htmlpurifier/library/HTMLPurifier/Bootstrap.php
@@ -37,7 +37,12 @@ class HTMLPurifier_Bootstrap
public static function autoload($class) {
$file = HTMLPurifier_Bootstrap::getPath($class);
if (!$file) return false;
- require HTMLPURIFIER_PREFIX . '/' . $file;
+ // Technically speaking, it should be ok and more efficient to
+ // just do 'require', but Antonio Parraga reports that with
+ // Zend extensions such as Zend debugger and APC, this invariant
+ // may be broken. Since we have efficient alternatives, pay
+ // the cost here and avoid the bug.
+ require_once HTMLPURIFIER_PREFIX . '/' . $file;
return true;
}
@@ -65,10 +70,11 @@ class HTMLPurifier_Bootstrap
if ( ($funcs = spl_autoload_functions()) === false ) {
spl_autoload_register($autoload);
} elseif (function_exists('spl_autoload_unregister')) {
+ $buggy = version_compare(PHP_VERSION, '5.2.11', '<');
$compat = version_compare(PHP_VERSION, '5.1.2', '<=') &&
version_compare(PHP_VERSION, '5.1.0', '>=');
foreach ($funcs as $func) {
- if (is_array($func)) {
+ if ($buggy && is_array($func)) {
// :TRICKY: There are some compatibility issues and some
// places where we need to error out
$reflector = new ReflectionMethod($func[0], $func[1]);
diff --git a/main/inc/lib/htmlpurifier/library/HTMLPurifier/Config.php b/main/inc/lib/htmlpurifier/library/HTMLPurifier/Config.php
index 3461c9f822..69c5683233 100644
--- a/main/inc/lib/htmlpurifier/library/HTMLPurifier/Config.php
+++ b/main/inc/lib/htmlpurifier/library/HTMLPurifier/Config.php
@@ -76,7 +76,8 @@ class HTMLPurifier_Config
/**
* Set to false if you do not want line and file numbers in errors
- * (useful when unit testing)
+ * (useful when unit testing). This will also compress some errors
+ * and exceptions.
*/
public $chatty = true;
@@ -318,26 +319,64 @@ class HTMLPurifier_Config
* Retrieves object reference to the HTML definition.
* @param $raw Return a copy that has not been setup yet. Must be
* called before it's been setup, otherwise won't work.
- */
- public function getHTMLDefinition($raw = false) {
- return $this->getDefinition('HTML', $raw);
+ * @param $optimized If true, this method may return null, to
+ * indicate that a cached version of the modified
+ * definition object is available and no further edits
+ * are necessary. Consider using
+ * maybeGetRawHTMLDefinition, which is more explicitly
+ * named, instead.
+ */
+ public function getHTMLDefinition($raw = false, $optimized = false) {
+ return $this->getDefinition('HTML', $raw, $optimized);
}
/**
* Retrieves object reference to the CSS definition
* @param $raw Return a copy that has not been setup yet. Must be
* called before it's been setup, otherwise won't work.
- */
- public function getCSSDefinition($raw = false) {
- return $this->getDefinition('CSS', $raw);
+ * @param $optimized If true, this method may return null, to
+ * indicate that a cached version of the modified
+ * definition object is available and no further edits
+ * are necessary. Consider using
+ * maybeGetRawCSSDefinition, which is more explicitly
+ * named, instead.
+ */
+ public function getCSSDefinition($raw = false, $optimized = false) {
+ return $this->getDefinition('CSS', $raw, $optimized);
+ }
+
+ /**
+ * Retrieves object reference to the URI definition
+ * @param $raw Return a copy that has not been setup yet. Must be
+ * called before it's been setup, otherwise won't work.
+ * @param $optimized If true, this method may return null, to
+ * indicate that a cached version of the modified
+ * definition object is available and no further edits
+ * are necessary. Consider using
+ * maybeGetRawURIDefinition, which is more explicitly
+ * named, instead.
+ */
+ public function getURIDefinition($raw = false, $optimized = false) {
+ return $this->getDefinition('URI', $raw, $optimized);
}
/**
* Retrieves a definition
* @param $type Type of definition: HTML, CSS, etc
* @param $raw Whether or not definition should be returned raw
- */
- public function getDefinition($type, $raw = false) {
+ * @param $optimized Only has an effect when $raw is true. Whether
+ * or not to return null if the result is already present in
+ * the cache. This is off by default for backwards
+ * compatibility reasons, but you need to do things this
+ * way in order to ensure that caching is done properly.
+ * Check out enduser-customize.html for more details.
+ * We probably won't ever change this default, as much as the
+ * maybe semantics is the "right thing to do."
+ */
+ public function getDefinition($type, $raw = false, $optimized = false) {
+ if ($optimized && !$raw) {
+ throw new HTMLPurifier_Exception("Cannot set optimized = true when raw = false");
+ }
if (!$this->finalized) $this->autoFinalize();
// temporarily suspend locks, so we can handle recursive definition calls
$lock = $this->lock;
@@ -346,52 +385,137 @@ class HTMLPurifier_Config
$cache = $factory->create($type, $this);
$this->lock = $lock;
if (!$raw) {
- // see if we can quickly supply a definition
+ // full definition
+ // ---------------
+ // check if definition is in memory
if (!empty($this->definitions[$type])) {
- if (!$this->definitions[$type]->setup) {
- $this->definitions[$type]->setup($this);
- $cache->set($this->definitions[$type], $this);
- }
- return $this->definitions[$type];
- }
- // memory check missed, try cache
- $this->definitions[$type] = $cache->get($this);
- if ($this->definitions[$type]) {
- // definition in cache, return it
- return $this->definitions[$type];
- }
- } elseif (
- !empty($this->definitions[$type]) &&
- !$this->definitions[$type]->setup
- ) {
- // raw requested, raw in memory, quick return
- return $this->definitions[$type];
+ $def = $this->definitions[$type];
+ // check if the definition is setup
+ if ($def->setup) {
+ return $def;
+ } else {
+ $def->setup($this);
+ if ($def->optimized) $cache->add($def, $this);
+ return $def;
+ }
+ }
+ // check if definition is in cache
+ $def = $cache->get($this);
+ if ($def) {
+ // definition in cache, save to memory and return it
+ $this->definitions[$type] = $def;
+ return $def;
+ }
+ // initialize it
+ $def = $this->initDefinition($type);
+ // set it up
+ $this->lock = $type;
+ $def->setup($this);
+ $this->lock = null;
+ // save in cache
+ $cache->add($def, $this);
+ // return it
+ return $def;
+ } else {
+ // raw definition
+ // --------------
+ // check preconditions
+ $def = null;
+ if ($optimized) {
+ if (is_null($this->get($type . '.DefinitionID'))) {
+ // fatally error out if definition ID not set
+ throw new HTMLPurifier_Exception("Cannot retrieve raw version without specifying %$type.DefinitionID");
+ }
+ }
+ if (!empty($this->definitions[$type])) {
+ $def = $this->definitions[$type];
+ if ($def->setup && !$optimized) {
+ $extra = $this->chatty ? " (try moving this code block earlier in your initialization)" : "";
+ throw new HTMLPurifier_Exception("Cannot retrieve raw definition after it has already been setup" . $extra);
+ }
+ if ($def->optimized === null) {
+ $extra = $this->chatty ? " (try flushing your cache)" : "";
+ throw new HTMLPurifier_Exception("Optimization status of definition is unknown" . $extra);
+ }
+ if ($def->optimized !== $optimized) {
+ $msg = $optimized ? "optimized" : "unoptimized";
+ $extra = $this->chatty ? " (this backtrace is for the first inconsistent call, which was for a $msg raw definition)" : "";
+ throw new HTMLPurifier_Exception("Inconsistent use of optimized and unoptimized raw definition retrievals" . $extra);
+ }
+ }
+ // check if definition was in memory
+ if ($def) {
+ if ($def->setup) {
+ // invariant: $optimized === true (checked above)
+ return null;
+ } else {
+ return $def;
+ }
+ }
+ // if optimized, check if definition was in cache
+ // (because we do the memory check first, this formulation
+ // is prone to cache slamming, but I think
+ // guaranteeing that either /all/ of the raw
+ // setup code or /none/ of it is run is more important.)
+ if ($optimized) {
+ // This code path only gets run once; once we put
+ // something in $definitions (which is guaranteed by the
+ // trailing code), we always short-circuit above.
+ $def = $cache->get($this);
+ if ($def) {
+ // save the full definition for later, but don't
+ // return it yet
+ $this->definitions[$type] = $def;
+ return null;
+ }
+ }
+ // check invariants for creation
+ if (!$optimized) {
+ if (!is_null($this->get($type . '.DefinitionID'))) {
+ if ($this->chatty) {
+ $this->triggerError("Due to a documentation error in previous version of HTML Purifier, your definitions are not being cached. If this is OK, you can remove the %$type.DefinitionRev and %$type.DefinitionID declaration. Otherwise, modify your code to use maybeGetRawDefinition, and test if the returned value is null before making any edits (if it is null, that means that a cached version is available, and no raw operations are necessary). See Customize for more details", E_USER_WARNING);
+ } else {
+ $this->triggerError("Useless DefinitionID declaration", E_USER_WARNING);
+ }
+ }
}
+ // initialize it
+ $def = $this->initDefinition($type);
+ $def->optimized = $optimized;
+ return $def;
+ }
+ throw new HTMLPurifier_Exception("The impossible happened!");
+ }
+
+ private function initDefinition($type) {
// quick checks failed, let's create the object
if ($type == 'HTML') {
- $this->definitions[$type] = new HTMLPurifier_HTMLDefinition();
+ $def = new HTMLPurifier_HTMLDefinition();
} elseif ($type == 'CSS') {
- $this->definitions[$type] = new HTMLPurifier_CSSDefinition();
+ $def = new HTMLPurifier_CSSDefinition();
} elseif ($type == 'URI') {
- $this->definitions[$type] = new HTMLPurifier_URIDefinition();
+ $def = new HTMLPurifier_URIDefinition();
} else {
throw new HTMLPurifier_Exception("Definition of $type type not supported");
}
- // quick abort if raw
- if ($raw) {
- if (is_null($this->get($type . '.DefinitionID'))) {
- // fatally error out if definition ID not set
- throw new HTMLPurifier_Exception("Cannot retrieve raw version without specifying %$type.DefinitionID");
+ $this->definitions[$type] = $def;
+ return $def;
}
- return $this->definitions[$type];
+
+ public function maybeGetRawDefinition($name) {
+ return $this->getDefinition($name, true, true);
}
- // set it up
- $this->lock = $type;
- $this->definitions[$type]->setup($this);
- $this->lock = null;
- // save in cache
- $cache->set($this->definitions[$type], $this);
- return $this->definitions[$type];
+
+ public function maybeGetRawHTMLDefinition() {
+ return $this->getDefinition('HTML', true, true);
+ }
+
+ public function maybeGetRawCSSDefinition() {
+ return $this->getDefinition('CSS', true, true);
+ }
+
+ public function maybeGetRawURIDefinition() {
+ return $this->getDefinition('URI', true, true);
}
/**
@@ -549,17 +673,22 @@ class HTMLPurifier_Config
/**
* Produces a nicely formatted error message by supplying the
- * stack frame information from two levels up and OUTSIDE of
- * HTMLPurifier_Config.
+ * stack frame information OUTSIDE of HTMLPurifier_Config.
*/
protected function triggerError($msg, $no) {
// determine previous stack frame
- $backtrace = debug_backtrace();
- if ($this->chatty && isset($backtrace[1])) {
- $frame = $backtrace[1];
- $extra = " on line {$frame['line']} in file {$frame['file']}";
- } else {
$extra = '';
+ if ($this->chatty) {
+ $trace = debug_backtrace();
+ // zip(tail(trace), trace) -- but PHP is not Haskell har har
+ for ($i = 0, $c = count($trace); $i < $c - 1; $i++) {
+ if ($trace[$i + 1]['class'] === 'HTMLPurifier_Config') {
+ continue;
+ }
+ $frame = $trace[$i];
+ $extra = " invoked on line {$frame['line']} in file {$frame['file']}";
+ break;
+ }
}
trigger_error($msg . $extra, $no);
}
diff --git a/main/inc/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema.php b/main/inc/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema.php
old mode 100755
new mode 100644
index 67be5c71fd..fadf7a5890
--- a/main/inc/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema.php
+++ b/main/inc/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema.php
@@ -60,7 +60,13 @@ class HTMLPurifier_ConfigSchema {
* Unserializes the default ConfigSchema.
*/
public static function makeFromSerial() {
- return unserialize(file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema.ser'));
+ $contents = file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema.ser');
+ $r = unserialize($contents);
+ if (!$r) {
+ $hash = sha1($contents);
+ trigger_error("Unserialization of configuration schema failed, sha1 of file was $hash", E_USER_ERROR);
+ }
+ return $r;
}
/**
diff --git a/main/inc/lib/htmlpurifier/library/HTMLPurifier/Definition.php b/main/inc/lib/htmlpurifier/library/HTMLPurifier/Definition.php
old mode 100755
new mode 100644
index a7408c9749..c7f82eba43
--- a/main/inc/lib/htmlpurifier/library/HTMLPurifier/Definition.php
+++ b/main/inc/lib/htmlpurifier/library/HTMLPurifier/Definition.php
@@ -12,6 +12,17 @@ abstract class HTMLPurifier_Definition
*/
public $setup = false;
+ /**
+ * If true, write out the final definition object to the cache after
+ * setup. This will be true only if all invocations to get a raw
+ * definition object are also optimized. This does not cause file
+ * system thrashing because on subsequent calls the cached object
+ * is used and any writes to the raw definition object are short
+ * circuited. See enduser-customize.html for the high-level
+ * picture.
+ */
+ public $optimized = null;
+
/**
* What type of definition is it?
*/
diff --git a/main/inc/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/SafeEmbed.php b/main/inc/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/SafeEmbed.php
old mode 100755
new mode 100644
index ea256716bb..9f3758a322
--- a/main/inc/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/SafeEmbed.php
+++ b/main/inc/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/SafeEmbed.php
@@ -21,7 +21,7 @@ class HTMLPurifier_HTMLModule_SafeEmbed extends HTMLPurifier_HTMLModule
'allowscriptaccess' => 'Enum#never',
'allownetworking' => 'Enum#internal',
'flashvars' => 'Text',
- 'wmode' => 'Enum#window',
+ 'wmode' => 'Enum#window,transparent,opaque',
'name' => 'ID',
)
);
diff --git a/main/inc/lib/htmlpurifier/library/HTMLPurifier/HTMLModuleManager.php b/main/inc/lib/htmlpurifier/library/HTMLPurifier/HTMLModuleManager.php
old mode 100755
new mode 100644
index f5c4a1d2cb..ce27efa6da
--- a/main/inc/lib/htmlpurifier/library/HTMLPurifier/HTMLModuleManager.php
+++ b/main/inc/lib/htmlpurifier/library/HTMLPurifier/HTMLModuleManager.php
@@ -216,13 +216,10 @@ class HTMLPurifier_HTMLModuleManager
}
}
- // add proprietary module (this gets special treatment because
- // it is completely removed from doctypes, etc.)
+ // custom modules
if ($config->get('HTML.Proprietary')) {
$modules[] = 'Proprietary';
}
-
- // add SafeObject/Safeembed modules
if ($config->get('HTML.SafeObject')) {
$modules[] = 'SafeObject';
}
diff --git a/main/inc/lib/htmlpurifier/library/HTMLPurifier/Lexer.php b/main/inc/lib/htmlpurifier/library/HTMLPurifier/Lexer.php
index 61e065f33b..24a5418b3e 100644
--- a/main/inc/lib/htmlpurifier/library/HTMLPurifier/Lexer.php
+++ b/main/inc/lib/htmlpurifier/library/HTMLPurifier/Lexer.php
@@ -235,7 +235,7 @@ class HTMLPurifier_Lexer
*/
protected static function removeIEConditional($string) {
return preg_replace(
- '##si', // probably should generalize for all strings
+ '##si', // probably should generalize for all strings
'',
$string
);
@@ -273,11 +273,11 @@ class HTMLPurifier_Lexer
$html = $this->escapeCommentedCDATA($html);
}
- $html = $this->removeIEConditional($html);
-
// escape CDATA
$html = $this->escapeCDATA($html);
+ $html = $this->removeIEConditional($html);
+
// extract body from document if applicable
if ($config->get('Core.ConvertDocumentToFragment')) {
$e = false;