Issue #306 - The multibute string library: Logic upgrades, optimizations for speed, part 2.

16 years ago · 9376bb9d20
parent 314c0d4484
commit 9376bb9d20
2 changed files with 335 additions and 277 deletions
--- a/main/inc/lib/multibyte_string_functions.lib.php
+++ b/main/inc/lib/multibyte_string_functions.lib.php
@ -66,7 +66,6 @@ function api_byte_count($string) {
 		return mb_strlen($string, '8bit');
 	}
 	return strlen($string);
-
 	// For PHP6 this function probably will contain:
 	//return strlen((binary)$string);
 }
@ -88,30 +87,32 @@ function api_byte_count($string) {
 * @link http://php.net/manual/en/function.mb-convert-encoding
 */
 function api_convert_encoding($string, $to_encoding, $from_encoding = null) {
+	static $equal_encodings = array();
 	if (empty($from_encoding)) {
 		$from_encoding = api_mb_internal_encoding();
 	}
-	if (api_equal_encodings($to_encoding, $from_encoding)) {
-		// When conversion is not needed, the string is returned directly, without validation.
-		return $string;
+	if (!isset($equal_encodings[$to_encoding][$from_encoding])) {
+		$equal_encodings[$to_encoding][$from_encoding] = api_equal_encodings($to_encoding, $from_encoding);
+	}
+	if ($equal_encodings[$to_encoding][$from_encoding]) {
+		return $string; // When conversion is not needed, the string is returned directly, without validation.
 	}
-	elseif (api_mb_supports($to_encoding) && api_mb_supports($from_encoding)) {
+	if (api_mb_supports($to_encoding) && api_mb_supports($from_encoding)) {
 		return @mb_convert_encoding($string, $to_encoding, $from_encoding);
 	}
-	elseif (api_iconv_supports($to_encoding) && api_iconv_supports($from_encoding)) {
+	if (api_iconv_supports($to_encoding) && api_iconv_supports($from_encoding)) {
 		return @iconv($from_encoding, $to_encoding, $string);
 	}
-	elseif (api_is_utf8($to_encoding) && api_is_latin1($from_encoding, true)) {
+	if (api_is_utf8($to_encoding) && api_is_latin1($from_encoding, true)) {
 		return utf8_encode($string);
 	}
-	elseif (api_is_latin1($to_encoding, true) && api_is_utf8($from_encoding)) {
+	if (api_is_latin1($to_encoding, true) && api_is_utf8($from_encoding)) {
 		return utf8_decode($string);
 	}
-	elseif (_api_convert_encoding_supports($to_encoding) && _api_convert_encoding_supports($from_encoding)) {
+	if (_api_convert_encoding_supports($to_encoding) && _api_convert_encoding_supports($from_encoding)) {
 		return _api_convert_encoding($string, $to_encoding, $from_encoding);
 	}
-	// Here the function gives up.
-	return $string;
+	return $string; // Here the function gives up.
 }

 /**
@ -127,23 +128,21 @@ function api_utf8_encode($string, $from_encoding = null) {
 		$from_encoding = api_mb_internal_encoding();
 	}
 	if (api_is_utf8($from_encoding)) {
-		// When conversion is not needed, the string is returned directly, without validation.
-		return $string;
+		return $string; // When conversion is not needed, the string is returned directly, without validation.
 	}
-	elseif (api_mb_supports($from_encoding)) {
+	if (api_mb_supports($from_encoding)) {
 		return @mb_convert_encoding($string, 'UTF-8', $from_encoding);
 	}
-	elseif (api_iconv_supports($from_encoding)) {
+	if (api_iconv_supports($from_encoding)) {
 		return @iconv($from_encoding, 'UTF-8', $string);
 	}
-	elseif (api_is_latin1($from_encoding, true)) {
+	if (api_is_latin1($from_encoding, true)) {
 		return utf8_encode($string);
 	}
-	elseif (_api_convert_encoding_supports($from_encoding)) {
+	if (_api_convert_encoding_supports($from_encoding)) {
 		return _api_convert_encoding($string, 'UTF-8', $from_encoding);
 	}
-	// Here the function gives up.
-	return $string;
+	return $string; // Here the function gives up.
 }

 /**
@ -159,23 +158,21 @@ function api_utf8_decode($string, $to_encoding = null) {
 		$to_encoding = api_mb_internal_encoding();
 	}
 	if (api_is_utf8($to_encoding)) {
-		// When conversion is not needed, the string is returned directly, without validation.
-		return $string;
+		return $string; // When conversion is not needed, the string is returned directly, without validation.
 	}
-	elseif (api_mb_supports($to_encoding)) {
+	if (api_mb_supports($to_encoding)) {
 		return @mb_convert_encoding($string, $to_encoding, 'UTF-8');
 	}
-	elseif (api_iconv_supports($to_encoding)) {
+	if (api_iconv_supports($to_encoding)) {
 		return @iconv('UTF-8', $to_encoding, $string);
 	}
-	elseif (api_is_latin1($to_encoding, true)) {
+	if (api_is_latin1($to_encoding, true)) {
 		return utf8_decode($string);
 	}
-	elseif (_api_convert_encoding_supports($to_encoding)) {
+	if (_api_convert_encoding_supports($to_encoding)) {
 		return _api_convert_encoding($string, $to_encoding, 'UTF-8');
 	}
-	// Here the function gives up.
-	return $string;
+	return $string; // Here the function gives up.
 }

 /**
@ -288,9 +285,16 @@ function api_html_entity_decode($string, $quote_style = ENT_COMPAT, $encoding =
 		return html_entity_decode($string, $quote_style, $encoding);
 	}
 	if (api_is_encoding_supported($encoding)) {
-		return api_utf8_decode(html_entity_decode(api_convert_encoding($string, 'UTF-8', $encoding), $quote_style, 'UTF-8'), $encoding);
+		if (!api_is_utf8($encoding)) {
+			$string = api_utf8_encode($string, $encoding);
+		}
+		$string = html_entity_decode($string, $quote_style, 'UTF-8');
+		if (!api_is_utf8($encoding)) {
+			return api_utf8_decode($string, $encoding);
+		}
+		return $string;
 	}
-	return $string;
+	return $string; // Here the function guves up.
 }

 /**
@ -301,7 +305,12 @@ function api_html_entity_decode($string, $quote_style = ENT_COMPAT, $encoding =
 */
 function api_xml_http_response_encode($string, $from_encoding = null) {
 	if (isset($_SERVER['HTTP_X_REQUESTED_WITH']) && strtolower($_SERVER['HTTP_X_REQUESTED_WITH']) == 'xmlhttprequest') {
-		return api_convert_encoding($string, 'UTF-8', $from_encoding);
+		if (empty($from_encoding)) {
+			$from_encoding = api_mb_internal_encoding();
+		}
+		if (!api_is_utf8($from_encoding)) {
+			return api_utf8_encode($string, $from_encoding);
+		}
 	}
 	return $string;
 }
@ -464,7 +473,11 @@ function api_stripos($haystack, $needle, $offset = 0, $encoding = null) {
 	}
 	elseif (api_is_encoding_supported($encoding)) {
 		if (MBSTRING_INSTALLED) {
-			return @mb_stripos(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $offset, 'UTF-8');
+			if (!api_is_utf8($encoding)) {
+				$haystack = api_utf8_encode($haystack, $encoding);
+				$needle = api_utf8_encode($needle, $encoding);
+			}
+			return @mb_stripos($haystack, $needle, $offset, 'UTF-8');
 		}
 		return api_strpos(api_strtolower($haystack, $encoding), api_strtolower($needle, $encoding), $offset, $encoding);
 	}
@ -474,7 +487,7 @@ function api_stripos($haystack, $needle, $offset = 0, $encoding = null) {
 /**
 * Finds first occurrence of a string within another, case insensitive.
 * @param string $haystack					The string from which to get the first occurrence.
- * @param mixed $needle					The string to be found.
+ * @param mixed $needle						The string to be found.
 * @param bool $before_needle (optional)	Determines which portion of $haystack this function returns. The default value is FALSE.
 * @param string $encoding (optional)		The used internally by this function character encoding. If it is omitted, the platform character set will be used by default.
 * @return mixed							Returns the portion of $haystack, or FALSE if $needle is not found.
@ -503,11 +516,18 @@ function api_stristr($haystack, $needle, $before_needle = false, $encoding = nul
 	}
 	elseif (api_is_encoding_supported($encoding)) {
 		if (MBSTRING_INSTALLED) {
-			$result = @mb_stristr(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $before_needle, 'UTF-8');
+			if (!api_is_utf8($encoding)) {
+				$haystack = api_utf8_encode($haystack, $encoding);
+				$needle = api_utf8_encode($needle, $encoding);
+			}
+			$result = @mb_stristr($haystack, $needle, $before_needle, 'UTF-8');
 			if ($result === false) {
 				return false;
 			}
-			return api_utf8_decode($result, $encoding);
+			if (!api_is_utf8($encoding)) {
+				return api_utf8_decode($result, $encoding);
+			}
+			return $result;
 		}
 		$result = api_strstr(api_strtolower($haystack, $encoding), api_strtolower($needle, $encoding), $before_needle, $encoding);
 		if ($result === false) {
@ -545,13 +565,13 @@ function api_strlen($string, $encoding = null) {
 	if (_api_is_single_byte_encoding($encoding)) {
 		return strlen($string);
 	}
-	elseif (api_mb_supports($encoding)) {
+	if (api_mb_supports($encoding)) {
 		return @mb_strlen($string, $encoding);
 	}
-	elseif (api_iconv_supports($encoding)) {
+	if (api_iconv_supports($encoding)) {
 		return @iconv_strlen($string, $encoding);
 	}
-	elseif (api_is_utf8($encoding)) {
+	if (api_is_utf8($encoding)) {
    	return api_byte_count(preg_replace("/[\x80-\xBF]/", '', $string));
 	}
 	return strlen($string);
@ -580,13 +600,13 @@ function api_strpos($haystack, $needle, $offset = 0, $encoding = null) {
 		return @mb_strpos($haystack, $needle, $offset, $encoding);
 	}
 	elseif (api_is_encoding_supported($encoding)) {
-		if (MBSTRING_INSTALLED) {
-			return @mb_strpos(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $offset, 'UTF-8');
-		}
 		if (!api_is_utf8($encoding)) {
 			$haystack = api_utf8_encode($haystack, $encoding);
 			$needle = api_utf8_encode($needle, $encoding);
 		}
+		if (MBSTRING_INSTALLED) {
+			return @mb_strpos($haystack, $needle, $offset, 'UTF-8');
+		}
 		if (empty($offset)) {
 			$haystack = explode($needle, $haystack, 2);
 			if (count($haystack) > 1) {
@ -644,11 +664,18 @@ function api_strrchr($haystack, $needle, $before_needle = false, $encoding = nul
 		return @mb_strrchr($haystack, $needle, $before_needle, $encoding);
 	}
 	elseif (MBSTRING_INSTALLED && api_is_encoding_supported($encoding)) {
-		$result = @mb_strrchr(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $before_needle, 'UTF-8');
+		if (!api_is_utf8($encoding)) {
+			$haystack = api_utf8_encode($haystack, $encoding);
+			$needle = api_utf8_encode($needle, $encoding);
+		}
+		$result = @mb_strrchr($haystack, $needle, $before_needle, 'UTF-8');
 		if ($result === false) {
 			return false;
 		}
-		return api_utf8_decode($result, $encoding);
+		if (!api_is_utf8($encoding)) {
+			return api_utf8_decode($result, $encoding);
+		}
+		return $result;
 	}
 	if (!$before_needle) {
 		return strrchr($haystack, $needle);
@ -707,14 +734,14 @@ function api_strrpos($haystack, $needle, $offset = 0, $encoding = null) {
 		return @mb_strrpos($haystack, $needle, $offset, $encoding);
 	}
 	elseif (api_is_encoding_supported($encoding)) {
-		if (MBSTRING_INSTALLED) {
-			return @mb_strrpos(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $offset, 'UTF-8');
-		}
-		// This branch (this fragment of code) is an adaptation from the CakePHP(tm) Project, http://www.cakefoundation.org
 		if (!api_is_utf8($encoding)) {
 			$haystack = api_utf8_encode($haystack, $encoding);
 			$needle = api_utf8_encode($needle, $encoding);
 		}
+		if (MBSTRING_INSTALLED) {
+			return @mb_strrpos($haystack, $needle, $offset, 'UTF-8');
+		}
+		// This branch (this fragment of code) is an adaptation from the CakePHP(tm) Project, http://www.cakefoundation.org
 		$found = false;
 		$haystack = _api_utf8_to_unicode($haystack);
 		$haystack_count = count($haystack);
@ -792,12 +819,18 @@ function api_strstr($haystack, $needle, $before_needle = false, $encoding = null
 		return @mb_strstr($haystack, $needle, $before_needle, $encoding);
 	}
 	elseif (MBSTRING_INSTALLED && api_is_encoding_supported($encoding)) {
-		$result = @mb_strstr(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $before_needle, 'UTF-8');
+		if (!api_is_utf8($encoding)) {
+			$haystack = api_utf8_encode($haystack, $encoding);
+			$needle = api_utf8_encode($needle, $encoding);
+		}
+		$result = @mb_strstr($haystack, $needle, $before_needle, 'UTF-8');
 		if ($result !== false) {
-			return api_utf8_decode($result, $encoding);
-		} else {
-			return false;
+			if (!api_is_utf8($encoding)) {
+				return api_utf8_decode($result, $encoding);
+			}
+			return $result;
 		}
+		return false;
 	}
 	// Adding the missing parameter $before_needle to the original function strstr(), PHP_VERSION < 5.3
 	if (!$before_needle) {
@ -830,47 +863,48 @@ function api_strtolower($string, $encoding = null) {
 		return @mb_strtolower($string, $encoding);
 	}
 	elseif (api_is_encoding_supported($encoding)) {
-		if (MBSTRING_INSTALLED) {
-			return api_utf8_decode(@mb_strtolower(api_utf8_encode($string, $encoding), 'UTF-8'), $encoding);
-		}
-		// This branch (this fragment of code) is an adaptation from the CakePHP(tm) Project, http://www.cakefoundation.org
 		if (!api_is_utf8($encoding)) {
 			$string = api_utf8_encode($string, $encoding);
 		}
-		$codepoints = _api_utf8_to_unicode($string);
-		$length = count($codepoints);
-		$matched = false;
-		$result = array();
-		for ($i = 0 ; $i < $length; $i++) {
-			$codepoint = $codepoints[$i];
-			if ($codepoint < 128) {
-				$str = strtolower(chr($codepoint));
-				$strlen = api_byte_count($str);
-				for ($ii = 0 ; $ii < $strlen; $ii++) {
-					$lower = ord($str[$ii]);
-				}
-				$result[] = $lower;
-				$matched = true;
-			} else {
-				$matched = false;
-				$properties = _api_utf8_get_letter_case_properties($codepoint, 'upper');
-				if (!empty($properties)) {
-					foreach ($properties as $key => $value) {
-						if ($properties[$key]['upper'] == $codepoint && count($properties[$key]['lower'][0]) === 1) {
-							$result[] = $properties[$key]['lower'][0];
-							$matched = true;
-							break 1;
+		if (MBSTRING_INSTALLED) {
+			$string = @mb_strtolower($string, 'UTF-8');
+		} else {
+			// This branch (this fragment of code) is an adaptation from the CakePHP(tm) Project, http://www.cakefoundation.org
+			$codepoints = _api_utf8_to_unicode($string);
+			$length = count($codepoints);
+			$matched = false;
+			$result = array();
+			for ($i = 0 ; $i < $length; $i++) {
+				$codepoint = $codepoints[$i];
+				if ($codepoint < 128) {
+					$str = strtolower(chr($codepoint));
+					$strlen = api_byte_count($str);
+					for ($ii = 0 ; $ii < $strlen; $ii++) {
+						$lower = ord($str[$ii]);
+					}
+					$result[] = $lower;
+					$matched = true;
+				} else {
+					$matched = false;
+					$properties = _api_utf8_get_letter_case_properties($codepoint, 'upper');
+					if (!empty($properties)) {
+						foreach ($properties as $key => $value) {
+							if ($properties[$key]['upper'] == $codepoint && count($properties[$key]['lower'][0]) === 1) {
+								$result[] = $properties[$key]['lower'][0];
+								$matched = true;
+								break 1;
+							}
 						}
 					}
 				}
+				if ($matched === false) {
+					$result[] = $codepoint;
+				}
 			}
-			if ($matched === false) {
-				$result[] = $codepoint;
-			}
+			$string = _api_utf8_from_unicode($result);
 		}
-		$string = _api_utf8_from_unicode($result);
 		if (!api_is_utf8($encoding)) {
-			$string = api_utf8_decode($string, $encoding);
+			return api_utf8_decode($string, $encoding);
 		}
 		return $string;
 	}
@ -894,85 +928,86 @@ function api_strtoupper($string, $encoding = null) {
 		return @mb_strtoupper($string, $encoding);
 	}
 	elseif (api_is_encoding_supported($encoding)) {
-		if (MBSTRING_INSTALLED) {
-			return api_utf8_decode(@mb_strtoupper(api_utf8_encode($string, $encoding), 'UTF-8'), $encoding);
-		}
-		// This branch (this fragment of code) is an adaptation from the CakePHP(tm) Project, http://www.cakefoundation.org
 		if (!api_is_utf8($encoding)) {
 			$string = api_utf8_encode($string, $encoding);
 		}
-		$codepoints = _api_utf8_to_unicode($string);
-		$length = count($codepoints);
-		$matched = false;
-		$replaced = array();
-		$result = array();
-		for ($i = 0 ; $i < $length; $i++) {
-			$codepoint = $codepoints[$i];
-			if ($codepoint < 128) {
-				$str = strtoupper(chr($codepoint));
-				$strlen = api_byte_count($str);
-				for ($ii = 0 ; $ii < $strlen; $ii++) {
-					$lower = ord($str[$ii]);
-				}
-				$result[] = $lower;
-				$matched = true;
-			} else {
-				$matched = false;
-				$properties = _api_utf8_get_letter_case_properties($codepoint);
-				$property_count = count($properties);
-				if (!empty($properties)) {
-					foreach ($properties as $key => $value) {
-						$matched = false;
-						$replace = 0;
-						if ($length > 1 && count($properties[$key]['lower']) > 1) {
-							$j = 0;
-							for ($ii = 0; $ii < count($properties[$key]['lower']); $ii++) {
-								$next_codepoint = $next_codepoints[$i + $ii];
-								if (isset($next_codepoint) && ($next_codepoint == $properties[$key]['lower'][$j + $ii])) {
-									$replace++;
+		if (MBSTRING_INSTALLED) {
+			$string = @mb_strtoupper($string, 'UTF-8');
+		} else {
+			// This branch (this fragment of code) is an adaptation from the CakePHP(tm) Project, http://www.cakefoundation.org
+			$codepoints = _api_utf8_to_unicode($string);
+			$length = count($codepoints);
+			$matched = false;
+			$replaced = array();
+			$result = array();
+			for ($i = 0 ; $i < $length; $i++) {
+				$codepoint = $codepoints[$i];
+				if ($codepoint < 128) {
+					$str = strtoupper(chr($codepoint));
+					$strlen = api_byte_count($str);
+					for ($ii = 0 ; $ii < $strlen; $ii++) {
+						$lower = ord($str[$ii]);
+					}
+					$result[] = $lower;
+					$matched = true;
+				} else {
+					$matched = false;
+					$properties = _api_utf8_get_letter_case_properties($codepoint);
+					$property_count = count($properties);
+					if (!empty($properties)) {
+						foreach ($properties as $key => $value) {
+							$matched = false;
+							$replace = 0;
+							if ($length > 1 && count($properties[$key]['lower']) > 1) {
+								$j = 0;
+								for ($ii = 0; $ii < count($properties[$key]['lower']); $ii++) {
+									$next_codepoint = $next_codepoints[$i + $ii];
+									if (isset($next_codepoint) && ($next_codepoint == $properties[$key]['lower'][$j + $ii])) {
+										$replace++;
+									}
+								}
+								if ($replace == count($properties[$key]['lower'])) {
+									$result[] = $properties[$key]['upper'];
+									$replaced = array_merge($replaced, array_values($properties[$key]['lower']));
+									$matched = true;
+									break 1;
+								}
+							} elseif ($length > 1 && $property_count > 1) {
+								$j = 0;
+								for ($ii = 1; $ii < $property_count; $ii++) {
+									$next_codepoint = $next_codepoints[$i + $ii - 1];
+									if (in_array($next_codepoint, $properties[$ii]['lower'])) {
+										for ($jj = 0; $jj < count($properties[$ii]['lower']); $jj++) {
+											$next_codepoint = $next_codepoints[$i + $jj];
+											if (isset($next_codepoint) && ($next_codepoint == $properties[$ii]['lower'][$j + $jj])) {
+												$replace++;
+											}
+										}
+										if ($replace == count($properties[$ii]['lower'])) {
+											$result[] = $properties[$ii]['upper'];
+											$replaced = array_merge($replaced, array_values($properties[$ii]['lower']));
+											$matched = true;
+											break 2;
+										}
+									}
 								}
 							}
-							if ($replace == count($properties[$key]['lower'])) {
+							if ($properties[$key]['lower'][0] == $codepoint) {
 								$result[] = $properties[$key]['upper'];
-								$replaced = array_merge($replaced, array_values($properties[$key]['lower']));
 								$matched = true;
 								break 1;
 							}
-						} elseif ($length > 1 && $property_count > 1) {
-							$j = 0;
-							for ($ii = 1; $ii < $property_count; $ii++) {
-								$next_codepoint = $next_codepoints[$i + $ii - 1];
-								if (in_array($next_codepoint, $properties[$ii]['lower'])) {
-									for ($jj = 0; $jj < count($properties[$ii]['lower']); $jj++) {
-										$next_codepoint = $next_codepoints[$i + $jj];
-										if (isset($next_codepoint) && ($next_codepoint == $properties[$ii]['lower'][$j + $jj])) {
-											$replace++;
-										}
-									}
-									if ($replace == count($properties[$ii]['lower'])) {
-										$result[] = $properties[$ii]['upper'];
-										$replaced = array_merge($replaced, array_values($properties[$ii]['lower']));
-										$matched = true;
-										break 2;
-									}
-								}
-							}
-						}
-						if ($properties[$key]['lower'][0] == $codepoint) {
-							$result[] = $properties[$key]['upper'];
-							$matched = true;
-							break 1;
 						}
 					}
 				}
+				if ($matched === false && !in_array($codepoint, $replaced, true)) {
+					$result[] = $codepoint;
+				}
 			}
-			if ($matched === false && !in_array($codepoint, $replaced, true)) {
-				$result[] = $codepoint;
-			}
+			$string = _api_utf8_from_unicode($result);
 		}
-		$string = _api_utf8_from_unicode($result);
 		if (!api_is_utf8($encoding)) {
-			$string = api_utf8_decode($string, $encoding);
+			return api_utf8_decode($string, $encoding);
 		}
 		return $string;
 	}
@ -989,6 +1024,7 @@ function api_strtoupper($string, $encoding = null) {
 * This function is aimed at replacing the function strtr() for human-language strings.
 * @link http://php.net/manual/en/function.strtr
 * TODO: To be revised and tested. Probably this function will not be needed.
+ * TODO: This function will be removed. It is not needed. 21-AUG-2009.
 */
 function api_strtr($string, $from, $to = null, $encoding = null) {
 	if (empty($string)) {
@ -1060,74 +1096,75 @@ function api_substr($string, $start, $length = null, $encoding = null) {
 		return @mb_substr($string, $start, $length, $encoding);
 	}
 	elseif (api_is_encoding_supported($encoding)) {
-		if (MBSTRING_INSTALLED) {
-			return api_utf8_decode(@mb_substr(api_utf8_encode($string, $encoding), $start, $length, 'UTF-8'), $encoding);
-		}
-		// The following branch of code is from the Drupal CMS, see the function drupal_substr().
 		if (!api_is_utf8($encoding)) {
 			$string = api_utf8_encode($string, $encoding);
 		}
-		$strlen = api_byte_count($string);
-		// Find the starting byte offset
-		$bytes = 0;
-		if ($start > 0) {
-			// Count all the continuation bytes from the start until we have found
-			// $start characters
-			$bytes = -1; $chars = -1;
-			while ($bytes < $strlen && $chars < $start) {
-				$bytes++;
-				$c = ord($string[$bytes]);
-				if ($c < 0x80 || $c >= 0xC0) {
-					$chars++;
+		if (MBSTRING_INSTALLED) {
+			$string = @mb_substr($string, $start, $length, 'UTF-8');
+		} else {
+			// The following branch of code is from the Drupal CMS, see the function drupal_substr().
+			$strlen = api_byte_count($string);
+			// Find the starting byte offset
+			$bytes = 0;
+			if ($start > 0) {
+				// Count all the continuation bytes from the start until we have found
+				// $start characters
+				$bytes = -1; $chars = -1;
+				while ($bytes < $strlen && $chars < $start) {
+					$bytes++;
+					$c = ord($string[$bytes]);
+					if ($c < 0x80 || $c >= 0xC0) {
+						$chars++;
+					}
 				}
 			}
-		}
-		else if ($start < 0) {
-			// Count all the continuation bytes from the end until we have found
-			// abs($start) characters
-			$start = abs($start);
-			$bytes = $strlen; $chars = 0;
-			while ($bytes > 0 && $chars < $start) {
-				$bytes--;
-				$c = ord($string[$bytes]);
-				if ($c < 0x80 || $c >= 0xC0) {
-					$chars++;
+			else if ($start < 0) {
+				// Count all the continuation bytes from the end until we have found
+				// abs($start) characters
+				$start = abs($start);
+				$bytes = $strlen; $chars = 0;
+				while ($bytes > 0 && $chars < $start) {
+					$bytes--;
+					$c = ord($string[$bytes]);
+					if ($c < 0x80 || $c >= 0xC0) {
+						$chars++;
+					}
 				}
 			}
-		}
-		$istart = $bytes;
-		// Find the ending byte offset
-		if ($length === NULL) {
-			$bytes = $strlen - 1;
-		}
-		else if ($length > 0) {
-			// Count all the continuation bytes from the starting index until we have
-			// found $length + 1 characters. Then backtrack one byte.
-			$bytes = $istart; $chars = 0;
-			while ($bytes < $strlen && $chars < $length) {
-				$bytes++;
-				$c = ord($string[$bytes]);
-				if ($c < 0x80 || $c >= 0xC0) {
-					$chars++;
+			$istart = $bytes;
+			// Find the ending byte offset
+			if ($length === NULL) {
+				$bytes = $strlen - 1;
+			}
+			else if ($length > 0) {
+				// Count all the continuation bytes from the starting index until we have
+				// found $length + 1 characters. Then backtrack one byte.
+				$bytes = $istart; $chars = 0;
+				while ($bytes < $strlen && $chars < $length) {
+					$bytes++;
+					$c = ord($string[$bytes]);
+					if ($c < 0x80 || $c >= 0xC0) {
+						$chars++;
+					}
 				}
+				$bytes--;
 			}
-			$bytes--;
-		}
-		else if ($length < 0) {
-			// Count all the continuation bytes from the end until we have found
-			// abs($length) characters
-			$length = abs($length);
-			$bytes = $strlen - 1; $chars = 0;
-			while ($bytes >= 0 && $chars < $length) {
-				$c = ord($string[$bytes]);
-				if ($c < 0x80 || $c >= 0xC0) {
-					$chars++;
+			else if ($length < 0) {
+				// Count all the continuation bytes from the end until we have found
+				// abs($length) characters
+				$length = abs($length);
+				$bytes = $strlen - 1; $chars = 0;
+				while ($bytes >= 0 && $chars < $length) {
+					$c = ord($string[$bytes]);
+					if ($c < 0x80 || $c >= 0xC0) {
+						$chars++;
+					}
+					$bytes--;
 				}
-				$bytes--;
 			}
+			$iend = $bytes;
+			$string = substr($string, $istart, max(0, $iend - $istart + 1));
 		}
-		$iend = $bytes;
-		$string = substr($string, $istart, max(0, $iend - $istart + 1));
 		if (!api_is_utf8($encoding)) {
 			$string = api_utf8_decode($string, $encoding);
 		}
@ -1159,7 +1196,12 @@ function api_substr_replace($string, $replacement, $start, $length = null, $enco
 	if (empty($encoding)) {
 		$encoding = api_mb_internal_encoding();
 	}
-	if (api_is_encoding_supported($encoding) && !_api_is_single_byte_encoding($encoding)) {
+	if (_api_is_single_byte_encoding($encoding)) {
+		return substr_replace($string, $replacement, $start, $length);
+	}
+	if (api_is_encoding_supported($encoding)) {
+		// This fragment (branch) of code is adaptation of a published proposition:
+		// http://php.net/manual/en/function.substr-replace.php#90146
 		$string_length = api_strlen($string, $encoding);
 		if ($start < 0) {
 			$start = max(0, $string_length + $start);
@ -1214,8 +1256,24 @@ function api_ucwords($string, $encoding = null) {
    if (api_mb_supports($encoding)) {
 		return @mb_convert_case($string, MB_CASE_TITLE, $encoding);
 	}
-	elseif (MBSTRING_INSTALLED && api_iconv_supports($encoding)) {
-		return api_utf8_decode(@mb_convert_case(api_utf8_encode($string, $encoding), MB_CASE_TITLE, 'UTF-8'), $encoding);
+	if (api_is_encoding_supported($encoding)) {
+		if (!api_is_utf8($encoding)) {
+			$string = api_utf8_encode($string, $encoding);
+		}
+		if (MBSTRING_INSTALLED) {
+			$string = @mb_convert_case($string, MB_CASE_TITLE, 'UTF-8');
+		} else {
+			// The following fragment (branch) of code is based on the function utf8_ucwords() by Harry Fuecks
+			// See http://dev.splitbrain.org/view/darcs/dokuwiki/inc/utf8.php
+			// Note: [\x0c\x09\x0b\x0a\x0d\x20] matches - form feeds, horizontal tabs, vertical tabs, linefeeds and carriage returns.
+			// This corresponds to the definition of a "word" defined at http://www.php.net/ucwords
+			$pattern = '/(^|([\x0c\x09\x0b\x0a\x0d\x20]+))([^\x0c\x09\x0b\x0a\x0d\x20]{1})[^\x0c\x09\x0b\x0a\x0d\x20]*/u';
+			$string = preg_replace_callback($pattern, '_api_utf8_ucwords_callback', $string);
+		}
+		if (!api_is_utf8($encoding)) {
+			return api_utf8_decode($string, $encoding);
+		}
+		return $string;
 	}
 	return ucwords($string);
 }
@ -1228,6 +1286,8 @@ function api_ucwords($string, $encoding = null) {
 */

 /**
+ * Note: Try to avoid using this function. Use api_preg_match() with Perl-compatible regular expression syntax.
+ *
 * Executes a regular expression match with extended multibyte support.
 * By default this function uses the platform character set.
 * @param string $pattern			The regular expression pattern.
@ -1244,12 +1304,10 @@ function api_ereg($pattern, $string, & $regs = null) {
 	if (api_mb_supports($encoding)) {
 		if ($count < 3) {
 			return @mb_ereg($pattern, $string);
-		} else {
-			$result = @mb_ereg($pattern, $string, $regs);
-			return $result;
 		}
+		return @mb_ereg($pattern, $string, $regs);
 	}
-	elseif (MBSTRING_INSTALLED && api_iconv_supports($encoding)) {
+	if (MBSTRING_INSTALLED && api_is_encoding_supported($encoding)) {
 		global $_api_encoding;
 		$_api_encoding = $encoding;
 		api_mb_regex_encoding('UTF-8');
@ -1261,16 +1319,16 @@ function api_ereg($pattern, $string, & $regs = null) {
 		}
 		api_mb_regex_encoding($encoding);
 		return $result;
-	} else {
-		if ($count < 3) {
-			return ereg($pattern, $string);
-		} else {
-			return ereg($pattern, $string, $regs);
-		}
 	}
+	if ($count < 3) {
+		return ereg($pattern, $string);
+	}
+	return ereg($pattern, $string, $regs);
 }

 /**
+ * Note: Try to avoid using this function. Use api_preg_replace() with Perl-compatible regular expression syntax.
+ *
 * Scans string for matches to pattern, then replaces the matched text with replacement, with extended multibyte support.
 * By default this function uses the platform character set.
 * @param string $pattern				The regular expression pattern.
@ -1292,13 +1350,11 @@ function api_ereg_replace($pattern, $replacement, $string, $option = null) {
 	if (api_mb_supports($encoding)) {
 		if (is_null($option)) {
 			return @mb_ereg_replace($pattern, $replacement, $string);
-		} else {
-			return @mb_ereg_replace($pattern, $replacement, $string, $option);
 		}
+		return @mb_ereg_replace($pattern, $replacement, $string, $option);
 	}
-	elseif (MBSTRING_INSTALLED && api_iconv_supports($encoding)) {
+	if (MBSTRING_INSTALLED && api_is_encoding_supported($encoding)) {
 		api_mb_regex_encoding('UTF-8');
-
 		if (is_null($option)) {
 			$result = api_utf8_decode(@mb_ereg_replace(api_utf8_encode($pattern, $encoding), api_utf8_encode($replacement, $encoding), api_utf8_encode($string, $encoding)), $encoding);
 		} else {
@ -1306,12 +1362,13 @@ function api_ereg_replace($pattern, $replacement, $string, $option = null) {
 		}
 		api_mb_regex_encoding($encoding);
 		return $result;
-	} else {
-		return ereg_replace($pattern, $replacement, $string);
 	}
+	return ereg_replace($pattern, $replacement, $string);
 }

 /**
+ * Note: Try to avoid using this function. Use api_preg_match() with Perl-compatible regular expression syntax.
+ *
 * Executes a regular expression match, ignoring case, with extended multibyte support.
 * By default this function uses the platform character set.
 * @param string $pattern			The regular expression pattern.
@ -1328,11 +1385,10 @@ function api_eregi($pattern, $string, & $regs = null) {
 	if (api_mb_supports($encoding)) {
 		if ($count < 3) {
 			return @mb_eregi($pattern, $string);
-		} else {
-			return @mb_eregi($pattern, $string, $regs);
 		}
+		return @mb_eregi($pattern, $string, $regs);
 	}
-	elseif (MBSTRING_INSTALLED && api_iconv_supports($encoding)) {
+	if (MBSTRING_INSTALLED && api_is_encoding_supported($encoding)) {
 		global $_api_encoding;
 		$_api_encoding = $encoding;
 		api_mb_regex_encoding('UTF-8');
@ -1344,16 +1400,16 @@ function api_eregi($pattern, $string, & $regs = null) {
 		}
 		api_mb_regex_encoding($encoding);
 		return $result;
-	} else {
-		if ($count < 3) {
-			return eregi($pattern, $string);
-		} else {
-			return eregi($pattern, $string, $regs);
-		}
 	}
+	if ($count < 3) {
+		return eregi($pattern, $string);
+	}
+	return eregi($pattern, $string, $regs);
 }

 /**
+ * Note: Try to avoid using this function. Use api_preg_replace() with Perl-compatible regular expression syntax.
+ *
 * Scans string for matches to pattern, then replaces the matched text with replacement, ignoring case, with extended multibyte support.
 * By default this function uses the platform character set.
 * @param string $pattern				The regular expression pattern.
@ -1375,11 +1431,10 @@ function api_eregi_replace($pattern, $replacement, $string, $option = null) {
 	if (api_mb_supports($encoding)) {
 		if (is_null($option)) {
 			return @mb_eregi_replace($pattern, $replacement, $string);
-		} else {
-			return @mb_eregi_replace($pattern, $replacement, $string, $option);
 		}
+		return @mb_eregi_replace($pattern, $replacement, $string, $option);
 	}
-	elseif (MBSTRING_INSTALLED && api_iconv_supports($encoding)) {
+	if (MBSTRING_INSTALLED && api_is_encoding_supported($encoding)) {
 		api_mb_regex_encoding('UTF-8');
 		if (is_null($option)) {
 			$result = api_utf8_decode(@mb_eregi_replace(api_utf8_encode($pattern, $encoding), api_utf8_encode($replacement, $encoding), api_utf8_encode($string, $encoding)), $encoding);
@ -1388,9 +1443,8 @@ function api_eregi_replace($pattern, $replacement, $string, $option = null) {
 		}
 		api_mb_regex_encoding($encoding);
 		return $result;
-	} else {
-		return eregi_replace($pattern, $replacement, $string);
 	}
+	return eregi_replace($pattern, $replacement, $string);
 }

 /**
@ -1448,16 +1502,17 @@ function api_preg_match_all($pattern, $subject, &$matches, $flags = PREG_PATTERN
 * If matches are found, the new subject will be returned, otherwise subject will be returned unchanged or NULL if an error occurred.
 * @link http://php.net/preg_replace
 */
-function api_preg_replace($pattern, $replacement, $subject, $limit= -1, &$count = 0, $encoding = null) {
+function api_preg_replace($pattern, $replacement, $subject, $limit = -1, &$count = 0, $encoding = null) {
 	if (empty($encoding)){
 		$encoding = api_get_system_encoding();
 	}
+	$is_utf8 = api_is_utf8($encoding);
 	if (is_array($pattern)) {
 		foreach ($pattern as &$p) {
-			$p = api_is_utf8($encoding) ? $p.'u' : $p;
+			$p = $is_utf8 ? $p.'u' : $p;
 		}
 	} else {
-		$pattern = api_is_utf8($encoding) ? $pattern.'u' : $pattern;
+		$pattern = $is_utf8 ? $pattern.'u' : $pattern;
 	}
 	return preg_replace($pattern, $replacement, $subject, $limit, $count);
 }
@ -1473,7 +1528,7 @@ function api_preg_replace($pattern, $replacement, $subject, $limit= -1, &$count
 * @return array|string					Returns an array if the subject parameter is an array, or a string otherwise.
 * @link http://php.net/preg_replace_callback
 */
-function api_preg_replace_callback($pattern, $callback, $subject, $limit= -1, &$count = 0, $encoding = null) {
+function api_preg_replace_callback($pattern, $callback, $subject, $limit = -1, &$count = 0, $encoding = null) {
 	if (empty($encoding)){
 		$encoding = api_get_system_encoding();
 	}
@ -1509,6 +1564,8 @@ function api_preg_split($pattern, $subject, $limit = -1, $flags = 0, $encoding =
 }

 /**
+ * Note: Try to avoid using this function. Use api_preg_split() with Perl-compatible regular expression syntax.
+ *
 * Splits a multibyte string using regular expression pattern and returns the result as an array.
 * By default this function uses the platform character set.
 * @param string $pattern			The regular expression pattern.
@ -1524,11 +1581,10 @@ function api_split($pattern, $string, $limit = null) {
 	if (api_mb_supports($encoding)) {
 		if (is_null($limit)) {
 			return @mb_split($pattern, $string);
-		} else {
-			return @mb_split($pattern, $string, $limit);
 		}
+		return @mb_split($pattern, $string, $limit);
 	}
-	elseif (MBSTRING_INSTALLED && api_iconv_supports($encoding)) {
+	if (MBSTRING_INSTALLED && api_is_encoding_supported($encoding)) {
 		global $_api_encoding;
 		$_api_encoding = $encoding;
 		api_mb_regex_encoding('UTF-8');
@ -1540,13 +1596,11 @@ function api_split($pattern, $string, $limit = null) {
 		$result = _api_array_utf8_decode($result);
 		api_mb_regex_encoding($encoding);
 		return $result;
-	} else {
-		if (is_null($limit)) {
-			return split($pattern, $string);
-		} else {
-			return split($pattern, $string, $limit);
-		}
 	}
+	if (is_null($limit)) {
+		return split($pattern, $string);
+	}
+	return split($pattern, $string, $limit);
 }

 /**
@ -2265,12 +2319,10 @@ function api_get_non_utf8_encoding($language = null) {
 	if (is_array($encodings[$language])) {
 		if (!empty($encodings[$language][0])) {
 			return $encodings[$language][0];
-		} else {
-			return 'ISO-8859-15';
 		}
-	} else {
 		return 'ISO-8859-15';
 	}
+	return 'ISO-8859-15';
 }

 /**
@ -2375,11 +2427,10 @@ function api_refine_encoding_id($encoding) {
 * @return bool							Returns TRUE if the encodings are equal, FALSE otherwise.
 */
 function api_equal_encodings($encoding1, $encoding2) {
-	$is_array_encoding1 = is_array($encoding1);
 	$is_array_encoding2 = is_array($encoding2);
 	$encoding1 = api_refine_encoding_id($encoding1);
 	$encoding2 = api_refine_encoding_id($encoding2);
-	if (!$is_array_encoding1 && !$is_array_encoding2) {
+	if (!is_array($encoding1) && !$is_array_encoding2) {
 		return $encoding1 == $encoding2;
 	}
 	if ($is_array_encoding2) {
@ -2587,13 +2638,10 @@ function api_iconv_set_encoding($type, $encoding = null) {
 				if(@iconv_set_encoding($type, $encoding)) {
 					$iconv_internal_encoding = $encoding;
 					return true;
-				} else {
-					return false;
 				}
-			} else {
 				return false;
 			}
-			break;
+			return false;
 		case 'iconv_input_encoding':
 			if (empty($encoding)) {
 				if (is_null($iconv_input_encoding)) {
@ -2605,13 +2653,10 @@ function api_iconv_set_encoding($type, $encoding = null) {
 				if(@iconv_set_encoding($type, $encoding)) {
 					$iconv_input_encoding = $encoding;
 					return true;
-				} else {
-					return false;
 				}
-			} else {
 				return false;
 			}
-			break;
+			return false;
 		case 'iconv_output_encoding':
 			if (empty($encoding)) {
 				if (is_null($iconv_output_encoding)) {
@ -2623,16 +2668,12 @@ function api_iconv_set_encoding($type, $encoding = null) {
 				if(@iconv_set_encoding($type, $encoding)) {
 					$iconv_output_encoding = $encoding;
 					return true;
-				} else {
-					return false;
 				}
-			} else {
 				return false;
 			}
-			break;
-		default:
 			return false;
 	}
+	return false;
 }

 /**
--- a/main/inc/lib/multibyte_string_functions_internal.lib.php
+++ b/main/inc/lib/multibyte_string_functions_internal.lib.php
@ -8,6 +8,10 @@
 * @author: Ivan Tcholakov, ivantcholakov@gmail.com, 2009
 * @package dokeos.library
 * ==============================================================================
+ * 
+ * Note: All functions and data structures here are not to be used directly.
+ * See the file multibyte_string_functions.lib.php which contains the "public" API.
+ * 
 */

 // Global variables used by some callback functions.
@ -24,7 +28,7 @@ $_api_collator = null;
 // This is a php-implementation of the function api_convert_encoding().
 function _api_convert_encoding($string, $to_encoding, $from_encoding) {
 	static $character_map = array();
-	static $utf8_like = array('UTF-8', 'US-ASCII');
+	static $utf8_compatible = array('UTF-8', 'US-ASCII');
 	if (empty($string)) {
 		return $string;
 	}
@ -35,7 +39,7 @@ function _api_convert_encoding($string, $to_encoding, $from_encoding) {
 	}
 	$to = _api_get_character_map_name($to_encoding);
 	$from = _api_get_character_map_name($from_encoding);
-	if (empty($to) || empty($from) || $to == $from || (in_array($to, $utf8_like) && in_array($from, $utf8_like))) {
+	if (empty($to) || empty($from) || $to == $from || (in_array($to, $utf8_compatible) && in_array($from, $utf8_compatible))) {
 		return $string;
 	}
 	if (!isset($character_map[$to])) {
@ -395,6 +399,20 @@ function _api_utf8_get_letter_case_properties($codepoint, $type = 'lower') {
 	return $result;
 }

+/**
+ * A callback function for serving the function api_ucwords()
+ * @author Harry Fuecks
+ * @link http://dev.splitbrain.org/view/darcs/dokuwiki/inc/utf8.php
+ * @author Ivan Tcholakov, adaptation for the Dokeos LMS, 2009
+ * @param array $matches	Input array of matches corresponding to a single word
+ * @return string			Returns a with first char of the word in uppercase
+ */
+function _api_utf8_ucwords_callback($matches) {
+	$leadingws = $matches[2];
+	$ucfirst = api_strtoupper($matches[3], 'UTF-8');
+	$ucword = api_substr_replace(ltrim($matches[0]), $ucfirst, 0, 1, 'UTF-8');
+	return $leadingws . $ucword;
+}

 /**
 * ----------------------------------------------------------------------------
@ -571,11 +589,10 @@ if (MBSTRING_INSTALLED && !function_exists('mb_stristr')) {
 		if ($pos === false) {
 			return false;
 		}
-		elseif($part == true) {
+		if($part == true) {
 			return mb_substr($haystack, 0, $pos + 1, $encoding);
-		} else {
-			return mb_substr($haystack, $pos, mb_strlen($haystack, $encoding), $encoding);
 		}
+		return mb_substr($haystack, $pos, mb_strlen($haystack, $encoding), $encoding);
 	}
 }

@ -591,11 +608,11 @@ if (MBSTRING_INSTALLED && !function_exists('mb_strrchr')) {
 		$pos = mb_strrpos($haystack, $needle, mb_strlen($haystack, $encoding) - 1, $encoding);
 		if ($pos === false) {
 			return false;
-		} elseif($part == true) {
+		} 
+		if($part == true) {
 			return mb_substr($haystack, 0, $pos + 1, $encoding);
-		} else {
-			return mb_substr($haystack, $pos, mb_strlen($haystack, $encoding), $encoding);
 		}
+		return mb_substr($haystack, $pos, mb_strlen($haystack, $encoding), $encoding);
 	}
 }

@ -610,10 +627,10 @@ if (MBSTRING_INSTALLED && !function_exists('mb_strstr')) {
 		$pos = mb_strpos($haystack, $needle, 0, $encoding);
 		if ($pos === false) {
 			return false;
-		} elseif($part == true) {
+		}
+		if($part == true) {
 			return mb_substr($haystack, 0, $pos + 1, $encoding);
-		} else {
-			return mb_substr($haystack, $pos, mb_strlen($haystack, $encoding), $encoding);
 		}
+		return mb_substr($haystack, $pos, mb_strlen($haystack, $encoding), $encoding);
 	}
 }