merge

16 years ago · eb7e83c301
parent 26d33a85b0 51a649cd96
commit eb7e83c301
3 changed files with 353 additions and 174 deletions
--- a/main/inc/lib/add_course.lib.inc.php
+++ b/main/inc/lib/add_course.lib.inc.php
@ -64,7 +64,7 @@ function generate_course_code($course_title, $encoding = null)
 	if (empty($encoding)) {
 		$encoding = api_get_system_encoding();
 	}
-	return substr(preg_replace('/[^A-Z0-9]/', '', strtoupper(api_transliterate($course_title, $encoding))), 0, 20);
+	return substr(preg_replace('/[^A-Z0-9]/', '', strtoupper(api_transliterate($course_title, 'X', $encoding))), 0, 20);
 }


--- a/main/inc/lib/multibyte_string_functions.lib.php
+++ b/main/inc/lib/multibyte_string_functions.lib.php
@ -410,29 +410,37 @@ function api_str_ireplace($search, $replace, $subject, & $count = null, $encodin
 * @link http://php.net/str_split
 */
 function api_str_split($string, $split_length = 1, $encoding = null) {
-	if ($split_length < 1) {
-		return false;
-	}
 	if (empty($encoding)) {
 		$encoding = api_mb_internal_encoding();
 	}
+	if (empty($string)) {
+		return array();
+	}
+	if ($split_length < 1) {
+		return false;
+	}
 	if (_api_is_single_byte_encoding($encoding)) {
 		return str_split($string, $split_length);
 	}
-	$result = array();
-	if (api_mb_supports($encoding)) {
-		for ($i = 0, $length = @mb_strlen($string, $encoding); $i < $length; $i += $split_length) {
-			$result[] = @mb_substr($string, $i, $split_length, $encoding);
+	if (api_is_encoding_supported($encoding)) {
+		$len = api_strlen($string);
+		if ($len <= $split_length) {
+			return array($string);
 		}
+		if (!api_is_utf8($encoding)) {
+			$string = api_utf8_encode($string, $encoding);
 		}
-	elseif (api_iconv_supports($encoding) || api_is_utf8($encoding)) {
-		for ($i = 0, $length = api_strlen($string, $encoding); $i < $length; $i += $split_length) {
-			$result[] = api_substr($string, $i, $split_length, $encoding);
+		if (preg_match_all('/.{'.$split_length.'}|[^\x00]{1,'.$split_length.'}$/us', $string, $result) === false) {
+			return array();
 		}
-	} else {
-		return str_split($string, $split_length);
+		if (!api_is_utf8($encoding)) {
+			global $_api_encoding;
+			$_api_encoding = $encoding;
+			$result = _api_array_utf8_decode($result[0]);
 		}
-	return $result;
+		return $result[0];
+	}
+	return str_split($string, $split_length);
 }

 /**
@ -454,11 +462,11 @@ function api_stripos($haystack, $needle, $offset = 0, $encoding = null) {
 	if (api_mb_supports($encoding)) {
 		return @mb_stripos($haystack, $needle, $offset, $encoding);
 	}
-	elseif (MBSTRING_INSTALLED && (api_iconv_supports($encoding) || _api_convert_encoding_supports($encoding))) {
-		return api_utf8_decode(@mb_stripos(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $offset, 'UTF-8'), $encoding);
+	elseif (api_is_encoding_supported($encoding)) {
+		if (MBSTRING_INSTALLED) {
+			return @mb_stripos(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $offset, 'UTF-8');
 		}
-	elseif (api_iconv_supports($encoding) || _api_convert_encoding_supports($encoding)) {
-		api_strpos(api_strtolower($haystack, $encoding), api_strtolower($needle, $encoding), $offset, $encoding);
+		return api_strpos(api_strtolower($haystack, $encoding), api_strtolower($needle, $encoding), $offset, $encoding);
 	}
 	return stripos($haystack, $needle, $offset);
 }
@ -466,28 +474,54 @@ function api_stripos($haystack, $needle, $offset = 0, $encoding = null) {
 /**
 * Finds first occurrence of a string within another, case insensitive.
 * @param string $haystack					The string from which to get the first occurrence.
- * @param string @needle				The string to be found.
- * @param bool $part (optional)			Determines which portion of $haystack this function returns. The default value is FALSE.
+ * @param mixed $needle					The string to be found.
+ * @param bool $before_needle (optional)	Determines which portion of $haystack this function returns. The default value is FALSE.
 * @param string $encoding (optional)		The used internally by this function character encoding. If it is omitted, the platform character set will be used by default.
 * @return mixed							Returns the portion of $haystack, or FALSE if $needle is not found.
 * Notes:
- * If $part is set to TRUE, the function returns all of $haystack from the beginning to the first occurrence of $needle.
- * If $part is set to FALSE, the function returns all of $haystack from the first occurrence of $needle to the end.
+ * If $needle is not a string, it is converted to an integer and applied as the ordinal value (codepoint if the encoding is UTF-8) of a character.
+ * If $before_needle is set to TRUE, the function returns all of $haystack from the beginning to the first occurrence of $needle.
+ * If $before_needle is set to FALSE, the function returns all of $haystack from the first occurrence of $needle to the end.
 * This function is aimed at replacing the functions stristr() and mb_stristr() for human-language strings.
 * @link http://php.net/manual/en/function.stristr
 * @link http://php.net/manual/en/function.mb-stristr
 */
-function api_stristr($haystack, $needle, $part = false, $encoding = null) {
+function api_stristr($haystack, $needle, $before_needle = false, $encoding = null) {
 	if (empty($encoding)) {
 		$encoding = api_mb_internal_encoding();
 	}
+	if (!is_string($needle)) {
+		$needle = (int)$needle;
+		if (api_is_utf8($encoding)) {
+			$needle = _api_utf8_chr($needle);
+		} else {
+			$needle = chr($needle);
+		}
+	}
 	if (api_mb_supports($encoding)) {
-		return @mb_stristr($haystack, $needle, $part, $encoding);
+		return @mb_stristr($haystack, $needle, $before_needle, $encoding);
 	}
-	elseif (MBSTRING_INSTALLED && (api_iconv_supports($encoding) || _api_convert_encoding_supports($encoding))) {
-		return api_utf8_decode(@mb_stristr(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $part, 'UTF-8'));
+	elseif (api_is_encoding_supported($encoding)) {
+		if (MBSTRING_INSTALLED) {
+			$result = @mb_stristr(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $before_needle, 'UTF-8');
+			if ($result === false) {
+				return false;
+			}
+			return api_utf8_decode($result, $encoding);
+		}
+		$result = api_strstr(api_strtolower($haystack, $encoding), api_strtolower($needle, $encoding), $before_needle, $encoding);
+		if ($result === false) {
+			return false;
 		}
-	return stristr($haystack, $needle, $part);
+		if ($before_needle) {
+			return api_substr($haystack, 0, api_strlen($result, $encoding), $encoding);
+		}
+		return api_substr($haystack, api_strlen($haystack, $encoding) - api_strlen($result, $encoding), null, $encoding);
+	}
+	if (PHP_VERSION < 5.3) {
+		return stristr($haystack, $needle);
+	}
+	return stristr($haystack, $needle, $before_needle);
 }

 /**
@ -545,10 +579,10 @@ function api_strpos($haystack, $needle, $offset = 0, $encoding = null) {
 	elseif (api_mb_supports($encoding)) {
 		return @mb_strpos($haystack, $needle, $offset, $encoding);
 	}
-	elseif (MBSTRING_INSTALLED && (api_iconv_supports($encoding) || _api_convert_encoding_supports($encoding))) {
-		return api_utf8_decode(@mb_strpos(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $offset, 'UTF-8'), $encoding);
+	elseif (api_is_encoding_supported($encoding)) {
+		if (MBSTRING_INSTALLED) {
+			return @mb_strpos(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $offset, 'UTF-8');
 		}
-	elseif (api_iconv_supports($encoding) || _api_convert_encoding_supports($encoding)) {
 		if (!api_is_utf8($encoding)) {
 			$haystack = api_utf8_encode($haystack, $encoding);
 			$needle = api_utf8_encode($needle, $encoding);
@ -559,45 +593,71 @@ function api_strpos($haystack, $needle, $offset = 0, $encoding = null) {
 				return api_strlen($haystack[0]);
 			}
 			return false;
-		} else {
+		}
 		$haystack = api_substr($haystack, $offset);
 		if (($pos = api_strpos($haystack, $needle)) !== false ) {
 			return $pos + $offset;
 		}
 		return false;
 	}
-	}
 	return strpos($haystack, $needle, $offset);
 }

 /**
 * Finds the last occurrence of a character in a string.
 * @param string $haystack					The string from which to get the last occurrence.
- * @param string $needle				The string which first character is to be found.
- * @param bool $part (optional)			Determines which portion of $haystack this function returns. The default value is FALSE.
+ * @param mixed $needle						The string which first character is to be found.
+ * @param bool $before_needle (optional)	Determines which portion of $haystack this function returns. The default value is FALSE.
 * @param string $encoding (optional)		The used internally by this function character encoding. If it is omitted, the platform character set will be used by default.
 * @return mixed							Returns the portion of $haystack, or FALSE if the first character from $needle is not found.
 * Notes:
- * If $part is set to TRUE, the function returns all of $haystack from the beginning to the first occurrence.
- * If $part is set to FALSE, the function returns all of $haystack from the first occurrence to the end.
+ * If $needle is not a string, it is converted to an integer and applied as the ordinal value (codepoint if the encoding is UTF-8) of a character.
+ * If $before_needle is set to TRUE, the function returns all of $haystack from the beginning to the first occurrence.
+ * If $before_needle is set to FALSE, the function returns all of $haystack from the first occurrence to the end.
 * This function is aimed at replacing the functions strrchr() and mb_strrchr() for human-language strings.
 * @link http://php.net/manual/en/function.strrchr
 * @link http://php.net/manual/en/function.mb-strrchr
 */
-function api_strrchr($haystack, $needle, $part = false, $encoding = null) {
+function api_strrchr($haystack, $needle, $before_needle = false, $encoding = null) {
 	if (empty($encoding)) {
 		$encoding = api_mb_internal_encoding();
 	}
+	if (!is_string($needle)) {
+		$needle = (int)$needle;
+		if (api_is_utf8($encoding)) {
+			$needle = _api_utf8_chr($needle);
+		} else {
+			$needle = chr($needle);
+		}
+	}
 	if (_api_is_single_byte_encoding($encoding)) {
+		if (!$before_needle) {
 			return strrchr($haystack, $needle);
 		}
+		$result = strrchr($haystack, $needle);
+		if ($result === false) {
+			return false;
+		}
+		return api_substr($haystack, 0, api_strlen($haystack, $encoding) - api_strlen($result, $encoding), $encoding);
+	}
 	elseif (api_mb_supports($encoding)) {
-		return @mb_strrchr($haystack, $needle, $part, $encoding);
+		return @mb_strrchr($haystack, $needle, $before_needle, $encoding);
 	}
-	elseif (MBSTRING_INSTALLED && (api_iconv_supports($encoding) || _api_convert_encoding_supports($encoding))) {
-		return api_utf8_decode(@mb_strrchr(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $part, 'UTF-8'), $encoding);
+	elseif (MBSTRING_INSTALLED && api_is_encoding_supported($encoding)) {
+		$result = @mb_strrchr(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $before_needle, 'UTF-8');
+		if ($result === false) {
+			return false;
 		}
+		return api_utf8_decode($result, $encoding);
+	}
+	if (!$before_needle) {
 		return strrchr($haystack, $needle);
+	}
+	$result = strrchr($haystack, $needle);
+	if ($result === false) {
+		return false;
+	}
+	return api_substr($haystack, 0, api_strlen($haystack, $encoding) - api_strlen($result, $encoding), $encoding);
 }

 /**
@ -609,17 +669,19 @@ function api_strrchr($haystack, $needle, $part = false, $encoding = null) {
 * @link http://php.net/manual/en/function.strrev
 */
 function api_strrev($string, $encoding = null) {
+	if (empty($encoding)) {
+		$encoding = api_mb_internal_encoding();
+	}
 	if (empty($string)) {
 		return '';
 	}
-	if (empty($encoding)) {
-		$encoding = api_mb_internal_encoding();
+	if (_api_is_single_byte_encoding($encoding)) {
+		return strrev($string);
 	}
-	$result = '';
-	for ($i = api_strlen($string, $encoding) - 1; $i > -1; $i--) {
-		$result .= api_substr($string, $i, 1, $encoding);
+	if (api_is_encoding_supported($encoding)) {
+		return implode(array_reverse(api_str_split($string, 1, $encoding)));
 	}
-	return $result;
+	return strrev($string);
 }

 /**
@ -638,11 +700,49 @@ function api_strrpos($haystack, $needle, $offset = 0, $encoding = null) {
 	if (empty($encoding)) {
 		$encoding = api_mb_internal_encoding();
 	}
+	if (_api_is_single_byte_encoding($encoding)) {
+		return strrpos($haystack, $needle, $offset);
+	}
 	if (api_mb_supports($encoding)) {
 		return @mb_strrpos($haystack, $needle, $offset, $encoding);
 	}
-	elseif (MBSTRING_INSTALLED && api_iconv_supports($encoding)) {
-		return api_utf8_decode(@mb_strrpos(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $offset, 'UTF-8'), $encoding);
+	elseif (api_is_encoding_supported($encoding)) {
+		if (MBSTRING_INSTALLED) {
+			return @mb_strrpos(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $offset, 'UTF-8');
+		}
+		// This branch (this fragment of code) is an adaptation from the CakePHP(tm) Project, http://www.cakefoundation.org
+		if (!api_is_utf8($encoding)) {
+			$haystack = api_utf8_encode($haystack, $encoding);
+			$needle = api_utf8_encode($needle, $encoding);
+		}
+		$found = false;
+		$haystack = _api_utf8_to_unicode($haystack);
+		$haystack_count = count($haystack);
+		$matches = array_count_values($haystack);
+		$needle = _api_utf8_to_unicode($needle);
+		$needle_count = count($needle);
+		$position = $offset;
+		while (($found === false) && ($position < $haystack_count)) {
+			if (isset($needle[0]) && $needle[0] === $haystack[$position]) {
+				for ($i = 1; $i < $needle_count; $i++) {
+					if ($needle[$i] !== $haystack[$position + $i]) {
+						if ($needle[$i] === $haystack[($position + $i) -1]) {
+							$position--;
+							$found = true;
+							continue;
+						}
+					}
+				}
+				if (!$offset && isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
+					$matches[$needle[0]] = $matches[$needle[0]] - 1;
+				} elseif ($i === $needle_count) {
+					$found = true;
+					$position--;
+				}
+			}
+			$position++;
+		}
+		return ($found) ? $position : false;
 	}
 	return strrpos($haystack, $needle, $offset);
 }
@ -650,31 +750,67 @@ function api_strrpos($haystack, $needle, $offset = 0, $encoding = null) {
 /**
 * Finds first occurrence of a string within another.
 * @param string $haystack					The string from which to get the first occurrence.
- * @param string @needle				The string to be found.
- * @param bool $part (optional)			Determines which portion of $haystack this function returns. The default value is FALSE.
+ * @param mixed $needle						The string to be found.
+ * @param bool $before_needle (optional)	Determines which portion of $haystack this function returns. The default value is FALSE.
 * @param string $encoding (optional)		The used internally by this function character encoding. If it is omitted, the platform character set will be used by default.
 * @return mixed							Returns the portion of $haystack, or FALSE if $needle is not found.
 * Notes:
- * If $part is set to TRUE, the function returns all of $haystack from the beginning to the first occurrence of $needle.
- * If $part is set to FALSE, the function returns all of $haystack from the first occurrence of $needle to the end.
+ * If $needle is not a string, it is converted to an integer and applied as the ordinal value (codepoint if the encoding is UTF-8) of a character.
+ * If $before_needle is set to TRUE, the function returns all of $haystack from the beginning to the first occurrence of $needle.
+ * If $before_needle is set to FALSE, the function returns all of $haystack from the first occurrence of $needle to the end.
 * This function is aimed at replacing the functions strstr() and mb_strstr() for human-language strings.
 * @link http://php.net/manual/en/function.strstr
 * @link http://php.net/manual/en/function.mb-strstr
 */
-function api_strstr($haystack, $needle, $part = false, $encoding = null) {
+function api_strstr($haystack, $needle, $before_needle = false, $encoding = null) {
 	if (empty($encoding)) {
 		$encoding = api_mb_internal_encoding();
 	}
+	if (!is_string($needle)) {
+		$needle = (int)$needle;
+		if (api_is_utf8($encoding)) {
+			$needle = _api_utf8_chr($needle);
+		} else {
+			$needle = chr($needle);
+		}
+	}
 	if (_api_is_single_byte_encoding($encoding)) {
-		return strstr($haystack, $needle, $part);
+		// Adding the missing parameter $before_needle to the original function strstr(), PHP_VERSION < 5.3
+		if (!$before_needle) {
+			return strstr($haystack, $needle);
+		}
+		if (PHP_VERSION < 5.3) {
+			$result = explode($needle, $haystack, 2);
+			if ($result === false || count($result) < 2) {
+				return false;
+			}
+			return $result[0];
+		}
+		return strstr($haystack, $needle, $before_needle);
 	}
 	if (api_mb_supports($encoding)) {
-		return @mb_strstr($haystack, $needle, $part, $encoding);
+		return @mb_strstr($haystack, $needle, $before_needle, $encoding);
+	}
+	elseif (MBSTRING_INSTALLED && api_is_encoding_supported($encoding)) {
+		$result = @mb_strstr(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $before_needle, 'UTF-8');
+		if ($result !== false) {
+			return api_utf8_decode($result, $encoding);
+		} else {
+			return false;
 		}
-	elseif (MBSTRING_INSTALLED && api_iconv_supports($encoding)) {
-		return api_utf8_decode(@mb_strstr(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $part, 'UTF-8'), $encoding);
 	}
-	return strstr($haystack, $needle, $part);
+	// Adding the missing parameter $before_needle to the original function strstr(), PHP_VERSION < 5.3
+	if (!$before_needle) {
+		return strstr($haystack, $needle);
+	}
+	if (PHP_VERSION < 5.3) {
+		$result = explode($needle, $haystack, 2);
+		if ($result === false || count($result) < 2) {
+			return false;
+		}
+		return $result[0];
+	}
+	return strstr($haystack, $needle, $before_needle);
 }

 /**
@ -693,14 +829,14 @@ function api_strtolower($string, $encoding = null) {
 	if (api_mb_supports($encoding)) {
 		return @mb_strtolower($string, $encoding);
 	}
-	elseif (MBSTRING_INSTALLED && api_iconv_supports($encoding)) {
+	elseif (api_is_encoding_supported($encoding)) {
+		if (MBSTRING_INSTALLED) {
 			return api_utf8_decode(@mb_strtolower(api_utf8_encode($string, $encoding), 'UTF-8'), $encoding);
 		}
-	elseif (api_iconv_supports($encoding) || _api_convert_encoding_supports($encoding)) {
+		// This branch (this fragment of code) is an adaptation from the CakePHP(tm) Project, http://www.cakefoundation.org
 		if (!api_is_utf8($encoding)) {
 			$string = api_utf8_encode($string, $encoding);
 		}
-		// This branch (this fragment of code) is an adaptation from the CakePHP(tm) Project, http://www.cakefoundation.org
 		$codepoints = _api_utf8_to_unicode($string);
 		$length = count($codepoints);
 		$matched = false;
@ -757,14 +893,14 @@ function api_strtoupper($string, $encoding = null) {
 	if (api_mb_supports($encoding)) {
 		return @mb_strtoupper($string, $encoding);
 	}
-	elseif (MBSTRING_INSTALLED && api_iconv_supports($encoding)) {
+	elseif (api_is_encoding_supported($encoding)) {
+		if (MBSTRING_INSTALLED) {
 			return api_utf8_decode(@mb_strtoupper(api_utf8_encode($string, $encoding), 'UTF-8'), $encoding);
 		}
-	elseif (api_iconv_supports($encoding) || _api_convert_encoding_supports($encoding)) {
+		// This branch (this fragment of code) is an adaptation from the CakePHP(tm) Project, http://www.cakefoundation.org
 		if (!api_is_utf8($encoding)) {
 			$string = api_utf8_encode($string, $encoding);
 		}
-		// This branch (this fragment of code) is an adaptation from the CakePHP(tm) Project, http://www.cakefoundation.org
 		$codepoints = _api_utf8_to_unicode($string);
 		$length = count($codepoints);
 		$matched = false;
@ -917,14 +1053,20 @@ function api_substr($string, $start, $length = null, $encoding = null) {
 	if (is_null($length)) {
 		$length = api_strlen($string, $encoding);
 	}
+	if (_api_is_single_byte_encoding($encoding)) {
+		return substr($string, $start, $length);
+	}
 	if (api_mb_supports($encoding)) {
 		return @mb_substr($string, $start, $length, $encoding);
 	}
-	elseif (MBSTRING_INSTALLED && api_iconv_supports($encoding)) {
+	elseif (api_is_encoding_supported($encoding)) {
+		if (MBSTRING_INSTALLED) {
 			return api_utf8_decode(@mb_substr(api_utf8_encode($string, $encoding), $start, $length, 'UTF-8'), $encoding);
 		}
-	elseif (api_is_utf8($encoding)) {
 		// The following branch of code is from the Drupal CMS, see the function drupal_substr().
+		if (!api_is_utf8($encoding)) {
+			$string = api_utf8_encode($string, $encoding);
+		}
 		$strlen = api_byte_count($string);
 		// Find the starting byte offset
 		$bytes = 0;
@ -985,7 +1127,11 @@ function api_substr($string, $start, $length = null, $encoding = null) {
 			}
 		}
 		$iend = $bytes;
-		return substr($string, $istart, max(0, $iend - $istart + 1));
+		$string = substr($string, $istart, max(0, $iend - $istart + 1));
+		if (!api_is_utf8($encoding)) {
+			$string = api_utf8_decode($string, $encoding);
+		}
+		return $string;
 	}
 	return substr($string, $start, $length);
 }
@ -1013,16 +1159,29 @@ function api_substr_replace($string, $replacement, $start, $length = null, $enco
 	if (empty($encoding)) {
 		$encoding = api_mb_internal_encoding();
 	}
-	if ($length == null) {
-		return api_substr($string, 0, $start, $encoding) . $replacement;
-	} else {
+	if (api_is_encoding_supported($encoding) && !_api_is_single_byte_encoding($encoding)) {
+		$string_length = api_strlen($string, $encoding);
+		if ($start < 0) {
+			$start = max(0, $string_length + $start);
+		}
+		else if ($start > $string_length) {
+			$start = $string_length;
+		}
 		if ($length < 0) {
-			$length = api_strlen($string, $encoding) - $start + $length;
+			$length = max(0, $string_length - $start + $length);
 		}
-		return
-			api_substr($string, 0, $start, $encoding) . $replacement .
-			api_substr($string, $start + $length, api_strlen($string, $encoding), $encoding);
+		else if (is_null($length) || ($length > $string_length)) {
+			$length = $string_length;
+		}
+		if (($start + $length) > $string_length) {
+			$length = $string_length - $start;
+		}
+		return api_substr($string, 0, $start, $encoding) . $replacement . api_substr($string, $start + $length, $string_length - $start - $length, $encoding);
+	}
+	if (is_null($length)) {
+		return substr_replace($string, $replacement, $start);
 	}
+	return substr_replace($string, $replacement, $start, $length);
 }

 /**
@ -1091,12 +1250,14 @@ function api_ereg($pattern, $string, & $regs = null) {
 		}
 	}
 	elseif (MBSTRING_INSTALLED && api_iconv_supports($encoding)) {
+		global $_api_encoding;
+		$_api_encoding = $encoding;
 		api_mb_regex_encoding('UTF-8');
 		if ($count < 3) {
 			$result = @mb_ereg(api_utf8_encode($pattern, $encoding), api_utf8_encode($string, $encoding));
 		} else {
 			$result = @mb_ereg(api_utf8_encode($pattern, $encoding), api_utf8_encode($string, $encoding), $regs);
-			$regs = _api_array_utf8_decode($regs, $encoding);
+			$regs = _api_array_utf8_decode($regs);
 		}
 		api_mb_regex_encoding($encoding);
 		return $result;
@ -1172,13 +1333,14 @@ function api_eregi($pattern, $string, & $regs = null) {
 		}
 	}
 	elseif (MBSTRING_INSTALLED && api_iconv_supports($encoding)) {
+		global $_api_encoding;
+		$_api_encoding = $encoding;
 		api_mb_regex_encoding('UTF-8');
-
 		if ($count < 3) {
 			$result = @mb_eregi(api_utf8_encode($pattern, $encoding), api_utf8_encode($string, $encoding));
 		} else {
 			$result = @mb_eregi(api_utf8_encode($pattern, $encoding), api_utf8_encode($string, $encoding), $regs);
-			$regs = _api_array_utf8_decode($regs, $encoding);
+			$regs = _api_array_utf8_decode($regs);
 		}
 		api_mb_regex_encoding($encoding);
 		return $result;
@ -1367,13 +1529,15 @@ function api_split($pattern, $string, $limit = null) {
 		}
 	}
 	elseif (MBSTRING_INSTALLED && api_iconv_supports($encoding)) {
+		global $_api_encoding;
+		$_api_encoding = $encoding;
 		api_mb_regex_encoding('UTF-8');
 		if (is_null($limit)) {
 			$result = @mb_split(api_utf8_encode($pattern, $encoding), api_utf8_encode($string, $encoding));
 		} else {
 			$result = @mb_split(api_utf8_encode($pattern, $encoding), api_utf8_encode($string, $encoding), $limit);
 		}
-		$result = _api_array_utf8_decode($result, $encoding);
+		$result = _api_array_utf8_decode($result);
 		api_mb_regex_encoding($encoding);
 		return $result;
 	} else {
@ -1916,7 +2080,7 @@ function api_rsort(&$array, $sort_flag = SORT_REGULAR, $language = null, $encodi
 * 	'&#1060;&#1105;&#1076;&#1086;&#1088; '.
 * 	'&#1052;&#1080;&#1093;&#1072;&#1081;&#1083;&#1086;&#1074;&#1080;&#1095; '.
 * 	'&#1044;&#1086;&#1089;&#1090;&#1086;&#1077;&#1074;&#1082;&#1080;&#1081;',
- * 	ENT_QUOTES, 'UTF-8'), 'UTF-8');
+ * 	ENT_QUOTES, 'UTF-8'), 'X', 'UTF-8');
 * The output should be: Fyodor Mihaylovich Dostoevkiy
 *
 * @param string $string					The input string.
@ -2194,24 +2358,34 @@ yoruba: ISO-8859-15, WINDOWS-1252, ISO-8859-1;

 /**
 * This function unifies the encoding identificators, so they could be compared.
- * @param string $encoding	The specified encoding.
+ * @param string/array $encoding	The specified encoding.
 * @return string					Returns the encoding identificator modified in suitable for comparison way.
 */
 function api_refine_encoding_id($encoding) {
+	if (is_array($encoding)){
+		return array_map('strtoupper', $encoding);
+	}
 	return strtoupper($encoding);
 }

 /**
 * This function checks whether two $encoding are equal (same, equvalent).
- * @param string $encoding1		The first encoding
- * @param string $encoding2		The second encoding
+ * @param string/array $encoding1		The first encoding
+ * @param string/array $encoding2		The second encoding
 * @return bool							Returns TRUE if the encodings are equal, FALSE otherwise.
 */
 function api_equal_encodings($encoding1, $encoding2) {
-	// We have to deal with aliases. This function alone does not solve
-	// the problem entirely. And there is no time for this kind of research.
-	// At the momemnt, the quick proposition could be:
-	return strcmp(api_refine_encoding_id($encoding1), api_refine_encoding_id($encoding2)) == 0 ? true : false;
+	$is_array_encoding1 = is_array($encoding1);
+	$is_array_encoding2 = is_array($encoding2);
+	$encoding1 = api_refine_encoding_id($encoding1);
+	$encoding2 = api_refine_encoding_id($encoding2);
+	if (!$is_array_encoding1 && !$is_array_encoding2) {
+		return $encoding1 == $encoding2;
+	}
+	if ($is_array_encoding2) {
+		return in_array($encoding1, $encoding2);
+	}
+	return in_array($encoding2, $encoding1);
 }

 /**
@ -2222,27 +2396,33 @@ function api_equal_encodings($encoding1, $encoding2) {
 function api_is_utf8($encoding) {
 	static $result = array();
 	if (!isset($result[$encoding])) {
-		$result[$encoding] = api_equal_encodings($encoding, 'UTF-8');
+		$result[$encoding] = api_equal_encodings($encoding, array('UTF-8', 'CP65001', 'WINDOWS-65001'));
 	}
 	return $result[$encoding];
 }

 /**
 * This function checks whether a given encoding represents (is an alias of) ISO Latin 1 character set.
- * @param string $encoding		The tested encoding.
+ * @param string/array $encoding		The tested encoding.
 * @return bool							Returns TRUE if the given encoding id means Latin 1 character set, otherwise returns false.
 */
 function api_is_latin1($encoding, $strict = false) {
-	static $latin1_encodings = array('ISO-8859-1', 'ISO8859-1', 'CP819', 'LATIN1');
-	static $latin1_encodings_like = array(
+	static $latin1 = array();
+	static $latin1_strict = array();
+	if ($strict) {
+		if (!isset($latin1_strict[$encoding])) {
+			$latin1_strict[$encoding] = api_equal_encodings($encoding, array('ISO-8859-1', 'ISO8859-1', 'CP819', 'LATIN1'));
+		}
+		return $latin1_strict[$encoding];
+	}
+	if (!isset($latin1[$encoding])) {
+		$latin1[$encoding] = api_equal_encodings($encoding, array(
 			'ISO-8859-1', 'ISO8859-1', 'CP819', 'LATIN1',
 			'ISO-8859-15', 'ISO8859-15', 'CP923', 'LATIN0', 'LATIN-9',
 			'WINDOWS-1252', 'CP1252', 'WIN-1252', 'WIN1252'
-	);
-	if ($strict) {
-		return in_array(api_refine_encoding_id($encoding), $latin1_encodings);
+		));
 	}
-	return in_array(api_refine_encoding_id($encoding), $latin1_encodings_like);
+	return $latin1[$encoding];
 }

 /**
@ -2461,7 +2641,11 @@ function api_iconv_set_encoding($type, $encoding = null) {
 * @return bool				Returns TRUE when the specified encoding is supported, FALSE othewise.
 */
 function api_is_encoding_supported($encoding) {
-	return api_mb_supports($encoding) || api_iconv_supports($encoding) || _api_convert_encoding_supports($encoding);
+	static $supported = array();
+	if (!isset($supported[$encoding])) {
+		$supported[$encoding] = api_mb_supports($encoding) || api_iconv_supports($encoding) || _api_convert_encoding_supports($encoding);
+	}
+	return $supported[$encoding];
 }

 /**
--- a/main/inc/lib/multibyte_string_functions_internal.lib.php
+++ b/main/inc/lib/multibyte_string_functions_internal.lib.php
@ -10,6 +10,10 @@
 * ==============================================================================
 */

+// Global variables used by some callback functions.
+$_api_encoding = null;
+$_api_collator = null;
+

 /**
 * ----------------------------------------------------------------------------
@ -21,7 +25,6 @@
 function _api_convert_encoding($string, $to_encoding, $from_encoding) {
 	static $character_map = array();
 	static $utf8_like = array('UTF-8', 'US-ASCII');
-	static $unknown = 63; // '?'
 	if (empty($string)) {
 		return $string;
 	}
@ -56,7 +59,7 @@ function _api_convert_encoding($string, $to_encoding, $from_encoding) {
 				if (isset($character_map[$from]['local'][$ord])) {
 					$codepoints[] = $character_map[$from]['local'][$ord];
 				} else {
-					$codepoints[] = $unknown;
+					$codepoints[] = 0xFFFD; // U+FFFD REPLACEMENT CHARACTER is the general substitute character in the Unicode Standard.
 				}
 			} else {
 				$codepoints[] = $ord;
@ -66,13 +69,12 @@ function _api_convert_encoding($string, $to_encoding, $from_encoding) {
 		$codepoints = _api_utf8_to_unicode($string);
 	}
 	if ($to != 'UTF-8') {
-		$unknown_char = chr($unknown);
 		foreach ($codepoints as $i => &$codepoint) {
 			if ($codepoint > 127) {
 				if (isset($character_map[$from]['local'][$codepoint])) {
 					$codepoint = chr($character_map[$from]['local'][$codepoint]);
 				} else {
-					$codepoint = $unknown_char;
+					$codepoint = '?'; // Unknown character.
 				}
 			} else {
 				$codepoint = chr($codepoint);
@ -138,16 +140,12 @@ function &_api_parse_character_map($name) {
 * output can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates
 * are not allowed.
 * @param string $string				The UTF-8 encoded string.
- * @param string $unknown (optional)	A US-ASCII character to represent invalid bytes.
 * @return array						Returns an array of unicode code points.
 * @author Henri Sivonen, mailto:hsivonen@iki.fi
 * @link http://hsivonen.iki.fi/php-utf8/
 * @author Ivan Tcholakov, 2009, modifications for the Dokeos LMS.
 */
-function _api_utf8_to_unicode($string, $unknown = '?') {
-	if (!empty($unknown)) {
-		$unknown = ord($unknown[0]);
-	}
+function _api_utf8_to_unicode($string) {
 	$state = 0;			// cached expected number of octets after the current octet
 						// until the beginning of the next UTF8 character sequence
 	$codepoint  = 0;	// cached Unicode character
@ -204,9 +202,7 @@ function _api_utf8_to_unicode($string, $unknown = '?') {
 				$state = 0;
 				$codepoint = 0;
 				$bytes = 1;
-				if (!empty($unknown)) {
-					$result[] = $unknown;
-				}
+				$result[] = 0xFFFD; // U+FFFD REPLACEMENT CHARACTER is the general substitute character in the Unicode Standard.
 				continue ;
 			}
 		} else {
@ -234,9 +230,7 @@ function _api_utf8_to_unicode($string, $unknown = '?') {
 						$state = 0;
 						$codepoint = 0;
 						$bytes = 1;
-						if (!empty($unknown)) {
-							$result[] = $unknown;
-						}
+						$result[] = 0xFFFD;
 						continue ;
 					}
 					if (0xFEFF != $codepoint) {
@ -254,9 +248,7 @@ function _api_utf8_to_unicode($string, $unknown = '?') {
 				$state = 0;
 				$codepoint = 0;
 				$bytes = 1;
-				if (!empty($unknown)) {
-					$result[] = $unknown;
-				}
+				$result[] = 0xFFFD;
 			}
 		}
 	}
@ -264,33 +256,28 @@ function _api_utf8_to_unicode($string, $unknown = '?') {
 }

 /**
- * Takes an array of ints representing the Unicode characters and returns 
- * a UTF-8 string. Astral planes are supported ie. the ints in the
- * input can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates
- * are not allowed.
- * @param array $array					An array of unicode code points representing a string.
- * @param string $unknown (optional)	A US-ASCII character to represent invalid bytes.
+ * Takes an array of ints representing the Unicode characters and returns a UTF-8 string.
+ * @param array $codepoints				An array of unicode code points representing a string.
 * @return string						Returns a UTF-8 string constructed using the given code points.
- * @author Henri Sivonen, mailto:hsivonen@iki.fi
- * @link http://hsivonen.iki.fi/php-utf8/
- * @author Ivan Tcholakov, 2009, modifications for the Dokeos LMS.
- * @see _api_utf8_from_unicodepoint()
 */
-function _api_utf8_from_unicode($array, $unknown = '?') {
-	foreach ($array as $i => &$codepoint) {
-		$codepoint = _api_utf8_from_unicodepoint($codepoint, $unknown);
-	}
-	return implode($array);
+function _api_utf8_from_unicode($codepoints) {
+	return implode(array_map('_api_utf8_chr', $codepoints));
 }

 /**
- * Takes an integer value and returns its correspondent representing the Unicode character.
+ * Takes an integer value (codepoint) and returns its correspondent representing the Unicode character.
+ * Astral planes are supported, ie the intger input can be > 0xFFFF. Occurrances of the BOM are ignored.
+ * Surrogates are not allowed.
 * @param array $array					An array of unicode code points representing a string
- * @param string $unknown (optional)	A US-ASCII character to represent invalid bytes.
 * @return string						Returns the corresponding  UTF-8 character.
+ * @author Henri Sivonen, mailto:hsivonen@iki.fi
+ * @link http://hsivonen.iki.fi/php-utf8/
+ * @author Ivan Tcholakov, 2009, modifications for the Dokeos LMS.
 * @see _api_utf8_from_unicode()
+ * This is a UTF-8 aware version of the function chr().
+ * @link http://php.net/manual/en/function.chr.php
 */
-function _api_utf8_from_unicodepoint($codepoint, $unknown = '?') {
+function _api_utf8_chr($codepoint) {
 	// ASCII range (including control chars)
 	if ( ($codepoint >= 0) && ($codepoint <= 0x007f) ) {
 		$result = chr($codepoint);
@ -304,7 +291,7 @@ function _api_utf8_from_unicodepoint($codepoint, $unknown = '?') {
 	// Test for illegal surrogates
 	} else if ($codepoint >= 0xD800 && $codepoint <= 0xDFFF) {
 		// found a surrogate
-		$result = $unknown;
+		$result = _api_utf8_chr(0xFFFD); // U+FFFD REPLACEMENT CHARACTER is the general substitute character in the Unicode Standard.
 	// 3 byte sequence
 	} else if ($codepoint <= 0xffff) {
 		$result = chr(0xe0 | ($codepoint >> 12)) . chr(0x80 | (($codepoint >> 6) & 0x003f)) . chr(0x80 | ($codepoint & 0x003f));
@ -313,11 +300,27 @@ function _api_utf8_from_unicodepoint($codepoint, $unknown = '?') {
 		$result = chr(0xf0 | ($codepoint >> 18)) . chr(0x80 | (($codepoint >> 12) & 0x3f)) . chr(0x80 | (($codepoint >> 6) & 0x3f)) . chr(0x80 | ($codepoint & 0x3f));
 	} else {
 		// out of range
-		$result = $unknown;
+		$result = _api_utf8_chr(0xFFFD);
 	}
 	return $result;
 }

+/**
+ * Takes the first UTF-8 character in a string and returns its codepoint (integer).
+ * @param string $utf8_character	The UTF-8 encoded character.
+ * @return int						Returns: the codepoint; or 0xFFFD (unknown character) when the input string is empty.
+ * This is a UTF-8 aware version of the function ord().
+ * @link http://php.net/manual/en/function.ord.php
+ * Note about a difference with the original funtion ord(): ord('') returns 0.
+ */
+function _api_utf8_ord($utf8_character) {
+	if (empty($utf8_character)) {
+		return 0xFFFD;
+	}
+	$codepoints = _api_utf8_to_unicode($utf8_character);
+	return $codepoints[0];
+}
+

 /**
 * ----------------------------------------------------------------------------
@ -329,7 +332,6 @@ function _api_utf8_from_unicodepoint($codepoint, $unknown = '?') {
 function _api_utf8_get_letter_case_properties($codepoint, $type = 'lower') {
 	static $config = array();
 	static $range = array();
-
 	if (!isset($range[$codepoint])) {
 		if ($codepoint > 128 && $codepoint < 256)  {
 			$range[$codepoint] = '0080_00ff'; // Latin-1 Supplement
@ -368,7 +370,6 @@ function _api_utf8_get_letter_case_properties($codepoint, $type = 'lower') {
 		} else {
 			$range[$codepoint] = false;
 		}
-
 		if ($range[$codepoint] === false) {
 			return null;
 		}
@ -379,14 +380,11 @@ function _api_utf8_get_letter_case_properties($codepoint, $type = 'lower') {
 			}
 		}
 	}
-
 	if ($range[$codepoint] === false || !isset($config[$range[$codepoint]])) {
 		return null;
 	}
-
 	$result = array();
 	$count = count($config[$range[$codepoint]]);
-
 	for ($i = 0; $i < $count; $i++) {
 		if ($type === 'lower' && $config[$range[$codepoint]][$i][$type][0] === $codepoint) {
 			$result[] = $config[$range[$codepoint]][$i];
@ -406,12 +404,13 @@ function _api_utf8_get_letter_case_properties($codepoint, $type = 'lower') {

 // This (callback) function convers from UTF-8 to other encoding.
 // It works with arrays of strings too.
-function _api_array_utf8_decode($variable, $encoding) {
+function _api_array_utf8_decode($variable) {
+	global $_api_encoding;
 	if (is_array($variable)) {
-		return array_map('_api_array_utf8_decode', $variable, $encoding);
+		return array_map('_api_array_utf8_decode', $variable);
 	}
    if (is_string($var)) {
-    	return api_utf8_decode($variable, $encoding);
+    	return api_utf8_decode($variable, $_api_encoding);
    }
    return $variable;
 }
@ -451,10 +450,6 @@ function _api_get_alpha_numerical_collator($language = null) {
 	return $collator[$language];
 }

-// Global variables used by the sorting functions.
-$_api_collator = null;
-$_api_encoding = null;
-
 // A string comparison function that serves sorting functions.
 function _api_cmp($string1, $string2) {
 	global $_api_collator, $_api_encoding;