skala
rvelasquez 16 years ago
commit eb7e83c301
  1. 2
      main/inc/lib/add_course.lib.inc.php
  2. 372
      main/inc/lib/multibyte_string_functions.lib.php
  3. 95
      main/inc/lib/multibyte_string_functions_internal.lib.php

@ -64,7 +64,7 @@ function generate_course_code($course_title, $encoding = null)
if (empty($encoding)) { if (empty($encoding)) {
$encoding = api_get_system_encoding(); $encoding = api_get_system_encoding();
} }
return substr(preg_replace('/[^A-Z0-9]/', '', strtoupper(api_transliterate($course_title, $encoding))), 0, 20); return substr(preg_replace('/[^A-Z0-9]/', '', strtoupper(api_transliterate($course_title, 'X', $encoding))), 0, 20);
} }

@ -410,29 +410,37 @@ function api_str_ireplace($search, $replace, $subject, & $count = null, $encodin
* @link http://php.net/str_split * @link http://php.net/str_split
*/ */
function api_str_split($string, $split_length = 1, $encoding = null) { function api_str_split($string, $split_length = 1, $encoding = null) {
if ($split_length < 1) {
return false;
}
if (empty($encoding)) { if (empty($encoding)) {
$encoding = api_mb_internal_encoding(); $encoding = api_mb_internal_encoding();
} }
if (empty($string)) {
return array();
}
if ($split_length < 1) {
return false;
}
if (_api_is_single_byte_encoding($encoding)) { if (_api_is_single_byte_encoding($encoding)) {
return str_split($string, $split_length); return str_split($string, $split_length);
} }
$result = array(); if (api_is_encoding_supported($encoding)) {
if (api_mb_supports($encoding)) { $len = api_strlen($string);
for ($i = 0, $length = @mb_strlen($string, $encoding); $i < $length; $i += $split_length) { if ($len <= $split_length) {
$result[] = @mb_substr($string, $i, $split_length, $encoding); return array($string);
} }
if (!api_is_utf8($encoding)) {
$string = api_utf8_encode($string, $encoding);
} }
elseif (api_iconv_supports($encoding) || api_is_utf8($encoding)) { if (preg_match_all('/.{'.$split_length.'}|[^\x00]{1,'.$split_length.'}$/us', $string, $result) === false) {
for ($i = 0, $length = api_strlen($string, $encoding); $i < $length; $i += $split_length) { return array();
$result[] = api_substr($string, $i, $split_length, $encoding);
} }
} else { if (!api_is_utf8($encoding)) {
return str_split($string, $split_length); global $_api_encoding;
$_api_encoding = $encoding;
$result = _api_array_utf8_decode($result[0]);
} }
return $result; return $result[0];
}
return str_split($string, $split_length);
} }
/** /**
@ -454,11 +462,11 @@ function api_stripos($haystack, $needle, $offset = 0, $encoding = null) {
if (api_mb_supports($encoding)) { if (api_mb_supports($encoding)) {
return @mb_stripos($haystack, $needle, $offset, $encoding); return @mb_stripos($haystack, $needle, $offset, $encoding);
} }
elseif (MBSTRING_INSTALLED && (api_iconv_supports($encoding) || _api_convert_encoding_supports($encoding))) { elseif (api_is_encoding_supported($encoding)) {
return api_utf8_decode(@mb_stripos(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $offset, 'UTF-8'), $encoding); if (MBSTRING_INSTALLED) {
return @mb_stripos(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $offset, 'UTF-8');
} }
elseif (api_iconv_supports($encoding) || _api_convert_encoding_supports($encoding)) { return api_strpos(api_strtolower($haystack, $encoding), api_strtolower($needle, $encoding), $offset, $encoding);
api_strpos(api_strtolower($haystack, $encoding), api_strtolower($needle, $encoding), $offset, $encoding);
} }
return stripos($haystack, $needle, $offset); return stripos($haystack, $needle, $offset);
} }
@ -466,28 +474,54 @@ function api_stripos($haystack, $needle, $offset = 0, $encoding = null) {
/** /**
* Finds first occurrence of a string within another, case insensitive. * Finds first occurrence of a string within another, case insensitive.
* @param string $haystack The string from which to get the first occurrence. * @param string $haystack The string from which to get the first occurrence.
* @param string @needle The string to be found. * @param mixed $needle The string to be found.
* @param bool $part (optional) Determines which portion of $haystack this function returns. The default value is FALSE. * @param bool $before_needle (optional) Determines which portion of $haystack this function returns. The default value is FALSE.
* @param string $encoding (optional) The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. * @param string $encoding (optional) The used internally by this function character encoding. If it is omitted, the platform character set will be used by default.
* @return mixed Returns the portion of $haystack, or FALSE if $needle is not found. * @return mixed Returns the portion of $haystack, or FALSE if $needle is not found.
* Notes: * Notes:
* If $part is set to TRUE, the function returns all of $haystack from the beginning to the first occurrence of $needle. * If $needle is not a string, it is converted to an integer and applied as the ordinal value (codepoint if the encoding is UTF-8) of a character.
* If $part is set to FALSE, the function returns all of $haystack from the first occurrence of $needle to the end. * If $before_needle is set to TRUE, the function returns all of $haystack from the beginning to the first occurrence of $needle.
* If $before_needle is set to FALSE, the function returns all of $haystack from the first occurrence of $needle to the end.
* This function is aimed at replacing the functions stristr() and mb_stristr() for human-language strings. * This function is aimed at replacing the functions stristr() and mb_stristr() for human-language strings.
* @link http://php.net/manual/en/function.stristr * @link http://php.net/manual/en/function.stristr
* @link http://php.net/manual/en/function.mb-stristr * @link http://php.net/manual/en/function.mb-stristr
*/ */
function api_stristr($haystack, $needle, $part = false, $encoding = null) { function api_stristr($haystack, $needle, $before_needle = false, $encoding = null) {
if (empty($encoding)) { if (empty($encoding)) {
$encoding = api_mb_internal_encoding(); $encoding = api_mb_internal_encoding();
} }
if (!is_string($needle)) {
$needle = (int)$needle;
if (api_is_utf8($encoding)) {
$needle = _api_utf8_chr($needle);
} else {
$needle = chr($needle);
}
}
if (api_mb_supports($encoding)) { if (api_mb_supports($encoding)) {
return @mb_stristr($haystack, $needle, $part, $encoding); return @mb_stristr($haystack, $needle, $before_needle, $encoding);
} }
elseif (MBSTRING_INSTALLED && (api_iconv_supports($encoding) || _api_convert_encoding_supports($encoding))) { elseif (api_is_encoding_supported($encoding)) {
return api_utf8_decode(@mb_stristr(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $part, 'UTF-8')); if (MBSTRING_INSTALLED) {
$result = @mb_stristr(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $before_needle, 'UTF-8');
if ($result === false) {
return false;
}
return api_utf8_decode($result, $encoding);
}
$result = api_strstr(api_strtolower($haystack, $encoding), api_strtolower($needle, $encoding), $before_needle, $encoding);
if ($result === false) {
return false;
} }
return stristr($haystack, $needle, $part); if ($before_needle) {
return api_substr($haystack, 0, api_strlen($result, $encoding), $encoding);
}
return api_substr($haystack, api_strlen($haystack, $encoding) - api_strlen($result, $encoding), null, $encoding);
}
if (PHP_VERSION < 5.3) {
return stristr($haystack, $needle);
}
return stristr($haystack, $needle, $before_needle);
} }
/** /**
@ -545,10 +579,10 @@ function api_strpos($haystack, $needle, $offset = 0, $encoding = null) {
elseif (api_mb_supports($encoding)) { elseif (api_mb_supports($encoding)) {
return @mb_strpos($haystack, $needle, $offset, $encoding); return @mb_strpos($haystack, $needle, $offset, $encoding);
} }
elseif (MBSTRING_INSTALLED && (api_iconv_supports($encoding) || _api_convert_encoding_supports($encoding))) { elseif (api_is_encoding_supported($encoding)) {
return api_utf8_decode(@mb_strpos(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $offset, 'UTF-8'), $encoding); if (MBSTRING_INSTALLED) {
return @mb_strpos(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $offset, 'UTF-8');
} }
elseif (api_iconv_supports($encoding) || _api_convert_encoding_supports($encoding)) {
if (!api_is_utf8($encoding)) { if (!api_is_utf8($encoding)) {
$haystack = api_utf8_encode($haystack, $encoding); $haystack = api_utf8_encode($haystack, $encoding);
$needle = api_utf8_encode($needle, $encoding); $needle = api_utf8_encode($needle, $encoding);
@ -559,46 +593,72 @@ function api_strpos($haystack, $needle, $offset = 0, $encoding = null) {
return api_strlen($haystack[0]); return api_strlen($haystack[0]);
} }
return false; return false;
} else { }
$haystack = api_substr($haystack, $offset); $haystack = api_substr($haystack, $offset);
if (($pos = api_strpos($haystack, $needle)) !== false ) { if (($pos = api_strpos($haystack, $needle)) !== false ) {
return $pos + $offset; return $pos + $offset;
} }
return false; return false;
} }
}
return strpos($haystack, $needle, $offset); return strpos($haystack, $needle, $offset);
} }
/** /**
* Finds the last occurrence of a character in a string. * Finds the last occurrence of a character in a string.
* @param string $haystack The string from which to get the last occurrence. * @param string $haystack The string from which to get the last occurrence.
* @param string $needle The string which first character is to be found. * @param mixed $needle The string which first character is to be found.
* @param bool $part (optional) Determines which portion of $haystack this function returns. The default value is FALSE. * @param bool $before_needle (optional) Determines which portion of $haystack this function returns. The default value is FALSE.
* @param string $encoding (optional) The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. * @param string $encoding (optional) The used internally by this function character encoding. If it is omitted, the platform character set will be used by default.
* @return mixed Returns the portion of $haystack, or FALSE if the first character from $needle is not found. * @return mixed Returns the portion of $haystack, or FALSE if the first character from $needle is not found.
* Notes: * Notes:
* If $part is set to TRUE, the function returns all of $haystack from the beginning to the first occurrence. * If $needle is not a string, it is converted to an integer and applied as the ordinal value (codepoint if the encoding is UTF-8) of a character.
* If $part is set to FALSE, the function returns all of $haystack from the first occurrence to the end. * If $before_needle is set to TRUE, the function returns all of $haystack from the beginning to the first occurrence.
* If $before_needle is set to FALSE, the function returns all of $haystack from the first occurrence to the end.
* This function is aimed at replacing the functions strrchr() and mb_strrchr() for human-language strings. * This function is aimed at replacing the functions strrchr() and mb_strrchr() for human-language strings.
* @link http://php.net/manual/en/function.strrchr * @link http://php.net/manual/en/function.strrchr
* @link http://php.net/manual/en/function.mb-strrchr * @link http://php.net/manual/en/function.mb-strrchr
*/ */
function api_strrchr($haystack, $needle, $part = false, $encoding = null) { function api_strrchr($haystack, $needle, $before_needle = false, $encoding = null) {
if (empty($encoding)) { if (empty($encoding)) {
$encoding = api_mb_internal_encoding(); $encoding = api_mb_internal_encoding();
} }
if (!is_string($needle)) {
$needle = (int)$needle;
if (api_is_utf8($encoding)) {
$needle = _api_utf8_chr($needle);
} else {
$needle = chr($needle);
}
}
if (_api_is_single_byte_encoding($encoding)) { if (_api_is_single_byte_encoding($encoding)) {
if (!$before_needle) {
return strrchr($haystack, $needle); return strrchr($haystack, $needle);
} }
$result = strrchr($haystack, $needle);
if ($result === false) {
return false;
}
return api_substr($haystack, 0, api_strlen($haystack, $encoding) - api_strlen($result, $encoding), $encoding);
}
elseif (api_mb_supports($encoding)) { elseif (api_mb_supports($encoding)) {
return @mb_strrchr($haystack, $needle, $part, $encoding); return @mb_strrchr($haystack, $needle, $before_needle, $encoding);
} }
elseif (MBSTRING_INSTALLED && (api_iconv_supports($encoding) || _api_convert_encoding_supports($encoding))) { elseif (MBSTRING_INSTALLED && api_is_encoding_supported($encoding)) {
return api_utf8_decode(@mb_strrchr(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $part, 'UTF-8'), $encoding); $result = @mb_strrchr(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $before_needle, 'UTF-8');
if ($result === false) {
return false;
} }
return api_utf8_decode($result, $encoding);
}
if (!$before_needle) {
return strrchr($haystack, $needle); return strrchr($haystack, $needle);
} }
$result = strrchr($haystack, $needle);
if ($result === false) {
return false;
}
return api_substr($haystack, 0, api_strlen($haystack, $encoding) - api_strlen($result, $encoding), $encoding);
}
/** /**
* Reverses a string. * Reverses a string.
@ -609,17 +669,19 @@ function api_strrchr($haystack, $needle, $part = false, $encoding = null) {
* @link http://php.net/manual/en/function.strrev * @link http://php.net/manual/en/function.strrev
*/ */
function api_strrev($string, $encoding = null) { function api_strrev($string, $encoding = null) {
if (empty($encoding)) {
$encoding = api_mb_internal_encoding();
}
if (empty($string)) { if (empty($string)) {
return ''; return '';
} }
if (empty($encoding)) { if (_api_is_single_byte_encoding($encoding)) {
$encoding = api_mb_internal_encoding(); return strrev($string);
} }
$result = ''; if (api_is_encoding_supported($encoding)) {
for ($i = api_strlen($string, $encoding) - 1; $i > -1; $i--) { return implode(array_reverse(api_str_split($string, 1, $encoding)));
$result .= api_substr($string, $i, 1, $encoding);
} }
return $result; return strrev($string);
} }
/** /**
@ -638,11 +700,49 @@ function api_strrpos($haystack, $needle, $offset = 0, $encoding = null) {
if (empty($encoding)) { if (empty($encoding)) {
$encoding = api_mb_internal_encoding(); $encoding = api_mb_internal_encoding();
} }
if (_api_is_single_byte_encoding($encoding)) {
return strrpos($haystack, $needle, $offset);
}
if (api_mb_supports($encoding)) { if (api_mb_supports($encoding)) {
return @mb_strrpos($haystack, $needle, $offset, $encoding); return @mb_strrpos($haystack, $needle, $offset, $encoding);
} }
elseif (MBSTRING_INSTALLED && api_iconv_supports($encoding)) { elseif (api_is_encoding_supported($encoding)) {
return api_utf8_decode(@mb_strrpos(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $offset, 'UTF-8'), $encoding); if (MBSTRING_INSTALLED) {
return @mb_strrpos(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $offset, 'UTF-8');
}
// This branch (this fragment of code) is an adaptation from the CakePHP(tm) Project, http://www.cakefoundation.org
if (!api_is_utf8($encoding)) {
$haystack = api_utf8_encode($haystack, $encoding);
$needle = api_utf8_encode($needle, $encoding);
}
$found = false;
$haystack = _api_utf8_to_unicode($haystack);
$haystack_count = count($haystack);
$matches = array_count_values($haystack);
$needle = _api_utf8_to_unicode($needle);
$needle_count = count($needle);
$position = $offset;
while (($found === false) && ($position < $haystack_count)) {
if (isset($needle[0]) && $needle[0] === $haystack[$position]) {
for ($i = 1; $i < $needle_count; $i++) {
if ($needle[$i] !== $haystack[$position + $i]) {
if ($needle[$i] === $haystack[($position + $i) -1]) {
$position--;
$found = true;
continue;
}
}
}
if (!$offset && isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
$matches[$needle[0]] = $matches[$needle[0]] - 1;
} elseif ($i === $needle_count) {
$found = true;
$position--;
}
}
$position++;
}
return ($found) ? $position : false;
} }
return strrpos($haystack, $needle, $offset); return strrpos($haystack, $needle, $offset);
} }
@ -650,31 +750,67 @@ function api_strrpos($haystack, $needle, $offset = 0, $encoding = null) {
/** /**
* Finds first occurrence of a string within another. * Finds first occurrence of a string within another.
* @param string $haystack The string from which to get the first occurrence. * @param string $haystack The string from which to get the first occurrence.
* @param string @needle The string to be found. * @param mixed $needle The string to be found.
* @param bool $part (optional) Determines which portion of $haystack this function returns. The default value is FALSE. * @param bool $before_needle (optional) Determines which portion of $haystack this function returns. The default value is FALSE.
* @param string $encoding (optional) The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. * @param string $encoding (optional) The used internally by this function character encoding. If it is omitted, the platform character set will be used by default.
* @return mixed Returns the portion of $haystack, or FALSE if $needle is not found. * @return mixed Returns the portion of $haystack, or FALSE if $needle is not found.
* Notes: * Notes:
* If $part is set to TRUE, the function returns all of $haystack from the beginning to the first occurrence of $needle. * If $needle is not a string, it is converted to an integer and applied as the ordinal value (codepoint if the encoding is UTF-8) of a character.
* If $part is set to FALSE, the function returns all of $haystack from the first occurrence of $needle to the end. * If $before_needle is set to TRUE, the function returns all of $haystack from the beginning to the first occurrence of $needle.
* If $before_needle is set to FALSE, the function returns all of $haystack from the first occurrence of $needle to the end.
* This function is aimed at replacing the functions strstr() and mb_strstr() for human-language strings. * This function is aimed at replacing the functions strstr() and mb_strstr() for human-language strings.
* @link http://php.net/manual/en/function.strstr * @link http://php.net/manual/en/function.strstr
* @link http://php.net/manual/en/function.mb-strstr * @link http://php.net/manual/en/function.mb-strstr
*/ */
function api_strstr($haystack, $needle, $part = false, $encoding = null) { function api_strstr($haystack, $needle, $before_needle = false, $encoding = null) {
if (empty($encoding)) { if (empty($encoding)) {
$encoding = api_mb_internal_encoding(); $encoding = api_mb_internal_encoding();
} }
if (!is_string($needle)) {
$needle = (int)$needle;
if (api_is_utf8($encoding)) {
$needle = _api_utf8_chr($needle);
} else {
$needle = chr($needle);
}
}
if (_api_is_single_byte_encoding($encoding)) { if (_api_is_single_byte_encoding($encoding)) {
return strstr($haystack, $needle, $part); // Adding the missing parameter $before_needle to the original function strstr(), PHP_VERSION < 5.3
if (!$before_needle) {
return strstr($haystack, $needle);
}
if (PHP_VERSION < 5.3) {
$result = explode($needle, $haystack, 2);
if ($result === false || count($result) < 2) {
return false;
}
return $result[0];
}
return strstr($haystack, $needle, $before_needle);
} }
if (api_mb_supports($encoding)) { if (api_mb_supports($encoding)) {
return @mb_strstr($haystack, $needle, $part, $encoding); return @mb_strstr($haystack, $needle, $before_needle, $encoding);
}
elseif (MBSTRING_INSTALLED && api_is_encoding_supported($encoding)) {
$result = @mb_strstr(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $before_needle, 'UTF-8');
if ($result !== false) {
return api_utf8_decode($result, $encoding);
} else {
return false;
} }
elseif (MBSTRING_INSTALLED && api_iconv_supports($encoding)) {
return api_utf8_decode(@mb_strstr(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $part, 'UTF-8'), $encoding);
} }
return strstr($haystack, $needle, $part); // Adding the missing parameter $before_needle to the original function strstr(), PHP_VERSION < 5.3
if (!$before_needle) {
return strstr($haystack, $needle);
}
if (PHP_VERSION < 5.3) {
$result = explode($needle, $haystack, 2);
if ($result === false || count($result) < 2) {
return false;
}
return $result[0];
}
return strstr($haystack, $needle, $before_needle);
} }
/** /**
@ -693,14 +829,14 @@ function api_strtolower($string, $encoding = null) {
if (api_mb_supports($encoding)) { if (api_mb_supports($encoding)) {
return @mb_strtolower($string, $encoding); return @mb_strtolower($string, $encoding);
} }
elseif (MBSTRING_INSTALLED && api_iconv_supports($encoding)) { elseif (api_is_encoding_supported($encoding)) {
if (MBSTRING_INSTALLED) {
return api_utf8_decode(@mb_strtolower(api_utf8_encode($string, $encoding), 'UTF-8'), $encoding); return api_utf8_decode(@mb_strtolower(api_utf8_encode($string, $encoding), 'UTF-8'), $encoding);
} }
elseif (api_iconv_supports($encoding) || _api_convert_encoding_supports($encoding)) { // This branch (this fragment of code) is an adaptation from the CakePHP(tm) Project, http://www.cakefoundation.org
if (!api_is_utf8($encoding)) { if (!api_is_utf8($encoding)) {
$string = api_utf8_encode($string, $encoding); $string = api_utf8_encode($string, $encoding);
} }
// This branch (this fragment of code) is an adaptation from the CakePHP(tm) Project, http://www.cakefoundation.org
$codepoints = _api_utf8_to_unicode($string); $codepoints = _api_utf8_to_unicode($string);
$length = count($codepoints); $length = count($codepoints);
$matched = false; $matched = false;
@ -757,14 +893,14 @@ function api_strtoupper($string, $encoding = null) {
if (api_mb_supports($encoding)) { if (api_mb_supports($encoding)) {
return @mb_strtoupper($string, $encoding); return @mb_strtoupper($string, $encoding);
} }
elseif (MBSTRING_INSTALLED && api_iconv_supports($encoding)) { elseif (api_is_encoding_supported($encoding)) {
if (MBSTRING_INSTALLED) {
return api_utf8_decode(@mb_strtoupper(api_utf8_encode($string, $encoding), 'UTF-8'), $encoding); return api_utf8_decode(@mb_strtoupper(api_utf8_encode($string, $encoding), 'UTF-8'), $encoding);
} }
elseif (api_iconv_supports($encoding) || _api_convert_encoding_supports($encoding)) { // This branch (this fragment of code) is an adaptation from the CakePHP(tm) Project, http://www.cakefoundation.org
if (!api_is_utf8($encoding)) { if (!api_is_utf8($encoding)) {
$string = api_utf8_encode($string, $encoding); $string = api_utf8_encode($string, $encoding);
} }
// This branch (this fragment of code) is an adaptation from the CakePHP(tm) Project, http://www.cakefoundation.org
$codepoints = _api_utf8_to_unicode($string); $codepoints = _api_utf8_to_unicode($string);
$length = count($codepoints); $length = count($codepoints);
$matched = false; $matched = false;
@ -917,14 +1053,20 @@ function api_substr($string, $start, $length = null, $encoding = null) {
if (is_null($length)) { if (is_null($length)) {
$length = api_strlen($string, $encoding); $length = api_strlen($string, $encoding);
} }
if (_api_is_single_byte_encoding($encoding)) {
return substr($string, $start, $length);
}
if (api_mb_supports($encoding)) { if (api_mb_supports($encoding)) {
return @mb_substr($string, $start, $length, $encoding); return @mb_substr($string, $start, $length, $encoding);
} }
elseif (MBSTRING_INSTALLED && api_iconv_supports($encoding)) { elseif (api_is_encoding_supported($encoding)) {
if (MBSTRING_INSTALLED) {
return api_utf8_decode(@mb_substr(api_utf8_encode($string, $encoding), $start, $length, 'UTF-8'), $encoding); return api_utf8_decode(@mb_substr(api_utf8_encode($string, $encoding), $start, $length, 'UTF-8'), $encoding);
} }
elseif (api_is_utf8($encoding)) {
// The following branch of code is from the Drupal CMS, see the function drupal_substr(). // The following branch of code is from the Drupal CMS, see the function drupal_substr().
if (!api_is_utf8($encoding)) {
$string = api_utf8_encode($string, $encoding);
}
$strlen = api_byte_count($string); $strlen = api_byte_count($string);
// Find the starting byte offset // Find the starting byte offset
$bytes = 0; $bytes = 0;
@ -985,7 +1127,11 @@ function api_substr($string, $start, $length = null, $encoding = null) {
} }
} }
$iend = $bytes; $iend = $bytes;
return substr($string, $istart, max(0, $iend - $istart + 1)); $string = substr($string, $istart, max(0, $iend - $istart + 1));
if (!api_is_utf8($encoding)) {
$string = api_utf8_decode($string, $encoding);
}
return $string;
} }
return substr($string, $start, $length); return substr($string, $start, $length);
} }
@ -1013,16 +1159,29 @@ function api_substr_replace($string, $replacement, $start, $length = null, $enco
if (empty($encoding)) { if (empty($encoding)) {
$encoding = api_mb_internal_encoding(); $encoding = api_mb_internal_encoding();
} }
if ($length == null) { if (api_is_encoding_supported($encoding) && !_api_is_single_byte_encoding($encoding)) {
return api_substr($string, 0, $start, $encoding) . $replacement; $string_length = api_strlen($string, $encoding);
} else { if ($start < 0) {
$start = max(0, $string_length + $start);
}
else if ($start > $string_length) {
$start = $string_length;
}
if ($length < 0) { if ($length < 0) {
$length = api_strlen($string, $encoding) - $start + $length; $length = max(0, $string_length - $start + $length);
} }
return else if (is_null($length) || ($length > $string_length)) {
api_substr($string, 0, $start, $encoding) . $replacement . $length = $string_length;
api_substr($string, $start + $length, api_strlen($string, $encoding), $encoding); }
if (($start + $length) > $string_length) {
$length = $string_length - $start;
}
return api_substr($string, 0, $start, $encoding) . $replacement . api_substr($string, $start + $length, $string_length - $start - $length, $encoding);
}
if (is_null($length)) {
return substr_replace($string, $replacement, $start);
} }
return substr_replace($string, $replacement, $start, $length);
} }
/** /**
@ -1091,12 +1250,14 @@ function api_ereg($pattern, $string, & $regs = null) {
} }
} }
elseif (MBSTRING_INSTALLED && api_iconv_supports($encoding)) { elseif (MBSTRING_INSTALLED && api_iconv_supports($encoding)) {
global $_api_encoding;
$_api_encoding = $encoding;
api_mb_regex_encoding('UTF-8'); api_mb_regex_encoding('UTF-8');
if ($count < 3) { if ($count < 3) {
$result = @mb_ereg(api_utf8_encode($pattern, $encoding), api_utf8_encode($string, $encoding)); $result = @mb_ereg(api_utf8_encode($pattern, $encoding), api_utf8_encode($string, $encoding));
} else { } else {
$result = @mb_ereg(api_utf8_encode($pattern, $encoding), api_utf8_encode($string, $encoding), $regs); $result = @mb_ereg(api_utf8_encode($pattern, $encoding), api_utf8_encode($string, $encoding), $regs);
$regs = _api_array_utf8_decode($regs, $encoding); $regs = _api_array_utf8_decode($regs);
} }
api_mb_regex_encoding($encoding); api_mb_regex_encoding($encoding);
return $result; return $result;
@ -1172,13 +1333,14 @@ function api_eregi($pattern, $string, & $regs = null) {
} }
} }
elseif (MBSTRING_INSTALLED && api_iconv_supports($encoding)) { elseif (MBSTRING_INSTALLED && api_iconv_supports($encoding)) {
global $_api_encoding;
$_api_encoding = $encoding;
api_mb_regex_encoding('UTF-8'); api_mb_regex_encoding('UTF-8');
if ($count < 3) { if ($count < 3) {
$result = @mb_eregi(api_utf8_encode($pattern, $encoding), api_utf8_encode($string, $encoding)); $result = @mb_eregi(api_utf8_encode($pattern, $encoding), api_utf8_encode($string, $encoding));
} else { } else {
$result = @mb_eregi(api_utf8_encode($pattern, $encoding), api_utf8_encode($string, $encoding), $regs); $result = @mb_eregi(api_utf8_encode($pattern, $encoding), api_utf8_encode($string, $encoding), $regs);
$regs = _api_array_utf8_decode($regs, $encoding); $regs = _api_array_utf8_decode($regs);
} }
api_mb_regex_encoding($encoding); api_mb_regex_encoding($encoding);
return $result; return $result;
@ -1367,13 +1529,15 @@ function api_split($pattern, $string, $limit = null) {
} }
} }
elseif (MBSTRING_INSTALLED && api_iconv_supports($encoding)) { elseif (MBSTRING_INSTALLED && api_iconv_supports($encoding)) {
global $_api_encoding;
$_api_encoding = $encoding;
api_mb_regex_encoding('UTF-8'); api_mb_regex_encoding('UTF-8');
if (is_null($limit)) { if (is_null($limit)) {
$result = @mb_split(api_utf8_encode($pattern, $encoding), api_utf8_encode($string, $encoding)); $result = @mb_split(api_utf8_encode($pattern, $encoding), api_utf8_encode($string, $encoding));
} else { } else {
$result = @mb_split(api_utf8_encode($pattern, $encoding), api_utf8_encode($string, $encoding), $limit); $result = @mb_split(api_utf8_encode($pattern, $encoding), api_utf8_encode($string, $encoding), $limit);
} }
$result = _api_array_utf8_decode($result, $encoding); $result = _api_array_utf8_decode($result);
api_mb_regex_encoding($encoding); api_mb_regex_encoding($encoding);
return $result; return $result;
} else { } else {
@ -1916,7 +2080,7 @@ function api_rsort(&$array, $sort_flag = SORT_REGULAR, $language = null, $encodi
* '&#1060;&#1105;&#1076;&#1086;&#1088; '. * '&#1060;&#1105;&#1076;&#1086;&#1088; '.
* '&#1052;&#1080;&#1093;&#1072;&#1081;&#1083;&#1086;&#1074;&#1080;&#1095; '. * '&#1052;&#1080;&#1093;&#1072;&#1081;&#1083;&#1086;&#1074;&#1080;&#1095; '.
* '&#1044;&#1086;&#1089;&#1090;&#1086;&#1077;&#1074;&#1082;&#1080;&#1081;', * '&#1044;&#1086;&#1089;&#1090;&#1086;&#1077;&#1074;&#1082;&#1080;&#1081;',
* ENT_QUOTES, 'UTF-8'), 'UTF-8'); * ENT_QUOTES, 'UTF-8'), 'X', 'UTF-8');
* The output should be: Fyodor Mihaylovich Dostoevkiy * The output should be: Fyodor Mihaylovich Dostoevkiy
* *
* @param string $string The input string. * @param string $string The input string.
@ -2194,24 +2358,34 @@ yoruba: ISO-8859-15, WINDOWS-1252, ISO-8859-1;
/** /**
* This function unifies the encoding identificators, so they could be compared. * This function unifies the encoding identificators, so they could be compared.
* @param string $encoding The specified encoding. * @param string/array $encoding The specified encoding.
* @return string Returns the encoding identificator modified in suitable for comparison way. * @return string Returns the encoding identificator modified in suitable for comparison way.
*/ */
function api_refine_encoding_id($encoding) { function api_refine_encoding_id($encoding) {
if (is_array($encoding)){
return array_map('strtoupper', $encoding);
}
return strtoupper($encoding); return strtoupper($encoding);
} }
/** /**
* This function checks whether two $encoding are equal (same, equvalent). * This function checks whether two $encoding are equal (same, equvalent).
* @param string $encoding1 The first encoding * @param string/array $encoding1 The first encoding
* @param string $encoding2 The second encoding * @param string/array $encoding2 The second encoding
* @return bool Returns TRUE if the encodings are equal, FALSE otherwise. * @return bool Returns TRUE if the encodings are equal, FALSE otherwise.
*/ */
function api_equal_encodings($encoding1, $encoding2) { function api_equal_encodings($encoding1, $encoding2) {
// We have to deal with aliases. This function alone does not solve $is_array_encoding1 = is_array($encoding1);
// the problem entirely. And there is no time for this kind of research. $is_array_encoding2 = is_array($encoding2);
// At the momemnt, the quick proposition could be: $encoding1 = api_refine_encoding_id($encoding1);
return strcmp(api_refine_encoding_id($encoding1), api_refine_encoding_id($encoding2)) == 0 ? true : false; $encoding2 = api_refine_encoding_id($encoding2);
if (!$is_array_encoding1 && !$is_array_encoding2) {
return $encoding1 == $encoding2;
}
if ($is_array_encoding2) {
return in_array($encoding1, $encoding2);
}
return in_array($encoding2, $encoding1);
} }
/** /**
@ -2222,27 +2396,33 @@ function api_equal_encodings($encoding1, $encoding2) {
function api_is_utf8($encoding) { function api_is_utf8($encoding) {
static $result = array(); static $result = array();
if (!isset($result[$encoding])) { if (!isset($result[$encoding])) {
$result[$encoding] = api_equal_encodings($encoding, 'UTF-8'); $result[$encoding] = api_equal_encodings($encoding, array('UTF-8', 'CP65001', 'WINDOWS-65001'));
} }
return $result[$encoding]; return $result[$encoding];
} }
/** /**
* This function checks whether a given encoding represents (is an alias of) ISO Latin 1 character set. * This function checks whether a given encoding represents (is an alias of) ISO Latin 1 character set.
* @param string $encoding The tested encoding. * @param string/array $encoding The tested encoding.
* @return bool Returns TRUE if the given encoding id means Latin 1 character set, otherwise returns false. * @return bool Returns TRUE if the given encoding id means Latin 1 character set, otherwise returns false.
*/ */
function api_is_latin1($encoding, $strict = false) { function api_is_latin1($encoding, $strict = false) {
static $latin1_encodings = array('ISO-8859-1', 'ISO8859-1', 'CP819', 'LATIN1'); static $latin1 = array();
static $latin1_encodings_like = array( static $latin1_strict = array();
if ($strict) {
if (!isset($latin1_strict[$encoding])) {
$latin1_strict[$encoding] = api_equal_encodings($encoding, array('ISO-8859-1', 'ISO8859-1', 'CP819', 'LATIN1'));
}
return $latin1_strict[$encoding];
}
if (!isset($latin1[$encoding])) {
$latin1[$encoding] = api_equal_encodings($encoding, array(
'ISO-8859-1', 'ISO8859-1', 'CP819', 'LATIN1', 'ISO-8859-1', 'ISO8859-1', 'CP819', 'LATIN1',
'ISO-8859-15', 'ISO8859-15', 'CP923', 'LATIN0', 'LATIN-9', 'ISO-8859-15', 'ISO8859-15', 'CP923', 'LATIN0', 'LATIN-9',
'WINDOWS-1252', 'CP1252', 'WIN-1252', 'WIN1252' 'WINDOWS-1252', 'CP1252', 'WIN-1252', 'WIN1252'
); ));
if ($strict) {
return in_array(api_refine_encoding_id($encoding), $latin1_encodings);
} }
return in_array(api_refine_encoding_id($encoding), $latin1_encodings_like); return $latin1[$encoding];
} }
/** /**
@ -2461,7 +2641,11 @@ function api_iconv_set_encoding($type, $encoding = null) {
* @return bool Returns TRUE when the specified encoding is supported, FALSE othewise. * @return bool Returns TRUE when the specified encoding is supported, FALSE othewise.
*/ */
function api_is_encoding_supported($encoding) { function api_is_encoding_supported($encoding) {
return api_mb_supports($encoding) || api_iconv_supports($encoding) || _api_convert_encoding_supports($encoding); static $supported = array();
if (!isset($supported[$encoding])) {
$supported[$encoding] = api_mb_supports($encoding) || api_iconv_supports($encoding) || _api_convert_encoding_supports($encoding);
}
return $supported[$encoding];
} }
/** /**

@ -10,6 +10,10 @@
* ============================================================================== * ==============================================================================
*/ */
// Global variables used by some callback functions.
$_api_encoding = null;
$_api_collator = null;
/** /**
* ---------------------------------------------------------------------------- * ----------------------------------------------------------------------------
@ -21,7 +25,6 @@
function _api_convert_encoding($string, $to_encoding, $from_encoding) { function _api_convert_encoding($string, $to_encoding, $from_encoding) {
static $character_map = array(); static $character_map = array();
static $utf8_like = array('UTF-8', 'US-ASCII'); static $utf8_like = array('UTF-8', 'US-ASCII');
static $unknown = 63; // '?'
if (empty($string)) { if (empty($string)) {
return $string; return $string;
} }
@ -56,7 +59,7 @@ function _api_convert_encoding($string, $to_encoding, $from_encoding) {
if (isset($character_map[$from]['local'][$ord])) { if (isset($character_map[$from]['local'][$ord])) {
$codepoints[] = $character_map[$from]['local'][$ord]; $codepoints[] = $character_map[$from]['local'][$ord];
} else { } else {
$codepoints[] = $unknown; $codepoints[] = 0xFFFD; // U+FFFD REPLACEMENT CHARACTER is the general substitute character in the Unicode Standard.
} }
} else { } else {
$codepoints[] = $ord; $codepoints[] = $ord;
@ -66,13 +69,12 @@ function _api_convert_encoding($string, $to_encoding, $from_encoding) {
$codepoints = _api_utf8_to_unicode($string); $codepoints = _api_utf8_to_unicode($string);
} }
if ($to != 'UTF-8') { if ($to != 'UTF-8') {
$unknown_char = chr($unknown);
foreach ($codepoints as $i => &$codepoint) { foreach ($codepoints as $i => &$codepoint) {
if ($codepoint > 127) { if ($codepoint > 127) {
if (isset($character_map[$from]['local'][$codepoint])) { if (isset($character_map[$from]['local'][$codepoint])) {
$codepoint = chr($character_map[$from]['local'][$codepoint]); $codepoint = chr($character_map[$from]['local'][$codepoint]);
} else { } else {
$codepoint = $unknown_char; $codepoint = '?'; // Unknown character.
} }
} else { } else {
$codepoint = chr($codepoint); $codepoint = chr($codepoint);
@ -138,16 +140,12 @@ function &_api_parse_character_map($name) {
* output can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates * output can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates
* are not allowed. * are not allowed.
* @param string $string The UTF-8 encoded string. * @param string $string The UTF-8 encoded string.
* @param string $unknown (optional) A US-ASCII character to represent invalid bytes.
* @return array Returns an array of unicode code points. * @return array Returns an array of unicode code points.
* @author Henri Sivonen, mailto:hsivonen@iki.fi * @author Henri Sivonen, mailto:hsivonen@iki.fi
* @link http://hsivonen.iki.fi/php-utf8/ * @link http://hsivonen.iki.fi/php-utf8/
* @author Ivan Tcholakov, 2009, modifications for the Dokeos LMS. * @author Ivan Tcholakov, 2009, modifications for the Dokeos LMS.
*/ */
function _api_utf8_to_unicode($string, $unknown = '?') { function _api_utf8_to_unicode($string) {
if (!empty($unknown)) {
$unknown = ord($unknown[0]);
}
$state = 0; // cached expected number of octets after the current octet $state = 0; // cached expected number of octets after the current octet
// until the beginning of the next UTF8 character sequence // until the beginning of the next UTF8 character sequence
$codepoint = 0; // cached Unicode character $codepoint = 0; // cached Unicode character
@ -204,9 +202,7 @@ function _api_utf8_to_unicode($string, $unknown = '?') {
$state = 0; $state = 0;
$codepoint = 0; $codepoint = 0;
$bytes = 1; $bytes = 1;
if (!empty($unknown)) { $result[] = 0xFFFD; // U+FFFD REPLACEMENT CHARACTER is the general substitute character in the Unicode Standard.
$result[] = $unknown;
}
continue ; continue ;
} }
} else { } else {
@ -234,9 +230,7 @@ function _api_utf8_to_unicode($string, $unknown = '?') {
$state = 0; $state = 0;
$codepoint = 0; $codepoint = 0;
$bytes = 1; $bytes = 1;
if (!empty($unknown)) { $result[] = 0xFFFD;
$result[] = $unknown;
}
continue ; continue ;
} }
if (0xFEFF != $codepoint) { if (0xFEFF != $codepoint) {
@ -254,9 +248,7 @@ function _api_utf8_to_unicode($string, $unknown = '?') {
$state = 0; $state = 0;
$codepoint = 0; $codepoint = 0;
$bytes = 1; $bytes = 1;
if (!empty($unknown)) { $result[] = 0xFFFD;
$result[] = $unknown;
}
} }
} }
} }
@ -264,33 +256,28 @@ function _api_utf8_to_unicode($string, $unknown = '?') {
} }
/** /**
* Takes an array of ints representing the Unicode characters and returns * Takes an array of ints representing the Unicode characters and returns a UTF-8 string.
* a UTF-8 string. Astral planes are supported ie. the ints in the * @param array $codepoints An array of unicode code points representing a string.
* input can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates
* are not allowed.
* @param array $array An array of unicode code points representing a string.
* @param string $unknown (optional) A US-ASCII character to represent invalid bytes.
* @return string Returns a UTF-8 string constructed using the given code points. * @return string Returns a UTF-8 string constructed using the given code points.
* @author Henri Sivonen, mailto:hsivonen@iki.fi
* @link http://hsivonen.iki.fi/php-utf8/
* @author Ivan Tcholakov, 2009, modifications for the Dokeos LMS.
* @see _api_utf8_from_unicodepoint()
*/ */
function _api_utf8_from_unicode($array, $unknown = '?') { function _api_utf8_from_unicode($codepoints) {
foreach ($array as $i => &$codepoint) { return implode(array_map('_api_utf8_chr', $codepoints));
$codepoint = _api_utf8_from_unicodepoint($codepoint, $unknown);
}
return implode($array);
} }
/** /**
* Takes an integer value and returns its correspondent representing the Unicode character. * Takes an integer value (codepoint) and returns its correspondent representing the Unicode character.
* Astral planes are supported, ie the intger input can be > 0xFFFF. Occurrances of the BOM are ignored.
* Surrogates are not allowed.
* @param array $array An array of unicode code points representing a string * @param array $array An array of unicode code points representing a string
* @param string $unknown (optional) A US-ASCII character to represent invalid bytes.
* @return string Returns the corresponding UTF-8 character. * @return string Returns the corresponding UTF-8 character.
* @author Henri Sivonen, mailto:hsivonen@iki.fi
* @link http://hsivonen.iki.fi/php-utf8/
* @author Ivan Tcholakov, 2009, modifications for the Dokeos LMS.
* @see _api_utf8_from_unicode() * @see _api_utf8_from_unicode()
* This is a UTF-8 aware version of the function chr().
* @link http://php.net/manual/en/function.chr.php
*/ */
function _api_utf8_from_unicodepoint($codepoint, $unknown = '?') { function _api_utf8_chr($codepoint) {
// ASCII range (including control chars) // ASCII range (including control chars)
if ( ($codepoint >= 0) && ($codepoint <= 0x007f) ) { if ( ($codepoint >= 0) && ($codepoint <= 0x007f) ) {
$result = chr($codepoint); $result = chr($codepoint);
@ -304,7 +291,7 @@ function _api_utf8_from_unicodepoint($codepoint, $unknown = '?') {
// Test for illegal surrogates // Test for illegal surrogates
} else if ($codepoint >= 0xD800 && $codepoint <= 0xDFFF) { } else if ($codepoint >= 0xD800 && $codepoint <= 0xDFFF) {
// found a surrogate // found a surrogate
$result = $unknown; $result = _api_utf8_chr(0xFFFD); // U+FFFD REPLACEMENT CHARACTER is the general substitute character in the Unicode Standard.
// 3 byte sequence // 3 byte sequence
} else if ($codepoint <= 0xffff) { } else if ($codepoint <= 0xffff) {
$result = chr(0xe0 | ($codepoint >> 12)) . chr(0x80 | (($codepoint >> 6) & 0x003f)) . chr(0x80 | ($codepoint & 0x003f)); $result = chr(0xe0 | ($codepoint >> 12)) . chr(0x80 | (($codepoint >> 6) & 0x003f)) . chr(0x80 | ($codepoint & 0x003f));
@ -313,11 +300,27 @@ function _api_utf8_from_unicodepoint($codepoint, $unknown = '?') {
$result = chr(0xf0 | ($codepoint >> 18)) . chr(0x80 | (($codepoint >> 12) & 0x3f)) . chr(0x80 | (($codepoint >> 6) & 0x3f)) . chr(0x80 | ($codepoint & 0x3f)); $result = chr(0xf0 | ($codepoint >> 18)) . chr(0x80 | (($codepoint >> 12) & 0x3f)) . chr(0x80 | (($codepoint >> 6) & 0x3f)) . chr(0x80 | ($codepoint & 0x3f));
} else { } else {
// out of range // out of range
$result = $unknown; $result = _api_utf8_chr(0xFFFD);
} }
return $result; return $result;
} }
/**
* Takes the first UTF-8 character in a string and returns its codepoint (integer).
* @param string $utf8_character The UTF-8 encoded character.
* @return int Returns: the codepoint; or 0xFFFD (unknown character) when the input string is empty.
* This is a UTF-8 aware version of the function ord().
* @link http://php.net/manual/en/function.ord.php
* Note about a difference with the original funtion ord(): ord('') returns 0.
*/
function _api_utf8_ord($utf8_character) {
if (empty($utf8_character)) {
return 0xFFFD;
}
$codepoints = _api_utf8_to_unicode($utf8_character);
return $codepoints[0];
}
/** /**
* ---------------------------------------------------------------------------- * ----------------------------------------------------------------------------
@ -329,7 +332,6 @@ function _api_utf8_from_unicodepoint($codepoint, $unknown = '?') {
function _api_utf8_get_letter_case_properties($codepoint, $type = 'lower') { function _api_utf8_get_letter_case_properties($codepoint, $type = 'lower') {
static $config = array(); static $config = array();
static $range = array(); static $range = array();
if (!isset($range[$codepoint])) { if (!isset($range[$codepoint])) {
if ($codepoint > 128 && $codepoint < 256) { if ($codepoint > 128 && $codepoint < 256) {
$range[$codepoint] = '0080_00ff'; // Latin-1 Supplement $range[$codepoint] = '0080_00ff'; // Latin-1 Supplement
@ -368,7 +370,6 @@ function _api_utf8_get_letter_case_properties($codepoint, $type = 'lower') {
} else { } else {
$range[$codepoint] = false; $range[$codepoint] = false;
} }
if ($range[$codepoint] === false) { if ($range[$codepoint] === false) {
return null; return null;
} }
@ -379,14 +380,11 @@ function _api_utf8_get_letter_case_properties($codepoint, $type = 'lower') {
} }
} }
} }
if ($range[$codepoint] === false || !isset($config[$range[$codepoint]])) { if ($range[$codepoint] === false || !isset($config[$range[$codepoint]])) {
return null; return null;
} }
$result = array(); $result = array();
$count = count($config[$range[$codepoint]]); $count = count($config[$range[$codepoint]]);
for ($i = 0; $i < $count; $i++) { for ($i = 0; $i < $count; $i++) {
if ($type === 'lower' && $config[$range[$codepoint]][$i][$type][0] === $codepoint) { if ($type === 'lower' && $config[$range[$codepoint]][$i][$type][0] === $codepoint) {
$result[] = $config[$range[$codepoint]][$i]; $result[] = $config[$range[$codepoint]][$i];
@ -406,12 +404,13 @@ function _api_utf8_get_letter_case_properties($codepoint, $type = 'lower') {
// This (callback) function convers from UTF-8 to other encoding. // This (callback) function convers from UTF-8 to other encoding.
// It works with arrays of strings too. // It works with arrays of strings too.
function _api_array_utf8_decode($variable, $encoding) { function _api_array_utf8_decode($variable) {
global $_api_encoding;
if (is_array($variable)) { if (is_array($variable)) {
return array_map('_api_array_utf8_decode', $variable, $encoding); return array_map('_api_array_utf8_decode', $variable);
} }
if (is_string($var)) { if (is_string($var)) {
return api_utf8_decode($variable, $encoding); return api_utf8_decode($variable, $_api_encoding);
} }
return $variable; return $variable;
} }
@ -451,10 +450,6 @@ function _api_get_alpha_numerical_collator($language = null) {
return $collator[$language]; return $collator[$language];
} }
// Global variables used by the sorting functions.
$_api_collator = null;
$_api_encoding = null;
// A string comparison function that serves sorting functions. // A string comparison function that serves sorting functions.
function _api_cmp($string1, $string2) { function _api_cmp($string1, $string2) {
global $_api_collator, $_api_encoding; global $_api_collator, $_api_encoding;

Loading…
Cancel
Save