Issue #306 - The multibute string library: Logic upgrades, optimizations for speed, part 2.

skala
Ivan Tcholakov 16 years ago
parent 314c0d4484
commit 9376bb9d20
  1. 271
      main/inc/lib/multibyte_string_functions.lib.php
  2. 39
      main/inc/lib/multibyte_string_functions_internal.lib.php

@ -66,7 +66,6 @@ function api_byte_count($string) {
return mb_strlen($string, '8bit'); return mb_strlen($string, '8bit');
} }
return strlen($string); return strlen($string);
// For PHP6 this function probably will contain: // For PHP6 this function probably will contain:
//return strlen((binary)$string); //return strlen((binary)$string);
} }
@ -88,30 +87,32 @@ function api_byte_count($string) {
* @link http://php.net/manual/en/function.mb-convert-encoding * @link http://php.net/manual/en/function.mb-convert-encoding
*/ */
function api_convert_encoding($string, $to_encoding, $from_encoding = null) { function api_convert_encoding($string, $to_encoding, $from_encoding = null) {
static $equal_encodings = array();
if (empty($from_encoding)) { if (empty($from_encoding)) {
$from_encoding = api_mb_internal_encoding(); $from_encoding = api_mb_internal_encoding();
} }
if (api_equal_encodings($to_encoding, $from_encoding)) { if (!isset($equal_encodings[$to_encoding][$from_encoding])) {
// When conversion is not needed, the string is returned directly, without validation. $equal_encodings[$to_encoding][$from_encoding] = api_equal_encodings($to_encoding, $from_encoding);
return $string; }
if ($equal_encodings[$to_encoding][$from_encoding]) {
return $string; // When conversion is not needed, the string is returned directly, without validation.
} }
elseif (api_mb_supports($to_encoding) && api_mb_supports($from_encoding)) { if (api_mb_supports($to_encoding) && api_mb_supports($from_encoding)) {
return @mb_convert_encoding($string, $to_encoding, $from_encoding); return @mb_convert_encoding($string, $to_encoding, $from_encoding);
} }
elseif (api_iconv_supports($to_encoding) && api_iconv_supports($from_encoding)) { if (api_iconv_supports($to_encoding) && api_iconv_supports($from_encoding)) {
return @iconv($from_encoding, $to_encoding, $string); return @iconv($from_encoding, $to_encoding, $string);
} }
elseif (api_is_utf8($to_encoding) && api_is_latin1($from_encoding, true)) { if (api_is_utf8($to_encoding) && api_is_latin1($from_encoding, true)) {
return utf8_encode($string); return utf8_encode($string);
} }
elseif (api_is_latin1($to_encoding, true) && api_is_utf8($from_encoding)) { if (api_is_latin1($to_encoding, true) && api_is_utf8($from_encoding)) {
return utf8_decode($string); return utf8_decode($string);
} }
elseif (_api_convert_encoding_supports($to_encoding) && _api_convert_encoding_supports($from_encoding)) { if (_api_convert_encoding_supports($to_encoding) && _api_convert_encoding_supports($from_encoding)) {
return _api_convert_encoding($string, $to_encoding, $from_encoding); return _api_convert_encoding($string, $to_encoding, $from_encoding);
} }
// Here the function gives up. return $string; // Here the function gives up.
return $string;
} }
/** /**
@ -127,23 +128,21 @@ function api_utf8_encode($string, $from_encoding = null) {
$from_encoding = api_mb_internal_encoding(); $from_encoding = api_mb_internal_encoding();
} }
if (api_is_utf8($from_encoding)) { if (api_is_utf8($from_encoding)) {
// When conversion is not needed, the string is returned directly, without validation. return $string; // When conversion is not needed, the string is returned directly, without validation.
return $string;
} }
elseif (api_mb_supports($from_encoding)) { if (api_mb_supports($from_encoding)) {
return @mb_convert_encoding($string, 'UTF-8', $from_encoding); return @mb_convert_encoding($string, 'UTF-8', $from_encoding);
} }
elseif (api_iconv_supports($from_encoding)) { if (api_iconv_supports($from_encoding)) {
return @iconv($from_encoding, 'UTF-8', $string); return @iconv($from_encoding, 'UTF-8', $string);
} }
elseif (api_is_latin1($from_encoding, true)) { if (api_is_latin1($from_encoding, true)) {
return utf8_encode($string); return utf8_encode($string);
} }
elseif (_api_convert_encoding_supports($from_encoding)) { if (_api_convert_encoding_supports($from_encoding)) {
return _api_convert_encoding($string, 'UTF-8', $from_encoding); return _api_convert_encoding($string, 'UTF-8', $from_encoding);
} }
// Here the function gives up. return $string; // Here the function gives up.
return $string;
} }
/** /**
@ -159,23 +158,21 @@ function api_utf8_decode($string, $to_encoding = null) {
$to_encoding = api_mb_internal_encoding(); $to_encoding = api_mb_internal_encoding();
} }
if (api_is_utf8($to_encoding)) { if (api_is_utf8($to_encoding)) {
// When conversion is not needed, the string is returned directly, without validation. return $string; // When conversion is not needed, the string is returned directly, without validation.
return $string;
} }
elseif (api_mb_supports($to_encoding)) { if (api_mb_supports($to_encoding)) {
return @mb_convert_encoding($string, $to_encoding, 'UTF-8'); return @mb_convert_encoding($string, $to_encoding, 'UTF-8');
} }
elseif (api_iconv_supports($to_encoding)) { if (api_iconv_supports($to_encoding)) {
return @iconv('UTF-8', $to_encoding, $string); return @iconv('UTF-8', $to_encoding, $string);
} }
elseif (api_is_latin1($to_encoding, true)) { if (api_is_latin1($to_encoding, true)) {
return utf8_decode($string); return utf8_decode($string);
} }
elseif (_api_convert_encoding_supports($to_encoding)) { if (_api_convert_encoding_supports($to_encoding)) {
return _api_convert_encoding($string, $to_encoding, 'UTF-8'); return _api_convert_encoding($string, $to_encoding, 'UTF-8');
} }
// Here the function gives up. return $string; // Here the function gives up.
return $string;
} }
/** /**
@ -288,10 +285,17 @@ function api_html_entity_decode($string, $quote_style = ENT_COMPAT, $encoding =
return html_entity_decode($string, $quote_style, $encoding); return html_entity_decode($string, $quote_style, $encoding);
} }
if (api_is_encoding_supported($encoding)) { if (api_is_encoding_supported($encoding)) {
return api_utf8_decode(html_entity_decode(api_convert_encoding($string, 'UTF-8', $encoding), $quote_style, 'UTF-8'), $encoding); if (!api_is_utf8($encoding)) {
$string = api_utf8_encode($string, $encoding);
}
$string = html_entity_decode($string, $quote_style, 'UTF-8');
if (!api_is_utf8($encoding)) {
return api_utf8_decode($string, $encoding);
} }
return $string; return $string;
} }
return $string; // Here the function guves up.
}
/** /**
* This function encodes (conditionally) a given string to UTF-8 if XmlHttp-request has been detected. * This function encodes (conditionally) a given string to UTF-8 if XmlHttp-request has been detected.
@ -301,7 +305,12 @@ function api_html_entity_decode($string, $quote_style = ENT_COMPAT, $encoding =
*/ */
function api_xml_http_response_encode($string, $from_encoding = null) { function api_xml_http_response_encode($string, $from_encoding = null) {
if (isset($_SERVER['HTTP_X_REQUESTED_WITH']) && strtolower($_SERVER['HTTP_X_REQUESTED_WITH']) == 'xmlhttprequest') { if (isset($_SERVER['HTTP_X_REQUESTED_WITH']) && strtolower($_SERVER['HTTP_X_REQUESTED_WITH']) == 'xmlhttprequest') {
return api_convert_encoding($string, 'UTF-8', $from_encoding); if (empty($from_encoding)) {
$from_encoding = api_mb_internal_encoding();
}
if (!api_is_utf8($from_encoding)) {
return api_utf8_encode($string, $from_encoding);
}
} }
return $string; return $string;
} }
@ -464,7 +473,11 @@ function api_stripos($haystack, $needle, $offset = 0, $encoding = null) {
} }
elseif (api_is_encoding_supported($encoding)) { elseif (api_is_encoding_supported($encoding)) {
if (MBSTRING_INSTALLED) { if (MBSTRING_INSTALLED) {
return @mb_stripos(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $offset, 'UTF-8'); if (!api_is_utf8($encoding)) {
$haystack = api_utf8_encode($haystack, $encoding);
$needle = api_utf8_encode($needle, $encoding);
}
return @mb_stripos($haystack, $needle, $offset, 'UTF-8');
} }
return api_strpos(api_strtolower($haystack, $encoding), api_strtolower($needle, $encoding), $offset, $encoding); return api_strpos(api_strtolower($haystack, $encoding), api_strtolower($needle, $encoding), $offset, $encoding);
} }
@ -503,12 +516,19 @@ function api_stristr($haystack, $needle, $before_needle = false, $encoding = nul
} }
elseif (api_is_encoding_supported($encoding)) { elseif (api_is_encoding_supported($encoding)) {
if (MBSTRING_INSTALLED) { if (MBSTRING_INSTALLED) {
$result = @mb_stristr(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $before_needle, 'UTF-8'); if (!api_is_utf8($encoding)) {
$haystack = api_utf8_encode($haystack, $encoding);
$needle = api_utf8_encode($needle, $encoding);
}
$result = @mb_stristr($haystack, $needle, $before_needle, 'UTF-8');
if ($result === false) { if ($result === false) {
return false; return false;
} }
if (!api_is_utf8($encoding)) {
return api_utf8_decode($result, $encoding); return api_utf8_decode($result, $encoding);
} }
return $result;
}
$result = api_strstr(api_strtolower($haystack, $encoding), api_strtolower($needle, $encoding), $before_needle, $encoding); $result = api_strstr(api_strtolower($haystack, $encoding), api_strtolower($needle, $encoding), $before_needle, $encoding);
if ($result === false) { if ($result === false) {
return false; return false;
@ -545,13 +565,13 @@ function api_strlen($string, $encoding = null) {
if (_api_is_single_byte_encoding($encoding)) { if (_api_is_single_byte_encoding($encoding)) {
return strlen($string); return strlen($string);
} }
elseif (api_mb_supports($encoding)) { if (api_mb_supports($encoding)) {
return @mb_strlen($string, $encoding); return @mb_strlen($string, $encoding);
} }
elseif (api_iconv_supports($encoding)) { if (api_iconv_supports($encoding)) {
return @iconv_strlen($string, $encoding); return @iconv_strlen($string, $encoding);
} }
elseif (api_is_utf8($encoding)) { if (api_is_utf8($encoding)) {
return api_byte_count(preg_replace("/[\x80-\xBF]/", '', $string)); return api_byte_count(preg_replace("/[\x80-\xBF]/", '', $string));
} }
return strlen($string); return strlen($string);
@ -580,13 +600,13 @@ function api_strpos($haystack, $needle, $offset = 0, $encoding = null) {
return @mb_strpos($haystack, $needle, $offset, $encoding); return @mb_strpos($haystack, $needle, $offset, $encoding);
} }
elseif (api_is_encoding_supported($encoding)) { elseif (api_is_encoding_supported($encoding)) {
if (MBSTRING_INSTALLED) {
return @mb_strpos(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $offset, 'UTF-8');
}
if (!api_is_utf8($encoding)) { if (!api_is_utf8($encoding)) {
$haystack = api_utf8_encode($haystack, $encoding); $haystack = api_utf8_encode($haystack, $encoding);
$needle = api_utf8_encode($needle, $encoding); $needle = api_utf8_encode($needle, $encoding);
} }
if (MBSTRING_INSTALLED) {
return @mb_strpos($haystack, $needle, $offset, 'UTF-8');
}
if (empty($offset)) { if (empty($offset)) {
$haystack = explode($needle, $haystack, 2); $haystack = explode($needle, $haystack, 2);
if (count($haystack) > 1) { if (count($haystack) > 1) {
@ -644,12 +664,19 @@ function api_strrchr($haystack, $needle, $before_needle = false, $encoding = nul
return @mb_strrchr($haystack, $needle, $before_needle, $encoding); return @mb_strrchr($haystack, $needle, $before_needle, $encoding);
} }
elseif (MBSTRING_INSTALLED && api_is_encoding_supported($encoding)) { elseif (MBSTRING_INSTALLED && api_is_encoding_supported($encoding)) {
$result = @mb_strrchr(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $before_needle, 'UTF-8'); if (!api_is_utf8($encoding)) {
$haystack = api_utf8_encode($haystack, $encoding);
$needle = api_utf8_encode($needle, $encoding);
}
$result = @mb_strrchr($haystack, $needle, $before_needle, 'UTF-8');
if ($result === false) { if ($result === false) {
return false; return false;
} }
if (!api_is_utf8($encoding)) {
return api_utf8_decode($result, $encoding); return api_utf8_decode($result, $encoding);
} }
return $result;
}
if (!$before_needle) { if (!$before_needle) {
return strrchr($haystack, $needle); return strrchr($haystack, $needle);
} }
@ -707,14 +734,14 @@ function api_strrpos($haystack, $needle, $offset = 0, $encoding = null) {
return @mb_strrpos($haystack, $needle, $offset, $encoding); return @mb_strrpos($haystack, $needle, $offset, $encoding);
} }
elseif (api_is_encoding_supported($encoding)) { elseif (api_is_encoding_supported($encoding)) {
if (MBSTRING_INSTALLED) {
return @mb_strrpos(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $offset, 'UTF-8');
}
// This branch (this fragment of code) is an adaptation from the CakePHP(tm) Project, http://www.cakefoundation.org
if (!api_is_utf8($encoding)) { if (!api_is_utf8($encoding)) {
$haystack = api_utf8_encode($haystack, $encoding); $haystack = api_utf8_encode($haystack, $encoding);
$needle = api_utf8_encode($needle, $encoding); $needle = api_utf8_encode($needle, $encoding);
} }
if (MBSTRING_INSTALLED) {
return @mb_strrpos($haystack, $needle, $offset, 'UTF-8');
}
// This branch (this fragment of code) is an adaptation from the CakePHP(tm) Project, http://www.cakefoundation.org
$found = false; $found = false;
$haystack = _api_utf8_to_unicode($haystack); $haystack = _api_utf8_to_unicode($haystack);
$haystack_count = count($haystack); $haystack_count = count($haystack);
@ -792,12 +819,18 @@ function api_strstr($haystack, $needle, $before_needle = false, $encoding = null
return @mb_strstr($haystack, $needle, $before_needle, $encoding); return @mb_strstr($haystack, $needle, $before_needle, $encoding);
} }
elseif (MBSTRING_INSTALLED && api_is_encoding_supported($encoding)) { elseif (MBSTRING_INSTALLED && api_is_encoding_supported($encoding)) {
$result = @mb_strstr(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $before_needle, 'UTF-8'); if (!api_is_utf8($encoding)) {
$haystack = api_utf8_encode($haystack, $encoding);
$needle = api_utf8_encode($needle, $encoding);
}
$result = @mb_strstr($haystack, $needle, $before_needle, 'UTF-8');
if ($result !== false) { if ($result !== false) {
if (!api_is_utf8($encoding)) {
return api_utf8_decode($result, $encoding); return api_utf8_decode($result, $encoding);
} else {
return false;
} }
return $result;
}
return false;
} }
// Adding the missing parameter $before_needle to the original function strstr(), PHP_VERSION < 5.3 // Adding the missing parameter $before_needle to the original function strstr(), PHP_VERSION < 5.3
if (!$before_needle) { if (!$before_needle) {
@ -830,13 +863,13 @@ function api_strtolower($string, $encoding = null) {
return @mb_strtolower($string, $encoding); return @mb_strtolower($string, $encoding);
} }
elseif (api_is_encoding_supported($encoding)) { elseif (api_is_encoding_supported($encoding)) {
if (MBSTRING_INSTALLED) {
return api_utf8_decode(@mb_strtolower(api_utf8_encode($string, $encoding), 'UTF-8'), $encoding);
}
// This branch (this fragment of code) is an adaptation from the CakePHP(tm) Project, http://www.cakefoundation.org
if (!api_is_utf8($encoding)) { if (!api_is_utf8($encoding)) {
$string = api_utf8_encode($string, $encoding); $string = api_utf8_encode($string, $encoding);
} }
if (MBSTRING_INSTALLED) {
$string = @mb_strtolower($string, 'UTF-8');
} else {
// This branch (this fragment of code) is an adaptation from the CakePHP(tm) Project, http://www.cakefoundation.org
$codepoints = _api_utf8_to_unicode($string); $codepoints = _api_utf8_to_unicode($string);
$length = count($codepoints); $length = count($codepoints);
$matched = false; $matched = false;
@ -869,8 +902,9 @@ function api_strtolower($string, $encoding = null) {
} }
} }
$string = _api_utf8_from_unicode($result); $string = _api_utf8_from_unicode($result);
}
if (!api_is_utf8($encoding)) { if (!api_is_utf8($encoding)) {
$string = api_utf8_decode($string, $encoding); return api_utf8_decode($string, $encoding);
} }
return $string; return $string;
} }
@ -894,13 +928,13 @@ function api_strtoupper($string, $encoding = null) {
return @mb_strtoupper($string, $encoding); return @mb_strtoupper($string, $encoding);
} }
elseif (api_is_encoding_supported($encoding)) { elseif (api_is_encoding_supported($encoding)) {
if (MBSTRING_INSTALLED) {
return api_utf8_decode(@mb_strtoupper(api_utf8_encode($string, $encoding), 'UTF-8'), $encoding);
}
// This branch (this fragment of code) is an adaptation from the CakePHP(tm) Project, http://www.cakefoundation.org
if (!api_is_utf8($encoding)) { if (!api_is_utf8($encoding)) {
$string = api_utf8_encode($string, $encoding); $string = api_utf8_encode($string, $encoding);
} }
if (MBSTRING_INSTALLED) {
$string = @mb_strtoupper($string, 'UTF-8');
} else {
// This branch (this fragment of code) is an adaptation from the CakePHP(tm) Project, http://www.cakefoundation.org
$codepoints = _api_utf8_to_unicode($string); $codepoints = _api_utf8_to_unicode($string);
$length = count($codepoints); $length = count($codepoints);
$matched = false; $matched = false;
@ -971,8 +1005,9 @@ function api_strtoupper($string, $encoding = null) {
} }
} }
$string = _api_utf8_from_unicode($result); $string = _api_utf8_from_unicode($result);
}
if (!api_is_utf8($encoding)) { if (!api_is_utf8($encoding)) {
$string = api_utf8_decode($string, $encoding); return api_utf8_decode($string, $encoding);
} }
return $string; return $string;
} }
@ -989,6 +1024,7 @@ function api_strtoupper($string, $encoding = null) {
* This function is aimed at replacing the function strtr() for human-language strings. * This function is aimed at replacing the function strtr() for human-language strings.
* @link http://php.net/manual/en/function.strtr * @link http://php.net/manual/en/function.strtr
* TODO: To be revised and tested. Probably this function will not be needed. * TODO: To be revised and tested. Probably this function will not be needed.
* TODO: This function will be removed. It is not needed. 21-AUG-2009.
*/ */
function api_strtr($string, $from, $to = null, $encoding = null) { function api_strtr($string, $from, $to = null, $encoding = null) {
if (empty($string)) { if (empty($string)) {
@ -1060,13 +1096,13 @@ function api_substr($string, $start, $length = null, $encoding = null) {
return @mb_substr($string, $start, $length, $encoding); return @mb_substr($string, $start, $length, $encoding);
} }
elseif (api_is_encoding_supported($encoding)) { elseif (api_is_encoding_supported($encoding)) {
if (MBSTRING_INSTALLED) {
return api_utf8_decode(@mb_substr(api_utf8_encode($string, $encoding), $start, $length, 'UTF-8'), $encoding);
}
// The following branch of code is from the Drupal CMS, see the function drupal_substr().
if (!api_is_utf8($encoding)) { if (!api_is_utf8($encoding)) {
$string = api_utf8_encode($string, $encoding); $string = api_utf8_encode($string, $encoding);
} }
if (MBSTRING_INSTALLED) {
$string = @mb_substr($string, $start, $length, 'UTF-8');
} else {
// The following branch of code is from the Drupal CMS, see the function drupal_substr().
$strlen = api_byte_count($string); $strlen = api_byte_count($string);
// Find the starting byte offset // Find the starting byte offset
$bytes = 0; $bytes = 0;
@ -1128,6 +1164,7 @@ function api_substr($string, $start, $length = null, $encoding = null) {
} }
$iend = $bytes; $iend = $bytes;
$string = substr($string, $istart, max(0, $iend - $istart + 1)); $string = substr($string, $istart, max(0, $iend - $istart + 1));
}
if (!api_is_utf8($encoding)) { if (!api_is_utf8($encoding)) {
$string = api_utf8_decode($string, $encoding); $string = api_utf8_decode($string, $encoding);
} }
@ -1159,7 +1196,12 @@ function api_substr_replace($string, $replacement, $start, $length = null, $enco
if (empty($encoding)) { if (empty($encoding)) {
$encoding = api_mb_internal_encoding(); $encoding = api_mb_internal_encoding();
} }
if (api_is_encoding_supported($encoding) && !_api_is_single_byte_encoding($encoding)) { if (_api_is_single_byte_encoding($encoding)) {
return substr_replace($string, $replacement, $start, $length);
}
if (api_is_encoding_supported($encoding)) {
// This fragment (branch) of code is adaptation of a published proposition:
// http://php.net/manual/en/function.substr-replace.php#90146
$string_length = api_strlen($string, $encoding); $string_length = api_strlen($string, $encoding);
if ($start < 0) { if ($start < 0) {
$start = max(0, $string_length + $start); $start = max(0, $string_length + $start);
@ -1214,8 +1256,24 @@ function api_ucwords($string, $encoding = null) {
if (api_mb_supports($encoding)) { if (api_mb_supports($encoding)) {
return @mb_convert_case($string, MB_CASE_TITLE, $encoding); return @mb_convert_case($string, MB_CASE_TITLE, $encoding);
} }
elseif (MBSTRING_INSTALLED && api_iconv_supports($encoding)) { if (api_is_encoding_supported($encoding)) {
return api_utf8_decode(@mb_convert_case(api_utf8_encode($string, $encoding), MB_CASE_TITLE, 'UTF-8'), $encoding); if (!api_is_utf8($encoding)) {
$string = api_utf8_encode($string, $encoding);
}
if (MBSTRING_INSTALLED) {
$string = @mb_convert_case($string, MB_CASE_TITLE, 'UTF-8');
} else {
// The following fragment (branch) of code is based on the function utf8_ucwords() by Harry Fuecks
// See http://dev.splitbrain.org/view/darcs/dokuwiki/inc/utf8.php
// Note: [\x0c\x09\x0b\x0a\x0d\x20] matches - form feeds, horizontal tabs, vertical tabs, linefeeds and carriage returns.
// This corresponds to the definition of a "word" defined at http://www.php.net/ucwords
$pattern = '/(^|([\x0c\x09\x0b\x0a\x0d\x20]+))([^\x0c\x09\x0b\x0a\x0d\x20]{1})[^\x0c\x09\x0b\x0a\x0d\x20]*/u';
$string = preg_replace_callback($pattern, '_api_utf8_ucwords_callback', $string);
}
if (!api_is_utf8($encoding)) {
return api_utf8_decode($string, $encoding);
}
return $string;
} }
return ucwords($string); return ucwords($string);
} }
@ -1228,6 +1286,8 @@ function api_ucwords($string, $encoding = null) {
*/ */
/** /**
* Note: Try to avoid using this function. Use api_preg_match() with Perl-compatible regular expression syntax.
*
* Executes a regular expression match with extended multibyte support. * Executes a regular expression match with extended multibyte support.
* By default this function uses the platform character set. * By default this function uses the platform character set.
* @param string $pattern The regular expression pattern. * @param string $pattern The regular expression pattern.
@ -1244,12 +1304,10 @@ function api_ereg($pattern, $string, & $regs = null) {
if (api_mb_supports($encoding)) { if (api_mb_supports($encoding)) {
if ($count < 3) { if ($count < 3) {
return @mb_ereg($pattern, $string); return @mb_ereg($pattern, $string);
} else {
$result = @mb_ereg($pattern, $string, $regs);
return $result;
} }
return @mb_ereg($pattern, $string, $regs);
} }
elseif (MBSTRING_INSTALLED && api_iconv_supports($encoding)) { if (MBSTRING_INSTALLED && api_is_encoding_supported($encoding)) {
global $_api_encoding; global $_api_encoding;
$_api_encoding = $encoding; $_api_encoding = $encoding;
api_mb_regex_encoding('UTF-8'); api_mb_regex_encoding('UTF-8');
@ -1261,16 +1319,16 @@ function api_ereg($pattern, $string, & $regs = null) {
} }
api_mb_regex_encoding($encoding); api_mb_regex_encoding($encoding);
return $result; return $result;
} else { }
if ($count < 3) { if ($count < 3) {
return ereg($pattern, $string); return ereg($pattern, $string);
} else {
return ereg($pattern, $string, $regs);
}
} }
return ereg($pattern, $string, $regs);
} }
/** /**
* Note: Try to avoid using this function. Use api_preg_replace() with Perl-compatible regular expression syntax.
*
* Scans string for matches to pattern, then replaces the matched text with replacement, with extended multibyte support. * Scans string for matches to pattern, then replaces the matched text with replacement, with extended multibyte support.
* By default this function uses the platform character set. * By default this function uses the platform character set.
* @param string $pattern The regular expression pattern. * @param string $pattern The regular expression pattern.
@ -1292,13 +1350,11 @@ function api_ereg_replace($pattern, $replacement, $string, $option = null) {
if (api_mb_supports($encoding)) { if (api_mb_supports($encoding)) {
if (is_null($option)) { if (is_null($option)) {
return @mb_ereg_replace($pattern, $replacement, $string); return @mb_ereg_replace($pattern, $replacement, $string);
} else {
return @mb_ereg_replace($pattern, $replacement, $string, $option);
} }
return @mb_ereg_replace($pattern, $replacement, $string, $option);
} }
elseif (MBSTRING_INSTALLED && api_iconv_supports($encoding)) { if (MBSTRING_INSTALLED && api_is_encoding_supported($encoding)) {
api_mb_regex_encoding('UTF-8'); api_mb_regex_encoding('UTF-8');
if (is_null($option)) { if (is_null($option)) {
$result = api_utf8_decode(@mb_ereg_replace(api_utf8_encode($pattern, $encoding), api_utf8_encode($replacement, $encoding), api_utf8_encode($string, $encoding)), $encoding); $result = api_utf8_decode(@mb_ereg_replace(api_utf8_encode($pattern, $encoding), api_utf8_encode($replacement, $encoding), api_utf8_encode($string, $encoding)), $encoding);
} else { } else {
@ -1306,12 +1362,13 @@ function api_ereg_replace($pattern, $replacement, $string, $option = null) {
} }
api_mb_regex_encoding($encoding); api_mb_regex_encoding($encoding);
return $result; return $result;
} else {
return ereg_replace($pattern, $replacement, $string);
} }
return ereg_replace($pattern, $replacement, $string);
} }
/** /**
* Note: Try to avoid using this function. Use api_preg_match() with Perl-compatible regular expression syntax.
*
* Executes a regular expression match, ignoring case, with extended multibyte support. * Executes a regular expression match, ignoring case, with extended multibyte support.
* By default this function uses the platform character set. * By default this function uses the platform character set.
* @param string $pattern The regular expression pattern. * @param string $pattern The regular expression pattern.
@ -1328,11 +1385,10 @@ function api_eregi($pattern, $string, & $regs = null) {
if (api_mb_supports($encoding)) { if (api_mb_supports($encoding)) {
if ($count < 3) { if ($count < 3) {
return @mb_eregi($pattern, $string); return @mb_eregi($pattern, $string);
} else {
return @mb_eregi($pattern, $string, $regs);
} }
return @mb_eregi($pattern, $string, $regs);
} }
elseif (MBSTRING_INSTALLED && api_iconv_supports($encoding)) { if (MBSTRING_INSTALLED && api_is_encoding_supported($encoding)) {
global $_api_encoding; global $_api_encoding;
$_api_encoding = $encoding; $_api_encoding = $encoding;
api_mb_regex_encoding('UTF-8'); api_mb_regex_encoding('UTF-8');
@ -1344,16 +1400,16 @@ function api_eregi($pattern, $string, & $regs = null) {
} }
api_mb_regex_encoding($encoding); api_mb_regex_encoding($encoding);
return $result; return $result;
} else { }
if ($count < 3) { if ($count < 3) {
return eregi($pattern, $string); return eregi($pattern, $string);
} else {
return eregi($pattern, $string, $regs);
}
} }
return eregi($pattern, $string, $regs);
} }
/** /**
* Note: Try to avoid using this function. Use api_preg_replace() with Perl-compatible regular expression syntax.
*
* Scans string for matches to pattern, then replaces the matched text with replacement, ignoring case, with extended multibyte support. * Scans string for matches to pattern, then replaces the matched text with replacement, ignoring case, with extended multibyte support.
* By default this function uses the platform character set. * By default this function uses the platform character set.
* @param string $pattern The regular expression pattern. * @param string $pattern The regular expression pattern.
@ -1375,11 +1431,10 @@ function api_eregi_replace($pattern, $replacement, $string, $option = null) {
if (api_mb_supports($encoding)) { if (api_mb_supports($encoding)) {
if (is_null($option)) { if (is_null($option)) {
return @mb_eregi_replace($pattern, $replacement, $string); return @mb_eregi_replace($pattern, $replacement, $string);
} else {
return @mb_eregi_replace($pattern, $replacement, $string, $option);
} }
return @mb_eregi_replace($pattern, $replacement, $string, $option);
} }
elseif (MBSTRING_INSTALLED && api_iconv_supports($encoding)) { if (MBSTRING_INSTALLED && api_is_encoding_supported($encoding)) {
api_mb_regex_encoding('UTF-8'); api_mb_regex_encoding('UTF-8');
if (is_null($option)) { if (is_null($option)) {
$result = api_utf8_decode(@mb_eregi_replace(api_utf8_encode($pattern, $encoding), api_utf8_encode($replacement, $encoding), api_utf8_encode($string, $encoding)), $encoding); $result = api_utf8_decode(@mb_eregi_replace(api_utf8_encode($pattern, $encoding), api_utf8_encode($replacement, $encoding), api_utf8_encode($string, $encoding)), $encoding);
@ -1388,9 +1443,8 @@ function api_eregi_replace($pattern, $replacement, $string, $option = null) {
} }
api_mb_regex_encoding($encoding); api_mb_regex_encoding($encoding);
return $result; return $result;
} else {
return eregi_replace($pattern, $replacement, $string);
} }
return eregi_replace($pattern, $replacement, $string);
} }
/** /**
@ -1452,12 +1506,13 @@ function api_preg_replace($pattern, $replacement, $subject, $limit= -1, &$count
if (empty($encoding)){ if (empty($encoding)){
$encoding = api_get_system_encoding(); $encoding = api_get_system_encoding();
} }
$is_utf8 = api_is_utf8($encoding);
if (is_array($pattern)) { if (is_array($pattern)) {
foreach ($pattern as &$p) { foreach ($pattern as &$p) {
$p = api_is_utf8($encoding) ? $p.'u' : $p; $p = $is_utf8 ? $p.'u' : $p;
} }
} else { } else {
$pattern = api_is_utf8($encoding) ? $pattern.'u' : $pattern; $pattern = $is_utf8 ? $pattern.'u' : $pattern;
} }
return preg_replace($pattern, $replacement, $subject, $limit, $count); return preg_replace($pattern, $replacement, $subject, $limit, $count);
} }
@ -1509,6 +1564,8 @@ function api_preg_split($pattern, $subject, $limit = -1, $flags = 0, $encoding =
} }
/** /**
* Note: Try to avoid using this function. Use api_preg_split() with Perl-compatible regular expression syntax.
*
* Splits a multibyte string using regular expression pattern and returns the result as an array. * Splits a multibyte string using regular expression pattern and returns the result as an array.
* By default this function uses the platform character set. * By default this function uses the platform character set.
* @param string $pattern The regular expression pattern. * @param string $pattern The regular expression pattern.
@ -1524,11 +1581,10 @@ function api_split($pattern, $string, $limit = null) {
if (api_mb_supports($encoding)) { if (api_mb_supports($encoding)) {
if (is_null($limit)) { if (is_null($limit)) {
return @mb_split($pattern, $string); return @mb_split($pattern, $string);
} else {
return @mb_split($pattern, $string, $limit);
} }
return @mb_split($pattern, $string, $limit);
} }
elseif (MBSTRING_INSTALLED && api_iconv_supports($encoding)) { if (MBSTRING_INSTALLED && api_is_encoding_supported($encoding)) {
global $_api_encoding; global $_api_encoding;
$_api_encoding = $encoding; $_api_encoding = $encoding;
api_mb_regex_encoding('UTF-8'); api_mb_regex_encoding('UTF-8');
@ -1540,13 +1596,11 @@ function api_split($pattern, $string, $limit = null) {
$result = _api_array_utf8_decode($result); $result = _api_array_utf8_decode($result);
api_mb_regex_encoding($encoding); api_mb_regex_encoding($encoding);
return $result; return $result;
} else { }
if (is_null($limit)) { if (is_null($limit)) {
return split($pattern, $string); return split($pattern, $string);
} else {
return split($pattern, $string, $limit);
}
} }
return split($pattern, $string, $limit);
} }
/** /**
@ -2265,12 +2319,10 @@ function api_get_non_utf8_encoding($language = null) {
if (is_array($encodings[$language])) { if (is_array($encodings[$language])) {
if (!empty($encodings[$language][0])) { if (!empty($encodings[$language][0])) {
return $encodings[$language][0]; return $encodings[$language][0];
} else {
return 'ISO-8859-15';
} }
} else {
return 'ISO-8859-15'; return 'ISO-8859-15';
} }
return 'ISO-8859-15';
} }
/** /**
@ -2375,11 +2427,10 @@ function api_refine_encoding_id($encoding) {
* @return bool Returns TRUE if the encodings are equal, FALSE otherwise. * @return bool Returns TRUE if the encodings are equal, FALSE otherwise.
*/ */
function api_equal_encodings($encoding1, $encoding2) { function api_equal_encodings($encoding1, $encoding2) {
$is_array_encoding1 = is_array($encoding1);
$is_array_encoding2 = is_array($encoding2); $is_array_encoding2 = is_array($encoding2);
$encoding1 = api_refine_encoding_id($encoding1); $encoding1 = api_refine_encoding_id($encoding1);
$encoding2 = api_refine_encoding_id($encoding2); $encoding2 = api_refine_encoding_id($encoding2);
if (!$is_array_encoding1 && !$is_array_encoding2) { if (!is_array($encoding1) && !$is_array_encoding2) {
return $encoding1 == $encoding2; return $encoding1 == $encoding2;
} }
if ($is_array_encoding2) { if ($is_array_encoding2) {
@ -2587,13 +2638,10 @@ function api_iconv_set_encoding($type, $encoding = null) {
if(@iconv_set_encoding($type, $encoding)) { if(@iconv_set_encoding($type, $encoding)) {
$iconv_internal_encoding = $encoding; $iconv_internal_encoding = $encoding;
return true; return true;
} else {
return false;
} }
} else {
return false; return false;
} }
break; return false;
case 'iconv_input_encoding': case 'iconv_input_encoding':
if (empty($encoding)) { if (empty($encoding)) {
if (is_null($iconv_input_encoding)) { if (is_null($iconv_input_encoding)) {
@ -2605,13 +2653,10 @@ function api_iconv_set_encoding($type, $encoding = null) {
if(@iconv_set_encoding($type, $encoding)) { if(@iconv_set_encoding($type, $encoding)) {
$iconv_input_encoding = $encoding; $iconv_input_encoding = $encoding;
return true; return true;
} else {
return false;
} }
} else {
return false; return false;
} }
break; return false;
case 'iconv_output_encoding': case 'iconv_output_encoding':
if (empty($encoding)) { if (empty($encoding)) {
if (is_null($iconv_output_encoding)) { if (is_null($iconv_output_encoding)) {
@ -2623,16 +2668,12 @@ function api_iconv_set_encoding($type, $encoding = null) {
if(@iconv_set_encoding($type, $encoding)) { if(@iconv_set_encoding($type, $encoding)) {
$iconv_output_encoding = $encoding; $iconv_output_encoding = $encoding;
return true; return true;
} else {
return false;
} }
} else {
return false; return false;
} }
break;
default:
return false; return false;
} }
return false;
} }
/** /**

@ -8,6 +8,10 @@
* @author: Ivan Tcholakov, ivantcholakov@gmail.com, 2009 * @author: Ivan Tcholakov, ivantcholakov@gmail.com, 2009
* @package dokeos.library * @package dokeos.library
* ============================================================================== * ==============================================================================
*
* Note: All functions and data structures here are not to be used directly.
* See the file multibyte_string_functions.lib.php which contains the "public" API.
*
*/ */
// Global variables used by some callback functions. // Global variables used by some callback functions.
@ -24,7 +28,7 @@ $_api_collator = null;
// This is a php-implementation of the function api_convert_encoding(). // This is a php-implementation of the function api_convert_encoding().
function _api_convert_encoding($string, $to_encoding, $from_encoding) { function _api_convert_encoding($string, $to_encoding, $from_encoding) {
static $character_map = array(); static $character_map = array();
static $utf8_like = array('UTF-8', 'US-ASCII'); static $utf8_compatible = array('UTF-8', 'US-ASCII');
if (empty($string)) { if (empty($string)) {
return $string; return $string;
} }
@ -35,7 +39,7 @@ function _api_convert_encoding($string, $to_encoding, $from_encoding) {
} }
$to = _api_get_character_map_name($to_encoding); $to = _api_get_character_map_name($to_encoding);
$from = _api_get_character_map_name($from_encoding); $from = _api_get_character_map_name($from_encoding);
if (empty($to) || empty($from) || $to == $from || (in_array($to, $utf8_like) && in_array($from, $utf8_like))) { if (empty($to) || empty($from) || $to == $from || (in_array($to, $utf8_compatible) && in_array($from, $utf8_compatible))) {
return $string; return $string;
} }
if (!isset($character_map[$to])) { if (!isset($character_map[$to])) {
@ -395,6 +399,20 @@ function _api_utf8_get_letter_case_properties($codepoint, $type = 'lower') {
return $result; return $result;
} }
/**
* A callback function for serving the function api_ucwords()
* @author Harry Fuecks
* @link http://dev.splitbrain.org/view/darcs/dokuwiki/inc/utf8.php
* @author Ivan Tcholakov, adaptation for the Dokeos LMS, 2009
* @param array $matches Input array of matches corresponding to a single word
* @return string Returns a with first char of the word in uppercase
*/
function _api_utf8_ucwords_callback($matches) {
$leadingws = $matches[2];
$ucfirst = api_strtoupper($matches[3], 'UTF-8');
$ucword = api_substr_replace(ltrim($matches[0]), $ucfirst, 0, 1, 'UTF-8');
return $leadingws . $ucword;
}
/** /**
* ---------------------------------------------------------------------------- * ----------------------------------------------------------------------------
@ -571,11 +589,10 @@ if (MBSTRING_INSTALLED && !function_exists('mb_stristr')) {
if ($pos === false) { if ($pos === false) {
return false; return false;
} }
elseif($part == true) { if($part == true) {
return mb_substr($haystack, 0, $pos + 1, $encoding); return mb_substr($haystack, 0, $pos + 1, $encoding);
} else {
return mb_substr($haystack, $pos, mb_strlen($haystack, $encoding), $encoding);
} }
return mb_substr($haystack, $pos, mb_strlen($haystack, $encoding), $encoding);
} }
} }
@ -591,11 +608,11 @@ if (MBSTRING_INSTALLED && !function_exists('mb_strrchr')) {
$pos = mb_strrpos($haystack, $needle, mb_strlen($haystack, $encoding) - 1, $encoding); $pos = mb_strrpos($haystack, $needle, mb_strlen($haystack, $encoding) - 1, $encoding);
if ($pos === false) { if ($pos === false) {
return false; return false;
} elseif($part == true) { }
if($part == true) {
return mb_substr($haystack, 0, $pos + 1, $encoding); return mb_substr($haystack, 0, $pos + 1, $encoding);
} else {
return mb_substr($haystack, $pos, mb_strlen($haystack, $encoding), $encoding);
} }
return mb_substr($haystack, $pos, mb_strlen($haystack, $encoding), $encoding);
} }
} }
@ -610,10 +627,10 @@ if (MBSTRING_INSTALLED && !function_exists('mb_strstr')) {
$pos = mb_strpos($haystack, $needle, 0, $encoding); $pos = mb_strpos($haystack, $needle, 0, $encoding);
if ($pos === false) { if ($pos === false) {
return false; return false;
} elseif($part == true) { }
if($part == true) {
return mb_substr($haystack, 0, $pos + 1, $encoding); return mb_substr($haystack, 0, $pos + 1, $encoding);
} else {
return mb_substr($haystack, $pos, mb_strlen($haystack, $encoding), $encoding);
} }
return mb_substr($haystack, $pos, mb_strlen($haystack, $encoding), $encoding);
} }
} }

Loading…
Cancel
Save