From 595b3b57ee430845cf297f13def83f84195ee976 Mon Sep 17 00:00:00 2001 From: Ivan Tcholakov Date: Sat, 15 Aug 2009 03:56:25 +0300 Subject: [PATCH] Task #306 - The multibyte string library: Reordering some functions for better categorization. Fixing a minor string-sorting related bug. --- .../lib/multibyte_string_functions.lib.php | 502 +++++++++--------- 1 file changed, 257 insertions(+), 245 deletions(-) diff --git a/main/inc/lib/multibyte_string_functions.lib.php b/main/inc/lib/multibyte_string_functions.lib.php index c6cc002a7d..44c35d5c83 100644 --- a/main/inc/lib/multibyte_string_functions.lib.php +++ b/main/inc/lib/multibyte_string_functions.lib.php @@ -290,219 +290,6 @@ function api_file_system_decode($string, $to_encoding = null) { * ---------------------------------------------------------------------------- */ -/** - * Executes a regular expression match with extended multibyte support. - * By default this function uses the platform character set. - * @param string $pattern The regular expression pattern. - * @param string $string The searched string. - * @param array $regs (optional) If specified, by this passed by reference parameter an array containing found match and its substrings is returned. - * @return mixed 1 if match is found, FALSE if not. If $regs has been specified, byte-length of the found match is returned, or FALSE if no match has been found. - * This function is aimed at replacing the functions ereg() and mb_ereg() for human-language strings. - * @link http://php.net/manual/en/function.ereg - * @link http://php.net/manual/en/function.mb-ereg - */ -function api_ereg($pattern, $string, & $regs = null) { - $count = func_num_args(); - $encoding = api_mb_regex_encoding(); - if (api_mb_supports($encoding)) { - if ($count < 3) { - return @mb_ereg($pattern, $string); - } else { - $result = @mb_ereg($pattern, $string, $regs); - return $result; - } - } - elseif (api_iconv_supports($encoding)) { - api_mb_regex_encoding('UTF-8'); - if ($count < 3) { - $result = @mb_ereg(api_utf8_encode($pattern, $encoding), api_utf8_encode($string, $encoding)); - } else { - $result = @mb_ereg(api_utf8_encode($pattern, $encoding), api_utf8_encode($string, $encoding), $regs); - $regs = _api_array_utf8_decode($regs, $encoding); - } - api_mb_regex_encoding($encoding); - return $result; - } else { - if ($count < 3) { - return ereg($pattern, $string); - } else { - return ereg($pattern, $string, $regs); - } - } -} - -/** - * Scans string for matches to pattern, then replaces the matched text with replacement, with extended multibyte support. - * By default this function uses the platform character set. - * @param string $pattern The regular expression pattern. - * @param string $replacement The replacement text. - * @param string $string The searched string. - * @param string $option (optional) Matching condition. - * If i is specified for the matching condition parameter, the case will be ignored. - * If x is specified, white space will be ignored. - * If m is specified, match will be executed in multiline mode and line break will be included in '.'. - * If p is specified, match will be executed in POSIX mode, line break will be considered as normal character. - * If e is specified, replacement string will be evaluated as PHP expression. - * @return mixed The modified string is returned. If no matches are found within the string, then it will be returned unchanged. FALSE will be returned on error. - * This function is aimed at replacing the functions ereg_replace() and mb_ereg_replace() for human-language strings. - * @link http://php.net/manual/en/function.ereg-replace - * @link http://php.net/manual/en/function.mb-ereg-replace - */ -function api_ereg_replace($pattern, $replacement, $string, $option = null) { - $encoding = api_mb_regex_encoding(); - if (api_mb_supports($encoding)) { - if (is_null($option)) { - return @mb_ereg_replace($pattern, $replacement, $string); - } else { - return @mb_ereg_replace($pattern, $replacement, $string, $option); - } - } - elseif (api_iconv_supports($encoding)) { - api_mb_regex_encoding('UTF-8'); - - if (is_null($option)) { - $result = api_utf8_decode(@mb_ereg_replace(api_utf8_encode($pattern, $encoding), api_utf8_encode($replacement, $encoding), api_utf8_encode($string, $encoding)), $encoding); - } else { - $result = api_utf8_decode(@mb_ereg_replace(api_utf8_encode($pattern, $encoding), api_utf8_encode($replacement, $encoding), api_utf8_encode($string, $encoding), $option), $encoding); - } - api_mb_regex_encoding($encoding); - return $result; - } else { - return ereg_replace($pattern, $replacement, $string); - } -} - -// This is a helper callback function for internal purposes. -function _api_array_utf8_decode($variable, $encoding) { - if (is_array($variable)) { - return array_map('_api_array_utf8_decode', $variable, $encoding); - } - if (is_string($var)) { - return api_utf8_decode($variable, $encoding); - } - return $variable; -} - -/** - * Executes a regular expression match, ignoring case, with extended multibyte support. - * By default this function uses the platform character set. - * @param string $pattern The regular expression pattern. - * @param string $string The searched string. - * @param array $regs (optional) If specified, by this passed by reference parameter an array containing found match and its substrings is returned. - * @return mixed 1 if match is found, FALSE if not. If $regs has been specified, byte-length of the found match is returned, or FALSE if no match has been found. - * This function is aimed at replacing the functions eregi() and mb_eregi() for human-language strings. - * @link http://php.net/manual/en/function.eregi - * @link http://php.net/manual/en/function.mb-eregi - */ -function api_eregi($pattern, $string, & $regs = null) { - $count = func_num_args(); - $encoding = api_mb_regex_encoding(); - if (api_mb_supports($encoding)) { - if ($count < 3) { - return @mb_eregi($pattern, $string); - } else { - return @mb_eregi($pattern, $string, $regs); - } - } - elseif (api_iconv_supports($encoding)) { - api_mb_regex_encoding('UTF-8'); - - if ($count < 3) { - $result = @mb_eregi(api_utf8_encode($pattern, $encoding), api_utf8_encode($string, $encoding)); - } else { - $result = @mb_eregi(api_utf8_encode($pattern, $encoding), api_utf8_encode($string, $encoding), $regs); - $regs = _api_array_utf8_decode($regs, $encoding); - } - api_mb_regex_encoding($encoding); - return $result; - } else { - if ($count < 3) { - return eregi($pattern, $string); - } else { - return eregi($pattern, $string, $regs); - } - } -} - -/** - * Scans string for matches to pattern, then replaces the matched text with replacement, ignoring case, with extended multibyte support. - * By default this function uses the platform character set. - * @param string $pattern The regular expression pattern. - * @param string $replacement The replacement text. - * @param string $string The searched string. - * @param string $option (optional) Matching condition. - * If i is specified for the matching condition parameter, the case will be ignored. - * If x is specified, white space will be ignored. - * If m is specified, match will be executed in multiline mode and line break will be included in '.'. - * If p is specified, match will be executed in POSIX mode, line break will be considered as normal character. - * If e is specified, replacement string will be evaluated as PHP expression. - * @return mixed The modified string is returned. If no matches are found within the string, then it will be returned unchanged. FALSE will be returned on error. - * This function is aimed at replacing the functions eregi_replace() and mb_eregi_replace() for human-language strings. - * @link http://php.net/manual/en/function.eregi-replace - * @link http://php.net/manual/en/function.mb-eregi-replace - */ -function api_eregi_replace($pattern, $replacement, $string, $option = null) { - $encoding = api_mb_regex_encoding(); - if (api_mb_supports($encoding)) { - if (is_null($option)) { - return @mb_eregi_replace($pattern, $replacement, $string); - } else { - return @mb_eregi_replace($pattern, $replacement, $string, $option); - } - } - elseif (api_iconv_supports($encoding)) { - api_mb_regex_encoding('UTF-8'); - if (is_null($option)) { - $result = api_utf8_decode(@mb_eregi_replace(api_utf8_encode($pattern, $encoding), api_utf8_encode($replacement, $encoding), api_utf8_encode($string, $encoding)), $encoding); - } else { - $result = api_utf8_decode(@mb_eregi_replace(api_utf8_encode($pattern, $encoding), api_utf8_encode($replacement, $encoding), api_utf8_encode($string, $encoding), $option), $encoding); - } - api_mb_regex_encoding($encoding); - return $result; - } else { - return eregi_replace($pattern, $replacement, $string); - } -} - -/** - * Splits a multibyte string using regular expression pattern and returns the result as an array. - * By default this function uses the platform character set. - * @param string $pattern The regular expression pattern. - * @param string $string The string being split. - * @param int $limit (optional) If this optional parameter $limit is specified, the string will be split in $limit elements as maximum. - * @return array The result as an array. - * This function is aimed at replacing the functions split() and mb_split() for human-language strings. - * @link http://php.net/manual/en/function.split - * @link http://php.net/manual/en/function.mb-split - */ -function api_split($pattern, $string, $limit = null) { - $encoding = api_mb_regex_encoding(); - if (api_mb_supports($encoding)) { - if (is_null($limit)) { - return @mb_split($pattern, $string); - } else { - return @mb_split($pattern, $string, $limit); - } - } - elseif (api_iconv_supports($encoding)) { - api_mb_regex_encoding('UTF-8'); - if (is_null($limit)) { - $result = @mb_split(api_utf8_encode($pattern, $encoding), api_utf8_encode($string, $encoding)); - } else { - $result = @mb_split(api_utf8_encode($pattern, $encoding), api_utf8_encode($string, $encoding), $limit); - } - $result = _api_array_utf8_decode($result, $encoding); - api_mb_regex_encoding($encoding); - return $result; - } else { - if (is_null($limit)) { - return split($pattern, $string); - } else { - return split($pattern, $string, $limit); - } - } -} - /** * This function returns a string or an array with all occurrences of search in subject (ignoring case) replaced with the given replace value. * @param mixed $search String or array of strings to be found. @@ -1001,6 +788,214 @@ function api_ucwords($string, $encoding = null) { return ucwords($string); } +/** + * ---------------------------------------------------------------------------- + * String operations using regular expressions + * ---------------------------------------------------------------------------- + */ + +/** + * Executes a regular expression match with extended multibyte support. + * By default this function uses the platform character set. + * @param string $pattern The regular expression pattern. + * @param string $string The searched string. + * @param array $regs (optional) If specified, by this passed by reference parameter an array containing found match and its substrings is returned. + * @return mixed 1 if match is found, FALSE if not. If $regs has been specified, byte-length of the found match is returned, or FALSE if no match has been found. + * This function is aimed at replacing the functions ereg() and mb_ereg() for human-language strings. + * @link http://php.net/manual/en/function.ereg + * @link http://php.net/manual/en/function.mb-ereg + */ +function api_ereg($pattern, $string, & $regs = null) { + $count = func_num_args(); + $encoding = api_mb_regex_encoding(); + if (api_mb_supports($encoding)) { + if ($count < 3) { + return @mb_ereg($pattern, $string); + } else { + $result = @mb_ereg($pattern, $string, $regs); + return $result; + } + } + elseif (api_iconv_supports($encoding)) { + api_mb_regex_encoding('UTF-8'); + if ($count < 3) { + $result = @mb_ereg(api_utf8_encode($pattern, $encoding), api_utf8_encode($string, $encoding)); + } else { + $result = @mb_ereg(api_utf8_encode($pattern, $encoding), api_utf8_encode($string, $encoding), $regs); + $regs = _api_array_utf8_decode($regs, $encoding); + } + api_mb_regex_encoding($encoding); + return $result; + } else { + if ($count < 3) { + return ereg($pattern, $string); + } else { + return ereg($pattern, $string, $regs); + } + } +} + +/** + * Scans string for matches to pattern, then replaces the matched text with replacement, with extended multibyte support. + * By default this function uses the platform character set. + * @param string $pattern The regular expression pattern. + * @param string $replacement The replacement text. + * @param string $string The searched string. + * @param string $option (optional) Matching condition. + * If i is specified for the matching condition parameter, the case will be ignored. + * If x is specified, white space will be ignored. + * If m is specified, match will be executed in multiline mode and line break will be included in '.'. + * If p is specified, match will be executed in POSIX mode, line break will be considered as normal character. + * If e is specified, replacement string will be evaluated as PHP expression. + * @return mixed The modified string is returned. If no matches are found within the string, then it will be returned unchanged. FALSE will be returned on error. + * This function is aimed at replacing the functions ereg_replace() and mb_ereg_replace() for human-language strings. + * @link http://php.net/manual/en/function.ereg-replace + * @link http://php.net/manual/en/function.mb-ereg-replace + */ +function api_ereg_replace($pattern, $replacement, $string, $option = null) { + $encoding = api_mb_regex_encoding(); + if (api_mb_supports($encoding)) { + if (is_null($option)) { + return @mb_ereg_replace($pattern, $replacement, $string); + } else { + return @mb_ereg_replace($pattern, $replacement, $string, $option); + } + } + elseif (api_iconv_supports($encoding)) { + api_mb_regex_encoding('UTF-8'); + + if (is_null($option)) { + $result = api_utf8_decode(@mb_ereg_replace(api_utf8_encode($pattern, $encoding), api_utf8_encode($replacement, $encoding), api_utf8_encode($string, $encoding)), $encoding); + } else { + $result = api_utf8_decode(@mb_ereg_replace(api_utf8_encode($pattern, $encoding), api_utf8_encode($replacement, $encoding), api_utf8_encode($string, $encoding), $option), $encoding); + } + api_mb_regex_encoding($encoding); + return $result; + } else { + return ereg_replace($pattern, $replacement, $string); + } +} + +/** + * Executes a regular expression match, ignoring case, with extended multibyte support. + * By default this function uses the platform character set. + * @param string $pattern The regular expression pattern. + * @param string $string The searched string. + * @param array $regs (optional) If specified, by this passed by reference parameter an array containing found match and its substrings is returned. + * @return mixed 1 if match is found, FALSE if not. If $regs has been specified, byte-length of the found match is returned, or FALSE if no match has been found. + * This function is aimed at replacing the functions eregi() and mb_eregi() for human-language strings. + * @link http://php.net/manual/en/function.eregi + * @link http://php.net/manual/en/function.mb-eregi + */ +function api_eregi($pattern, $string, & $regs = null) { + $count = func_num_args(); + $encoding = api_mb_regex_encoding(); + if (api_mb_supports($encoding)) { + if ($count < 3) { + return @mb_eregi($pattern, $string); + } else { + return @mb_eregi($pattern, $string, $regs); + } + } + elseif (api_iconv_supports($encoding)) { + api_mb_regex_encoding('UTF-8'); + + if ($count < 3) { + $result = @mb_eregi(api_utf8_encode($pattern, $encoding), api_utf8_encode($string, $encoding)); + } else { + $result = @mb_eregi(api_utf8_encode($pattern, $encoding), api_utf8_encode($string, $encoding), $regs); + $regs = _api_array_utf8_decode($regs, $encoding); + } + api_mb_regex_encoding($encoding); + return $result; + } else { + if ($count < 3) { + return eregi($pattern, $string); + } else { + return eregi($pattern, $string, $regs); + } + } +} + +/** + * Scans string for matches to pattern, then replaces the matched text with replacement, ignoring case, with extended multibyte support. + * By default this function uses the platform character set. + * @param string $pattern The regular expression pattern. + * @param string $replacement The replacement text. + * @param string $string The searched string. + * @param string $option (optional) Matching condition. + * If i is specified for the matching condition parameter, the case will be ignored. + * If x is specified, white space will be ignored. + * If m is specified, match will be executed in multiline mode and line break will be included in '.'. + * If p is specified, match will be executed in POSIX mode, line break will be considered as normal character. + * If e is specified, replacement string will be evaluated as PHP expression. + * @return mixed The modified string is returned. If no matches are found within the string, then it will be returned unchanged. FALSE will be returned on error. + * This function is aimed at replacing the functions eregi_replace() and mb_eregi_replace() for human-language strings. + * @link http://php.net/manual/en/function.eregi-replace + * @link http://php.net/manual/en/function.mb-eregi-replace + */ +function api_eregi_replace($pattern, $replacement, $string, $option = null) { + $encoding = api_mb_regex_encoding(); + if (api_mb_supports($encoding)) { + if (is_null($option)) { + return @mb_eregi_replace($pattern, $replacement, $string); + } else { + return @mb_eregi_replace($pattern, $replacement, $string, $option); + } + } + elseif (api_iconv_supports($encoding)) { + api_mb_regex_encoding('UTF-8'); + if (is_null($option)) { + $result = api_utf8_decode(@mb_eregi_replace(api_utf8_encode($pattern, $encoding), api_utf8_encode($replacement, $encoding), api_utf8_encode($string, $encoding)), $encoding); + } else { + $result = api_utf8_decode(@mb_eregi_replace(api_utf8_encode($pattern, $encoding), api_utf8_encode($replacement, $encoding), api_utf8_encode($string, $encoding), $option), $encoding); + } + api_mb_regex_encoding($encoding); + return $result; + } else { + return eregi_replace($pattern, $replacement, $string); + } +} + +/** + * Splits a multibyte string using regular expression pattern and returns the result as an array. + * By default this function uses the platform character set. + * @param string $pattern The regular expression pattern. + * @param string $string The string being split. + * @param int $limit (optional) If this optional parameter $limit is specified, the string will be split in $limit elements as maximum. + * @return array The result as an array. + * This function is aimed at replacing the functions split() and mb_split() for human-language strings. + * @link http://php.net/manual/en/function.split + * @link http://php.net/manual/en/function.mb-split + */ +function api_split($pattern, $string, $limit = null) { + $encoding = api_mb_regex_encoding(); + if (api_mb_supports($encoding)) { + if (is_null($limit)) { + return @mb_split($pattern, $string); + } else { + return @mb_split($pattern, $string, $limit); + } + } + elseif (api_iconv_supports($encoding)) { + api_mb_regex_encoding('UTF-8'); + if (is_null($limit)) { + $result = @mb_split(api_utf8_encode($pattern, $encoding), api_utf8_encode($string, $encoding)); + } else { + $result = @mb_split(api_utf8_encode($pattern, $encoding), api_utf8_encode($string, $encoding), $limit); + } + $result = _api_array_utf8_decode($result, $encoding); + api_mb_regex_encoding($encoding); + return $result; + } else { + if (is_null($limit)) { + return split($pattern, $string); + } else { + return split($pattern, $string, $limit); + } + } +} + /** * This function adds a unicode modifier (u suffix) to a Perl-compatible regular expression depending on the specified encoding. * @param string $pcre The Perl-compatible regular expression. @@ -1014,6 +1009,54 @@ function api_add_pcre_unicode_modifier($pcre, $encoding = null) { return api_is_utf8($encoding) ? $pcre.'u' : $pcre; } +/** + * ---------------------------------------------------------------------------- + * Common sting operations with arrays + * ---------------------------------------------------------------------------- + */ + +/** + * Checks if a value exists in an array, a case insensitive version of in_array() function with extended multibyte support. + * @param mixed $needle The searched value. If needle is a string, the comparison is done in a case-insensitive manner. + * @param array $haystack The array. + * @param bool $strict (optional) If is set to TRUE then the function will also check the types of the $needle in the $haystack. The default value if FALSE. + * @param string $encoding (optional) The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. + * @return bool Returns TRUE if $needle is found in the array, FALSE otherwise. + * @link http://php.net/manual/en/function.in-array.php + */ +function api_in_array_nocase($needle, $haystack, $strict = false, $encoding = null) { + if (is_array($needle)) { + foreach ($needle as $item) { + if (api_in_array_nocase($item, $haystack, $strict, $encoding)) return true; + } + return false; + } + if (!is_string($needle)) { + return in_array($needle, $haystack, $strict); + } + $needle = api_strtolower($needle, $encoding); + foreach ($haystack as $item) { + if ($strict && !is_string($item)) { + continue; + } + if (api_strtolower($item, $encoding) == $needle) { + return true; + } + } + return false; +} + +// This is a helper callback function for internal purposes. +function _api_array_utf8_decode($variable, $encoding) { + if (is_array($variable)) { + return array_map('_api_array_utf8_decode', $variable, $encoding); + } + if (is_string($var)) { + return api_utf8_decode($variable, $encoding); + } + return $variable; +} + /** * ---------------------------------------------------------------------------- * String comparison @@ -1092,37 +1135,6 @@ function api_strnatcmp($string1, $string2, $language = null, $encoding = null) { return strnatcmp($string1, $string2); } -/** - * Checks if a value exists in an array, a case insensitive version of in_array() function with extended multibyte support. - * @param mixed $needle The searched value. If needle is a string, the comparison is done in a case-insensitive manner. - * @param array $haystack The array. - * @param bool $strict (optional) If is set to TRUE then the function will also check the types of the $needle in the $haystack. The default value if FALSE. - * @param string $encoding (optional) The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. - * @return bool Returns TRUE if $needle is found in the array, FALSE otherwise. - * @link http://php.net/manual/en/function.in-array.php - */ -function api_in_array_nocase($needle, $haystack, $strict = false, $encoding = null) { - if (is_array($needle)) { - foreach ($needle as $item) { - if (api_in_array_nocase($item, $haystack, $strict, $encoding)) return true; - } - return false; - } - if (!is_string($needle)) { - return in_array($needle, $haystack, $strict); - } - $needle = api_strtolower($needle, $encoding); - foreach ($haystack as $item) { - if ($strict && !is_string($item)) { - continue; - } - if (api_strtolower($item, $encoding) == $needle) { - return true; - } - } - return false; -} - // Returns an instance of Collator class (ICU) created for a specified language, for internal use. function _api_get_collator($language = null) { static $collator = array(); @@ -2531,7 +2543,7 @@ function api_get_locale_from_language($language = null) { */ function api_set_default_locale($locale = null) { static $default_locale = 'en'; - if (!empty($language)) { + if (!empty($locale)) { $default_locale = $locale; if (INTL_INSTALLED) { return @locale_set_default($locale);