diff --git a/main/inc/lib/multibyte_string_functions.lib.php b/main/inc/lib/multibyte_string_functions.lib.php index 3997927762..07aaffff60 100644 --- a/main/inc/lib/multibyte_string_functions.lib.php +++ b/main/inc/lib/multibyte_string_functions.lib.php @@ -16,9 +16,9 @@ * Notes: * * 1. For all the functions from this library witn optional encoding - * parameters the system's encoding is assumed, i.e. the value that is - * returned by api_get_setting('platform_charset') or the value of the - * global variable $charset. + * parameters, the system's encoding is assumed by default, i.e. the + * value that is returned by api_get_setting('platform_charset') or + * the value of the global variable $charset. * * 2. In other aspects, most of the functions in this library try to copy * behaviour of some core PHP functions and some functions from the @@ -37,6 +37,8 @@ * When encodings to be used are not supported by mbstring, this library * is able to exploit the PHP iconv extesion, which in this case should * be activated too. + * + * 5. TODO: Functions that were not used may be removed from this library. */ /** @@ -51,7 +53,7 @@ * @param string $to_encoding The encoding that $string is being converted to. * @param string $from_encoding The encoding that $string is being converted from. If it is omited, the platform character set is assumed. * @return string Returns the converted string. - * This function is aimed to replace mb_convert_encoding() for human-language strings. + * This function is aimed at replacing the function mb_convert_encoding() for human-language strings. * @link http://php.net/manual/en/function.mb-convert-encoding */ function api_convert_encoding($string, $to_encoding, $from_encoding = null) { @@ -77,7 +79,7 @@ function api_convert_encoding($string, $to_encoding, $from_encoding = null) { * @param string $string The string being converted. * @param string $from_encoding The encoding that $string is being converted from. If it is omited, the platform character set is assumed. * @return string Returns the converted string. - * This function is aimed to replace utf8_encode() for human-language strings. + * This function is aimed at replacing the function utf8_encode() for human-language strings. * @link http://php.net/manual/en/function.utf8-encode */ function api_utf8_encode($string, $from_encoding = null) { @@ -103,7 +105,7 @@ function api_utf8_encode($string, $from_encoding = null) { * @param string $string The string being converted. * @param string $to_encoding The encoding that $string is being converted to. If it is omited, the platform character set is assumed. * @return string Returns the converted string. - * This function is aimed to replace utf8_decode() for human-language strings. + * This function is aimed at replacing the function utf8_decode() for human-language strings. * @link http://php.net/manual/en/function.utf8-decode */ function api_utf8_decode($string, $to_encoding = null) { @@ -169,7 +171,7 @@ function api_to_system_encoding($string, $from_encoding = null, $check_utf8_vali * @param int $quote_style The quote style - ENT_COMPAT (default), ENT_QUOTES, ENT_NOQUOTES. * @param string $encoding The encoding (of the input string) used in conversion. If it is omited, the platform character set is assumed. * @return string Returns the converted string. - * This function is aimed to replace htmlentities() for human-language strings. + * This function is aimed at replacing the function htmlentities() for human-language strings. * @link http://php.net/manual/en/function.htmlentities */ function api_htmlentities($string, $quote_style = ENT_COMPAT, $encoding = null) { @@ -200,7 +202,7 @@ function api_htmlentities($string, $quote_style = ENT_COMPAT, $encoding = null) * @param int $quote_style The quote style - ENT_COMPAT (default), ENT_QUOTES, ENT_NOQUOTES. * @param string $encoding The encoding (of the result) used in conversion. If it is omited, the platform character set is assumed. * @return string Returns the converted string. - * This function is aimed to replace html_entity_decode() for human-language strings. + * This function is aimed at replacing the function html_entity_decode() for human-language strings. * @link http://php.net/html_entity_decode */ function api_html_entity_decode($string, $quote_style = ENT_COMPAT, $encoding = null) { @@ -229,17 +231,26 @@ function api_xml_http_response_encode($string, $from_encoding = null) { return $string; } +/** + * ---------------------------------------------------------------------------- + * Common multibyte string functions + * ---------------------------------------------------------------------------- + */ -//---------------------------------------------------------------------------- -// Common multibyte string functions -//---------------------------------------------------------------------------- - -// Regular expression match with multibyte support. -// See http://php.net/manual/en/function.mb-ereg +/** + * Executes a regular expression match with extended multibyte support. + * By default this function uses the platform character set. + * @param string $pattern The regular expression pattern. + * @param string $string The searched string. + * @param array $regs If specified, by this passed by reference parameter an array containing found match and its substrings is returned. + * @return mixed 1 if match is found, FALSE if not. If $regs has been specified, byte-length of the found match is returned, or FALSE if no match has been found. + * This function is aimed at replacing the functions ereg() and mb_ereg() for human-language strings. + * @link http://php.net/manual/en/function.ereg + * @link http://php.net/manual/en/function.mb-ereg + */ function api_ereg($pattern, $string, & $regs = null) { $count = func_num_args(); $encoding = api_mb_regex_encoding(); - if (api_mb_supports($encoding)) { if ($count < 3) { return @mb_ereg($pattern, $string); @@ -254,7 +265,7 @@ function api_ereg($pattern, $string, & $regs = null) { $result = @mb_ereg(api_utf8_encode($pattern, $encoding), api_utf8_encode($string, $encoding)); } else { $result = @mb_ereg(api_utf8_encode($pattern, $encoding), api_utf8_encode($string, $encoding), $regs); - $regs = api_array_utf8_decode($regs, $encoding); + $regs = _api_array_utf8_decode($regs, $encoding); } api_mb_regex_encoding($encoding); return $result; @@ -267,11 +278,25 @@ function api_ereg($pattern, $string, & $regs = null) { } } -// Replace regular expression with multibyte support. -// See http://php.net/manual/en/function.mb-ereg-replace +/** + * Scans string for matches to pattern, then replaces the matched text with replacement, with extended multibyte support. + * By default this function uses the platform character set. + * @param string $pattern The regular expression pattern. + * @param string $replacement The replacement text. + * @param string $string The searched string. + * @param string $option Matching condition. + * If i is specified for the matching condition parameter, the case will be ignored. + * If x is specified, white space will be ignored. + * If m is specified, match will be executed in multiline mode and line break will be included in '.'. + * If p is specified, match will be executed in POSIX mode, line break will be considered as normal character. + * If e is specified, replacement string will be evaluated as PHP expression. + * @return mixed The modified string is returned. If no matches are found within the string, then it will be returned unchanged. FALSE will be returned on error. + * This function is aimed at replacing the functions ereg_replace() and mb_ereg_replace() for human-language strings. + * @link http://php.net/manual/en/function.ereg-replace + * @link http://php.net/manual/en/function.mb-ereg-replace + */ function api_ereg_replace($pattern, $replacement, $string, $option = null) { $encoding = api_mb_regex_encoding(); - if (api_mb_supports($encoding)) { if (is_null($option)) { return @mb_ereg_replace($pattern, $replacement, $string); @@ -295,9 +320,9 @@ function api_ereg_replace($pattern, $replacement, $string, $option = null) { } // This is a helper callback function for internal purposes. -function api_array_utf8_decode($variable, $encoding) { +function _api_array_utf8_decode($variable, $encoding) { if (is_array($variable)) { - return array_map('api_array_utf8_decode', $variable, $encoding); + return array_map('_api_array_utf8_decode', $variable, $encoding); } if (is_string($var)) { return api_utf8_decode($variable, $encoding); @@ -305,12 +330,20 @@ function api_array_utf8_decode($variable, $encoding) { return $variable; } -// Regular expression match ignoring case with multibyte support. -// See http://php.net/manual/en/function.mb-eregi +/** + * Executes a regular expression match, ignoring case, with extended multibyte support. + * By default this function uses the platform character set. + * @param string $pattern The regular expression pattern. + * @param string $string The searched string. + * @param array $regs If specified, by this passed by reference parameter an array containing found match and its substrings is returned. + * @return mixed 1 if match is found, FALSE if not. If $regs has been specified, byte-length of the found match is returned, or FALSE if no match has been found. + * This function is aimed at replacing the functions eregi() and mb_eregi() for human-language strings. + * @link http://php.net/manual/en/function.eregi + * @link http://php.net/manual/en/function.mb-eregi + */ function api_eregi($pattern, $string, & $regs = null) { $count = func_num_args(); $encoding = api_mb_regex_encoding(); - if (api_mb_supports($encoding)) { if ($count < 3) { return @mb_eregi($pattern, $string); @@ -325,7 +358,7 @@ function api_eregi($pattern, $string, & $regs = null) { $result = @mb_eregi(api_utf8_encode($pattern, $encoding), api_utf8_encode($string, $encoding)); } else { $result = @mb_eregi(api_utf8_encode($pattern, $encoding), api_utf8_encode($string, $encoding), $regs); - $regs = api_array_utf8_decode($regs, $encoding); + $regs = _api_array_utf8_decode($regs, $encoding); } api_mb_regex_encoding($encoding); return $result; @@ -338,11 +371,25 @@ function api_eregi($pattern, $string, & $regs = null) { } } -// Replace regular expression with multibyte support ignoring case. -// See http://php.net/manual/en/function.mb-eregi-replace +/** + * Scans string for matches to pattern, then replaces the matched text with replacement, ignoring case, with extended multibyte support. + * By default this function uses the platform character set. + * @param string $pattern The regular expression pattern. + * @param string $replacement The replacement text. + * @param string $string The searched string. + * @param string $option Matching condition. + * If i is specified for the matching condition parameter, the case will be ignored. + * If x is specified, white space will be ignored. + * If m is specified, match will be executed in multiline mode and line break will be included in '.'. + * If p is specified, match will be executed in POSIX mode, line break will be considered as normal character. + * If e is specified, replacement string will be evaluated as PHP expression. + * @return mixed The modified string is returned. If no matches are found within the string, then it will be returned unchanged. FALSE will be returned on error. + * This function is aimed at replacing the functions eregi_replace() and mb_eregi_replace() for human-language strings. + * @link http://php.net/manual/en/function.eregi-replace + * @link http://php.net/manual/en/function.mb-eregi-replace + */ function api_eregi_replace($pattern, $replacement, $string, $option = null) { $encoding = api_mb_regex_encoding(); - if (api_mb_supports($encoding)) { if (is_null($option)) { return @mb_eregi_replace($pattern, $replacement, $string); @@ -364,24 +411,14 @@ function api_eregi_replace($pattern, $replacement, $string, $option = null) { } } -// This function returns a selected by position character of a string. -function api_get_character($string, $position, $encoding = null) { - if (empty($encoding)) { - $encoding = api_mb_internal_encoding(); - } - return api_substr($string, $position, 1, $encoding); -} - -// This function returns an array containing all characters of a string. -function api_get_characters($string, $encoding = null) { - if (empty($encoding)) { - $encoding = api_mb_internal_encoding(); - } - return api_str_split($string, 1, $encoding); -} - -// Makes a string's first character lowercase. -// See http://php.net/manual/en/function.lcfirst +/** + * Returns a string with the first character lowercased if that character is alphabetic. + * @param string $string The input string. + * @param string $encoding The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. + * @return string The result string. + * This function is aimed at replacing the function lcfirst() for human-language strings. + * @link http://php.net/manual/en/function.lcfirst + */ function api_lcfirst($string, $encoding = null) { if (empty($encoding)) { $encoding = api_mb_internal_encoding(); @@ -389,27 +426,19 @@ function api_lcfirst($string, $encoding = null) { return api_strtolower(api_substr($string, 0, 1, $encoding), $encoding) . api_substr($string, 1, api_strlen($string, $encoding), $encoding); } -// Puts a prefix into a string. -// The input variables could be arrays too. -function api_prefix($string, $prefix) { - if (is_array($string)) { - if (is_array($prefix)) { - return array_map('api_prefix', $string, $prefix); - } else { - return array_map('api_prefix', $string, array_fill(0 , count($string) , $prefix)); - } - } - if (is_array($prefix)) { - $prefix = implode('', $prefix); - } - return $prefix.$string; -} - -// Splits string into array by regular expression. -// See http://php.net/manual/en/function.mb-split +/** + * Splits a multibyte string using regular expression pattern and returns the result as an array. + * By default this function uses the platform character set. + * @param string $pattern The regular expression pattern. + * @param string $string The string being split. + * @param int $limit If this optional parameter $limit is specified, the string will be split in $limit elements as maximum. + * @return array The result as an array. + * This function is aimed at replacing the functions split() and mb_split() for human-language strings. + * @link http://php.net/manual/en/function.split + * @link http://php.net/manual/en/function.mb-split + */ function api_split($pattern, $string, $limit = null) { $encoding = api_mb_regex_encoding(); - if (api_mb_supports($encoding)) { if (is_null($limit)) { return @mb_split($pattern, $string); @@ -419,13 +448,12 @@ function api_split($pattern, $string, $limit = null) { } elseif (api_iconv_supports($encoding)) { api_mb_regex_encoding('UTF-8'); - if (is_null($limit)) { $result = @mb_split(api_utf8_encode($pattern, $encoding), api_utf8_encode($string, $encoding)); } else { $result = @mb_split(api_utf8_encode($pattern, $encoding), api_utf8_encode($string, $encoding), $limit); } - $result = api_array_utf8_decode($result, $encoding); + $result = _api_array_utf8_decode($result, $encoding); api_mb_regex_encoding($encoding); return $result; } else { @@ -437,21 +465,33 @@ function api_split($pattern, $string, $limit = null) { } } -// This is a multibyte replacement of str_ireplace(). -// See http://php.net/manual/en/function.str-ireplace -// TODO: To be revised an to be checked. +/** + * This function returns a string or an array with all occurrences of search in subject (ignoring case) replaced with the given replace value. + * @param mixed $search String or array of strings to be found. + * @param mixed $replace String or array of strings used for replacement. + * @param mixed $subject String or array of strings being searced. + * @param int $count The number of matched and replaced needles will be returned in count, which is passed by reference. + * @param string $encoding The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. + * @return mixed String or array as a result. + * Notes: + * If $subject is an array, then the search and replace is performed with every entry of subject, the return value is an array. + * If $search and $replace are arrays, then the function takes a value from each array and uses it to do search and replace on subject. + * If $replace has fewer values than search, then an empty string is used for the rest of replacement values. + * If $search is an array and $replace is a string, then this replacement string is used for every value of search. + * This function is aimed at replacing the function str_ireplace() for human-language strings. + * @link http://php.net/manual/en/function.str-ireplace + * TODO: To be revised and to be checked. + */ function api_str_ireplace($search, $replace, $subject, & $count = null, $encoding = null) { if (empty($encoding)) { $encoding = api_mb_internal_encoding(); } - if (is_array($subject)) { foreach ($subject as $key => $val) { $subject[$key] = api_str_ireplace($search, $replace, $val, $count, $encoding); } return $subject; } - if (is_array($search)) { foreach (array_keys($search) as $key) { if (is_array($replace)) { @@ -466,30 +506,36 @@ function api_str_ireplace($search, $replace, $subject, & $count = null, $encodin } return $subject; } - $search = api_strtolower($search, $encoding); $subject_lower = api_strtolower($subject, $encoding); - $total_matched_strlen = 0; $i = 0; - while (preg_match(api_add_pcre_unicode_modifier('/(.*?)'.preg_quote($search, '/').'/s', $encoding), $subject_lower, $matches)) { $matched_strlen = api_strlen($matches[0], $encoding); $subject_lower = api_substr($subject_lower, $matched_strlen, api_strlen($subject_lower, $encoding), $encoding); - $offset = $total_matched_strlen + api_strlen($matches[1], $encoding) + ($i * (api_strlen($replace, $encoding) - 1)); $subject = api_substr_replace($subject, $replace, $offset, api_strlen($search), $encoding); - $total_matched_strlen += $matched_strlen; $i++; } - $count += $i; return $subject; } -// This is a multibyte replacement of str_split(). -// See http://php.net/str_split +/** + * Converts a string to an array. + * @param string $string The input string. + * @param int $split_length Maximum character-length of the chunk, one character by default. + * @param string $encoding The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. + * @return array The result array of chunks with the spcified length. + * Notes: + * If the optional split_length parameter is specified, the returned array will be broken down into chunks + * with each being split_length in length, otherwise each chunk will be one character in length. + * FALSE is returned if split_length is less than 1. + * If the split_length length exceeds the length of string, the entire string is returned as the first (and only) array element. + * This function is aimed at replacing the function str_split() for human-language strings. + * @link http://php.net/str_split + */ function api_str_split($string, $split_length = 1, $encoding = null) { if ($split_length < 1) { return false; @@ -497,9 +543,7 @@ function api_str_split($string, $split_length = 1, $encoding = null) { if (empty($encoding)) { $encoding = api_mb_internal_encoding(); } - $result = array(); - if (api_mb_supports($encoding)) { for ($i = 0, $length = @mb_strlen($string, $encoding); $i < $length; $i += $split_length) { $result[] = @mb_substr($string, $i, $split_length, $encoding); @@ -514,21 +558,37 @@ function api_str_split($string, $split_length = 1, $encoding = null) { $result[] = substr($string, $i, $split_length); } } - return $result; } -// This is a multibyte replacement of strcasecmp(). -// See http://php.net/manual/en/function.strcasecmp -function api_strcasecmp($str1, $str2, $encoding = null) { +/** + * Case-insensitive string comparison wuth extended multibyte support. + * @param string $string1 The first string. + * @param string $string2 The second string. + * @param string $encoding The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. + * @return int Returns < 0 if $string1 is less than $string2; > 0 if $string1 is greater than $string2; and 0 if the strings are equal. + * This function is aimed at replacing the function strcasecmp() for human-language strings. + * @link http://php.net/manual/en/function.strcasecmp + */ +function api_strcasecmp($string1, $string2, $encoding = null) { if (empty($encoding)) { $encoding = api_mb_internal_encoding(); } - return strcmp(api_strtolower($str1, $encoding), api_strtolower($str2, $encoding)); + return strcmp(api_strtolower($string1, $encoding), api_strtolower($string2, $encoding)); } -// This is a multibyte replacement of stripos(). -// See http://php.net/manual/en/function.mb-stripos +/** + * Finds position of first occurrence of a string within another, case insensitive. + * @param string $haystack The string from which to get the position of the first occurrence. + * @param string $needle The string to be found. + * @param int $offset The position in $haystack to start searching from. If it is omitted, searching starts from the beginning. + * @param string $encoding The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. + * @return mixed Return the numeric position of the first occurrence of $needle in the $haystack, or FALSE if $needle is not found. + * Note: The first character's position is 0, the second character position is 1, and so on. + * This function is aimed at replacing the functions stripos() and mb_stripos() for human-language strings. + * @link http://php.net/manual/en/function.stripos + * @link http://php.net/manual/en/function.mb-stripos + */ function api_stripos($haystack, $needle, $offset = 0, $encoding = null) { if (empty($encoding)){ $encoding = api_mb_internal_encoding(); @@ -541,8 +601,20 @@ function api_stripos($haystack, $needle, $offset = 0, $encoding = null) { return stripos($haystack, $needle, $offset); } -// This is a multibyte replacement of stristr(). -// See http://php.net/manual/en/function.mb-stristr +/** + * Finds first occurrence of a string within another, case insensitive. + * @param string $haystack The string from which to get the first occurrence. + * @param string @needle The string to be found. + * @param bool $part Determines which portion of $haystack this function returns. The default value is FALSE. + * @param string $encoding The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. + * @return mixed Returns the portion of $haystack, or FALSE if $needle is not found. + * Notes: + * If $part is set to TRUE, the function returns all of $haystack from the beginning to the first occurrence of $needle. + * If $part is set to FALSE, the function returns all of $haystack from the first occurrence of $needle to the end. + * This function is aimed at replacing the functions stristr() and mb_stristr() for human-language strings. + * @link http://php.net/manual/en/function.stristr + * @link http://php.net/manual/en/function.mb-stristr + */ function api_stristr($haystack, $needle, $part = false, $encoding = null) { if (empty($encoding)) { $encoding = api_mb_internal_encoding(); @@ -556,8 +628,15 @@ function api_stristr($haystack, $needle, $part = false, $encoding = null) { return stristr($haystack, $needle, $part); } -// Returns length of the input string. -// See http://php.net/manual/en/function.mb-strlen +/** + * Returns length of the input string. + * @param string $string The string which length is to be calculated. + * @param string $encoding The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. + * @return int Returns the number of characters within the string. A multi-byte character is counted as 1. + * This function is aimed at replacing the functions strlen() and mb_strlen() for human-language strings. + * @link http://php.net/manual/en/function.strlen + * @link http://php.net/manual/en/function.mb-strlen + */ function api_strlen($string, $encoding = null) { if (empty($encoding)) { $encoding = api_mb_internal_encoding(); @@ -571,8 +650,18 @@ function api_strlen($string, $encoding = null) { return strlen($string); } -// This is a multibyte replacement of strpos(). -// See http://php.net/manual/en/function.mb-strpos +/** + * Finds position of first occurrence of a string within another. + * @param string $haystack The string from which to get the position of the first occurrence. + * @param string $needle The string to be found. + * @param int $offset The position in $haystack to start searching from. If it is omitted, searching starts from the beginning. + * @param string $encoding The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. + * @return mixed Return the numeric position of the first occurrence of $needle in the $haystack, or FALSE if $needle is not found. + * Note: The first character's position is 0, the second character position is 1, and so on. + * This function is aimed at replacing the functions strpos() and mb_strpos() for human-language strings. + * @link http://php.net/manual/en/function.strpos + * @link http://php.net/manual/en/function.mb-strpos + */ function api_strpos($haystack, $needle, $offset = 0, $encoding = null) { if (empty($encoding)) { $encoding = api_mb_internal_encoding(); @@ -586,8 +675,20 @@ function api_strpos($haystack, $needle, $offset = 0, $encoding = null) { return strpos($haystack, $needle, $offset); } -// This is a multibyte replacement of strrchr(). -// See http://php.net/manual/en/function.mb-strrchr +/** + * Finds the last occurrence of a character in a string. + * @param string $haystack The string from which to get the last occurrence. + * @param string $needle The string which first character is to be found. + * @param bool $part Determines which portion of $haystack this function returns. The default value is FALSE. + * @param string $encoding The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. + * @return mixed Returns the portion of $haystack, or FALSE if the first character from $needle is not found. + * Notes: + * If $part is set to TRUE, the function returns all of $haystack from the beginning to the first occurrence. + * If $part is set to FALSE, the function returns all of $haystack from the first occurrence to the end. + * This function is aimed at replacing the functions strrchr() and mb_strrchr() for human-language strings. + * @link http://php.net/manual/en/function.strrchr + * @link http://php.net/manual/en/function.mb-strrchr + */ function api_strrchr($haystack, $needle, $part = false, $encoding = null) { if (empty($encoding)) { $encoding = api_mb_internal_encoding(); @@ -601,8 +702,14 @@ function api_strrchr($haystack, $needle, $part = false, $encoding = null) { return strrchr($haystack, $needle); } -// This is a multibyte replacement of strrev(). -// See http://php.net/manual/en/function.strrev +/** + * Reverses a string. + * @param string $string The string to be reversed. + * @param string $encoding The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. + * @return string Returns the reversed string. + * This function is aimed at replacing the function strrev() for human-language strings. + * @link http://php.net/manual/en/function.strrev + */ function api_strrev($string, $encoding = null) { if (empty($string)) { return ''; @@ -617,8 +724,18 @@ function api_strrev($string, $encoding = null) { return $result; } -// This is a multibyte replacement of strrpos(). -// See http://php.net/manual/en/function.mb-strrpos +/** + * Finds the position of last occurrence of a string in a string. + * @param string $haystack The string from which to get the position of the last occurrence. + * @param string $needle The string to be found. + * @param int $offset $offset may be specified to begin searching an arbitrary position. Negative values will stop searching at an arbitrary point prior to the end of the string. + * @param string $encoding The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. + * @return mixed Return the numeric position of the first occurrence of $needle in the $haystack, or FALSE if $needle is not found. + * Note: The first character's position is 0, the second character position is 1, and so on. + * This function is aimed at replacing the functions strrpos() and mb_strrpos() for human-language strings. + * @link http://php.net/manual/en/function.strrpos + * @link http://php.net/manual/en/function.mb-strrpos + */ function api_strrpos($haystack, $needle, $offset = 0, $encoding = null) { if (empty($encoding)) { $encoding = api_mb_internal_encoding(); @@ -632,8 +749,20 @@ function api_strrpos($haystack, $needle, $offset = 0, $encoding = null) { return strrpos($haystack, $needle, $offset); } -// This is a multibyte replacement of strstr(). -// See http://php.net/manual/en/function.mb-strstr +/** + * Finds first occurrence of a string within another. + * @param string $haystack The string from which to get the first occurrence. + * @param string @needle The string to be found. + * @param bool $part Determines which portion of $haystack this function returns. The default value is FALSE. + * @param string $encoding The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. + * @return mixed Returns the portion of $haystack, or FALSE if $needle is not found. + * Notes: + * If $part is set to TRUE, the function returns all of $haystack from the beginning to the first occurrence of $needle. + * If $part is set to FALSE, the function returns all of $haystack from the first occurrence of $needle to the end. + * This function is aimed at replacing the functions strstr() and mb_strstr() for human-language strings. + * @link http://php.net/manual/en/function.strstr + * @link http://php.net/manual/en/function.mb-strstr + */ function api_strstr($haystack, $needle, $part = false, $encoding = null) { if (empty($encoding)) { $encoding = api_mb_internal_encoding(); @@ -647,8 +776,15 @@ function api_strstr($haystack, $needle, $part = false, $encoding = null) { return strstr($haystack, $needle, $part); } -// Makes a string lowercase. -// See http://php.net/manual/en/function.mb-strtolower +/** + * Makes a string lowercase. + * @param string $string The string being lowercased. + * @param string $encoding The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. + * @return string Returns the string with all alphabetic characters converted to lowercase. + * This function is aimed at replacing the functions strtolower() and mb_strtolower() for human-language strings. + * @link http://php.net/manual/en/function.strtolower + * @link http://php.net/manual/en/function.mb-strtolower + */ function api_strtolower($string, $encoding = null) { if (empty($encoding)) { $encoding = api_mb_internal_encoding(); @@ -662,8 +798,15 @@ function api_strtolower($string, $encoding = null) { return strtolower($string); } -// Makes a string uppercase. -// See http://php.net/manual/en/function.mb-strtoupper +/** + * Makes a string uppercase. + * @param string $string The string being uppercased. + * @param string $encoding The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. + * @return string Returns the string with all alphabetic characters converted to uppercase. + * This function is aimed at replacing the functions strtoupper() and mb_strtoupper() for human-language strings. + * @link http://php.net/manual/en/function.strtoupper + * @link http://php.net/manual/en/function.mb-strtoupper + */ function api_strtoupper($string, $encoding = null) { if (empty($encoding)) { $encoding = api_mb_internal_encoding(); @@ -677,10 +820,17 @@ function api_strtoupper($string, $encoding = null) { return strtoupper($string); } -// Translates certain characters. -// See http://php.net/manual/en/function.strtr -// TODO: To be revised and tested. -// It would be good tihs function to be removed. I hesitate to do it right now. +/** + * Translates certain characters. + * @param string $string The string being translated. + * @param mixed $from A string that contains the character to be replaced. This parameter can be also an array with pairs of characters 'from' => 'to'. + * @param string $to A string that contains the replacing characters. + * @param string $encoding The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. + * @return string This function returns a copy of $string, translating all occurrences of each character in $from to the corresponding character in $to. + * This function is aimed at replacing the function strtr() for human-language strings. + * @link http://php.net/manual/en/function.strtr + * TODO: To be revised and tested. Probably this function will be not needed. + */ function api_strtr($string, $from, $to = null, $encoding = null) { if (empty($string)) { return ''; @@ -702,8 +852,8 @@ function api_strtr($string, $from, $to = null, $encoding = null) { $encoding = api_mb_internal_encoding(); } $translator = array(); - $arr_from = api_get_characters($from, $encoding); - $arr_to = api_get_characters($to, $encoding); + $arr_from = api_str_split($from, 1, $encoding); + $arr_to = api_str_split($to, 1, $encoding); $n = count($arr_from); $n2 = count($arr_to); if ($n > $n2) $n = $n2; @@ -711,7 +861,7 @@ function api_strtr($string, $from, $to = null, $encoding = null) { $translator[$arr_from[$i]] = $arr_to[$i]; } } - $arr_string = api_get_characters($string, $encoding); + $arr_string = api_str_split($string, 1, $encoding); $n = count($arr_string); $result = ''; for ($i = 0; $i < $n; $i++) { @@ -724,13 +874,23 @@ function api_strtr($string, $from, $to = null, $encoding = null) { return $result; } -// Performs a multi-byte safe substr() operation based on number of characters. -// See http://bg.php.net/manual/en/function.mb-substr +/** +// Gets part of a string. + * @param string $string The input string. + * @param int $start The first position from which the extracted part begins. + * @param int $length The length in character of the extracted part. + * @param string $encoding The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. + * @return string Returns the part of the string specified by the start and length parameters. + * Note: First character's position is 0. Second character position is 1, and so on. + * This function is aimed at replacing the functions substr() and mb_substr() for human-language strings. + * @link http://php.net/manual/en/function.substr + * @link http://php.net/manual/en/function.mb-substr + */ function api_substr($string, $start, $length = null, $encoding = null) { if (empty($encoding)) { $encoding = api_mb_internal_encoding(); } - // Passing null as $length will mean 0. This behaviour have to be corrected. + // Passing null as $length would mean 0. This behaviour has been corrected here. if (is_null($length)) { $length = api_strlen($string, $encoding); } @@ -743,24 +903,25 @@ function api_substr($string, $start, $length = null, $encoding = null) { return substr($string, $start, $length); } -// Puts a suffix into a string. -// The input variables could be arrays too. -function api_suffix($string, $suffix) { - if (is_array($string)) { - if (is_array($suffix)) { - return array_map('api_suffix', $string, $suffix); - } else { - return array_map('api_suffix', $string, array_fill(0 , count($string) , $suffix)); - } - } - if (is_array($suffix)) { - $suffix = implode('', $suffix); - } - return $string.$suffix; -} - -// This is a multibyte replacement of substr_replace(). -// See http://php.net/manual/function.substr-replace +/** + * Replaces text within a portion of a string. + * @param string $string The input string. + * @param string $replacement The replacement string. + * @param int $start The position from which replacing will begin. + * Notes: + * If $start is positive, the replacing will begin at the $start'th offset into the string. + * If $start is negative, the replacing will begin at the $start'th character from the end of the string. + * @param int $length The position where replacing will end. + * Notes: + * If given and is positive, it represents the length of the portion of the string which is to be replaced. + * If it is negative, it represents the number of characters from the end of string at which to stop replacing. + * If it is not given, then it will default to api_strlen($string); i.e. end the replacing at the end of string. + * If $length is zero, then this function will have the effect of inserting replacement into the string at the given start offset. + * @param string $encoding The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. + * @return string The result string is returned. + * This function is aimed at replacing the function substr_replace() for human-language strings. + * @link http://php.net/manual/function.substr-replace + */ function api_substr_replace($string, $replacement, $start, $length = null, $encoding = null) { if (empty($encoding)) { $encoding = api_mb_internal_encoding(); @@ -772,13 +933,19 @@ function api_substr_replace($string, $replacement, $start, $length = null, $enco $length = api_strlen($string, $encoding) - $start + $length; } return - api_substr($string, 0, $start, $encoding) .$replacement . + api_substr($string, 0, $start, $encoding) . $replacement . api_substr($string, $start + $length, api_strlen($string, $encoding), $encoding); } } -// Returns a string with the first character capitalized, if that character is alphabetic. -// See http://php.net/manual/en/function.ucfirst +/** + * Makes a string's first character uppercase. + * @param string $string The input string. + * @param string $encoding The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. + * @return string Returns a string with the first character capitalized, if that character is alphabetic. + * This function is aimed at replacing the function ucfirst() for human-language strings. + * @link http://php.net/manual/en/function.ucfirst + */ function api_ucfirst($string, $encoding = null) { if (empty($encoding)) { $encoding = api_mb_internal_encoding(); @@ -786,8 +953,14 @@ function api_ucfirst($string, $encoding = null) { return api_strtoupper(api_substr($string, 0, 1, $encoding), $encoding) . api_substr($string, 1, api_strlen($string, $encoding), $encoding); } -// Uppercases the first character of each word in a string. -// See http://php.net/manual/en/function.ucwords +/** + * Uppercases the first character of each word in a string. + * @param string $string The input string. + * @param string $encoding The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. + * @return string Returns the modified string. + * This function is aimed at replacing the function ucwords() for human-language strings. + * @link http://php.net/manual/en/function.ucwords + */ function api_ucwords($string, $encoding = null) { if (empty($encoding)) { $encoding = api_mb_internal_encoding(); @@ -801,8 +974,12 @@ function api_ucwords($string, $encoding = null) { return ucwords($string); } -// This function adds a unicode modifier to a -// Perl-compatible regular expression when it is necessary. +/** + * This function adds a unicode modifier (u suffix) to a Perl-compatible regular expression depending on the specified encoding. + * @param string $pcre The Perl-compatible regular expression. + * @param string $encoding The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. + * @return string Returns the same regular expression wit a suffix 'u' if $encoding is 'UTF-8'. + */ function api_add_pcre_unicode_modifier($pcre, $encoding = null) { if (empty($encoding)){ $encoding = api_get_system_encoding(); @@ -810,18 +987,18 @@ function api_add_pcre_unicode_modifier($pcre, $encoding = null) { return api_is_utf8($encoding) ? $pcre.'u' : $pcre; } +/** + * ---------------------------------------------------------------------------- + * Encoding management functions + * ---------------------------------------------------------------------------- + */ -//---------------------------------------------------------------------------- -// Encoding management functions -//---------------------------------------------------------------------------- - - -// Returns the most-probably used non-UTF-8 encoding for the given language. -// The $language parameter must be cleaned by api_refine_language_id() if it -// is necessary. -// If the returned value is not as you expect, you may do the following: -// In the table $encodings below, correct the order of the encodings for your -// language, or if it is necessary - insert at the first place a new encoding. +/** + * Returns the most-probably used non-UTF-8 encoding for the given language. + * @param string $language The specified language, the default value is the language of the user interface. + * Note: The $language parameter must be cleaned by api_refine_language_id() if it is necessary. + * @return string The correspondent encoding to the specified language. + */ function api_get_non_utf8_encoding($language = null) { if (empty($language)) { $language = api_refine_language_id(api_get_interface_language()); @@ -838,7 +1015,11 @@ function api_get_non_utf8_encoding($language = null) { } } -// Returns a two-dimensional array of non-UTF-8 encodings for all system languages. +/** + * Returns a table with non-UTF-8 encodings for all system languages. + * @return array Returns an array in the form array('language1' => array('encoding1', encoding2', ...), ...) + * Note: The function api_get_non_utf8_encoding() returns the first encoding from this array that is correspondent to the given language. + */ function & api_non_utf8_encodings() { // The following list may have some inconsistencies. // Place the most used for your language encoding at the first place. @@ -904,26 +1085,44 @@ vietnamese: WINDOWS-1258; return $encodings; } -// This function unifies internally the encoding identificators. -// It is to be adjusted in case of id comparison problems. +/** + * This function unifies the encoding identificators, so they could be compared. + * @param string $encoding The specified encoding. + * @return string Returns the encoding identificator modified in suitable for comparison way. + */ function api_refine_encoding_id($encoding) { return strtoupper($encoding); } -// This function checks whether two $encoding are equal (same, equvalent). -function api_equal_encodings($encoding_1, $encoding_2) { +/** + * This function checks whether two $encoding are equal (same, equvalent). + * @param string $encoding1 The first encoding + * @param string $encoding2 The second encoding + * @return bool Returns TRUE if the encodings are equal, FALSE otherwise. + */ +function api_equal_encodings($encoding1, $encoding2) { // We have to deal with aliases. This function alone does not solve // the problem entirely. And there is no time for this kind of research. // At the momemnt, the quick proposition could be: - return strcmp(api_refine_encoding_id($encoding_1), api_refine_encoding_id($encoding_2)) == 0 ? true : false; + return strcmp(api_refine_encoding_id($encoding1), api_refine_encoding_id($encoding2)) == 0 ? true : false; } -// Returns true if the given encoding id means UTF-8, otherwise returns false. +/** + * This function checks whether a given encoding is UTF-8. + * @param string $encoding The tested encoding. + * @return bool Returns TRUE if the given encoding id means UTF-8, otherwise returns false. + */ function api_is_utf8($encoding) { return api_equal_encodings($encoding, 'UTF-8'); } -// Returns the encoding currently used by the system. +/** + * This function returns the encoding, currently used by the system. + * @return string The system's encoding. + * Note: The value of api_get_setting('platform_charset') is tried to be returned first, + * on the second place the global variable $charset is tried to be returned. If for some + * reason both attempts fail, 'ISO-8859-15' will be returned. + */ function api_get_system_encoding() { $system_encoding = api_get_setting('platform_charset'); if (!empty($system_encoding)) { @@ -933,8 +1132,13 @@ function api_get_system_encoding() { return empty($charset) ? 'ISO-8859-15' : $charset; } -// Sets/Gets internal character encoding. -// See http://php.net/manual/en/function.mb-internal-encoding +/** + * Sets/Gets internal character encoding of the common string functions within the PHP mbstring extension. + * @param string $encoding When this parameter is given, the function sets the internal encoding. + * @return string When $encoding parameter is not given, the function returns the internal encoding. + * Note: This function is used in the global initialization script for setting the internal encoding to the platform's character set. + * @link http://php.net/manual/en/function.mb-internal-encoding + */ function api_mb_internal_encoding($encoding = null) { static $mb_internal_encoding = null; if (empty($encoding)) { @@ -950,8 +1154,13 @@ function api_mb_internal_encoding($encoding = null) { return false; } -// Sets/Gets current encoding for multibyte regex. -// See http://php.net/manual/en/function.mb-regex-encoding +/** + * Sets/Gets internal character encoding of the regular expression functions (ereg-like) within the PHP mbstring extension. + * @param string $encoding When this parameter is given, the function sets the internal encoding. + * @return string When $encoding parameter is not given, the function returns the internal encoding. + * Note: This function is used in the global initialization script for setting the internal encoding to the platform's character set. + * @link http://php.net/manual/en/function.mb-regex-encoding + */ function api_mb_regex_encoding($encoding = null) { static $mb_regex_encoding = null; if (empty($encoding)) { @@ -967,13 +1176,24 @@ function api_mb_regex_encoding($encoding = null) { return false; } -// Retrieves internal configuration variables of iconv extension. -// The parameter $type could be: 'iconv_internal_encoding', 'iconv_input_encoding', or 'iconv_output_encoding'. -// See http://php.net/manual/en/function.iconv-get-encoding +/** + * Retrieves specified internal encoding configuration variable within the PHP iconv extension. + * @param string $type The parameter $type could be: 'iconv_internal_encoding', 'iconv_input_encoding', or 'iconv_output_encoding'. + * @return mixed The function returns the requested encoding or FALSE on error. + * @link http://php.net/manual/en/function.iconv-get-encoding + */ function api_iconv_get_encoding($type) { return api_iconv_set_encoding($type); } +/** + * Sets specified internal encoding configuration variables within the PHP iconv extension. + * @param string $type The parameter $type could be: 'iconv_internal_encoding', 'iconv_input_encoding', or 'iconv_output_encoding'. + * @param string $encoding The desired encoding to be set. + * @return bool Returns TRUE on success, FALSE on error. + * Note: This function is used in the global initialization script for setting these three internal encodings to the platform's character set. + * @link http://php.net/manual/en/function.iconv-set-encoding + */ // Sets current setting for character encoding conversion. // The parameter $type could be: 'iconv_internal_encoding', 'iconv_input_encoding', or 'iconv_output_encoding'. function api_iconv_set_encoding($type, $encoding = null) { @@ -983,7 +1203,6 @@ function api_iconv_set_encoding($type, $encoding = null) { if (!api_iconv_present()) { return false; } - switch ($type) { case 'iconv_internal_encoding': if (empty($encoding)) { @@ -1003,7 +1222,6 @@ function api_iconv_set_encoding($type, $encoding = null) { return false; } break; - case 'iconv_input_encoding': if (empty($encoding)) { if (is_null($iconv_input_encoding)) { @@ -1022,7 +1240,6 @@ function api_iconv_set_encoding($type, $encoding = null) { return false; } break; - case 'iconv_output_encoding': if (empty($encoding)) { if (is_null($iconv_output_encoding)) { @@ -1041,18 +1258,25 @@ function api_iconv_set_encoding($type, $encoding = null) { return false; } break; - default: return false; } } -// Checks whether the specified encoding is supported by this API. +/** + * Checks whether a specified encoding is supported by this API. + * @param string $encoding The specified encoding. + * @return bool Returns TRUE when the specified encoding is supported, FALSE othewise. + */ function api_is_encoding_supported($encoding) { return api_mb_supports($encoding) || api_iconv_supports($encoding); } -// Checks whether the specified encoding is supported by mbstring library. +/** + * Checks whether the specified encoding is supported by the PHP mbstring extension. + * @param string $encoding The specified encoding. + * @return bool Returns TRUE when the specified encoding is supported, FALSE othewise. + */ function api_mb_supports($encoding) { static $supported = array(); $encoding = api_refine_encoding_id($encoding); @@ -1064,7 +1288,11 @@ function api_mb_supports($encoding) { return $supported[$encoding] ? true : false; } -// Checks whether the specified non-UTF-8 encoding is supported by iconv library. +/** + * Checks whether the specified encoding is supported by the PHP iconv extension. + * @param string $encoding The specified encoding. + * @return bool Returns TRUE when the specified encoding is supported, FALSE othewise. + */ function api_iconv_supports($encoding) { static $supported = array(); $encoding = api_refine_encoding_id($encoding); @@ -1083,7 +1311,10 @@ function api_iconv_supports($encoding) { return $supported[$encoding]; } -// Checks whether the iconv library is installed and works. +/** + * Checks whether the PHP iconv extension is installed and it works. + * @return bool Returns TRUE when the iconv extension is detected, FALSE othewise. + */ function api_iconv_present() { static $iconv_present = null; if (!is_null($iconv_present)) { @@ -1100,7 +1331,11 @@ function api_iconv_present() { return $iconv_present; } -// Checks whether the specified encoding is supported by html-entities operations. +/** + * Checks whether the specified encoding is supported by the html-entitiy related functions. + * @param string $encoding The specified encoding. + * @return bool Returns TRUE when the specified encoding is supported, FALSE othewise. + */ function api_html_entity_supports($encoding) { static $supported = array(); $encoding = api_refine_encoding_id($encoding); @@ -1128,13 +1363,18 @@ EUC-JP, EUCJP return $supported[$encoding] ? true : false; } +/** + * ---------------------------------------------------------------------------- + * String validation functions concerning some encodings + * ---------------------------------------------------------------------------- + */ -//---------------------------------------------------------------------------- -// String validation functions concerning some encodings -//---------------------------------------------------------------------------- - - -// Returns true if the specified string is a valid UTF-8 one and false otherwise. +/** + * Checks a string for UTF-8 validity. + * @param string $string The string to be tested/validated. + * @return bool Returns TRUE when the tested string is valid UTF-8 one, FALSE othewise. + * @link http://en.wikipedia.org/wiki/UTF-8 + */ function api_is_valid_utf8($string) { //return @mb_detect_encoding($string, 'UTF-8', true) == 'UTF-8' ? true : false; @@ -1142,8 +1382,7 @@ function api_is_valid_utf8($string) { // found a string with a single cyrillic letter (single byte), that is // wrongly detected as UTF-8. Possibly, there would be problems with other // languages too. - // - // To understand the following algorithm see http://en.wikipedia.org/wiki/UTF-8 + // An alternative implementation will be used: $len = strlen($string); $i = 0; @@ -1324,19 +1563,26 @@ function api_is_valid_utf8($string) { return true; // Empty strings are valid too. } -// Checks whether a string contains 7bit ASCII characters only. +/** + * Checks whether a string contains 7-bit ASCII characters only. + * @param string $string The string to be tested/validated. + * @return bool Returns TRUE when the tested string contains 7-bit ASCII characters only, FALSE othewise. + */ function api_is_valid_ascii($string) { return @mb_detect_encoding($string, 'ASCII', true) == 'ASCII' ? true : false; } +/** + * ---------------------------------------------------------------------------- + * Language management functions + * ---------------------------------------------------------------------------- + */ -//---------------------------------------------------------------------------- -// Language management functions -//---------------------------------------------------------------------------- - - -// Returns a pure language id, without possible suffixes -// that will disturb language identification in certain cases. +/** + * Returns a purified language id, without possible suffixes that will disturb language identification in certain cases. + * @param string $language The input language identificator, for example 'french_unicode'. + * @param string The same purified or filtered language identificator, for example 'french'. + */ function api_refine_language_id($language) { return ( str_replace('_unicode', '', strtolower( @@ -1346,14 +1592,20 @@ function api_refine_language_id($language) { str_replace('_KM', '', $language))))))); } +/** + * ---------------------------------------------------------------------------- + * Array functions + * ---------------------------------------------------------------------------- + */ -//---------------------------------------------------------------------------- -// Array functions -//---------------------------------------------------------------------------- - - -// A case insensitive version of in_array() function. -// See http://php.net/manual/en/function.in-array.php +/** + * Checks if a value exists in an array, a case insensitive version of in_array() function with extended multibyte support. + * @param mixed $needle The searched value. If needle is a string, the comparison is done in a case-insensitive manner. + * @param array $haystack The array. + * @param bool $strict If is set to TRUE then the function will also check the types of the $needle in the $haystack. The default value if FALSE. + * @return bool Returns TRUE if $needle is found in the array, FALSE otherwise. + * @link http://php.net/manual/en/function.in-array.php + */ function api_in_array_nocase($needle, $haystack, $strict = false, $encoding = null) { if (is_array($needle)) { foreach ($needle as $item) { @@ -1361,7 +1613,6 @@ function api_in_array_nocase($needle, $haystack, $strict = false, $encoding = nu } return false; } - if (!is_string($needle)) { return in_array($needle, $haystack, $strict); } @@ -1465,4 +1716,4 @@ if (!function_exists('mb_strstr')) { } } -?> +?> \ No newline at end of file