$val) { $subject[$key] = api_str_ireplace($search, $replace, $val, $count, $encoding); } return $subject; } if (is_array($search)) { foreach (array_keys($search) as $key) { if (is_array($replace)) { if (array_key_exists($key, $replace)) { $subject = api_str_ireplace($search[$key], $replace[$key], $subject, $count, $encoding); } else { $subject = api_str_ireplace($search[$key], '', $subject, $count, $encoding); } } else { $subject = api_str_ireplace($search[$key], $replace, $subject, $count, $encoding); } } return $subject; } $search = api_strtolower($search, $encoding); $subject_lower = api_strtolower($subject, $encoding); $total_matched_strlen = 0; $i = 0; while (preg_match(api_add_pcre_unicode_modifier('/(.*?)'.preg_quote($search, '/').'/s', $encoding), $subject_lower, $matches)) { $matched_strlen = api_strlen($matches[0], $encoding); $subject_lower = api_substr($subject_lower, $matched_strlen, api_strlen($subject_lower, $encoding), $encoding); $offset = $total_matched_strlen + api_strlen($matches[1], $encoding) + ($i * (api_strlen($replace, $encoding) - 1)); $subject = api_substr_replace($subject, $replace, $offset, api_strlen($search), $encoding); $total_matched_strlen += $matched_strlen; $i++; } $count += $i; return $subject; } /** * Converts a string to an array. * @param string $string The input string. * @param int $split_length Maximum character-length of the chunk, one character by default. * @param string $encoding The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. * @return array The result array of chunks with the spcified length. * Notes: * If the optional split_length parameter is specified, the returned array will be broken down into chunks * with each being split_length in length, otherwise each chunk will be one character in length. * FALSE is returned if split_length is less than 1. * If the split_length length exceeds the length of string, the entire string is returned as the first (and only) array element. * This function is aimed at replacing the function str_split() for human-language strings. * @link http://php.net/str_split */ function api_str_split($string, $split_length = 1, $encoding = null) { if ($split_length < 1) { return false; } if (empty($encoding)) { $encoding = api_mb_internal_encoding(); } $result = array(); if (api_mb_supports($encoding)) { for ($i = 0, $length = @mb_strlen($string, $encoding); $i < $length; $i += $split_length) { $result[] = @mb_substr($string, $i, $split_length, $encoding); } } elseif (api_iconv_supports($encoding)) { for ($i = 0, $length = api_strlen($string, $encoding); $i < $length; $i += $split_length) { $result[] = api_substr($string, $i, $split_length, $encoding); } } else { for ($i = 0, $length = strlen($string); $i < $length; $i += $split_length) { $result[] = substr($string, $i, $split_length); } } return $result; } /** * Case-insensitive string comparison wuth extended multibyte support. * @param string $string1 The first string. * @param string $string2 The second string. * @param string $encoding The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. * @return int Returns < 0 if $string1 is less than $string2; > 0 if $string1 is greater than $string2; and 0 if the strings are equal. * This function is aimed at replacing the function strcasecmp() for human-language strings. * @link http://php.net/manual/en/function.strcasecmp */ function api_strcasecmp($string1, $string2, $encoding = null) { if (empty($encoding)) { $encoding = api_mb_internal_encoding(); } return strcmp(api_strtolower($string1, $encoding), api_strtolower($string2, $encoding)); } /** * Finds position of first occurrence of a string within another, case insensitive. * @param string $haystack The string from which to get the position of the first occurrence. * @param string $needle The string to be found. * @param int $offset The position in $haystack to start searching from. If it is omitted, searching starts from the beginning. * @param string $encoding The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. * @return mixed Return the numeric position of the first occurrence of $needle in the $haystack, or FALSE if $needle is not found. * Note: The first character's position is 0, the second character position is 1, and so on. * This function is aimed at replacing the functions stripos() and mb_stripos() for human-language strings. * @link http://php.net/manual/en/function.stripos * @link http://php.net/manual/en/function.mb-stripos */ function api_stripos($haystack, $needle, $offset = 0, $encoding = null) { if (empty($encoding)){ $encoding = api_mb_internal_encoding(); } if (api_mb_supports($encoding)) { return @mb_stripos($haystack, $needle, $offset, $encoding); } elseif (api_iconv_supports($encoding)) { return api_utf8_decode(@mb_stripos(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $offset, 'UTF-8'), $encoding); } return stripos($haystack, $needle, $offset); } /** * Finds first occurrence of a string within another, case insensitive. * @param string $haystack The string from which to get the first occurrence. * @param string @needle The string to be found. * @param bool $part Determines which portion of $haystack this function returns. The default value is FALSE. * @param string $encoding The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. * @return mixed Returns the portion of $haystack, or FALSE if $needle is not found. * Notes: * If $part is set to TRUE, the function returns all of $haystack from the beginning to the first occurrence of $needle. * If $part is set to FALSE, the function returns all of $haystack from the first occurrence of $needle to the end. * This function is aimed at replacing the functions stristr() and mb_stristr() for human-language strings. * @link http://php.net/manual/en/function.stristr * @link http://php.net/manual/en/function.mb-stristr */ function api_stristr($haystack, $needle, $part = false, $encoding = null) { if (empty($encoding)) { $encoding = api_mb_internal_encoding(); } if (api_mb_supports($encoding)) { return @mb_stristr($haystack, $needle, $part, $encoding); } elseif (api_iconv_supports($encoding)) { return api_utf8_decode(@mb_stristr(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $part, 'UTF-8')); } return stristr($haystack, $needle, $part); } /** * Returns length of the input string. * @param string $string The string which length is to be calculated. * @param string $encoding The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. * @return int Returns the number of characters within the string. A multi-byte character is counted as 1. * This function is aimed at replacing the functions strlen() and mb_strlen() for human-language strings. * @link http://php.net/manual/en/function.strlen * @link http://php.net/manual/en/function.mb-strlen * Note: When you use strlen() to test for an empty string, you needn't change it to api_strlen(). * For example, in lines like the following: * if (strlen($string) > 0) * if (strlen($string) != 0) * there is no need the original function strlen() to be changed, it works correctly and faster for these cases. */ function api_strlen($string, $encoding = null) { if (empty($encoding)) { $encoding = api_mb_internal_encoding(); } if (api_mb_supports($encoding)) { return @mb_strlen($string, $encoding); } elseif (api_iconv_supports($encoding)) { return @iconv_strlen($string, $encoding); } return strlen($string); } /** * Finds position of first occurrence of a string within another. * @param string $haystack The string from which to get the position of the first occurrence. * @param string $needle The string to be found. * @param int $offset The position in $haystack to start searching from. If it is omitted, searching starts from the beginning. * @param string $encoding The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. * @return mixed Return the numeric position of the first occurrence of $needle in the $haystack, or FALSE if $needle is not found. * Note: The first character's position is 0, the second character position is 1, and so on. * This function is aimed at replacing the functions strpos() and mb_strpos() for human-language strings. * @link http://php.net/manual/en/function.strpos * @link http://php.net/manual/en/function.mb-strpos */ function api_strpos($haystack, $needle, $offset = 0, $encoding = null) { if (empty($encoding)) { $encoding = api_mb_internal_encoding(); } if (api_mb_supports($encoding)) { return @mb_strpos($haystack, $needle, $offset, $encoding); } elseif (api_iconv_supports($encoding)) { return api_utf8_decode(@mb_strpos(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $offset, 'UTF-8'), $encoding); } return strpos($haystack, $needle, $offset); } /** * Finds the last occurrence of a character in a string. * @param string $haystack The string from which to get the last occurrence. * @param string $needle The string which first character is to be found. * @param bool $part Determines which portion of $haystack this function returns. The default value is FALSE. * @param string $encoding The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. * @return mixed Returns the portion of $haystack, or FALSE if the first character from $needle is not found. * Notes: * If $part is set to TRUE, the function returns all of $haystack from the beginning to the first occurrence. * If $part is set to FALSE, the function returns all of $haystack from the first occurrence to the end. * This function is aimed at replacing the functions strrchr() and mb_strrchr() for human-language strings. * @link http://php.net/manual/en/function.strrchr * @link http://php.net/manual/en/function.mb-strrchr */ function api_strrchr($haystack, $needle, $part = false, $encoding = null) { if (empty($encoding)) { $encoding = api_mb_internal_encoding(); } if (api_mb_supports($encoding)) { return @mb_strrchr($haystack, $needle, $part, $encoding); } elseif (api_iconv_supports($encoding)) { return api_utf8_decode(@mb_strrchr(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $part, 'UTF-8'), $encoding); } return strrchr($haystack, $needle); } /** * Reverses a string. * @param string $string The string to be reversed. * @param string $encoding The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. * @return string Returns the reversed string. * This function is aimed at replacing the function strrev() for human-language strings. * @link http://php.net/manual/en/function.strrev */ function api_strrev($string, $encoding = null) { if (empty($string)) { return ''; } if (empty($encoding)) { $encoding = api_mb_internal_encoding(); } $result = ''; for ($i = api_strlen($string, $encoding) - 1; $i > -1; $i--) { $result .= api_substr($string, $i, 1, $encoding); } return $result; } /** * Finds the position of last occurrence of a string in a string. * @param string $haystack The string from which to get the position of the last occurrence. * @param string $needle The string to be found. * @param int $offset $offset may be specified to begin searching an arbitrary position. Negative values will stop searching at an arbitrary point prior to the end of the string. * @param string $encoding The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. * @return mixed Return the numeric position of the first occurrence of $needle in the $haystack, or FALSE if $needle is not found. * Note: The first character's position is 0, the second character position is 1, and so on. * This function is aimed at replacing the functions strrpos() and mb_strrpos() for human-language strings. * @link http://php.net/manual/en/function.strrpos * @link http://php.net/manual/en/function.mb-strrpos */ function api_strrpos($haystack, $needle, $offset = 0, $encoding = null) { if (empty($encoding)) { $encoding = api_mb_internal_encoding(); } if (api_mb_supports($encoding)) { return @mb_strrpos($haystack, $needle, $offset, $encoding); } elseif (api_iconv_supports($encoding)) { return api_utf8_decode(@mb_strrpos(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $offset, 'UTF-8'), $encoding); } return strrpos($haystack, $needle, $offset); } /** * Finds first occurrence of a string within another. * @param string $haystack The string from which to get the first occurrence. * @param string @needle The string to be found. * @param bool $part Determines which portion of $haystack this function returns. The default value is FALSE. * @param string $encoding The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. * @return mixed Returns the portion of $haystack, or FALSE if $needle is not found. * Notes: * If $part is set to TRUE, the function returns all of $haystack from the beginning to the first occurrence of $needle. * If $part is set to FALSE, the function returns all of $haystack from the first occurrence of $needle to the end. * This function is aimed at replacing the functions strstr() and mb_strstr() for human-language strings. * @link http://php.net/manual/en/function.strstr * @link http://php.net/manual/en/function.mb-strstr */ function api_strstr($haystack, $needle, $part = false, $encoding = null) { if (empty($encoding)) { $encoding = api_mb_internal_encoding(); } if (api_mb_supports($encoding)) { return @mb_strstr($haystack, $needle, $part, $encoding); } elseif (api_iconv_supports($encoding)) { return api_utf8_decode(@mb_strstr(api_utf8_encode($haystack, $encoding), api_utf8_encode($needle, $encoding), $part, 'UTF-8'), $encoding); } return strstr($haystack, $needle, $part); } /** * Makes a string lowercase. * @param string $string The string being lowercased. * @param string $encoding The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. * @return string Returns the string with all alphabetic characters converted to lowercase. * This function is aimed at replacing the functions strtolower() and mb_strtolower() for human-language strings. * @link http://php.net/manual/en/function.strtolower * @link http://php.net/manual/en/function.mb-strtolower */ function api_strtolower($string, $encoding = null) { if (empty($encoding)) { $encoding = api_mb_internal_encoding(); } if (api_mb_supports($encoding)) { return @mb_strtolower($string, $encoding); } elseif (api_iconv_supports($encoding)) { return api_utf8_decode(@mb_strtolower(api_utf8_encode($string, $encoding), 'UTF-8'), $encoding); } return strtolower($string); } /** * Makes a string uppercase. * @param string $string The string being uppercased. * @param string $encoding The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. * @return string Returns the string with all alphabetic characters converted to uppercase. * This function is aimed at replacing the functions strtoupper() and mb_strtoupper() for human-language strings. * @link http://php.net/manual/en/function.strtoupper * @link http://php.net/manual/en/function.mb-strtoupper */ function api_strtoupper($string, $encoding = null) { if (empty($encoding)) { $encoding = api_mb_internal_encoding(); } if (api_mb_supports($encoding)) { return @mb_strtoupper($string, $encoding); } elseif (api_iconv_supports($encoding)) { return api_utf8_decode(@mb_strtoupper(api_utf8_encode($string, $encoding), 'UTF-8'), $encoding); } return strtoupper($string); } /** * Translates certain characters. * @param string $string The string being translated. * @param mixed $from A string that contains the character to be replaced. This parameter can be also an array with pairs of characters 'from' => 'to'. * @param string $to A string that contains the replacing characters. * @param string $encoding The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. * @return string This function returns a copy of $string, translating all occurrences of each character in $from to the corresponding character in $to. * This function is aimed at replacing the function strtr() for human-language strings. * @link http://php.net/manual/en/function.strtr * TODO: To be revised and tested. Probably this function will be not needed. */ function api_strtr($string, $from, $to = null, $encoding = null) { if (empty($string)) { return ''; } if (is_array($from)) { if (empty($from)) { return $string; } $encoding = $to; if (empty($encoding)){ $encoding = api_mb_internal_encoding(); } $translator = $from; } else { if (empty($from) || empty($to)) { return $string; } if (empty($encoding)) { $encoding = api_mb_internal_encoding(); } $translator = array(); $arr_from = api_str_split($from, 1, $encoding); $arr_to = api_str_split($to, 1, $encoding); $n = count($arr_from); $n2 = count($arr_to); if ($n > $n2) $n = $n2; for ($i = 0; $i < $n; $i++) { $translator[$arr_from[$i]] = $arr_to[$i]; } } $arr_string = api_str_split($string, 1, $encoding); $n = count($arr_string); $result = ''; for ($i = 0; $i < $n; $i++) { if (is_set($translator[$arr_string[$i]])) { $result .= $translator[$arr_string[$i]]; } else { $result .= $arr_string[$i]; } } return $result; } /** // Gets part of a string. * @param string $string The input string. * @param int $start The first position from which the extracted part begins. * @param int $length The length in character of the extracted part. * @param string $encoding The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. * @return string Returns the part of the string specified by the start and length parameters. * Note: First character's position is 0. Second character position is 1, and so on. * This function is aimed at replacing the functions substr() and mb_substr() for human-language strings. * @link http://php.net/manual/en/function.substr * @link http://php.net/manual/en/function.mb-substr */ function api_substr($string, $start, $length = null, $encoding = null) { if (empty($encoding)) { $encoding = api_mb_internal_encoding(); } // Passing null as $length would mean 0. This behaviour has been corrected here. if (is_null($length)) { $length = api_strlen($string, $encoding); } if (api_mb_supports($encoding)) { return @mb_substr($string, $start, $length, $encoding); } elseif (api_iconv_supports($encoding)) { return api_utf8_decode(@mb_substr(api_utf8_encode($string, $encoding), $start, $length, 'UTF-8'), $encoding); } return substr($string, $start, $length); } /** * Replaces text within a portion of a string. * @param string $string The input string. * @param string $replacement The replacement string. * @param int $start The position from which replacing will begin. * Notes: * If $start is positive, the replacing will begin at the $start'th offset into the string. * If $start is negative, the replacing will begin at the $start'th character from the end of the string. * @param int $length The position where replacing will end. * Notes: * If given and is positive, it represents the length of the portion of the string which is to be replaced. * If it is negative, it represents the number of characters from the end of string at which to stop replacing. * If it is not given, then it will default to api_strlen($string); i.e. end the replacing at the end of string. * If $length is zero, then this function will have the effect of inserting replacement into the string at the given start offset. * @param string $encoding The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. * @return string The result string is returned. * This function is aimed at replacing the function substr_replace() for human-language strings. * @link http://php.net/manual/function.substr-replace */ function api_substr_replace($string, $replacement, $start, $length = null, $encoding = null) { if (empty($encoding)) { $encoding = api_mb_internal_encoding(); } if ($length == null) { return api_substr($string, 0, $start, $encoding) . $replacement; } else { if ($length < 0) { $length = api_strlen($string, $encoding) - $start + $length; } return api_substr($string, 0, $start, $encoding) . $replacement . api_substr($string, $start + $length, api_strlen($string, $encoding), $encoding); } } /** * Makes a string's first character uppercase. * @param string $string The input string. * @param string $encoding The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. * @return string Returns a string with the first character capitalized, if that character is alphabetic. * This function is aimed at replacing the function ucfirst() for human-language strings. * @link http://php.net/manual/en/function.ucfirst */ function api_ucfirst($string, $encoding = null) { if (empty($encoding)) { $encoding = api_mb_internal_encoding(); } return api_strtoupper(api_substr($string, 0, 1, $encoding), $encoding) . api_substr($string, 1, api_strlen($string, $encoding), $encoding); } /** * Uppercases the first character of each word in a string. * @param string $string The input string. * @param string $encoding The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. * @return string Returns the modified string. * This function is aimed at replacing the function ucwords() for human-language strings. * @link http://php.net/manual/en/function.ucwords */ function api_ucwords($string, $encoding = null) { if (empty($encoding)) { $encoding = api_mb_internal_encoding(); } if (api_mb_supports($encoding)) { return @mb_convert_case($string, MB_CASE_TITLE, $encoding); } elseif (api_iconv_supports($encoding)) { return api_utf8_decode(@mb_convert_case(api_utf8_encode($string, $encoding), MB_CASE_TITLE, 'UTF-8'), $encoding); } return ucwords($string); } /** * This function adds a unicode modifier (u suffix) to a Perl-compatible regular expression depending on the specified encoding. * @param string $pcre The Perl-compatible regular expression. * @param string $encoding The used internally by this function character encoding. If it is omitted, the platform character set will be used by default. * @return string Returns the same regular expression wit a suffix 'u' if $encoding is 'UTF-8'. */ function api_add_pcre_unicode_modifier($pcre, $encoding = null) { if (empty($encoding)){ $encoding = api_get_system_encoding(); } return api_is_utf8($encoding) ? $pcre.'u' : $pcre; } /** * ---------------------------------------------------------------------------- * Encoding management functions * ---------------------------------------------------------------------------- */ /** * Returns the most-probably used non-UTF-8 encoding for the given language. * @param string $language The specified language, the default value is the language of the user interface. * Note: The $language parameter must be cleaned by api_refine_language_id() if it is necessary. * @return string The correspondent encoding to the specified language. */ function api_get_non_utf8_encoding($language = null) { if (empty($language)) { $language = api_get_interface_language(); } $language = api_refine_language_id($language); $encodings = & api_non_utf8_encodings(); if (is_array($encodings[$language])) { if (!empty($encodings[$language][0])) { return $encodings[$language][0]; } else { return 'ISO-8859-15'; } } else { return 'ISO-8859-15'; } } /** * Returns a table with non-UTF-8 encodings for all system languages. * @return array Returns an array in the form array('language1' => array('encoding1', encoding2', ...), ...) * Note: The function api_get_non_utf8_encoding() returns the first encoding from this array that is correspondent to the given language. */ function & api_non_utf8_encodings() { // The following list may have some inconsistencies. // Place the most used for your language encoding at the first place. // If you are adding an encoding, check whether it is supported either by // mbstring library, either by iconv library. // If you modify this list, please, follow the given syntax exactly. // The language names must be stripped of any suffixes, such as _unicode, _corporate, _org, etc. static $encodings = ' arabic: WINDOWS-1256, ISO-8859-6; asturian: ISO-8859-15, WINDOWS-1252, ISO-8859-1; bosnian: WINDOWS-1250; brazilian: ISO-8859-15, WINDOWS-1252, ISO-8859-1; bulgarian: WINDOWS-1251; catalan: ISO-8859-15, WINDOWS-1252, ISO-8859-1; croatian: WINDOWS-1250; czech: WINDOWS-1250, ISO-8859-2; danish: ISO-8859-15, WINDOWS-1252, ISO-8859-1; dari: WINDOWS-1256; dutch: ISO-8859-15, WINDOWS-1252, ISO-8859-1; english: ISO-8859-15, WINDOWS-1252, ISO-8859-1; euskera: ISO-8859-15, WINDOWS-1252, ISO-8859-1; esperanto: ISO-8859-15, WINDOWS-1252, ISO-8859-1; finnish: ISO-8859-15, WINDOWS-1252, ISO-8859-1; french: ISO-8859-15, WINDOWS-1252, ISO-8859-1; friulian: ISO-8859-15, WINDOWS-1252, ISO-8859-1; galician: ISO-8859-15, WINDOWS-1252, ISO-8859-1; georgian: GEORGIAN-ACADEMY, GEORGIAN-PS; german: ISO-8859-15, WINDOWS-1252, ISO-8859-1; greek: WINDOWS-1253, ISO-8859-7; hebrew: ISO-8859-8, WINDOWS-1255; hungarian: WINDOWS-1250, ISO-8859-2; indonesian: ISO-8859-15, WINDOWS-1252, ISO-8859-1; italian: ISO-8859-15, WINDOWS-1252, ISO-8859-1; japanese: EUC-JP, ISO-2022-JP, Shift-JIS; korean: EUC-KR, ISO-2022-KR, CP949; latvian: WINDOWS-1257, ISO-8859-13; lithuanian: WINDOWS-1257, ISO-8859-13; macedonian: WINDOWS-1251; malay: ISO-8859-15, WINDOWS-1252, ISO-8859-1; norwegian: ISO-8859-15, WINDOWS-1252, ISO-8859-1; occitan: ISO-8859-15, WINDOWS-1252, ISO-8859-1; pashto: WINDOWS-1256; persian: WINDOWS-1256; polish: WINDOWS-1250, ISO-8859-2; portuguese: ISO-8859-15, WINDOWS-1252, ISO-8859-1; quechua_cusco: ISO-8859-15, WINDOWS-1252, ISO-8859-1; romanian: WINDOWS-1250, ISO-8859-2; russian: KOI8-R, WINDOWS-1251; serbian: ISO-8859-15, WINDOWS-1252, ISO-8859-1, WINDOWS-1251; simpl_chinese: GB2312, WINDOWS-936; slovak: WINDOWS-1250, ISO-8859-2; slovenian: WINDOWS-1250, ISO-8859-2; spanish: ISO-8859-15, WINDOWS-1252, ISO-8859-1; swahili: ISO-8859-1; swedish: ISO-8859-15, WINDOWS-1252, ISO-8859-1; thai: WINDOWS-874, ISO-8859-11; trad_chinese: BIG-5, EUC-TW; turkce: WINDOWS-1254, ISO-8859-9; ukrainian: KOI8-U; vietnamese: WINDOWS-1258, VISCII, TCVN; yoruba: ISO-8859-15, WINDOWS-1252, ISO-8859-1; '; if (!is_array($encodings)) { $table = explode(';', str_replace(' ', '', $encodings)); $encodings = array(); foreach ($table as & $row) { $row = trim($row); if (!empty($row)) { $row = explode(':', $row); $encodings[$row[0]] = explode(',', strtoupper($row[1])); } } } return $encodings; } /** * This function unifies the encoding identificators, so they could be compared. * @param string $encoding The specified encoding. * @return string Returns the encoding identificator modified in suitable for comparison way. */ function api_refine_encoding_id($encoding) { return strtoupper($encoding); } /** * This function checks whether two $encoding are equal (same, equvalent). * @param string $encoding1 The first encoding * @param string $encoding2 The second encoding * @return bool Returns TRUE if the encodings are equal, FALSE otherwise. */ function api_equal_encodings($encoding1, $encoding2) { // We have to deal with aliases. This function alone does not solve // the problem entirely. And there is no time for this kind of research. // At the momemnt, the quick proposition could be: return strcmp(api_refine_encoding_id($encoding1), api_refine_encoding_id($encoding2)) == 0 ? true : false; } /** * This function checks whether a given encoding is UTF-8. * @param string $encoding The tested encoding. * @return bool Returns TRUE if the given encoding id means UTF-8, otherwise returns false. */ function api_is_utf8($encoding) { return api_equal_encodings($encoding, 'UTF-8'); } /** * This function returns the encoding, currently used by the system. * @return string The system's encoding. * Note: The value of api_get_setting('platform_charset') is tried to be returned first, * on the second place the global variable $charset is tried to be returned. If for some * reason both attempts fail, 'ISO-8859-15' will be returned. */ function api_get_system_encoding() { $system_encoding = api_get_setting('platform_charset'); if (!empty($system_encoding)) { return $system_encoding; } global $charset; return empty($charset) ? 'ISO-8859-15' : $charset; } /** * Sets/Gets internal character encoding of the common string functions within the PHP mbstring extension. * @param string $encoding When this parameter is given, the function sets the internal encoding. * @return string When $encoding parameter is not given, the function returns the internal encoding. * Note: This function is used in the global initialization script for setting the internal encoding to the platform's character set. * @link http://php.net/manual/en/function.mb-internal-encoding */ function api_mb_internal_encoding($encoding = null) { static $mb_internal_encoding = null; if (empty($encoding)) { if (is_null($mb_internal_encoding)) { $mb_internal_encoding = @mb_internal_encoding(); } return $mb_internal_encoding; } $mb_internal_encoding = $encoding; if (api_mb_supports($encoding)) { return @mb_internal_encoding($encoding); } return false; } /** * Sets/Gets internal character encoding of the regular expression functions (ereg-like) within the PHP mbstring extension. * @param string $encoding When this parameter is given, the function sets the internal encoding. * @return string When $encoding parameter is not given, the function returns the internal encoding. * Note: This function is used in the global initialization script for setting the internal encoding to the platform's character set. * @link http://php.net/manual/en/function.mb-regex-encoding */ function api_mb_regex_encoding($encoding = null) { static $mb_regex_encoding = null; if (empty($encoding)) { if (is_null($mb_regex_encoding)) { $mb_regex_encoding = @mb_regex_encoding(); } return $mb_regex_encoding; } $mb_regex_encoding = $encoding; if (api_mb_supports($encoding)) { return @mb_regex_encoding($encoding); } return false; } /** * Retrieves specified internal encoding configuration variable within the PHP iconv extension. * @param string $type The parameter $type could be: 'iconv_internal_encoding', 'iconv_input_encoding', or 'iconv_output_encoding'. * @return mixed The function returns the requested encoding or FALSE on error. * @link http://php.net/manual/en/function.iconv-get-encoding */ function api_iconv_get_encoding($type) { return api_iconv_set_encoding($type); } /** * Sets specified internal encoding configuration variables within the PHP iconv extension. * @param string $type The parameter $type could be: 'iconv_internal_encoding', 'iconv_input_encoding', or 'iconv_output_encoding'. * @param string $encoding The desired encoding to be set. * @return bool Returns TRUE on success, FALSE on error. * Note: This function is used in the global initialization script for setting these three internal encodings to the platform's character set. * @link http://php.net/manual/en/function.iconv-set-encoding */ // Sets current setting for character encoding conversion. // The parameter $type could be: 'iconv_internal_encoding', 'iconv_input_encoding', or 'iconv_output_encoding'. function api_iconv_set_encoding($type, $encoding = null) { static $iconv_internal_encoding = null; static $iconv_input_encoding = null; static $iconv_output_encoding = null; if (!api_iconv_present()) { return false; } switch ($type) { case 'iconv_internal_encoding': if (empty($encoding)) { if (is_null($iconv_internal_encoding)) { $iconv_internal_encoding = @iconv_get_encoding($type); } return $iconv_internal_encoding; } if (api_iconv_supports($encoding)) { if(@iconv_set_encoding($type, $encoding)) { $iconv_internal_encoding = $encoding; return true; } else { return false; } } else { return false; } break; case 'iconv_input_encoding': if (empty($encoding)) { if (is_null($iconv_input_encoding)) { $iconv_input_encoding = @iconv_get_encoding($type); } return $iconv_input_encoding; } if (api_iconv_supports($encoding)) { if(@iconv_set_encoding($type, $encoding)) { $iconv_input_encoding = $encoding; return true; } else { return false; } } else { return false; } break; case 'iconv_output_encoding': if (empty($encoding)) { if (is_null($iconv_output_encoding)) { $iconv_output_encoding = @iconv_get_encoding($type); } return $iconv_output_encoding; } if (api_iconv_supports($encoding)) { if(@iconv_set_encoding($type, $encoding)) { $iconv_output_encoding = $encoding; return true; } else { return false; } } else { return false; } break; default: return false; } } /** * Checks whether a specified encoding is supported by this API. * @param string $encoding The specified encoding. * @return bool Returns TRUE when the specified encoding is supported, FALSE othewise. */ function api_is_encoding_supported($encoding) { return api_mb_supports($encoding) || api_iconv_supports($encoding); } /** * Checks whether the specified encoding is supported by the PHP mbstring extension. * @param string $encoding The specified encoding. * @return bool Returns TRUE when the specified encoding is supported, FALSE othewise. */ function api_mb_supports($encoding) { static $supported = array(); $encoding = api_refine_encoding_id($encoding); if (!isset($supported[$encoding])) { $mb_encodings = mb_list_encodings(); $mb_encodings = array_map('api_refine_encoding_id', $mb_encodings); $supported[$encoding] = in_array($encoding, $mb_encodings); } return $supported[$encoding] ? true : false; } /** * Checks whether the specified encoding is supported by the PHP iconv extension. * @param string $encoding The specified encoding. * @return bool Returns TRUE when the specified encoding is supported, FALSE othewise. */ function api_iconv_supports($encoding) { static $supported = array(); $encoding = api_refine_encoding_id($encoding); if (!isset($supported[$encoding])) { if (api_iconv_present()) { $test_string = ''; for ($i = 32; $i < 128; $i++) { $test_string .= chr($i); } $supported[$encoding] = (@iconv_strlen($test_string, $encoding)) ? true : false; } else { $supported[$encoding] = false; } } return $supported[$encoding]; } /** * Checks whether the PHP iconv extension is installed and it works. * @return bool Returns TRUE when the iconv extension is detected, FALSE othewise. */ function api_iconv_present() { static $iconv_present = null; if (!is_null($iconv_present)) { return $iconv_present; } $iconv_present = function_exists('iconv') ? true : false; return $iconv_present; } /** * Checks whether the specified encoding is supported by the html-entitiy related functions. * @param string $encoding The specified encoding. * @return bool Returns TRUE when the specified encoding is supported, FALSE othewise. */ function api_html_entity_supports($encoding) { static $supported = array(); $encoding = api_refine_encoding_id($encoding); if (!isset($supported[$encoding])) { // See http://php.net/manual/en/function.htmlentities.php $html_entity_encodings = array(explode(',', ' ISO-8859-1, ISO8859-1, ISO-8859-15, ISO8859-15, UTF-8, cp866, ibm866, 866, cp1251, Windows-1251, win-1251, 1251, cp1252, Windows-1252, 1252, KOI8-R, koi8-ru, koi8r, BIG5, 950, GB2312, 936, BIG5-HKSCS, Shift_JIS, SJIS, 932, EUC-JP, EUCJP ')); $html_entity_encodings = array_map('trim', $html_entity_encodings); $html_entity_encodings = array_map('api_refine_encoding_id', $html_entity_encodings); $supported[$encoding] = in_array($encoding, $html_entity_encodings); } return $supported[$encoding] ? true : false; } /** * ---------------------------------------------------------------------------- * String validation functions concerning some encodings * ---------------------------------------------------------------------------- */ /** * Checks a string for UTF-8 validity. * @param string $string The string to be tested/validated. * @return bool Returns TRUE when the tested string is valid UTF-8 one, FALSE othewise. * @link http://en.wikipedia.org/wiki/UTF-8 */ function api_is_valid_utf8($string) { //return @mb_detect_encoding($string, 'UTF-8', true) == 'UTF-8' ? true : false; // Ivan Tcholakov, 05-OCT-2008: I do not trust mb_detect_encoding(). I have // found a string with a single cyrillic letter (single byte), that is // wrongly detected as UTF-8. Possibly, there would be problems with other // languages too. // An alternative implementation will be used: $len = strlen($string); $i = 0; while ($i < $len) { $byte1 = ord($string[$i++]); // Here the current character begins. Its size is // determined by the senior bits in the first byte. if (($byte1 & 0x80) == 0x00) { // 0xxxxxxx // & // 10000000 // -------- // 00000000 // This is s valid character and it contains a single byte. } elseif (($byte1 & 0xE0) == 0xC0) { // 110xxxxx 10xxxxxx // & & // 11100000 11000000 // -------- -------- // 11000000 10000000 // The character contains two bytes. if ($i == $len) return false; // Here the string ends unexpectedly. if (!((ord($string[$i++]) & 0xC0) == 0x80)) return false; // Invalid second byte, invalid string. } elseif(($byte1 & 0xF0) == 0xE0) { // 1110xxxx 10xxxxxx 10xxxxxx // & & & // 11110000 11000000 11000000 // -------- -------- -------- // 11100000 10000000 10000000 // This is a character of three bytes. if ($i == $len) { return false; // Unexpected end of the string. } if (!((ord($string[$i++]) & 0xC0) == 0x80)) { return false; // Invalid second byte. } if ($i == $len) { return false; // Unexpected end of the string. } if (!((ord($string[$i++]) & 0xC0) == 0x80)) { return false; // Invalid third byte, invalid string. } } elseif(($byte1 & 0xF8) == 0xF0) { // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx // & & & & // 11111000 11000000 11000000 11000000 // -------- -------- -------- -------- // 11110000 10000000 10000000 10000000 // This is a character of four bytes. if ($i == $len) { return false; } if (!((ord($string[$i++]) & 0xC0) == 0x80)) { return false; } if ($i == $len) { return false; } if (!((ord($string[$i++]) & 0xC0) == 0x80)) { return false; } if ($i == $len) { return false; } if (!((ord($string[$i++]) & 0xC0) == 0x80)) { return false; } } elseif(($byte1 & 0xFC) == 0xF8) { // 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx // & & & & & // 11111100 11000000 11000000 11000000 11000000 // -------- -------- -------- -------- -------- // 11111000 10000000 10000000 10000000 10000000 // This is a character of five bytes. if ($i == $len) { return false; } if (!((ord($string[$i++]) & 0xC0) == 0x80)) { return false; } if ($i == $len) { return false; } if (!((ord($string[$i++]) & 0xC0) == 0x80)) { return false; } if ($i == $len) { return false; } if (!((ord($string[$i++]) & 0xC0) == 0x80)) { return false; } if ($i == $len) { return false; } if (!((ord($string[$i++]) & 0xC0) == 0x80)) { return false; } } elseif(($byte1 & 0xFE) == 0xFC) { // 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx // & & & & & & // 11111110 11000000 11000000 11000000 11000000 11000000 // -------- -------- -------- -------- -------- -------- // 11111100 10000000 10000000 10000000 10000000 10000000 // This is a character of six bytes. if ($i == $len) { return false; } if (!((ord($string[$i++]) & 0xC0) == 0x80)) { return false; } if ($i == $len) { return false; } if (!((ord($string[$i++]) & 0xC0) == 0x80)) { return false; } if ($i == $len) { return false; } if (!((ord($string[$i++]) & 0xC0) == 0x80)) { return false; } if ($i == $len) { return false; } if (!((ord($string[$i++]) & 0xC0) == 0x80)) { return false; } if ($i == $len) { return false; } if (!((ord($string[$i++]) & 0xC0) == 0x80)) { return false; } } else { return false; // In any other case the character is invalid. } // Here the current character is valid, it // matches to some of the cases above. // The next character is to be examinated. } return true; // Empty strings are valid too. } /** * Checks whether a string contains 7-bit ASCII characters only. * @param string $string The string to be tested/validated. * @return bool Returns TRUE when the tested string contains 7-bit ASCII characters only, FALSE othewise. */ function api_is_valid_ascii($string) { return @mb_detect_encoding($string, 'ASCII', true) == 'ASCII' ? true : false; } /** * ---------------------------------------------------------------------------- * Language management functions * ---------------------------------------------------------------------------- */ /** * Returns a purified language id, without possible suffixes that will disturb language identification in certain cases. * @param string $language The input language identificator, for example 'french_unicode'. * @param string The same purified or filtered language identificator, for example 'french'. */ function api_refine_language_id($language) { static $search = array('_unicode', '_latin', '_corporate', '_org', '_KM'); return str_replace($search, '', $language); } /** * ---------------------------------------------------------------------------- * Array functions * ---------------------------------------------------------------------------- */ /** * Checks if a value exists in an array, a case insensitive version of in_array() function with extended multibyte support. * @param mixed $needle The searched value. If needle is a string, the comparison is done in a case-insensitive manner. * @param array $haystack The array. * @param bool $strict If is set to TRUE then the function will also check the types of the $needle in the $haystack. The default value if FALSE. * @return bool Returns TRUE if $needle is found in the array, FALSE otherwise. * @link http://php.net/manual/en/function.in-array.php */ function api_in_array_nocase($needle, $haystack, $strict = false, $encoding = null) { if (is_array($needle)) { foreach ($needle as $item) { if (api_in_array_nocase($item, $haystack, $strict, $encoding)) return true; } return false; } if (!is_string($needle)) { return in_array($needle, $haystack, $strict); } $needle = api_strtolower($needle, $encoding); foreach ($haystack as $item) { if (api_strtolower($item, $encoding) == $needle) { return true; } } return false; } //---------------------------------------------------------------------------- // Multibyte string functions designed to upgrade the PHP5 mbstring extension //---------------------------------------------------------------------------- // ---------- Multibyte string functions implemented in PHP 5.2.0+ ----------- // This is a multibyte replacement of strchr(). // This function exists in PHP 5 >= 5.2.0 // See http://php.net/manual/en/function.mb-strrchr if (!function_exists('mb_strchr')) { function mb_strchr($haystack, $needle, $part = false, $encoding = null) { if (empty($encoding)) { $encoding = mb_internal_encoding(); } return mb_strstr($haystack, $needle, $part, $encoding); } } // This is a multibyte replacement of stripos(). // This function exists in PHP 5 >= 5.2.0 // See http://php.net/manual/en/function.mb-stripos if (!function_exists('mb_stripos')) { function mb_stripos($haystack, $needle, $offset = 0, $encoding = null) { if (empty($encoding)) { $encoding = mb_internal_encoding(); } return mb_strpos(mb_strtolower($haystack, $encoding), mb_strtolower($needle, $encoding), $offset, $encoding); } } // This is a multibyte replacement of stristr(). // This function exists in PHP 5 >= 5.2.0 // See http://php.net/manual/en/function.mb-stristr if (!function_exists('mb_stristr')) { function mb_stristr($haystack, $needle, $part = false, $encoding = null) { if (empty($encoding)) { $encoding = mb_internal_encoding(); } $pos = mb_strpos(mb_strtolower($haystack, $encoding), mb_strtolower($needle, $encoding), 0, $encoding); if ($pos === false) { return false; } elseif($part == true) { return mb_substr($haystack, 0, $pos + 1, $encoding); } else { return mb_substr($haystack, $pos, mb_strlen($haystack, $encoding), $encoding); } } } // This is a multibyte replacement of strrchr(). // This function exists in PHP 5 >= 5.2.0 // See http://php.net/manual/en/function.mb-strrchr if (!function_exists('mb_strrchr')) { function mb_strrchr($haystack, $needle, $part = false, $encoding = null) { if (empty($encoding)) { $encoding = mb_internal_encoding(); } $needle = mb_substr($needle, 0, 1, $encoding); $pos = mb_strrpos($haystack, $needle, mb_strlen($haystack, $encoding) - 1, $encoding); if ($pos === false) { return false; } elseif($part == true) { return mb_substr($haystack, 0, $pos + 1, $encoding); } else { return mb_substr($haystack, $pos, mb_strlen($haystack, $encoding), $encoding); } } } // This is a multibyte replacement of strstr(). // This function exists in PHP 5 >= 5.2.0 // See http://php.net/manual/en/function.mb-strstr if (!function_exists('mb_strstr')) { function mb_strstr($haystack, $needle, $part = false, $encoding = null) { if (empty($encoding)) { $encoding = mb_internal_encoding(); } $pos = mb_strpos($haystack, $needle, 0, $encoding); if ($pos === false) { return false; } elseif($part == true) { return mb_substr($haystack, 0, $pos + 1, $encoding); } else { return mb_substr($haystack, $pos, mb_strlen($haystack, $encoding), $encoding); } } } ?>