[svn r20631] FS#306 - The multibyte string library: Reverting back to the previous implementations of api_utf8_encode() and api_utf8_decode() - they work faster. Starting to comment the library in PHPDoc style.

skala
Ivan Tcholakov 17 years ago
parent ee7f7df624
commit 8ec6c7eade
  1. 202
      main/inc/lib/multibyte_string_functions.lib.php

@ -1,49 +1,65 @@
<?php <?php
/**
// File: multibyte_string_functions.lib.php * ==============================================================================
// Main API extension for Dokeos 1.8.6 LMS * File: multibyte_string_functions.lib.php
// A common purpose library for supporting multibyte string aware functions. * Main API extension library for Dokeos 1.8.6+ LMS
// License: GNU/GPL version 2 or later (Free Software Foundation) * A common purpose library for supporting multibyte string aware functions.
// Author: Ivan Tcholakov, ivantcholakov@gmail.com * License: GNU/GPL version 2 or later (Free Software Foundation)
// October 2008. * @author: Ivan Tcholakov, ivantcholakov@gmail.com
// May 2009 - refactoring and minor fixes have been implemented. * October 2008 - initial implementation.
* May 2009 - refactoring and minor corrections have been implemented.
* @package dokeos.library
// Notes: * ==============================================================================
// */
// 1. For all the functions from this library witn optional encoding
// parameters the system's encoding is assumed, i.e. the value that is /**
// returned by api_get_setting('platform_charset') or the value of the * Notes:
// global variable $charset. *
// * 1. For all the functions from this library witn optional encoding
// 2. In other aspets, most of the functions in this library try to copy * parameters the system's encoding is assumed, i.e. the value that is
// behaviour of some core PHP functions and some functions from the * returned by api_get_setting('platform_charset') or the value of the
// mbstring extension. Mostly they have similar names prefixed with "api_". * global variable $charset.
// For your convenience, links have been given to the documentation of the *
// original PHP functions. Thus, you may exploit on your previous habits. * 2. In other aspects, most of the functions in this library try to copy
// * behaviour of some core PHP functions and some functions from the
// 3. Why these function have been introduced? Because they are able to * mbstring extension. Mostly they have similar names prefixed with "api_".
// support more encodings than the original ones. And which is more * For your convenience, links have been given to the documentation of the
// important - they are UTF-8 aware. So, they should be used for strings * original PHP functions. Thus, you may exploit on your previous habits.
// in natural language. For internal system identificators of file names *
// which are supposed to contain only English letters you may use the * 3. Why these function have been introduced? Because they are able to
// original PHP string functions. * support more encodings than the original ones. And which is more
// * important - they are UTF-8 aware. So, they should be used for strings
// 4. This library requires PHP mbstring extension to be activated. * in natural language. For internal system identificators of file names
// When encodings to be used are not supported by mbstring, this library * which are supposed to contain only English letters you may use the
// is able to exploit the PHP iconv extesion, which in this case should * original PHP string functions.
// be activated too. *
* 4. This library requires PHP mbstring extension to be activated.
* When encodings to be used are not supported by mbstring, this library
//---------------------------------------------------------------------------- * is able to exploit the PHP iconv extesion, which in this case should
// Multibyte string conversion functions * be activated too.
//---------------------------------------------------------------------------- */
/**
// Converts character encoding of a given string. * ----------------------------------------------------------------------------
// See http://php.net/manual/en/function.mb-convert-encoding * Multibyte string conversion functions
function api_convert_encoding($string, $to_encoding, $from_encoding) { * ----------------------------------------------------------------------------
*/
/**
* Converts character encoding of a given string.
* @param string $string The string being converted.
* @param string $to_encoding The encoding that $string is being converted to.
* @param string $from_encoding The encoding that $string is being converted from. If it is omited, the platform character set is assumed.
* @return string Returns the converted string.
* This function is aimed to replace mb_convert_encoding() for human-language strings.
* @link http://php.net/manual/en/function.mb-convert-encoding
*/
function api_convert_encoding($string, $to_encoding, $from_encoding = null) {
if (empty($from_encoding)) {
$from_encoding = api_mb_internal_encoding();
}
if (api_equal_encodings($to_encoding, $from_encoding)) { if (api_equal_encodings($to_encoding, $from_encoding)) {
// When conversion is not needed, the string is returned directly, without validation.
return $string; return $string;
} }
if (api_mb_supports($to_encoding) && api_mb_supports($from_encoding)) { if (api_mb_supports($to_encoding) && api_mb_supports($from_encoding)) {
@ -52,22 +68,74 @@ function api_convert_encoding($string, $to_encoding, $from_encoding) {
elseif (api_iconv_supports($to_encoding) && api_iconv_supports($from_encoding)) { elseif (api_iconv_supports($to_encoding) && api_iconv_supports($from_encoding)) {
return @iconv($from_encoding, $to_encoding, $string); return @iconv($from_encoding, $to_encoding, $string);
} }
// Here the function gives up.
return $string; return $string;
} }
// Converts a given string into UTF-8 encoded string. /**
// See http://php.net/manual/en/function.utf8-encode * Converts a given string into UTF-8 encoded string.
* @param string $string The string being converted.
* @param string $from_encoding The encoding that $string is being converted from. If it is omited, the platform character set is assumed.
* @return string Returns the converted string.
* This function is aimed to replace utf8_encode() for human-language strings.
* @link http://php.net/manual/en/function.utf8-encode
*/
function api_utf8_encode($string, $from_encoding = null) { function api_utf8_encode($string, $from_encoding = null) {
return api_convert_encoding($string, 'UTF-8', $from_encoding); if (empty($from_encoding)) {
$from_encoding = api_mb_internal_encoding();
}
if (api_is_utf8($from_encoding)) {
// When conversion is not needed, the string is returned directly, without validation.
return $string;
}
if (api_mb_supports($from_encoding)) {
return @mb_convert_encoding($string, 'UTF-8', $from_encoding);
}
elseif (api_iconv_supports($from_encoding)) {
return @iconv($from_encoding, 'UTF-8', $string);
}
// Here the function gives up.
return $string;
} }
// Converts a given string, from UTF-8 encoding to a specified encoding. /**
// See http://php.net/manual/en/function.utf8-decode * Converts a given string from UTF-8 encoding to a specified encoding.
* @param string $string The string being converted.
* @param string $to_encoding The encoding that $string is being converted to. If it is omited, the platform character set is assumed.
* @return string Returns the converted string.
* This function is aimed to replace utf8_decode() for human-language strings.
* @link http://php.net/manual/en/function.utf8-decode
*/
function api_utf8_decode($string, $to_encoding = null) { function api_utf8_decode($string, $to_encoding = null) {
return api_convert_encoding($string, $to_encoding, 'UTF-8'); if (empty($to_encoding)) {
$to_encoding = api_mb_internal_encoding();
}
if (api_is_utf8($to_encoding)) {
// When conversion is not needed, the string is returned directly, without validation.
return $string;
}
if (api_mb_supports($to_encoding)) {
return @mb_convert_encoding($string, $to_encoding, 'UTF-8');
}
elseif (api_iconv_supports($to_encoding)) {
return @iconv('UTF-8', $to_encoding, $string);
}
// Here the function gives up.
return $string;
} }
// Encodes a given string into the system ecoding if this conversion has been detected as necessary. /**
* Converts a given string into the system ecoding (or platform character set).
* When $from encoding is omited on UTF-8 platforms then language dependent encoding
* is guessed/assumed. On non-UTF-8 platforms omited $from encoding is assumed as UTF-8.
* When the parameter $check_utf8_validity is true the function checks string's
* UTF-8 validity and decides whether to try to convert it or not.
* This function is useful for problem detection or making workarounds.
* @param string $string The string being converted.
* @param string $from_encoding The encoding that $string is being converted from. It is guessed when it is omited.
* @param bool $check_utf8_validity A flag for UTF-8 validity check as condition for making conversion.
* @return string Returns the converted string.
*/
function api_to_system_encoding($string, $from_encoding = null, $check_utf8_validity = false) { function api_to_system_encoding($string, $from_encoding = null, $check_utf8_validity = false) {
$charset = api_get_system_encoding(); $charset = api_get_system_encoding();
if (empty($from_encoding)) { if (empty($from_encoding)) {
@ -95,8 +163,15 @@ function api_to_system_encoding($string, $from_encoding = null, $check_utf8_vali
return api_convert_encoding($string, $charset, $from_encoding); return api_convert_encoding($string, $charset, $from_encoding);
} }
// Converts all applicable characters to HTML entities. /**
// See http://php.net/manual/en/function.htmlentities * Converts all applicable characters to HTML entities.
* @param string $string The input string.
* @param int $quote_style The quote style - ENT_COMPAT (default), ENT_QUOTES, ENT_NOQUOTES.
* @param string $encoding The encoding (of the input string) used in conversion. If it is omited, the platform character set is assumed.
* @return string Returns the converted string.
* This function is aimed to replace htmlentities() for human-language strings.
* @link http://php.net/manual/en/function.htmlentities
*/
function api_htmlentities($string, $quote_style = ENT_COMPAT, $encoding = null) { function api_htmlentities($string, $quote_style = ENT_COMPAT, $encoding = null) {
if (empty($encoding)) { if (empty($encoding)) {
$encoding = api_mb_internal_encoding(); $encoding = api_mb_internal_encoding();
@ -119,8 +194,15 @@ function api_htmlentities($string, $quote_style = ENT_COMPAT, $encoding = null)
return $string; return $string;
} }
// Decodes HTML entities into normal characters. /**
// See http://php.net/html_entity_decode * Convers HTML entities into normal characters.
* @param string $string The input string.
* @param int $quote_style The quote style - ENT_COMPAT (default), ENT_QUOTES, ENT_NOQUOTES.
* @param string $encoding The encoding (of the result) used in conversion. If it is omited, the platform character set is assumed.
* @return string Returns the converted string.
* This function is aimed to replace html_entity_decode() for human-language strings.
* @link http://php.net/html_entity_decode
*/
function api_html_entity_decode($string, $quote_style = ENT_COMPAT, $encoding = null) { function api_html_entity_decode($string, $quote_style = ENT_COMPAT, $encoding = null) {
if (empty($encoding)) { if (empty($encoding)) {
$encoding = api_mb_internal_encoding(); $encoding = api_mb_internal_encoding();
@ -134,12 +216,14 @@ function api_html_entity_decode($string, $quote_style = ENT_COMPAT, $encoding =
return api_utf8_decode(html_entity_decode(api_convert_encoding($string, 'UTF-8', $encoding), $quote_style, 'UTF-8'), $encoding); return api_utf8_decode(html_entity_decode(api_convert_encoding($string, 'UTF-8', $encoding), $quote_style, 'UTF-8'), $encoding);
} }
// This function encodes (conditionally) to UTF-8 a given string if XmlHttp-request has been detected. /**
* This function encodes (conditionally) a given string to UTF-8 if XmlHttp-request has been detected.
* @param string $string The string being converted.
* @param string $from_encoding The encoding that $string is being converted from. If it is omited, the platform character set is assumed.
* @return string Returns the converted string.
*/
function api_xml_http_response_encode($string, $from_encoding = null) { function api_xml_http_response_encode($string, $from_encoding = null) {
if (isset($_SERVER['HTTP_X_REQUESTED_WITH']) && strtolower($_SERVER['HTTP_X_REQUESTED_WITH']) == 'xmlhttprequest') { if (isset($_SERVER['HTTP_X_REQUESTED_WITH']) && strtolower($_SERVER['HTTP_X_REQUESTED_WITH']) == 'xmlhttprequest') {
if (empty($from_encoding)) {
$from_encoding = api_mb_internal_encoding();
}
return api_convert_encoding($string, 'UTF-8', $from_encoding); return api_convert_encoding($string, 'UTF-8', $from_encoding);
} }
return $string; return $string;

Loading…
Cancel
Save