Remove unused functions. Use Patchwork\Utf8 to load common mb* functions

Use URLify::transliterate() instead of custom functions.
1.10.x
Julio Montoya 11 years ago
parent 9d5987911a
commit cbb28b7282
  1. 8
      main/inc/global.inc.php
  2. 476
      main/inc/lib/internationalization.lib.php
  3. 128
      main/inc/lib/internationalization_internal.lib.php
  4. 25
      main/inc/lib/usermanager.lib.php

@ -195,13 +195,13 @@ if (empty($charset)) {
$charset_initial_value = $charset; $charset_initial_value = $charset;
// Enables the portablity layer and configures PHP for UTF-8 // Enables the portablity layer and configures PHP for UTF-8
//\Patchwork\Utf8\Bootup::initAll(); \Patchwork\Utf8\Bootup::initAll();
// Initialization of the internationalization library. // Initialization of the internationalization library.
api_initialize_internationalization(); //api_initialize_internationalization();
// Initialization of the default encoding that will be used by the multibyte string routines in the internationalization library. // Initialization of the default encoding that will be used by the multibyte string routines in the internationalization library.
api_set_internationalization_default_encoding($charset); //api_set_internationalization_default_encoding($charset);
// Start session after the internationalization library has been initialized. // Start session after the internationalization library has been initialized.
Chamilo::session()->start($already_installed); Chamilo::session()->start($already_installed);
@ -558,7 +558,7 @@ if (!isset($_SESSION['login_as']) && isset($_user)) {
if ($res_logout_date < time() - $_configuration['session_lifetime']) { if ($res_logout_date < time() - $_configuration['session_lifetime']) {
// it isn't, we should create a fresh entry // it isn't, we should create a fresh entry
Event::event_login(); Event::event_login($_user['user_id']);
// now that it's created, we can get its ID and carry on // now that it's created, we can get its ID and carry on
$q_last_connection = Database::query($sql_last_connection); $q_last_connection = Database::query($sql_last_connection);
$i_id_last_connection = Database::result($q_last_connection, 0, 'login_id'); $i_id_last_connection = Database::result($q_last_connection, 0, 'login_id');

@ -5,7 +5,7 @@
* File: internationalization.lib.php * File: internationalization.lib.php
* Internationalization library for Chamilo 1.8.7 LMS * Internationalization library for Chamilo 1.8.7 LMS
* A library implementing internationalization related functions. * A library implementing internationalization related functions.
* License: GNU General Public License Version 3 (Free Software Foundation) * License: GNU General Public License Version 3 (Free Software Foundation)ww
* @todo use Patchwork-UTF8 instead of custom changes. * @todo use Patchwork-UTF8 instead of custom changes.
* @author Ivan Tcholakov, <ivantcholakov@gmail.com>, 2009, 2010 * @author Ivan Tcholakov, <ivantcholakov@gmail.com>, 2009, 2010
* @author More authors, mentioned in the correpsonding fragments of this source. * @author More authors, mentioned in the correpsonding fragments of this source.
@ -270,21 +270,6 @@ function get_lang($variable, $reserved = null, $language = null) {
return $ret; return $ret;
} }
/**
* Checks whether exists a translated (localized) string.
* @param string $variable This is the identificator (name) of the translated string to be checked.
* @param string $language (optional) Language indentificator. If it is omited, the current interface language is assumed.
* @return bool Returns TRUE if translation exists, FALSE otherwise.
* @author Ivan Tcholakov, 2010.
*/
function api_is_translated($variable, $language = null) {
global $_api_is_translated, $_api_is_translated_call;
$_api_is_translated_call = true;
get_lang($variable, $language);
$_api_is_translated_call = false;
return $_api_is_translated;
}
/** /**
* Gets the current interface language. * Gets the current interface language.
* @param bool $purified (optional) When it is true, a purified (refined) language value will be returned, for example 'french' instead of 'french_unicode'. * @param bool $purified (optional) When it is true, a purified (refined) language value will be returned, for example 'french' instead of 'french_unicode'.
@ -465,44 +450,6 @@ function api_get_text_direction($language = null) {
return $text_direction[$language]; return $text_direction[$language];
} }
/**
* This function checks whether a given language can use Latin 1 encoding.
* In the past (Chamilo 1.8.6.2), the function was used in the installation script only once.
* It is not clear whether this function would be use useful for something else in the future.
* @param string $language The checked language.
* @return bool TRUE if the given language can use Latin 1 encoding (ISO-8859-15, ISO-8859-1, WINDOWS-1252, ...), FALSE otherwise.
*/
function api_is_latin1_compatible($language) {
static $latin1_languages;
if (!isset($latin1_languages)) {
$latin1_languages = _api_get_latin1_compatible_languages();
}
$language = api_purify_language_id($language);
return in_array($language, $latin1_languages);
}
/**
* Language recognition
* Based on the publication:
* W. B. Cavnar and J. M. Trenkle. N-gram-based text categorization.
* Proceedings of SDAIR-94, 3rd Annual Symposium on Document Analysis
* and Information Retrieval, 1994.
* @link http://citeseer.ist.psu.edu/cache/papers/cs/810/http:zSzzSzwww.info.unicaen.frzSz~giguetzSzclassifzSzcavnar_trenkle_ngram.pdf/n-gram-based-text.pdf
*/
function api_detect_language(&$string, $encoding = null) {
if (empty($encoding)) {
$encoding = _api_mb_internal_encoding();
}
if (empty($string)) {
return false;
}
$result_array = &_api_compare_n_grams(_api_generate_n_grams(api_substr($string, 0, LANGUAGE_DETECT_MAX_LENGTH, $encoding), $encoding), $encoding);
if (empty($result_array)) {
return false;
}
list($key, $delta_points) = each($result_array);
return strstr($key, ':', true);
}
/** /**
* Date and time conversions and formats * Date and time conversions and formats
@ -533,8 +480,8 @@ function api_get_timezones()
* *
* @return string The timezone chosen * @return string The timezone chosen
*/ */
function _api_get_timezone() { function _api_get_timezone()
global $_user; {
// First, get the default timezone of the server // First, get the default timezone of the server
$to_timezone = date_default_timezone_get(); $to_timezone = date_default_timezone_get();
// Second, see if a timezone has been chosen for the platform // Second, see if a timezone has been chosen for the platform
@ -544,13 +491,16 @@ function _api_get_timezone() {
} }
// If allowed by the administrator // If allowed by the administrator
$use_users_timezone = api_get_setting('use_users_timezone', 'timezones'); $use_users_timezone = api_get_setting('use_users_timezone', 'timezones');
if ($use_users_timezone == 'true') { if ($use_users_timezone == 'true') {
$userId = api_get_user_id();
// Get the timezone based on user preference, if it exists // Get the timezone based on user preference, if it exists
$timezone_user = UserManager::get_extra_user_data_by_field($_user['user_id'],'timezone'); $timezone_user = UserManager::get_extra_user_data_by_field($userId,'timezone');
if (isset($timezone_user['timezone']) && $timezone_user['timezone'] != null) { if (isset($timezone_user['timezone']) && $timezone_user['timezone'] != null) {
$to_timezone = $timezone_user['timezone']; $to_timezone = $timezone_user['timezone'];
} }
} }
return $to_timezone; return $to_timezone;
} }
@ -605,10 +555,12 @@ function api_get_local_time($time = null, $to_timezone = null, $from_timezone =
if (is_null($from_timezone)) { if (is_null($from_timezone)) {
$from_timezone = 'UTC'; $from_timezone = 'UTC';
} }
// Determining the timezone to be converted to // Determining the timezone to be converted to
if (is_null($to_timezone)) { if (is_null($to_timezone)) {
$to_timezone = _api_get_timezone(); $to_timezone = _api_get_timezone();
} }
// If time is a timestamp, convert it to a string // If time is a timestamp, convert it to a string
if (is_null($time) || empty($time) || $time == '0000-00-00 00:00:00') { if (is_null($time) || empty($time) || $time == '0000-00-00 00:00:00') {
if ($return_null_if_invalid_date) { if ($return_null_if_invalid_date) {
@ -662,7 +614,7 @@ function api_strtotime($time, $timezone = null) {
* *
* @param mixed Timestamp or datetime string * @param mixed Timestamp or datetime string
* @param mixed Date format (string or int; see date formats in the Chamilo system: TIME_NO_SEC_FORMAT, DATE_FORMAT_SHORT, DATE_FORMAT_LONG, DATE_TIME_FORMAT_LONG) * @param mixed Date format (string or int; see date formats in the Chamilo system: TIME_NO_SEC_FORMAT, DATE_FORMAT_SHORT, DATE_FORMAT_LONG, DATE_TIME_FORMAT_LONG)
* @param string $language (optional) Language indentificator. If it is omited, the current interface language is assumed. * @param string $language (optional) Language identificator. If it is omited, the current interface language is assumed.
* @return string Returns the formatted date. * @return string Returns the formatted date.
* *
* @link http://php.net/manual/en/function.strftime.php * @link http://php.net/manual/en/function.strftime.php
@ -1171,7 +1123,10 @@ function api_byte_count(& $string) {
* This function is aimed at replacing the function mb_convert_encoding() for human-language strings. * This function is aimed at replacing the function mb_convert_encoding() for human-language strings.
* @link http://php.net/manual/en/function.mb-convert-encoding * @link http://php.net/manual/en/function.mb-convert-encoding
*/ */
function api_convert_encoding($string, $to_encoding, $from_encoding = null) { function api_convert_encoding($string, $to_encoding, $from_encoding = null)
{
return mb_convert_encoding($string, $to_encoding, $from_encoding);
/*
if (empty($from_encoding)) { if (empty($from_encoding)) {
$from_encoding = _api_mb_internal_encoding(); $from_encoding = _api_mb_internal_encoding();
} }
@ -1193,7 +1148,7 @@ function api_convert_encoding($string, $to_encoding, $from_encoding = null) {
if (_api_convert_encoding_supports($to_encoding) && _api_convert_encoding_supports($from_encoding)) { if (_api_convert_encoding_supports($to_encoding) && _api_convert_encoding_supports($from_encoding)) {
return _api_convert_encoding($string, $to_encoding, $from_encoding); return _api_convert_encoding($string, $to_encoding, $from_encoding);
} }
return $string; // Here the function gives up. return $string; // Here the function gives up.*/
} }
/** /**
@ -1205,7 +1160,9 @@ function api_convert_encoding($string, $to_encoding, $from_encoding = null) {
* @link http://php.net/manual/en/function.utf8-encode * @link http://php.net/manual/en/function.utf8-encode
*/ */
function api_utf8_encode($string, $from_encoding = null) { function api_utf8_encode($string, $from_encoding = null) {
if (empty($from_encoding)) { return mb_convert_encoding($string, 'UTF-8', $from_encoding);
/*if (empty($from_encoding)) {
$from_encoding = _api_mb_internal_encoding(); $from_encoding = _api_mb_internal_encoding();
} }
if (api_is_utf8($from_encoding)) { if (api_is_utf8($from_encoding)) {
@ -1223,7 +1180,7 @@ function api_utf8_encode($string, $from_encoding = null) {
if (_api_convert_encoding_supports($from_encoding)) { if (_api_convert_encoding_supports($from_encoding)) {
return _api_convert_encoding($string, 'UTF-8', $from_encoding); return _api_convert_encoding($string, 'UTF-8', $from_encoding);
} }
return $string; // Here the function gives up. return $string; // Here the function gives up.*/
} }
/** /**
@ -1235,6 +1192,10 @@ function api_utf8_encode($string, $from_encoding = null) {
* @link http://php.net/manual/en/function.utf8-decode * @link http://php.net/manual/en/function.utf8-decode
*/ */
function api_utf8_decode($string, $to_encoding = null) { function api_utf8_decode($string, $to_encoding = null) {
return mb_convert_encoding($string, $to_encoding, 'UTF-8');
/*
if (empty($to_encoding)) { if (empty($to_encoding)) {
$to_encoding = _api_mb_internal_encoding(); $to_encoding = _api_mb_internal_encoding();
} }
@ -1253,7 +1214,7 @@ function api_utf8_decode($string, $to_encoding = null) {
if (_api_convert_encoding_supports($to_encoding)) { if (_api_convert_encoding_supports($to_encoding)) {
return _api_convert_encoding($string, $to_encoding, 'UTF-8'); return _api_convert_encoding($string, $to_encoding, 'UTF-8');
} }
return $string; // Here the function gives up. return $string; // Here the function gives up.*/
} }
/** /**
@ -1386,31 +1347,6 @@ function api_xml_http_response_encode($string, $from_encoding = null) {
return $string; return $string;
} }
/**
* This function converts a given string to the encoding that filesystem uses for representing file/folder names.
* @param string $string The string being converted.
* @param string $from_encoding (optional) The encoding that $string is being converted from. If it is omited, the platform character set is assumed.
* @return string Returns the converted string.
*/
function api_file_system_encode($string, $from_encoding = null) {
if (empty($from_encoding)) {
$from_encoding = _api_mb_internal_encoding();
}
return api_convert_encoding($string, api_get_file_system_encoding(), $from_encoding);
}
/**
* This function converts a given string from the encoding that filesystem uses for representing file/folder names.
* @param string $string The string being converted.
* @param string $from_encoding (optional) The encoding that $string is being converted from. If it is omited, the platform character set is assumed.
* @return string Returns the converted string.
*/
function api_file_system_decode($string, $to_encoding = null) {
if (empty($to_encoding)) {
$to_encoding = _api_mb_internal_encoding();
}
return api_convert_encoding($string, $to_encoding, api_get_file_system_encoding());
}
/** /**
* Transliterates a string with arbitrary encoding into a plain ASCII string. * Transliterates a string with arbitrary encoding into a plain ASCII string.
@ -1441,145 +1377,9 @@ function api_file_system_decode($string, $to_encoding = null) {
* Initial implementation for Dokeos 1.8.6.1, 12-JUN-2009 * Initial implementation for Dokeos 1.8.6.1, 12-JUN-2009
* @author Ivan Tcholakov * @author Ivan Tcholakov
*/ */
function api_transliterate($string, $unknown = '?', $from_encoding = null) { function api_transliterate($string, $unknown = '?', $from_encoding = null)
static $map = array(); {
$string = api_utf8_encode($string, $from_encoding); return URLify::transliterate($string);
// Screen out some characters that eg won't be allowed in XML.
$string = preg_replace('/[\x00-\x08\x0b\x0c\x0e-\x1f]/', $unknown, $string);
// ASCII is always valid NFC!
// If we're only ever given plain ASCII, we can avoid the overhead
// of initializing the decomposition tables by skipping out early.
if (api_is_valid_ascii($string)) {
return $string;
}
static $tail_bytes;
if (!isset($tail_bytes)) {
// Each UTF-8 head byte is followed by a certain
// number of tail bytes.
$tail_bytes = array();
for ($n = 0; $n < 256; $n++) {
if ($n < 0xc0) {
$remaining = 0;
}
elseif ($n < 0xe0) {
$remaining = 1;
}
elseif ($n < 0xf0) {
$remaining = 2;
}
elseif ($n < 0xf8) {
$remaining = 3;
}
elseif ($n < 0xfc) {
$remaining = 4;
}
elseif ($n < 0xfe) {
$remaining = 5;
} else {
$remaining = 0;
}
$tail_bytes[chr($n)] = $remaining;
}
}
// Chop the text into pure-ASCII and non-ASCII areas;
// large ASCII parts can be handled much more quickly.
// Don't chop up Unicode areas for punctuation, though,
// that wastes energy.
preg_match_all('/[\x00-\x7f]+|[\x80-\xff][\x00-\x40\x5b-\x5f\x7b-\xff]*/', $string, $matches);
$result = '';
foreach ($matches[0] as $str) {
if ($str{0} < "\x80") {
// ASCII chunk: guaranteed to be valid UTF-8
// and in normal form C, so skip over it.
$result .= $str;
continue;
}
// We'll have to examine the chunk byte by byte to ensure
// that it consists of valid UTF-8 sequences, and to see
// if any of them might not be normalized.
//
// Since PHP is not the fastest language on earth, some of
// this code is a little ugly with inner loop optimizations.
$head = '';
$chunk = api_byte_count($str);
// Counting down is faster. I'm *so* sorry.
$len = $chunk + 1;
for ($i = -1; --$len; ) {
$c = $str{++$i};
if ($remaining = $tail_bytes[$c]) {
// UTF-8 head byte!
$sequence = $head = $c;
do {
// Look for the defined number of tail bytes...
if (--$len && ($c = $str{++$i}) >= "\x80" && $c < "\xc0") {
// Legal tail bytes are nice.
$sequence .= $c;
} else {
if ($len == 0) {
// Premature end of string!
// Drop a replacement character into output to
// represent the invalid UTF-8 sequence.
$result .= $unknown;
break 2;
} else {
// Illegal tail byte; abandon the sequence.
$result .= $unknown;
// Back up and reprocess this byte; it may itself
// be a legal ASCII or UTF-8 sequence head.
--$i;
++$len;
continue 2;
}
}
} while (--$remaining);
$n = ord($head);
if ($n <= 0xdf) {
$ord = ($n - 192) * 64 + (ord($sequence{1}) - 128);
}
else if ($n <= 0xef) {
$ord = ($n - 224) * 4096 + (ord($sequence{1}) - 128) * 64 + (ord($sequence{2}) - 128);
}
else if ($n <= 0xf7) {
$ord = ($n - 240) * 262144 + (ord($sequence{1}) - 128) * 4096 + (ord($sequence{2}) - 128) * 64 + (ord($sequence{3}) - 128);
}
else if ($n <= 0xfb) {
$ord = ($n - 248) * 16777216 + (ord($sequence{1}) - 128) * 262144 + (ord($sequence{2}) - 128) * 4096 + (ord($sequence{3}) - 128) * 64 + (ord($sequence{4}) - 128);
}
else if ($n <= 0xfd) {
$ord = ($n - 252) * 1073741824 + (ord($sequence{1}) - 128) * 16777216 + (ord($sequence{2}) - 128) * 262144 + (ord($sequence{3}) - 128) * 4096 + (ord($sequence{4}) - 128) * 64 + (ord($sequence{5}) - 128);
}
// Lookup and replace a character from the transliteration database.
$bank = $ord >> 8;
// Check if we need to load a new bank
if (!isset($map[$bank])) {
$file = dirname(__FILE__).'/internationalization_database/transliteration/' . sprintf('x%02x', $bank) . '.php';
if (file_exists($file)) {
$map[$bank] = include ($file);
} else {
$map[$bank] = array('en' => array());
}
}
$ord = $ord & 255;
$result .= isset($map[$bank]['en'][$ord]) ? $map[$bank]['en'][$ord] : $unknown;
$head = '';
} elseif ($c < "\x80") {
// ASCII byte.
$result .= $c;
$head = '';
} elseif ($c < "\xc0") {
// Illegal tail bytes.
if ($head == '') {
$result .= $unknown;
}
} else {
// Miscellaneous freaks.
$result .= $unknown;
$head = '';
}
}
}
return $result;
} }
/** /**
@ -1633,6 +1433,8 @@ function api_chr($codepoint, $encoding) {
* @author Ivan Tcholakov * @author Ivan Tcholakov
*/ */
function api_str_ireplace($search, $replace, $subject, & $count = null, $encoding = null) { function api_str_ireplace($search, $replace, $subject, & $count = null, $encoding = null) {
return str_ireplace($search, $replace, $subject, $count);
/*
if (empty($encoding)) { if (empty($encoding)) {
$encoding = _api_mb_internal_encoding(); $encoding = _api_mb_internal_encoding();
} }
@ -1687,7 +1489,7 @@ function api_str_ireplace($search, $replace, $subject, & $count = null, $encodin
if (is_null($count)) { if (is_null($count)) {
return str_ireplace($search, $replace, $subject); return str_ireplace($search, $replace, $subject);
} }
return str_ireplace($search, $replace, $subject, $count); return str_ireplace($search, $replace, $subject, $count);*/
} }
/** /**
@ -1705,6 +1507,8 @@ function api_str_ireplace($search, $replace, $subject, & $count = null, $encodin
* @link http://php.net/str_split * @link http://php.net/str_split
*/ */
function api_str_split($string, $split_length = 1, $encoding = null) { function api_str_split($string, $split_length = 1, $encoding = null) {
return str_split($string, $split_length);
/*
if (empty($encoding)) { if (empty($encoding)) {
$encoding = _api_mb_internal_encoding(); $encoding = _api_mb_internal_encoding();
} }
@ -1735,7 +1539,7 @@ function api_str_split($string, $split_length = 1, $encoding = null) {
} }
return $result[0]; return $result[0];
} }
return str_split($string, $split_length); return str_split($string, $split_length);*/
} }
/** /**
@ -1751,12 +1555,13 @@ function api_str_split($string, $split_length = 1, $encoding = null) {
* @link http://php.net/manual/en/function.mb-stripos * @link http://php.net/manual/en/function.mb-stripos
*/ */
function api_stripos($haystack, $needle, $offset = 0, $encoding = null) { function api_stripos($haystack, $needle, $offset = 0, $encoding = null) {
if (_api_mb_supports($encoding)) {
/*if (_api_mb_supports($encoding)) {
if (empty($encoding)) { if (empty($encoding)) {
$encoding = _api_mb_internal_encoding(); $encoding = _api_mb_internal_encoding();
} }
return mb_stripos($haystack, $needle, $offset, $encoding); return mb_stripos($haystack, $needle, $offset, $encoding);
} }*/
return stripos($haystack, $needle, $offset); return stripos($haystack, $needle, $offset);
} }
@ -1776,12 +1581,12 @@ function api_stripos($haystack, $needle, $offset = 0, $encoding = null) {
* @link http://php.net/manual/en/function.mb-stristr * @link http://php.net/manual/en/function.mb-stristr
*/ */
function api_stristr($haystack, $needle, $before_needle = false, $encoding = null) { function api_stristr($haystack, $needle, $before_needle = false, $encoding = null) {
if (_api_mb_supports($encoding)) { /*if (_api_mb_supports($encoding)) {
if (empty($encoding)) { if (empty($encoding)) {
$encoding = _api_mb_internal_encoding(); $encoding = _api_mb_internal_encoding();
} }
return mb_stristr($haystack, $needle, $before_needle, $encoding); return mb_stristr($haystack, $needle, $before_needle, $encoding);
} }*/
return stristr($haystack, $needle, $before_needle); return stristr($haystack, $needle, $before_needle);
} }
@ -1800,7 +1605,7 @@ function api_stristr($haystack, $needle, $before_needle = false, $encoding = nul
* there is no need the original function strlen() to be changed, it works correctly and faster for these cases. * there is no need the original function strlen() to be changed, it works correctly and faster for these cases.
*/ */
function api_strlen($string, $encoding = null) { function api_strlen($string, $encoding = null) {
if (empty($encoding)) { /*if (empty($encoding)) {
$encoding = _api_mb_internal_encoding(); $encoding = _api_mb_internal_encoding();
} }
if (_api_is_single_byte_encoding($encoding)) { if (_api_is_single_byte_encoding($encoding)) {
@ -1811,7 +1616,7 @@ function api_strlen($string, $encoding = null) {
} }
if (_api_iconv_supports($encoding)) { if (_api_iconv_supports($encoding)) {
return @iconv_strlen($string, $encoding); return @iconv_strlen($string, $encoding);
} }*/
return strlen($string); return strlen($string);
} }
@ -1829,14 +1634,14 @@ function api_strlen($string, $encoding = null) {
* @link http://php.net/manual/en/function.mb-strpos * @link http://php.net/manual/en/function.mb-strpos
*/ */
function api_strpos($haystack, $needle, $offset = 0, $encoding = null) { function api_strpos($haystack, $needle, $offset = 0, $encoding = null) {
if (empty($encoding)) { /*if (empty($encoding)) {
$encoding = _api_mb_internal_encoding(); $encoding = _api_mb_internal_encoding();
} }
if (_api_is_single_byte_encoding($encoding)) { if (_api_is_single_byte_encoding($encoding)) {
return strpos($haystack, $needle, $offset); return strpos($haystack, $needle, $offset);
} elseif (_api_mb_supports($encoding)) { } elseif (_api_mb_supports($encoding)) {
return mb_strpos($haystack, $needle, $offset, $encoding); return mb_strpos($haystack, $needle, $offset, $encoding);
} }*/
return strpos($haystack, $needle, $offset); return strpos($haystack, $needle, $offset);
} }
@ -1856,7 +1661,7 @@ function api_strpos($haystack, $needle, $offset = 0, $encoding = null) {
* @link http://php.net/manual/en/function.mb-strrchr * @link http://php.net/manual/en/function.mb-strrchr
*/ */
function api_strrchr($haystack, $needle, $before_needle = false, $encoding = null) { function api_strrchr($haystack, $needle, $before_needle = false, $encoding = null) {
if (empty($encoding)) { /*if (empty($encoding)) {
$encoding = _api_mb_internal_encoding(); $encoding = _api_mb_internal_encoding();
} }
if (_api_is_single_byte_encoding($encoding)) { if (_api_is_single_byte_encoding($encoding)) {
@ -1875,7 +1680,8 @@ function api_strrchr($haystack, $needle, $before_needle = false, $encoding = nul
if ($result === false) { if ($result === false) {
return false; return false;
} }
return api_substr($haystack, 0, api_strlen($haystack, $encoding) - api_strlen($result, $encoding), $encoding); return api_substr($haystack, 0, api_strlen($haystack, $encoding) - api_strlen($result, $encoding), $encoding);*/
return strrchr($haystack, $needle);
} }
/** /**
@ -1887,7 +1693,7 @@ function api_strrchr($haystack, $needle, $before_needle = false, $encoding = nul
* @link http://php.net/manual/en/function.strrev * @link http://php.net/manual/en/function.strrev
*/ */
function api_strrev($string, $encoding = null) { function api_strrev($string, $encoding = null) {
if (empty($encoding)) { /*if (empty($encoding)) {
$encoding = _api_mb_internal_encoding(); $encoding = _api_mb_internal_encoding();
} }
if (empty($string)) { if (empty($string)) {
@ -1898,7 +1704,7 @@ function api_strrev($string, $encoding = null) {
} }
if (api_is_encoding_supported($encoding)) { if (api_is_encoding_supported($encoding)) {
return implode(array_reverse(api_str_split($string, 1, $encoding))); return implode(array_reverse(api_str_split($string, 1, $encoding)));
} }*/
return strrev($string); return strrev($string);
} }
@ -1916,7 +1722,8 @@ function api_strrev($string, $encoding = null) {
*/ */
function api_strripos($haystack, $needle, $offset = 0, $encoding = null) function api_strripos($haystack, $needle, $offset = 0, $encoding = null)
{ {
return api_strrpos(api_strtolower($haystack, $encoding), api_strtolower($needle, $encoding), $offset, $encoding); return strripos($haystack, $needle, $offset);
//return api_strrpos(api_strtolower($haystack, $encoding), api_strtolower($needle, $encoding), $offset, $encoding);
} }
/** /**
@ -1933,12 +1740,12 @@ function api_strripos($haystack, $needle, $offset = 0, $encoding = null)
*/ */
function api_strrpos($haystack, $needle, $offset = 0, $encoding = null) function api_strrpos($haystack, $needle, $offset = 0, $encoding = null)
{ {
if (_api_mb_supports($encoding)) { /*if (_api_mb_supports($encoding)) {
if (empty($encoding)) { if (empty($encoding)) {
$encoding = _api_mb_internal_encoding(); $encoding = _api_mb_internal_encoding();
} }
return mb_strrpos($haystack, $needle, $offset, $encoding); return mb_strrpos($haystack, $needle, $offset, $encoding);
} }*/
return strrpos($haystack, $needle, $offset); return strrpos($haystack, $needle, $offset);
} }
@ -1958,7 +1765,7 @@ function api_strrpos($haystack, $needle, $offset = 0, $encoding = null)
* @link http://php.net/manual/en/function.mb-strstr * @link http://php.net/manual/en/function.mb-strstr
*/ */
function api_strstr($haystack, $needle, $before_needle = false, $encoding = null) { function api_strstr($haystack, $needle, $before_needle = false, $encoding = null) {
if (empty($encoding)) { /*if (empty($encoding)) {
$encoding = _api_mb_internal_encoding(); $encoding = _api_mb_internal_encoding();
} }
if (!is_string($needle)) { if (!is_string($needle)) {
@ -1977,7 +1784,7 @@ function api_strstr($haystack, $needle, $before_needle = false, $encoding = null
} }
if (_api_mb_supports($encoding)) { if (_api_mb_supports($encoding)) {
return mb_strstr($haystack, $needle, $before_needle, $encoding); return mb_strstr($haystack, $needle, $before_needle, $encoding);
} }*/
return strstr($haystack, $needle, $before_needle); return strstr($haystack, $needle, $before_needle);
} }
@ -1991,12 +1798,12 @@ function api_strstr($haystack, $needle, $before_needle = false, $encoding = null
* @link http://php.net/manual/en/function.mb-strtolower * @link http://php.net/manual/en/function.mb-strtolower
*/ */
function api_strtolower($string, $encoding = null) { function api_strtolower($string, $encoding = null) {
if (empty($encoding)) { /*if (empty($encoding)) {
$encoding = _api_mb_internal_encoding(); $encoding = _api_mb_internal_encoding();
} }
if (_api_mb_supports($encoding)) { if (_api_mb_supports($encoding)) {
return mb_strtolower($string, $encoding); return mb_strtolower($string, $encoding);
} }*/
return strtolower($string); return strtolower($string);
} }
@ -2010,12 +1817,12 @@ function api_strtolower($string, $encoding = null) {
* @link http://php.net/manual/en/function.mb-strtoupper * @link http://php.net/manual/en/function.mb-strtoupper
*/ */
function api_strtoupper($string, $encoding = null) { function api_strtoupper($string, $encoding = null) {
if (empty($encoding)) { /*if (empty($encoding)) {
$encoding = _api_mb_internal_encoding(); $encoding = _api_mb_internal_encoding();
} }
if (_api_mb_supports($encoding)) { if (_api_mb_supports($encoding)) {
return mb_strtoupper($string, $encoding); return mb_strtoupper($string, $encoding);
} }*/
return strtoupper($string); return strtoupper($string);
} }
@ -2032,7 +1839,7 @@ function api_strtoupper($string, $encoding = null) {
* @link http://php.net/manual/en/function.mb-substr * @link http://php.net/manual/en/function.mb-substr
*/ */
function api_substr($string, $start, $length = null, $encoding = null) { function api_substr($string, $start, $length = null, $encoding = null) {
if (empty($encoding)) { /*if (empty($encoding)) {
$encoding = _api_mb_internal_encoding(); $encoding = _api_mb_internal_encoding();
} }
// Passing null as $length would mean 0. This behaviour has been corrected here. // Passing null as $length would mean 0. This behaviour has been corrected here.
@ -2044,7 +1851,7 @@ function api_substr($string, $start, $length = null, $encoding = null) {
} }
if (_api_mb_supports($encoding)) { if (_api_mb_supports($encoding)) {
return mb_substr($string, $start, $length, $encoding); return mb_substr($string, $start, $length, $encoding);
} }*/
return substr($string, $start, $length); return substr($string, $start, $length);
} }
@ -2057,12 +1864,12 @@ function api_substr($string, $start, $length = null, $encoding = null) {
* @link http://php.net/manual/en/function.mb-substr-count.php * @link http://php.net/manual/en/function.mb-substr-count.php
*/ */
function api_substr_count($haystack, $needle, $encoding = null) { function api_substr_count($haystack, $needle, $encoding = null) {
if (empty($encoding)) { /*if (empty($encoding)) {
$encoding = _api_mb_internal_encoding(); $encoding = _api_mb_internal_encoding();
} }
if (_api_mb_supports($encoding)) { if (_api_mb_supports($encoding)) {
return mb_substr_count($haystack, $needle, $encoding); return mb_substr_count($haystack, $needle, $encoding);
} }*/
return substr_count($haystack, $needle); return substr_count($haystack, $needle);
} }
@ -2086,7 +1893,7 @@ function api_substr_count($haystack, $needle, $encoding = null) {
* @link http://php.net/manual/function.substr-replace * @link http://php.net/manual/function.substr-replace
*/ */
function api_substr_replace($string, $replacement, $start, $length = null, $encoding = null) { function api_substr_replace($string, $replacement, $start, $length = null, $encoding = null) {
if (empty($encoding)) { /*if (empty($encoding)) {
$encoding = _api_mb_internal_encoding(); $encoding = _api_mb_internal_encoding();
} }
if (_api_is_single_byte_encoding($encoding)) { if (_api_is_single_byte_encoding($encoding)) {
@ -2113,7 +1920,7 @@ function api_substr_replace($string, $replacement, $start, $length = null, $enco
} }
if (is_null($length)) { if (is_null($length)) {
return substr_replace($string, $replacement, $start); return substr_replace($string, $replacement, $start);
} }*/
return substr_replace($string, $replacement, $start, $length); return substr_replace($string, $replacement, $start, $length);
} }
@ -2126,10 +1933,11 @@ function api_substr_replace($string, $replacement, $start, $length = null, $enco
* @link http://php.net/manual/en/function.ucfirst * @link http://php.net/manual/en/function.ucfirst
*/ */
function api_ucfirst($string, $encoding = null) { function api_ucfirst($string, $encoding = null) {
if (empty($encoding)) { /*if (empty($encoding)) {
$encoding = _api_mb_internal_encoding(); $encoding = _api_mb_internal_encoding();
} }
return api_strtoupper(api_substr($string, 0, 1, $encoding), $encoding) . api_substr($string, 1, api_strlen($string, $encoding), $encoding); return api_strtoupper(api_substr($string, 0, 1, $encoding), $encoding) . api_substr($string, 1, api_strlen($string, $encoding), $encoding);*/
return ucfirst($string);
} }
/** /**
@ -2141,12 +1949,12 @@ function api_ucfirst($string, $encoding = null) {
* @link http://php.net/manual/en/function.ucwords * @link http://php.net/manual/en/function.ucwords
*/ */
function api_ucwords($string, $encoding = null) { function api_ucwords($string, $encoding = null) {
if (_api_mb_supports($encoding)) { /*if (_api_mb_supports($encoding)) {
if (empty($encoding)) { if (empty($encoding)) {
$encoding = _api_mb_internal_encoding(); $encoding = _api_mb_internal_encoding();
} }
return mb_convert_case($string, MB_CASE_TITLE, $encoding); return mb_convert_case($string, MB_CASE_TITLE, $encoding);
} }*/
return ucwords($string); return ucwords($string);
} }
@ -2227,30 +2035,6 @@ function api_preg_replace($pattern, $replacement, $subject, $limit = -1, &$count
return preg_replace($pattern, $replacement, $subject, $limit, $count); return preg_replace($pattern, $replacement, $subject, $limit, $count);
} }
/**
* Performs a regular expression search and replace using a callback function, UTF-8 aware when it is applicable.
* @param string|array $pattern The pattern to search for. It can be either a string or an array with strings.
* @param function $callback A callback that will be called and passed an array of matched elements in the $subject string. The callback should return the replacement string.
* @param string|array $subject The string or an array with strings to search and replace.
* @param int $limit (optional) The maximum possible replacements for each pattern in each subject string. Defaults to -1 (no limit).
* @param int &$count (optional) If specified, this variable will be filled with the number of replacements done.
* @param string $encoding (optional) The used internally by this function character encoding. If it is omitted, the platform character set will be used by default.
* @return array|string Returns an array if the subject parameter is an array, or a string otherwise.
* @link http://php.net/preg_replace_callback
*/
function api_preg_replace_callback($pattern, $callback, $subject, $limit = -1, &$count = 0, $encoding = null) {
if (empty($encoding)) {
$encoding = _api_mb_internal_encoding();
}
if (is_array($pattern)) {
foreach ($pattern as &$p) {
$p = api_is_utf8($encoding) ? $p.'u' : $p;
}
} else {
$pattern = api_is_utf8($encoding) ? $pattern.'u' : $pattern;
}
return preg_replace_callback($pattern, $callback, $subject, $limit, $count);
}
/** /**
* Splits a string by a regular expression, UTF-8 aware when it is applicable. * Splits a string by a regular expression, UTF-8 aware when it is applicable.
@ -2439,46 +2223,6 @@ function api_eregi_replace($pattern, $replacement, $string, $option = null) {
return eregi_replace($pattern, $replacement, $string); return eregi_replace($pattern, $replacement, $string);
} }
/**
* Note: Try to avoid using this function. Use api_preg_split() with Perl-compatible regular expression syntax.
*
* Splits a multibyte string using regular expression pattern and returns the result as an array.
* By default this function uses the platform character set.
* @param string $pattern The regular expression pattern.
* @param string $string The string being split.
* @param int $limit (optional) If this optional parameter $limit is specified, the string will be split in $limit elements as maximum.
* @return array The result as an array.
* This function is aimed at replacing the functions split() and mb_split() for human-language strings.
* @link http://php.net/manual/en/function.split
* @link http://php.net/manual/en/function.mb-split
*/
function api_split($pattern, $string, $limit = null) {
$encoding = _api_mb_regex_encoding();
if (_api_mb_supports($encoding)) {
if (is_null($limit)) {
return @mb_split($pattern, $string);
}
return @mb_split($pattern, $string, $limit);
}
if (MBSTRING_INSTALLED && api_is_encoding_supported($encoding)) {
global $_api_encoding;
$_api_encoding = $encoding;
_api_mb_regex_encoding('UTF-8');
if (is_null($limit)) {
$result = @mb_split(api_utf8_encode($pattern, $encoding), api_utf8_encode($string, $encoding));
} else {
$result = @mb_split(api_utf8_encode($pattern, $encoding), api_utf8_encode($string, $encoding), $limit);
}
$result = _api_array_utf8_decode($result);
_api_mb_regex_encoding($encoding);
return $result;
}
if (is_null($limit)) {
return split($pattern, $string);
}
return split($pattern, $string, $limit);
}
/** /**
* String comparison * String comparison
*/ */
@ -2862,29 +2606,6 @@ function api_knatcasesort(&$array, $language = null, $encoding = null) {
return uksort($array, 'strnatcasecmp'); return uksort($array, 'strnatcasecmp');
} }
/**
* Sorts an array by keys using natural order algorithm, case insensitive, reverse order.
* @param array $array The input array.
* @param string $language (optional) The language in which comparison is to be made. If language is omitted, interface language is assumed then.
* @param string $encoding (optional) The used internally by this function character encoding. If it is omitted, the platform character set will be used by default.
* @return bool Returns TRUE on success, FALSE on error.
*/
function api_knatcasersort(&$array, $language = null, $encoding = null) {
if (INTL_INSTALLED) {
if (empty($encoding)) {
$encoding = _api_mb_internal_encoding();
}
$collator = _api_get_alpha_numerical_collator($language);
if (is_object($collator)) {
global $_api_collator, $_api_encoding;
$_api_collator = $collator;
$_api_encoding = $encoding;
return uksort($array, '_api_casercmp');
}
}
return uksort($array, '_api_strnatcasercmp');
}
/** /**
* Sorts an array, elements will be arranged from the lowest to the highest. * Sorts an array, elements will be arranged from the lowest to the highest.
* @param array $array The input array. * @param array $array The input array.
@ -3237,59 +2958,8 @@ function api_detect_encoding($string, $language = null) {
if (api_is_valid_utf8($string)) { if (api_is_valid_utf8($string)) {
return 'UTF-8'; return 'UTF-8';
} }
$result = null;
$delta_points_min = LANGUAGE_DETECT_MAX_DELTA;
// Testing non-UTF-8 encodings.
$encodings = api_get_valid_encodings();
foreach ($encodings as & $encoding) {
if (api_is_encoding_supported($encoding) && !api_is_utf8($encoding)) {
$stringToParse = api_substr($string, 0, LANGUAGE_DETECT_MAX_LENGTH, $encoding);
$strintToParse2 = _api_generate_n_grams(
$stringToParse,
$encoding
);
$result_array = _api_compare_n_grams(
$strintToParse2,
$encoding
);
if (!empty($result_array)) { return mb_detect_encoding($string);
list($key, $delta_points) = each($result_array);
if ($delta_points < $delta_points_min) {
$pos = strpos($key, ':');
$result_encoding = api_refine_encoding_id(substr($key, $pos + 1));
if (api_equal_encodings($encoding, $result_encoding)) {
if ($string == api_utf8_decode(api_utf8_encode($string, $encoding), $encoding)) {
$delta_points_min = $delta_points;
$result = $encoding;
}
}
}
}
}
}
// "Broken" UTF-8 texts are to be detected as UTF-8.
// This functionality is enabled when language of the text is known.
$language = api_purify_language_id((string)$language);
if (!empty($language)) {
$encoding = 'UTF-8';
$result_array = & _api_compare_n_grams(_api_generate_n_grams(api_substr($string, 0, LANGUAGE_DETECT_MAX_LENGTH, $encoding), $encoding), $encoding);
if (!empty($result_array)) {
list($key, $delta_points) = each($result_array);
if ($delta_points < $delta_points_min) {
$pos = strpos($key, ':');
$result_encoding = api_refine_encoding_id(substr($key, $pos + 1));
$result_language = substr($key, 0, $pos);
if ($language == $result_language && api_is_utf8($result_encoding)) {
$delta_points_min = $delta_points;
$result = $encoding;
}
}
}
}
return $result;
} }
/** /**
@ -3311,11 +2981,9 @@ function api_is_valid_utf8(&$string)
* @param string $string The string to be tested/validated. * @param string $string The string to be tested/validated.
* @return bool Returns TRUE when the tested string contains 7-bit ASCII characters only, FALSE othewise. * @return bool Returns TRUE when the tested string contains 7-bit ASCII characters only, FALSE othewise.
*/ */
function api_is_valid_ascii(&$string) { function api_is_valid_ascii(&$string)
if (MBSTRING_INSTALLED) { {
return @mb_detect_encoding($string, 'ASCII', true) == 'ASCII' ? true : false; return mb_detect_encoding($string, 'ASCII', true) == 'ASCII' ? true : false;
}
return !preg_match('/[^\x00-\x7F]/S', $string);
} }
/** /**

@ -46,127 +46,6 @@ function _api_get_latin1_compatible_languages() {
* and Information Retrieval, 1994. * and Information Retrieval, 1994.
* @link http://citeseer.ist.psu.edu/cache/papers/cs/810/http:zSzzSzwww.info.unicaen.frzSz~giguetzSzclassifzSzcavnar_trenkle_ngram.pdf/n-gram-based-text.pdf * @link http://citeseer.ist.psu.edu/cache/papers/cs/810/http:zSzzSzwww.info.unicaen.frzSz~giguetzSzclassifzSzcavnar_trenkle_ngram.pdf/n-gram-based-text.pdf
*/ */
/**
* Generates statistical, based on n-grams language profile from the given text.
* @param string $string The input text. It should be UTF-8 encoded. Practically it should be at least 3000 characters long, 40000 characters size is for increased accuracy.
* @param int $n_grams_max (optional) The size of the array of the generated n-grams.
* @param int $n_max (optional) The limit if the number of characters that a n-gram may contain.
* @return array An array that contains cunstructed n-grams, sorted in reverse order by their frequences. Frequences are not stored in the array.
*/
function _api_generate_n_grams(&$string, $encoding, $n_grams_max = 350, $n_max = 4) {
if (empty($string)) {
return array();
}
// We construct only lowercase n-grams if it is applicable for the given language.
// Removing all puntuation and some other non-letter characters. Apostrophe characters stay.
// Splitting the sample text into separate words.
$words = preg_split('/_/u', preg_replace('/[\x00-\x1F\x20-\x26\x28-\x3E\?@\x5B-\x60{|}~\x7F]/u', '_', ' '.api_strtolower(api_utf8_encode($string, $encoding), 'UTF-8').' '), -1, PREG_SPLIT_NO_EMPTY);
$prefix = '_'; // Beginning of a word.
$suffix = str_repeat('_', $n_max); // End of a word. Only the last '_' stays.
$n_grams = array(); // The array that will contain the constructed n-grams.
foreach ($words as $word) {
$k = api_strlen($word, 'UTF-8') + 1;
$word = $prefix.$word.$suffix;
for ($n = 1; $n <= $n_max; $n++) {
for ($i = 0; $i < $k; $i++) {
$n_gram = api_utf8_decode(api_substr($word, $i, $n, 'UTF-8'), $encoding);
if (isset($n_grams[$n_gram])) {
$n_grams[$n_gram]++;
} else {
$n_grams[$n_gram] = 1;
}
}
}
}
// Sorting the n-grams in reverse order by their frequences.
arsort($n_grams);
// Reduction the number of n-grams.
return array_keys(array_slice($n_grams, 0, $n_grams_max));
}
/**
*
* The value $max_delta = 80000 is good enough for speed and detection accuracy.
* If you set the value of $max_delta too low, no language will be recognized.
* $max_delta = 400 * 350 = 140000 is the best detection with lowest speed.
*/
function _api_compare_n_grams(&$n_grams, $encoding, $max_delta = LANGUAGE_DETECT_MAX_DELTA) {
static $language_profiles;
if (!isset($language_profiles)) {
// Reading the language profile files from the internationalization database.
$exceptions = array('.', '..', 'CVS', '.htaccess', '.svn', '_svn', 'index.html');
$path = str_replace("\\", '/', dirname(__FILE__).'/internationalization_database/language_detection/language_profiles/');
$non_utf8_encodings = & _api_non_utf8_encodings();
if (is_dir($path)) {
if ($handle = @opendir($path)) {
while (($dir_entry = @readdir($handle)) !== false) {
if (api_in_array_nocase($dir_entry, $exceptions)) continue;
if (strpos($dir_entry, '.txt') === false) continue;
$dir_entry_full_path = $path .'/'. $dir_entry;
if (@filetype($dir_entry_full_path) != 'dir') {
if (false !== $data = @file_get_contents($dir_entry_full_path)) {
$language = basename($dir_entry_full_path, '.txt');
$encodings = array('UTF-8');
if (!empty($non_utf8_encodings[$language])) {
$encodings = array_merge($encodings, $non_utf8_encodings[$language]);
}
foreach ($encodings as $enc) {
$data_enc = api_utf8_decode($data, $enc);
if (empty($data_enc)) {
continue;
}
$key = $language.':'.$enc;
$language_profiles[$key]['data'] = array_flip(explode("\n", $data_enc));
$language_profiles[$key]['language'] = $language;
$language_profiles[$key]['encoding'] = $enc;
}
}
}
}
}
}
@closedir($handle);
ksort($language_profiles);
}
if (!is_array($n_grams) || empty($n_grams)) {
return array();
}
// Comparison between the input n-grams and the lanuage profiles.
foreach ($language_profiles as $key => &$language_profile) {
if (!api_is_language_supported($language_profile['language']) || !api_equal_encodings($encoding, $language_profile['encoding'])) {
continue;
}
$delta = 0; // This is a summary measurment for matching between the input text and the current language profile.
// Searching each n-gram from the input text into the language profile.
foreach ($n_grams as $rank => &$n_gram) {
if (isset($language_profile['data'][$n_gram])) {
// The n-gram has been found, the difference between places in both
// arrays is calculated (so called delta-points are adopted for
// measuring distances between n-gram ranks.
$delta += abs($rank - $language_profile['data'][$n_gram]);
} else {
// The n-gram has not been found in the profile. We add then
// a large enough "distance" in delta-points.
$delta += 400;
}
// Abort: This language already differs too much.
if ($delta > $max_delta) {
break;
}
}
// Include only non-aborted languages in result array.
if ($delta < ($max_delta - 400)) {
$result[$key] = $delta;
}
}
if (!isset($result)) {
return array();
}
asort($result);
return $result;
}
/** /**
* Appendix to "Date and time formats" * Appendix to "Date and time formats"
*/ */
@ -282,7 +161,10 @@ function _api_clean_person_name($person_name) {
* @param string $from_encoding The encoding that $string is being converted from. * @param string $from_encoding The encoding that $string is being converted from.
* @return string Returns the converted string. * @return string Returns the converted string.
*/ */
function _api_convert_encoding(&$string, $to_encoding, $from_encoding) { function _api_convert_encoding(&$string, $to_encoding, $from_encoding)
{
return mb_convert_encoding($string, $to_encoding, $from_encoding);
/*
$str = (string)$string; $str = (string)$string;
static $character_map = array(); static $character_map = array();
static $utf8_compatible = array('UTF-8', 'US-ASCII'); static $utf8_compatible = array('UTF-8', 'US-ASCII');
@ -351,7 +233,7 @@ function _api_convert_encoding(&$string, $to_encoding, $from_encoding) {
} else { } else {
$str = _api_utf8_from_unicode($codepoints); $str = _api_utf8_from_unicode($codepoints);
} }
return $str; return $str;*/
} }
/** /**

@ -900,28 +900,25 @@ class UserManager
*/ */
public static function create_username($firstname, $lastname, $language = null, $encoding = null) public static function create_username($firstname, $lastname, $language = null, $encoding = null)
{ {
if (is_null($encoding)) {
$encoding = api_get_system_encoding();
}
if (is_null($language)) {
$language = api_get_interface_language();
}
if (empty($firstname) && empty($lastname)) { if (empty($firstname) && empty($lastname)) {
return false; return false;
} }
$firstname = api_substr(preg_replace(USERNAME_PURIFIER, '', api_transliterate($firstname, '', $encoding)), 0, 1); // The first letter only.
$firstname = api_substr(preg_replace(USERNAME_PURIFIER, '', $firstname), 0, 1); // The first letter only.
//Looking for a space in the lastname //Looking for a space in the lastname
$pos = api_strpos($lastname, ' '); $pos = api_strpos($lastname, ' ');
if ($pos !== false) { if ($pos !== false) {
$lastname = api_substr($lastname, 0, $pos); $lastname = api_substr($lastname, 0, $pos);
} }
$lastname = preg_replace(USERNAME_PURIFIER, '', api_transliterate($lastname, '', $encoding)); $lastname = preg_replace(USERNAME_PURIFIER, '', $lastname);
//$username = api_is_western_name_order(null, $language) ? $firstname.$lastname : $lastname.$firstname;
$username = $firstname.$lastname; $username = $firstname.$lastname;
if (empty($username)) { if (empty($username)) {
$username = 'user'; $username = 'user';
} }
$username = URLify::transliterate($username);
return strtolower(substr($username, 0, USERNAME_MAX_LENGTH - 3)); return strtolower(substr($username, 0, USERNAME_MAX_LENGTH - 3));
} }
@ -944,7 +941,7 @@ class UserManager
// In this case the actual input parameter $firstname should contain ASCII-letters and digits only. // In this case the actual input parameter $firstname should contain ASCII-letters and digits only.
// For making this method tolerant of mistakes, let us transliterate and purify the suggested input username anyway. // For making this method tolerant of mistakes, let us transliterate and purify the suggested input username anyway.
// So, instead of the sentence $username = $firstname; we place the following: // So, instead of the sentence $username = $firstname; we place the following:
$username = strtolower(preg_replace(USERNAME_PURIFIER, '', api_transliterate($firstname, '', $encoding))); $username = strtolower(preg_replace(USERNAME_PURIFIER, '', $firstname));
} else { } else {
$username = self::create_username($firstname, $lastname, $language, $encoding); $username = self::create_username($firstname, $lastname, $language, $encoding);
} }
@ -957,6 +954,9 @@ class UserManager
} }
$username = $temp_username; $username = $temp_username;
} }
$username = URLify::transliterate($username);
return $username; return $username;
} }
@ -973,8 +973,9 @@ class UserManager
// 1. Conversion of unacceptable letters (latinian letters with accents for example) into ASCII letters in order they not to be totally removed. // 1. Conversion of unacceptable letters (latinian letters with accents for example) into ASCII letters in order they not to be totally removed.
// 2. Applying the strict purifier. // 2. Applying the strict purifier.
// 3. Length limitation. // 3. Length limitation.
$toreturn = api_get_setting('login_is_email') == 'true' ? substr(preg_replace(USERNAME_PURIFIER_MAIL, '', api_transliterate($username, '', $encoding)), 0, USERNAME_MAX_LENGTH) : substr(preg_replace(USERNAME_PURIFIER, '', api_transliterate($username, '', $encoding)), 0, USERNAME_MAX_LENGTH); $return = api_get_setting('login_is_email') == 'true' ? substr(preg_replace(USERNAME_PURIFIER_MAIL, '', $username), 0, USERNAME_MAX_LENGTH) : substr(preg_replace(USERNAME_PURIFIER, '', $username), 0, USERNAME_MAX_LENGTH);
return $toreturn; $return = URLify::transliterate($return);
return $return;
} }
// 1. Applying the shallow purifier. // 1. Applying the shallow purifier.
// 2. Length limitation. // 2. Length limitation.

Loading…
Cancel
Save