Feature #272 - Internationalization library: A change for correct detection of "broken" UTF-8 texts as UTF-8.

skala
Ivan Tcholakov 16 years ago
parent 298a83b9f3
commit 6df851bf20
  1. 5
      main/inc/lib/internationalization.lib.php

@ -3485,11 +3485,12 @@ function api_detect_encoding($string) {
if (api_is_valid_utf8($string)) {
return 'UTF-8';
}
// "Broken" UTF-8 texts are to be detected as UTF-8.
$result = null;
$delta_points_min = LANGUAGE_DETECT_MAX_DELTA;
$encodings = api_get_valid_encodings();
foreach ($encodings as $encoding) {
if (api_is_encoding_supported($encoding) & !api_is_utf8($encoding)) {
foreach ($encodings as & $encoding) {
if (api_is_encoding_supported($encoding)) {
$result_array = & _api_compare_n_grams(_api_generate_n_grams(api_substr($string, 0, LANGUAGE_DETECT_MAX_LENGTH, $encoding), $encoding), $encoding);
if (!empty($result_array)) {
list($key, $delta_points) = each($result_array);

Loading…
Cancel
Save