From 6df851bf20624bdf3b040455de0df7161fba30ea Mon Sep 17 00:00:00 2001 From: Ivan Tcholakov Date: Fri, 26 Mar 2010 12:03:47 +0200 Subject: [PATCH] Feature #272 - Internationalization library: A change for correct detection of "broken" UTF-8 texts as UTF-8. --- main/inc/lib/internationalization.lib.php | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/main/inc/lib/internationalization.lib.php b/main/inc/lib/internationalization.lib.php index 654c674a01..62823c96dc 100755 --- a/main/inc/lib/internationalization.lib.php +++ b/main/inc/lib/internationalization.lib.php @@ -3485,11 +3485,12 @@ function api_detect_encoding($string) { if (api_is_valid_utf8($string)) { return 'UTF-8'; } + // "Broken" UTF-8 texts are to be detected as UTF-8. $result = null; $delta_points_min = LANGUAGE_DETECT_MAX_DELTA; $encodings = api_get_valid_encodings(); - foreach ($encodings as $encoding) { - if (api_is_encoding_supported($encoding) & !api_is_utf8($encoding)) { + foreach ($encodings as & $encoding) { + if (api_is_encoding_supported($encoding)) { $result_array = & _api_compare_n_grams(_api_generate_n_grams(api_substr($string, 0, LANGUAGE_DETECT_MAX_LENGTH, $encoding), $encoding), $encoding); if (!empty($result_array)) { list($key, $delta_points) = each($result_array);