Feature #272 - The PHP function fgetcsv() depends on the OS locale setting. When the file system encoding is different than the encoding of the CSV-file, then the imported data is damaged. For solving this problem, two new functions have been introduced in the internationalization library - api_str_getcsv() and api_fgetcsv().

skala
Ivan Tcholakov 15 years ago
parent 1d69ad42dc
commit 85f1a4f977
  1. 88
      main/inc/lib/internationalization.lib.php
  2. 27
      tests/main/inc/lib/internationalization.lib.test.php

@ -604,7 +604,7 @@ function api_sort_by_first_name($language = null) {
* @param string $string The input string.
* @return int Returns the length of the input string (or binary data) as number of bytes.
*/
function api_byte_count($string) {
function api_byte_count(& $string) {
static $use_mb_strlen;
if (!isset($use_mb_strlen)) {
$use_mb_strlen = MBSTRING_INSTALLED && ((int) ini_get('mbstring.func_overload') & 2);
@ -3402,6 +3402,92 @@ function api_is_valid_ascii(&$string) {
}
/**
* ----------------------------------------------------------------------------
* Parsing CSV-data.
* ----------------------------------------------------------------------------
*/
/**
* Parses CSV data (one line) into an array. This function is not affected by the OS-locale settings.
* @param string $string The input string.
* @param string $delimiter (optional) The field delimiter, one character only. The default delimiter character is comma {,).
* @param string $enclosure (optional) The field enclosure, one character only. The default enclosure character is quote (").
* @param string $escape (optional) The escape character, one character only. The default escape character is backslash (\).
* @return array Returns an array containing the fields read.
* Note: In order this function to work correctly with UTF-8, limitation for the parameters $delimiter, $enclosure and $escape
* should be kept. These parameters should be single ASCII characters only. Thus the implementation of this function is faster.
* @link http://php.net/manual/en/function.str-getcsv.php (exists as of PHP 5 >= 5.3.0)
*/
function & api_str_getcsv(& $string, $delimiter = ',', $enclosure = '"', $escape = '\\') {
if (api_byte_count($delimiter) > 1) { $delimiter = $delimiter[1]; }
if (api_byte_count($enclosure) > 1) { $enclosure = $enclosure[1]; }
if (api_byte_count($escape) > 1) { $escape = $escape[1]; }
$len = api_byte_count($string);
$enclosed = false;
$escaped = false;
$value = '';
$result = array();
for ($i = 0; $i < $len; $i++) {
$char = $string[$i];
if ($char == $escape) {
if (!$escaped) {
$escaped = true;
continue;
}
}
$escaped = false;
switch ($char) {
case $enclosure:
if ($enclosed && $string[$i + 1] == $enclosure) {
$value .= $char;
$i++;
} else {
$enclosed = !$enclosed;
}
break;
case $delimiter:
if (!$enclosed) {
$result[] = $value;
$value = '';
} else {
$value .= $char;
}
break;
default:
$value .= $char;
break;
}
}
if (!empty($value)) {
$result[] = $value;
}
return $result;
}
/**
* Reads a line from a file pointer and parses it for CSV fields. This function is not affected by the OS-locale settings.
* @param resource $handle The file pointer, it must be valid and must point to a file successfully opened by fopen().
* @param int $length (optional) Reading ends when length - 1 bytes have been read, on a newline (which is included in the return value), or on EOF (whichever comes first).
* If no length is specified, it will keep reading from the stream until it reaches the end of the line.
* @param string $delimiter (optional) The field delimiter, one character only. The default delimiter character is comma {,).
* @param string $enclosure (optional) The field enclosure, one character only. The default enclosure character is quote (").
* @param string $escape (optional) The escape character, one character only. The default escape character is backslash (\).
* @return array Returns an array containing the fields read.
* Note: In order this function to work correctly with UTF-8, limitation for the parameters $delimiter, $enclosure and $escape
* should be kept. These parameters should be single ASCII characters only.
* @link http://php.net/manual/en/function.fgetcsv.php
*/
function api_fgetcsv($handle, $length = null, $delimiter = ',', $enclosure = '"', $escape = '\\') {
if (($line = is_null($length) ? fgets($handle): fgets($handle, $length)) !== false) {
return api_str_getcsv($line, $delimiter, $enclosure, $escape);
}
return false;
}
/**
* ----------------------------------------------------------------------------
* Functions for internal use behind this API.

@ -1322,6 +1322,33 @@ class TestInternationalization extends UnitTestCase {
}
*/
public function test_api_str_getcsv() {
$strings = array('FirstName;LastName;Email', 'John;Doe;john.doe@mail.com', '"Иван";\\Чолаков;ivan@mail.com');
$expected_results = array(array('FirstName', 'LastName', 'Email'), array('John', 'Doe', 'john.doe@mail.com'), array('Иван', 'Чолаков', 'ivan@mail.com'));
$res = array();
foreach ($strings as $string) {
$res[] = api_str_getcsv($string, ';');
}
$this->assertTrue($res === $expected_results);
//var_dump($res);
}
public function test_api_fgetcsv() {
$filename = api_get_path(SYS_CODE_PATH).'admin/exemple.csv';
$res = array();
$handle = @fopen($filename, 'r');
if ($handle !== false) {
while (($line = @api_fgetcsv($handle, null, ';')) !== false) {
$res[] = $line;
}
@fclose($handle);
$this->assertTrue(is_array($res) && count($res) > 0);
} else {
$this->assertTrue($res === $expected_results); // The file is missing, skip this test.
}
//var_dump($res);
}
}
?>
Loading…
Cancel
Save