Feature #272 - The PHP function fgetcsv() depends on the OS locale setting. When the file system encoding is different than the encoding of the CSV-file, then the imported data is damaged. For solving this problem, two new functions have been introduced in the internationalization library - api_str_getcsv() and api_fgetcsv().
if (api_byte_count($delimiter) > 1) { $delimiter = $delimiter[1]; }
if (api_byte_count($enclosure) > 1) { $enclosure = $enclosure[1]; }
if (api_byte_count($escape) > 1) { $escape = $escape[1]; }
$len = api_byte_count($string);
$enclosed = false;
$escaped = false;
$value = '';
$result = array();
for ($i = 0; $i < $len; $i++) {
$char = $string[$i];
if ($char == $escape) {
if (!$escaped) {
$escaped = true;
continue;
}
}
$escaped = false;
switch ($char) {
case $enclosure:
if ($enclosed && $string[$i + 1] == $enclosure) {
$value .= $char;
$i++;
} else {
$enclosed = !$enclosed;
}
break;
case $delimiter:
if (!$enclosed) {
$result[] = $value;
$value = '';
} else {
$value .= $char;
}
break;
default:
$value .= $char;
break;
}
}
if (!empty($value)) {
$result[] = $value;
}
return $result;
}
/**
* Reads a line from a file pointer and parses it for CSV fields. This function is not affected by the OS-locale settings.
* @param resource $handle The file pointer, it must be valid and must point to a file successfully opened by fopen().
* @param int $length (optional) Reading ends when length - 1 bytes have been read, on a newline (which is included in the return value), or on EOF (whichever comes first).
* If no length is specified, it will keep reading from the stream until it reaches the end of the line.
* @param string $delimiter (optional) The field delimiter, one character only. The default delimiter character is comma {,).
* @param string $enclosure (optional) The field enclosure, one character only. The default enclosure character is quote (").
* @param string $escape (optional) The escape character, one character only. The default escape character is backslash (\).
* @return array Returns an array containing the fields read.
* Note: In order this function to work correctly with UTF-8, limitation for the parameters $delimiter, $enclosure and $escape
* should be kept. These parameters should be single ASCII characters only.