#4758 glossaire csv export with international chars do not work

14 years ago · f2c93cdd38
parent 7b2e9fbbd7
commit f2c93cdd38
16 changed files with 1265 additions and 184 deletions
--- a/main/glossary/index.php
+++ b/main/glossary/index.php
@ -80,7 +80,7 @@ if (isset($_GET['action']) && $_GET['action'] == 'export') {
        $list[] = array ($line[0], $line[1]);
    }    
    $filename = 'glossary_course_'.api_get_course_id();
-	Export::export_table_csv($list,$filename);
+	Export::export_table_csv_utf8($list, $filename);
 }

 Display::display_header($tool_name);
@ -214,18 +214,17 @@ if (api_is_allowed_to_edit(null, true)) {
                        }
                    }
                }  
-                $data = Import::csv_to_array($_FILES['file']['tmp_name']);                
-                
-                if (!empty($data)) {
-                    $good = 0;
-                    $bad = 0;                    
-                    foreach($data as $item) {                          
-                        if (GlossaryManager::save_glossary(array('glossary_title' => $item['term'], 'glossary_comment' => $item['definition']), false)) 
-                            $good++;
-                        else
-                            $bad++;
-                    }
-                }                
+                //$data = Import::csv_to_array($_FILES['file']['tmp_name']);     
+                $data = Import::csv_reader($_FILES['file']['tmp_name']);
+                $good = 0;
+                $bad = 0;                    
+                foreach($data as $item) {                          
+                    if (GlossaryManager::save_glossary(array('glossary_title' => $item['term'], 'glossary_comment' => $item['definition']), false)) 
+                        $good++;
+                    else
+                        $bad++;
+                }
+                      
                Display::display_confirmation_message (get_lang ("TermsImported") . ':' . $good);
                
                if ($bad)
--- a/main/inc/lib/autoload.class.php
+++ b/main/inc/lib/autoload.class.php
@ -102,6 +102,7 @@ class Autoload
        $result['ClosureCompiler'] = '/main/inc/lib/closure_compiler.class.php';
        $result['CodeUtilities'] = '/main/inc/lib/code_utilities.class.php';
        $result['ConditionalLogin'] = '/main/inc/lib/conditional_login.class.php';
+        $result['Converter'] = '/main/inc/lib/system/text/converter.class.php';
        $result['Course'] = '/main/coursecopy/classes/Course.class.php';
        $result['CourseArchiver'] = '/main/coursecopy/classes/CourseArchiver.class.php';
        $result['CourseBuilder'] = '/main/coursecopy/classes/CourseBuilder.class.php';
@ -116,6 +117,8 @@ class Autoload
        $result['CourseRestorer'] = '/main/coursecopy/classes/CourseRestorer.class.php';
        $result['CourseSelectForm'] = '/main/coursecopy/classes/CourseSelectForm.class.php';
        $result['CourseSession'] = '/main/coursecopy/classes/CourseSession.class.php';
+        $result['CsvReader'] = '/main/inc/lib/system/io/csv_reader.class.php';
+        $result['CsvWriter'] = '/main/inc/lib/system/io/csv_writer.class.php';
        $result['CustomPages'] = '/main/inc/lib/custompages.lib.php';
        $result['DashboardManager'] = '/main/inc/lib/dashboard.lib.php';
        $result['DataForm'] = '/main/gradebook/lib/fe/dataform.class.php';
@ -128,6 +131,8 @@ class Autoload
        $result['DokeosIndexer'] = '/main/inc/lib/search/DokeosIndexer.class.php';
        $result['DropboxLink'] = '/main/gradebook/lib/be/dropboxlink.class.php';
        $result['DummyCourseCreator'] = '/main/coursecopy/classes/DummyCourseCreator.class.php';
+        $result['Encoding'] = '/main/inc/lib/system/text/encoding.class.php';
+        $result['EncodingConverter'] = '/main/inc/lib/system/text/encoding_converter.class.php';
        $result['EntityGenerator'] = '/main/inc/lib/tools/entity_generator.class.php';
        $result['EvalForm'] = '/main/gradebook/lib/fe/evalform.class.php';
        $result['EvalLink'] = '/main/gradebook/lib/be/evallink.class.php';
@ -138,6 +143,8 @@ class Autoload
        $result['ExerciseResult'] = '/main/exercice/exercise_result.class.php';
        $result['ExerciseShowFunctions'] = '/main/inc/lib/exercise_show_functions.lib.php';
        $result['FileManager'] = '/main/inc/lib/fileManage.lib.php';
+        $result['FileReader'] = '/main/inc/lib/system/io/file_reader.class.php';
+        $result['FileWriter'] = '/main/inc/lib/system/io/file_writer.class.php';
        $result['FillBlanks'] = '/main/exercice/fill_blanks.class.php';
        $result['FlatViewDataGenerator'] = '/main/gradebook/lib/flatview_data_generator.class.php';
        $result['FlatViewTable'] = '/main/gradebook/lib/fe/flatviewtable.class.php';
@ -260,6 +267,7 @@ class Autoload
        $result['MyHorBar'] = '/main/inc/lib/pchart/MyHorBar.class.php';
        $result['MySpace'] = '/main/mySpace/myspace.lib.php';
        $result['Nanogong'] = '/main/inc/lib/nanogong.lib.php';
+        $result['NewMediaForm'] = '/main/media/lib/new_media_form.class.php';
        $result['NotebookManager'] = '/main/inc/lib/notebook.lib.php';
        $result['Notification'] = '/main/inc/lib/notification.lib.php';
        $result['OLE'] = '/main/inc/lib/pear/OLE/OLE.php';
@ -390,6 +398,9 @@ class Autoload
        $result['UserManager'] = '/main/inc/lib/usermanager.lib.php';
        $result['UserStore'] = '/main/auth/shibboleth/app/model/user.class.php';
        $result['UserTable'] = '/main/gradebook/lib/fe/usertable.class.php';
+        $result['Utf8'] = '/main/inc/lib/system/text/utf8.class.php';
+        $result['Utf8Decoder'] = '/main/inc/lib/system/text/utf8_decoder.class.php';
+        $result['Utf8Encoder'] = '/main/inc/lib/system/text/utf8_encoder.class.php';
        $result['Wiki'] = '/main/coursecopy/classes/wiki.class.php';
        $result['XapianIndexer'] = '/main/inc/lib/search/xapian/XapianIndexer.class.php';
        $result['ZombieManager'] = '/main/inc/lib/zombie/zombie_manager.class.php';
@ -449,6 +460,7 @@ class Autoload
        $result['xhtdoc'] = '/main/inc/lib/xht.lib.php';
        $result['xmddoc'] = '/main/inc/lib/xmd.lib.php';

+
        return $result;
    }

--- a/main/inc/lib/chamilo.class.php
+++ b/main/inc/lib/chamilo.class.php
@ -9,7 +9,7 @@
 */
 class Chamilo
 {
-    
+
    public static function name()
    {
        //@todo: add version
@ -25,7 +25,6 @@ class Chamilo
    {
        return api_get_setting('server_type') == 'production';
    }
-        

    /**
     * Returns a full url from local/absolute path and parameters.
@ -39,12 +38,12 @@ class Chamilo
    {
        return Uri::url($path, $params, $html);
    }
-    
+
    public static function here($params = array(), $html = true)
    {
        return Uri::here($params, $html);
    }
-        
+
    /**
     * Application web root
     */
@ -62,12 +61,18 @@ class Chamilo
    {
        return api_get_path(SYS_PATH);
    }
-    
+
    public static function root_courses()
    {
        return api_get_path(SYS_COURSE_PATH);
    }

+    public static function temp($ext = '')
+    {
+        $ext = $ext ? '.' . $ext : '';
+        return api_get_path(SYS_ARCHIVE_PATH) . uniqid() . $ext;
+    }
+
    public static function path($path = '')
    {
        $root = self::root();
--- a/main/inc/lib/export.lib.inc.php
+++ b/main/inc/lib/export.lib.inc.php
@ -25,9 +25,8 @@ class Export {
 	}

 	/**
-	 * Export tabular data to CSV-file
-	 * @param array $data
-	 * @param string $filename
+     * 
+     * @deprecated use export_table_csv_utf8 instead
 	 */
 	public static function export_table_csv ($data, $filename = 'export') {
 		$file = api_get_path(SYS_ARCHIVE_PATH).uniqid('').'.csv';
@ -48,6 +47,28 @@ class Export {
 		DocumentManager :: file_send_for_download($file, true, $filename.'.csv');
 		return false;
 	}
+    
+	/**
+	 * Export tabular data to CSV-file
+	 * @param array $data
+	 * @param string $filename
+	 */
+	public static function export_table_csv_utf8 ($data, $filename = 'export') {
+        if(empty($data)){
+            return false;
+        }
+        $path = Chamilo::temp();
+        $converter = new Utf8Encoder(null, true);
+        $file = FileWriter::create($path, $converter);
+        $file = CsvWriter::create($file);
+        foreach ($data as $row) {
+            $file->put($row);
+        }
+		$file->close();
+		DocumentManager :: file_send_for_download($path, true, $filename.'.csv');
+        unlink($path);
+		return false;
+	}

 	/**
 	 * Export tabular data to XLS-file
--- a/main/inc/lib/import.lib.php
+++ b/main/inc/lib/import.lib.php
@ -10,6 +10,11 @@
 * @package	 chamilo.library
 */
 class Import {
+    
+    static function csv_reader($path)
+    {
+        return new CsvReader($path);
+    }

 	/**
 	 * Reads a CSV-file into an array. The first line of the CSV-file should contain the array-keys.
@ -27,6 +32,9 @@ class Import {
 	 *   ...
 	 * @param string $filename	The path to the CSV-file which should be imported.
 	 * @return array			Returns an array (in the system encoding) that contains all data from the CSV-file.
+     * 
+     * 
+     * @deprecated use cvs_reader instead
 	 */
 	function csv_to_array($filename) {
 		$result = array();
--- a/main/inc/lib/internationalization.lib.php
+++ b/main/inc/lib/internationalization.lib.php
@ -3676,171 +3676,11 @@ function api_detect_encoding($string, $language = null) {

 /**
 * Checks a string for UTF-8 validity.
- * @param string $string	The string to be tested/validated.
- * @return bool				Returns TRUE when the tested string is valid UTF-8 one, FALSE othewise.
- * @link http://en.wikipedia.org/wiki/UTF-8
+ * 
+ * @deprecated Use Encoding::utf8()->is_valid() instead
 */
 function api_is_valid_utf8(&$string) {
-
-    //return @mb_detect_encoding($string, 'UTF-8', true) == 'UTF-8' ? true : false;
-    // Ivan Tcholakov, 05-OCT-2008: I do not trust mb_detect_encoding(). I have
-    // found a string with a single cyrillic letter (single byte), that is
-    // wrongly detected as UTF-8. Possibly, there would be problems with other
-    // languages too. An alternative implementation will be used.
-
-    $str = (string)$string;
-    $len = api_byte_count($str);
-    $i = 0;
-    while ($i < $len) {
-        $byte1 = ord($str[$i++]);		// Here the current character begins. Its size is
-                                            // determined by the senior bits in the first byte.
-
-        if (($byte1 & 0x80) == 0x00) {		// 0xxxxxxx
-                                            //    &
-                                            // 10000000
-                                            // --------
-                                            // 00000000
-                                            // This is s valid character and it contains a single byte.
-        }
-
-        elseif (($byte1 & 0xE0) == 0xC0) {	// 110xxxxx 10xxxxxx
-                                            //    &        &
-                                            // 11100000 11000000
-                                            // -------- --------
-                                            // 11000000 10000000
-                                            // The character contains two bytes.
-            if ($i == $len) {
-                return false;				// Here the string ends unexpectedly.
-            }
-
-            if (!((ord($str[$i++]) & 0xC0) == 0x80))
-                return false;				// Invalid second byte, invalid string.
-        }
-
-        elseif(($byte1 & 0xF0) == 0xE0) {	// 1110xxxx 10xxxxxx 10xxxxxx
-                                            //    &        &        &
-                                            // 11110000 11000000 11000000
-                                            // -------- -------- --------
-                                            // 11100000 10000000 10000000
-                                            // This is a character of three bytes.
-            if ($i == $len) {
-                return false;				// Unexpected end of the string.
-            }
-            if (!((ord($str[$i++]) & 0xC0) == 0x80)) {
-                return false;				// Invalid second byte.
-            }
-            if ($i == $len) {
-                return false;				// Unexpected end of the string.
-            }
-            if (!((ord($str[$i++]) & 0xC0) == 0x80)) {
-                return false;				// Invalid third byte, invalid string.
-            }
-        }
-
-        elseif(($byte1 & 0xF8) == 0xF0) {	// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
-                                            //    &        &        &        &
-                                            // 11111000 11000000 11000000 11000000
-                                            // -------- -------- -------- --------
-                                            // 11110000 10000000 10000000 10000000
-                                            // This is a character of four bytes.
-            if ($i == $len) {
-                return false;
-            }
-            if (!((ord($str[$i++]) & 0xC0) == 0x80)) {
-                return false;
-            }
-            if ($i == $len) {
-                return false;
-            }
-            if (!((ord($str[$i++]) & 0xC0) == 0x80)) {
-                return false;
-            }
-            if ($i == $len) {
-                return false;
-            }
-            if (!((ord($str[$i++]) & 0xC0) == 0x80)) {
-                return false;
-            }
-        }
-
-        elseif(($byte1 & 0xFC) == 0xF8) {	// 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
-                                            //    &        &        &        &        &
-                                            // 11111100 11000000 11000000 11000000 11000000
-                                            // -------- -------- -------- -------- --------
-                                            // 11111000 10000000 10000000 10000000 10000000
-                                            // This is a character of five bytes.
-            if ($i == $len) {
-                return false;
-            }
-            if (!((ord($str[$i++]) & 0xC0) == 0x80)) {
-                return false;
-            }
-            if ($i == $len) {
-                return false;
-            }
-            if (!((ord($str[$i++]) & 0xC0) == 0x80)) {
-                return false;
-            }
-            if ($i == $len) {
-                return false;
-            }
-            if (!((ord($str[$i++]) & 0xC0) == 0x80)) {
-                return false;
-            }
-            if ($i == $len) {
-                return false;
-            }
-            if (!((ord($str[$i++]) & 0xC0) == 0x80)) {
-                return false;
-            }
-        }
-
-        elseif(($byte1 & 0xFE) == 0xFC) {	// 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
-                                            //    &        &        &        &        &        &
-                                            // 11111110 11000000 11000000 11000000 11000000 11000000
-                                            // -------- -------- -------- -------- -------- --------
-                                            // 11111100 10000000 10000000 10000000 10000000 10000000
-                                            // This is a character of six bytes.
-            if ($i == $len) {
-                return false;
-            }
-            if (!((ord($str[$i++]) & 0xC0) == 0x80)) {
-                return false;
-            }
-            if ($i == $len) {
-                return false;
-            }
-            if (!((ord($str[$i++]) & 0xC0) == 0x80)) {
-                return false;
-            }
-            if ($i == $len) {
-                return false;
-            }
-            if (!((ord($str[$i++]) & 0xC0) == 0x80)) {
-                return false;
-            }
-            if ($i == $len) {
-                return false;
-            }
-            if (!((ord($str[$i++]) & 0xC0) == 0x80)) {
-                return false;
-            }
-            if ($i == $len) {
-                return false;
-            }
-            if (!((ord($str[$i++]) & 0xC0) == 0x80)) {
-                return false;
-            }
-        }
-
-        else {
-            return false;					// In any other case the character is invalid.
-        }
-                                            // Here the current character is valid, it
-                                            // matches to some of the cases above.
-                                             // The next character is to be examinated.
-    }
-    return true;							// Empty strings are valid too.
+    return Encoding::utf8()->is_valid($string);
 }

 /**
--- a/main/inc/lib/system/io/csv_reader.class.php
+++ b/main/inc/lib/system/io/csv_reader.class.php
@ -0,0 +1,170 @@
+<?php
+
+/**
+ * Read cvs data from a stream - string/FileReader. 
+ * 
+ * Returns data as associative arrays (headers are the keys of the array).
+ * Skip blank lines ?? is it such a good idea?
+ * 
+ * Usage:
+ * 
+ *      $reader = CsvReader::create('path');
+ *      foreach($reader as $items){
+ *          foreach($items as $key=>$value){
+ *              echo "$key : $value";
+ *          }
+ *      }
+ * 
+ * 
+ *
+ * @copyright (c) 2012 University of Geneva
+ * @license GNU General Public License - http://www.gnu.org/copyleft/gpl.html
+ * @author Laurent Opprecht <laurent@opprecht.info>
+ */
+class CsvReader implements Iterator
+{
+
+    /**
+     *
+     * @param string|FileReader $stream
+     * @param string $delimiter
+     * @param string $enclosure
+     * @return CsvReader 
+     */
+    static function create($stream, $delimiter = ';', $enclosure = '"')
+    {
+        return new self($stream, $delimiter, $enclosure);
+    }
+
+    protected $stream = null;
+    protected $headers = array();
+    protected $delimiter = '';
+    protected $enclosure = '';
+    protected $current = false;
+    protected $index = -1;
+
+    function __construct($stream, $delimiter = ';', $enclosure = '"')
+    {
+        $this->stream = $stream;
+        $this->delimiter = $delimiter ? substr($delimiter, 0, 1) : ';';
+        $this->enclosure = $enclosure ? substr($enclosure, 0, 1) : '"';
+    }
+
+    function get_delimiter()
+    {
+        return $this->delimiter;
+    }
+
+    function get_enclosure()
+    {
+        return $this->enclosure;
+    }
+
+    function headers()
+    {
+        return $this->headers;
+    }
+
+    /**
+     * @return FileReader
+     */
+    function stream()
+    {
+        if (is_string($this->stream)) {
+            $this->stream = new FileReader($this->stream);
+        }
+        return $this->stream;
+    }
+
+    protected function decode($line)
+    {
+        if (empty($line)) {
+            return array();
+        }
+        $data = api_str_getcsv($line, $this->get_delimiter(), $this->get_enclosure());
+        if ($this->headers) {
+            $result = array();
+            foreach ($data as $index => $value) {
+                $key = isset($this->headers[$index]) ? $this->headers[$index] : false;
+                if ($key) {
+                    $result[$key] = $value;
+                } else {
+                    $result[] = $value;
+                }
+            }
+        } else {
+            $result = $data;
+        }
+        return $result;
+    }
+
+    /**
+     * Returns the next non empty line
+     * 
+     * @return boolean|string
+     */
+    protected function next_line()
+    {
+        while (true) {
+            $line = $this->stream()->next();
+            if ($line === false) {
+                return false;
+            } else if ($line) {
+                return $line;
+            }
+        }
+        return false;
+    }
+
+    public function current()
+    {
+        return $this->current;
+    }
+
+    public function key()
+    {
+        return $this->index;
+    }
+
+    public function next()
+    {
+        if (empty($this->headers)) {
+            $line = $this->next_line();
+            $this->headers = $this->decode($line);
+        }
+        $line = $this->next_line();
+        if ($line) {
+            $this->current = $this->decode($line);
+            $this->index++;
+        } else {
+            $this->current = false;
+        }
+        return $this->current;
+    }
+
+    public function rewind()
+    {
+        $this->stream()->rewind();
+        $line = $this->stream()->current();
+        if (empty($line)) {
+            $line = $this->next_line();
+        }
+        $this->headers = $this->decode($line);
+        $this->index = -1;
+        $this->next();
+    }
+
+    public function valid()
+    {
+        return $this->current !== false;
+    }
+
+    function __clone()
+    {
+        $this->stream()->rewind();
+        $this->current = false;
+        $this->index = -1;
+        $this->headers = array();
+    }
+
+}
--- a/main/inc/lib/system/io/csv_writer.class.php
+++ b/main/inc/lib/system/io/csv_writer.class.php
@ -0,0 +1,93 @@
+<?php
+
+/**
+ * Write array data to a stream in CSV format. Usage:
+ * 
+ *      $writer = CsvWriter::create('path');
+ * 
+ *      $writer->put($headers);
+ *      $writer->put($line_1);
+ *      $writer->put($line_2);
+ * 
+ * @copyright (c) 2012 University of Geneva
+ * @license GNU General Public License - http://www.gnu.org/copyleft/gpl.html
+ * @author Laurent Opprecht <laurent@opprecht.info>
+ */
+class CsvWriter
+{
+
+    /**
+     *
+     * @param string|object $stream
+     * @return FileWriter
+     */
+    static function create($stream, $delimiter = ';', $enclosure = '"')
+    {
+        return new self($stream, $delimiter, $enclosure);
+    }
+
+    protected $stream = null;
+    protected $delimiter = '';
+    protected $enclosure = '';
+
+    function __construct($stream, $delimiter = ';', $enclosure = '"')
+    {
+        $this->stream = $stream;
+        $this->delimiter = $delimiter ? substr($delimiter, 0, 1) : ';';;
+        $this->enclosure = $enclosure ? substr($enclosure, 0, 1) : '"';;
+    }
+
+    function get_delimiter()
+    {
+        return $this->delimiter;
+    }
+
+    function get_enclosure()
+    {
+        return $this->enclosure;
+    }
+
+    /**
+     *
+     * @return FileWriter
+     */
+    protected function stream()
+    {
+        if (is_string($this->stream)) {
+            $this->stream = new FileWriter($this->stream);
+        }
+        return $this->stream;
+    }
+
+    function write($items)
+    {
+        $this->put($items);
+    }
+
+    function writeln($items)
+    {
+        $this->put($items);
+    }
+
+    function put($items)
+    {
+        $enclosure = $this->enclosure;
+        $fields = array();
+        foreach ($items as $item) {
+            $fields[] = $enclosure . str_replace($enclosure, $enclosure . $enclosure, $item) . $enclosure;
+        }
+
+        $delimiter = $this->delimiter;
+        $line = implode($delimiter, $fields);
+        $this->stream()->writeln($line);
+    }
+
+    function close()
+    {
+        if (is_object($this->stream)) {
+            $this->stream->close();
+        }
+        $this->stream = null;
+    }
+
+}
--- a/main/inc/lib/system/io/file_reader.class.php
+++ b/main/inc/lib/system/io/file_reader.class.php
@ -0,0 +1,182 @@
+<?php
+
+/**
+ * Read text from a file. Reader is line oriented and not char oriented. 
+ * The default converter converts from the file encoding - auto-detected - to 
+ * system encoding.
+ * 
+ * Usage:
+ * 
+ *  $file = FileReader::create('path');
+ *  foreach($file as $line)
+ * {
+ *      ...
+ * }
+ *
+ * @copyright (c) 2012 University of Geneva
+ * @license GNU General Public License - http://www.gnu.org/copyleft/gpl.html
+ * @author Laurent Opprecht <laurent@opprecht.info>
+ */
+class FileReader implements Iterator
+{
+
+    const EOL = "\n";
+
+    /**
+     *
+     * @param string $path
+     * @return FileReader
+     */
+    static function create($path, $converter = null)
+    {
+        return new self($path, $converter);
+    }
+
+    /**
+     * Returns the file encoding
+     * 
+     * @return Encoding
+     */
+    static function detect_encoding($path)
+    {
+        $abstract = array();
+        // We assume that 200 lines are enough for encoding detection.     
+        // here we must get at the raw data so we don't use other functions
+        // it's not possible to read x chars as this would not be safe with utf 
+        // (chars may be split in the middle)
+        $handle = fopen($path, 'r');
+
+        $i = 0;
+        while (($line = fgets($handle)) !== false && $i < 200) {
+            $i++;
+            $abstract[] = $line;
+        }
+        fclose($handle);
+        $abstract = implode($abstract);
+        return Encoding::detect_encoding($abstract);
+    }
+
+    protected $path = '';
+    protected $handle = null;
+    protected $current = false;
+    protected $index = -1;
+    protected $converter = null;
+
+    function __construct($path, $converter = null)
+    {
+        if (empty($converter)) {
+            $encoding = self::detect_encoding($path);
+            $converter = $encoding->decoder();
+        }
+        $this->path = $path;
+        $this->converter = $converter;
+    }
+
+    /**
+     *
+     * @return Converter
+     */
+    function get_converter()
+    {
+        return $this->converter;
+    }
+
+    function handle()
+    {
+        if (is_null($this->handle)) {
+            $this->handle = fopen($this->path, 'r');
+        }
+        return $this->handle;
+    }
+
+    /**
+     * Read at most $count lines.
+     * 
+     * @param int $count
+     * @return array
+     */
+    function read_lines($count)
+    {
+        $result;
+        $i = 0;
+        foreach ($this as $line) {
+            if ($i >= $count) {
+                return $result;
+            }
+            $i++;
+            $result[] = $line;
+        }
+        return $result;
+    }
+
+    function read_line()
+    {
+        return $this->next();
+    }
+
+    function close()
+    {
+        if (is_resource($this->handle)) {
+            fclose($this->handle);
+        }
+        $this->handle = null;
+    }
+
+    protected function convert($text)
+    {
+        return $this->converter->convert($text);
+    }
+
+    public function current()
+    {
+        return $this->current;
+    }
+
+    public function key()
+    {
+        return $this->index;
+    }
+
+    public function next()
+    {
+        $handle = $this->handle();
+        if($handle === false)
+        {
+            $this->current = false;            
+            return false;
+        }
+        $line = fgets($handle);
+        if ($line !== false) {
+            $line = rtrim($line, "\r\n");
+            $line = $this->convert($line);
+            $this->index++;
+        }
+        $this->current = $line;
+        return $this->current;
+    }
+
+    public function rewind()
+    {
+        $this->converter->reset();
+        if ($handle = $this->handle()) {
+            rewind($handle);
+        }
+        $this->current = false;
+        $this->index = -1;
+        $this->next();
+    }
+
+    public function valid()
+    {
+        return $this->current !== false;
+    }
+
+    function __clone()
+    {
+        $this->handle = null;
+        $this->current = false;
+        $this->index = -1;
+        $this->converter->reset();
+    }
+
+}
--- a/main/inc/lib/system/io/file_writer.class.php
+++ b/main/inc/lib/system/io/file_writer.class.php
@ -0,0 +1,81 @@
+<?php
+
+/**
+ * Write data to file. Default to UTF8 encoding.
+ *
+ * @copyright (c) 2012 University of Geneva
+ * @license GNU General Public License - http://www.gnu.org/copyleft/gpl.html
+ * @author Laurent Opprecht <laurent@opprecht.info>
+ */
+class FileWriter
+{
+
+    /**
+     *
+     * @param string $path
+     * @param Converter $converter 
+     * @return FileWriter
+     */
+    static function create($path, $converter = null)
+    {
+        return new self($path, $converter);
+    }
+
+    const EOL = "\n";
+
+    protected $path = '';
+    protected $handle = null;
+    protected $converter = null;
+
+    /**
+     *
+     * @param string $path
+     * @param Encoding $encoding 
+     */
+    function __construct($path, $converter = null)
+    {
+        $this->path = $path;
+        $this->converter = $converter ? $converter : Encoding::utf8()->encoder();
+    }
+
+    /**
+     *
+     * @return Converter
+     */
+    function get_converter()
+    {
+        return $this->converter;
+    }
+
+    protected function handle()
+    {
+        if (is_null($this->handle)) {
+            $this->handle = fopen($this->path, 'a+');
+        }
+        return $this->handle;
+    }
+
+    function write($text)
+    {
+        fwrite($this->handle(), $this->convert($text));
+    }
+
+    function writeln($text)
+    {
+        fwrite($this->handle(), $this->convert($text) . self::EOL);
+    }
+
+    function close()
+    {
+        if (is_resource($this->handle)) {
+            fclose($this->handle);
+        }
+        $this->handle = null;
+    }
+
+    protected function convert($text)
+    {
+        return $this->converter->convert($text);
+    }
+
+}
--- a/main/inc/lib/system/text/converter.class.php
+++ b/main/inc/lib/system/text/converter.class.php
@ -0,0 +1,33 @@
+<?php
+
+/**
+ * Convert text. Used mostly to convert from one encoding to another.
+ *
+ * @copyright (c) 2012 University of Geneva
+ * @license GNU General Public License - http://www.gnu.org/copyleft/gpl.html
+ * @author Laurent Opprecht <laurent@opprecht.info>
+ */
+class Converter
+{
+    
+    /**
+     * Identity converter. Returns the string with no transformations.
+     *
+     * @return Converter 
+     */
+    public static function identity()
+    {
+        static $result = null;
+        if(empty($result))
+        {
+            $result = new self();
+        }
+        return $result;
+    }
+    
+    
+    function convert($string)
+    {
+        return $string;
+    }
+}
--- a/main/inc/lib/system/text/encoding.class.php
+++ b/main/inc/lib/system/text/encoding.class.php
@ -0,0 +1,158 @@
+<?php
+
+/**
+ * Set the system encoding to the plateform encoding. 
+ * 
+ * @todo: 
+ * Note: those lines are here for ease of use only. They should be move away:
+ * 
+ *      1 first autodetection should be done inside the Encoding class
+ *      2 this library should not call a chamilo specific function (this should
+ *        be the other way around, chamilo calling the encoding functions)
+ */
+
+$plateform_encoding =  api_get_system_encoding();
+Encoding::system($plateform_encoding);
+
+/**
+ * Encoding class. Handles text encoding. Usage:
+ * 
+ *      $encoding = Encoding::get('name');
+ *      $decoder = $encoding->decoder();
+ *      $decoder->convert('text');
+ * 
+ * The system encoding is the platform/system/default encoding. This defaults to
+ * UTF8 but can be changed:
+ * 
+ *      Encoding::system('name');
+ * 
+ * Note that Encoding returns to its name when converted to a string. As such it
+ * can be used in places where a string is expected:
+ * 
+ *      $utf8 = Encoding::Utf8();
+ *      echo $utf8;
+ *
+ * @copyright (c) 2012 University of Geneva
+ * @license GNU General Public License - http://www.gnu.org/copyleft/gpl.html
+ * @author Laurent Opprecht <laurent@opprecht.info>
+ */
+class Encoding
+{
+
+    private static $system = null;
+
+    /**
+     * Returns encoding for $name.
+     * 
+     * @param string $name
+     * @return Encoding
+     */
+    public static function get($name)
+    {
+        if (is_object($name)) {
+            return $name;
+        } else if (Encoding::utf8()->is($name)) {
+            return self::utf8();
+        } else {
+            return new self($name);
+        }
+    }
+
+    /**
+     * Returns the Utf8 encoding.
+     * 
+     * @return Utf8
+     */
+    public static function utf8()
+    {
+        return Utf8::instance();
+    }
+
+    /**
+     * Returns/set the system/default encoding.
+     * 
+     * @return Encoding
+     */
+    public static function system($value = null)
+    {
+        if (is_object($value)) {
+            self::$system = $value;
+        } else if (is_string($value)) {
+            self::$system = self::get($value);
+        }
+
+        return self::$system ? self::$system : self::utf8();
+    }
+
+    /**
+     * Detect encoding from an abstract.
+     * 
+     * @param string $abstract
+     * @return Encoding 
+     */
+    public static function detect_encoding($abstract)
+    {
+        $encoding_name = api_detect_encoding($abstract);
+        return self::get($encoding_name);
+    }
+
+    protected $name = '';
+
+    protected function __construct($name = '')
+    {
+        $this->name = $name;
+    }
+
+    /**
+     * The name of the encoding
+     * 
+     * @return string
+     */
+    function name()
+    {
+        return $this->name;
+    }
+
+    /**
+     * The Byte Order Mark.
+     * 
+     * @see http://en.wikipedia.org/wiki/Byte_order_mark 
+     * @return string 
+     */
+    function bom()
+    {
+        return '';
+    }
+
+    /**
+     * Returns a decoder that convert encoding to another encoding.      
+     * 
+     * @param string|Encoder $to Encoding to convert to, defaults to system encoding
+     * @return Converter 
+     */
+    public function decoder($to = null)
+    {
+        $from = $this;
+        $to = $to ? $to : Encoding::system();
+        return EncodingConverter::create($from, $to);
+    }
+
+    /**
+     * Returns an encoder that convert from another encoding to this encoding.
+     * 
+     * @param string|Encoder $from Encoding to convert from, defaults to system encoding.
+     * @return Converter
+     */
+    public function encoder($from = null)
+    {
+        $from = $from ? $from : Encoding::system();
+        $to = $this;
+        return EncodingConverter::create($from, $to);
+    }
+    
+    function __toString()
+    {
+        return $this->name();
+    }
+
+}
--- a/main/inc/lib/system/text/encoding_converter.class.php
+++ b/main/inc/lib/system/text/encoding_converter.class.php
@ -0,0 +1,66 @@
+<?php
+
+/**
+ * Convert text from one encoding to another. Usage:
+ * 
+ *      $converter = EncodingConverter::create($from, $to);
+ *      $converter->convert($text);
+ * 
+ * Note that the create function will returns an identify converter if from and to 
+ * encodings are the same. Reason why the constructor is private.
+ *
+ * @copyright (c) 2012 University of Geneva
+ * @license GNU General Public License - http://www.gnu.org/copyleft/gpl.html
+ * @author Laurent Opprecht <laurent@opprecht.info>
+ */
+class EncodingConverter extends Converter
+{
+
+    /**
+     *
+     * @param string $from_encoding
+     * @param string $to_encoding 
+     * 
+     * @return EncodingConverter
+     */
+    public static function create($from_encoding, $to_encoding)
+    {
+        $from_encoding = (string) $from_encoding;
+        $to_encoding = (string) $to_encoding;
+        if (strtolower($from_encoding) == strtolower($to_encoding)) {
+            return Converter::identity();
+        } else {
+            new self($from_encoding, $to_encoding);
+        }
+    }
+
+    protected $from_encoding;
+    protected $to_encoding;
+
+    protected function __construct($from_encoding, $to_encoding)
+    {
+        $this->from_encoding = $from_encoding;
+        $this->to_encoding = $to_encoding;
+    }
+
+    function from_encoding()
+    {
+        return $this->from_encoding;
+    }
+
+    function to_encoding()
+    {
+        return $this->to_encoding;
+    }
+
+    function convert($string)
+    {
+        $from = $this->from_encoding;
+        $to = $this->to_encoding;
+        if ($from == $to) {
+            return $string;
+        }
+        api_convert_encoding($string, $to, $from);
+    }
+
+}
--- a/main/inc/lib/system/text/utf8.class.php
+++ b/main/inc/lib/system/text/utf8.class.php
@ -0,0 +1,287 @@
+<?php
+
+/**
+ * Utf8 encoding class. Provides utility function to deal with UTF8 encoding.
+ *
+ * @license see /license.txt
+ * @author Laurent Opprecht <laurent@opprecht.info> for the Univesity of Geneva
+ * @author More authors, mentioned in the correpsonding fragments of this source.
+ */
+class Utf8 extends Encoding
+{
+
+    const PATTERN_NOT_VISIBLE_CHARS = '/[^[:print:]-]/'; //Visible characters and the space character
+
+    /**
+     * @see http://en.wikipedia.org/wiki/Byte_order_mark 
+     */
+    const BOM = "\xEF\xBB\xBF";
+    const NAME = 'UTF-8';
+
+    /**
+     *
+     * @return Utf8
+     */
+    public static function instance()
+    {
+        static $result = null;
+        if (empty($result)) {
+            $result = new self();
+        }
+        return $result;
+    }
+
+    /**
+     * Returns true if encoding is UTF8.
+     * 
+     * @param string|Encoding $encoding
+     * @return bool 
+     */
+    function is($encoding)
+    {
+        $encoding = (string) $encoding;
+        return strtolower($encoding) == strtolower(self::NAME);
+    }
+
+    protected function __construct()
+    {
+        parent::__construct(self::NAME);
+    }
+
+    function name()
+    {
+        return self::NAME;
+    }
+
+    function bom()
+    {
+        return self::BOM;
+    }
+
+    /**
+     * Returns the hexa decimal representation of an utf8 string. Usefull to understand
+     * what is going on - not printable chars, rare patterns such as e' for é, etc. 
+     * 
+     * @param type $text
+     * @return string 
+     */
+    function to_hex($text)
+    {
+        $result = '';
+        mb_internal_encoding('utf-8');
+
+        for ($i = 0, $n = mb_strlen($text); $i < $n; $i++) {
+            $char = mb_substr($text, $i, 1);
+            $num = strlen($char);
+            for ($j = 0; $j < $num; $j++) {
+                $result .= sprintf('%02x', ord($char[$j]));
+            }
+            $result .= ' ';
+        }
+        return $result;
+    }
+
+    /**
+     * Trim the BOM from an utf-8 string
+     * 
+     * @param string $text
+     * @return string 
+     */
+    function trim($text)
+    {
+        $bom = self::BOM;
+        if (strlen($text) < strlen($bom)) {
+            return $text;
+        }
+
+        if (substr($text, 0, 3) == $bom) {
+            return substr($text, 3);
+        }
+        return $text;
+    }
+
+    /**
+     * Checks a string for UTF-8 validity.
+     * 
+     * @param string $string	The string to be tested.
+     * @return bool				Returns TRUE when the tested string is valid UTF-8, FALSE othewise.
+     * @link http://en.wikipedia.org/wiki/UTF-8
+     * @author see internationalization.lib.php
+     */
+    static function is_valid(&$string)
+    {
+
+        //return @mb_detect_encoding($string, 'UTF-8', true) == 'UTF-8' ? true : false;
+        // Ivan Tcholakov, 05-OCT-2008: I do not trust mb_detect_encoding(). I have
+        // found a string with a single cyrillic letter (single byte), that is
+        // wrongly detected as UTF-8. Possibly, there would be problems with other
+        // languages too. An alternative implementation will be used.
+
+        $str = (string) $string;
+        $len = api_byte_count($str);
+        $i = 0;
+        while ($i < $len) {
+            $byte1 = ord($str[$i++]);  // Here the current character begins. Its size is
+            // determined by the senior bits in the first byte.
+
+            if (($byte1 & 0x80) == 0x00) {  // 0xxxxxxx
+                //    &
+                // 10000000
+                // --------
+                // 00000000
+                // This is s valid character and it contains a single byte.
+            } elseif (($byte1 & 0xE0) == 0xC0) { // 110xxxxx 10xxxxxx
+                //    &        &
+                // 11100000 11000000
+                // -------- --------
+                // 11000000 10000000
+                // The character contains two bytes.
+                if ($i == $len) {
+                    return false;    // Here the string ends unexpectedly.
+                }
+
+                if (!((ord($str[$i++]) & 0xC0) == 0x80))
+                    return false;    // Invalid second byte, invalid string.
+            }
+
+            elseif (($byte1 & 0xF0) == 0xE0) { // 1110xxxx 10xxxxxx 10xxxxxx
+                //    &        &        &
+                // 11110000 11000000 11000000
+                // -------- -------- --------
+                // 11100000 10000000 10000000
+                // This is a character of three bytes.
+                if ($i == $len) {
+                    return false;    // Unexpected end of the string.
+                }
+                if (!((ord($str[$i++]) & 0xC0) == 0x80)) {
+                    return false;    // Invalid second byte.
+                }
+                if ($i == $len) {
+                    return false;    // Unexpected end of the string.
+                }
+                if (!((ord($str[$i++]) & 0xC0) == 0x80)) {
+                    return false;    // Invalid third byte, invalid string.
+                }
+            } elseif (($byte1 & 0xF8) == 0xF0) { // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+                //    &        &        &        &
+                // 11111000 11000000 11000000 11000000
+                // -------- -------- -------- --------
+                // 11110000 10000000 10000000 10000000
+                // This is a character of four bytes.
+                if ($i == $len) {
+                    return false;
+                }
+                if (!((ord($str[$i++]) & 0xC0) == 0x80)) {
+                    return false;
+                }
+                if ($i == $len) {
+                    return false;
+                }
+                if (!((ord($str[$i++]) & 0xC0) == 0x80)) {
+                    return false;
+                }
+                if ($i == $len) {
+                    return false;
+                }
+                if (!((ord($str[$i++]) & 0xC0) == 0x80)) {
+                    return false;
+                }
+            } elseif (($byte1 & 0xFC) == 0xF8) { // 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+                //    &        &        &        &        &
+                // 11111100 11000000 11000000 11000000 11000000
+                // -------- -------- -------- -------- --------
+                // 11111000 10000000 10000000 10000000 10000000
+                // This is a character of five bytes.
+                if ($i == $len) {
+                    return false;
+                }
+                if (!((ord($str[$i++]) & 0xC0) == 0x80)) {
+                    return false;
+                }
+                if ($i == $len) {
+                    return false;
+                }
+                if (!((ord($str[$i++]) & 0xC0) == 0x80)) {
+                    return false;
+                }
+                if ($i == $len) {
+                    return false;
+                }
+                if (!((ord($str[$i++]) & 0xC0) == 0x80)) {
+                    return false;
+                }
+                if ($i == $len) {
+                    return false;
+                }
+                if (!((ord($str[$i++]) & 0xC0) == 0x80)) {
+                    return false;
+                }
+            } elseif (($byte1 & 0xFE) == 0xFC) { // 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+                //    &        &        &        &        &        &
+                // 11111110 11000000 11000000 11000000 11000000 11000000
+                // -------- -------- -------- -------- -------- --------
+                // 11111100 10000000 10000000 10000000 10000000 10000000
+                // This is a character of six bytes.
+                if ($i == $len) {
+                    return false;
+                }
+                if (!((ord($str[$i++]) & 0xC0) == 0x80)) {
+                    return false;
+                }
+                if ($i == $len) {
+                    return false;
+                }
+                if (!((ord($str[$i++]) & 0xC0) == 0x80)) {
+                    return false;
+                }
+                if ($i == $len) {
+                    return false;
+                }
+                if (!((ord($str[$i++]) & 0xC0) == 0x80)) {
+                    return false;
+                }
+                if ($i == $len) {
+                    return false;
+                }
+                if (!((ord($str[$i++]) & 0xC0) == 0x80)) {
+                    return false;
+                }
+                if ($i == $len) {
+                    return false;
+                }
+                if (!((ord($str[$i++]) & 0xC0) == 0x80)) {
+                    return false;
+                }
+            } else {
+                return false;     // In any other case the character is invalid.
+            }
+            // Here the current character is valid, it
+            // matches to some of the cases above.
+            // The next character is to be examinated.
+        }
+        return true;       // Empty strings are valid too.
+    }
+
+    /**
+     *
+     * @param type $to
+     * @return Utf8Decoder 
+     */
+    public function decoder($to = null)
+    {
+        $to = $to ? $to : Encoding::system();
+        return new Utf8Decoder($to);
+    }
+
+    /**
+     *
+     * @param type $from
+     * @return Utf8Encoder
+     */
+    public function encoder($from = null)
+    {
+        $from = $from ? $from : Encoding::system();
+        return new Utf8Encoder($from);
+    }
+
+}
--- a/main/inc/lib/system/text/utf8_decoder.class.php
+++ b/main/inc/lib/system/text/utf8_decoder.class.php
@ -0,0 +1,54 @@
+<?php
+
+/**
+ * Convert from Utf8 to another encoding: 
+ * 
+ *      - remove BOM
+ *      - change encoding
+ *
+ * @copyright (c) 2012 University of Geneva
+ * @license GNU General Public License - http://www.gnu.org/copyleft/gpl.html
+ * @author Laurent Opprecht <laurent@opprecht.info>
+ */
+class Utf8Decoder extends Converter
+{
+
+    protected $started = false;
+    protected $to_encoding;
+    protected $encoding_converter;
+
+    function __construct($to_encoding = null)
+    {
+        $this->to_encoding = $to_encoding ? $to_encoding : Encoding::system();
+        $this->encoding_converter = EncodingConverter::create(Utf8::NAME, $this->to_encoding);
+        $this->reset();
+    }
+
+    function from_encoding()
+    {
+        return Utf8::NAME;
+    }
+
+    function to_encoding()
+    {
+        return $this->to_encoding;
+    }
+
+    function reset()
+    {
+        $this->started = false;
+    }
+
+    function convert($string)
+    {
+        if (!$this->started) {
+            $this->started = true;
+            $string = Utf8::instance()->trim($string);
+            return $this->encoding_converter->convert($string);
+        } else {
+            return $this->encoding_converter->convert($string);
+        }
+        return $string;
+    }
+
+}
--- a/main/inc/lib/system/text/utf8_encoder.class.php
+++ b/main/inc/lib/system/text/utf8_encoder.class.php
@ -0,0 +1,72 @@
+<?php
+
+/**
+ * Encode from another encoding to UTF8:
+ * 
+ *      - add BOM
+ *      - change encoding
+ *      - convert html entities if turned on
+ * 
+ * Note: 
+ * 
+ * Convert_html_entities cannot but turned on by default. This would be bad
+ * for performances but more than anything else it may be perfectly valid to write
+ * html entities wihtout transformation - i.e. when writing html content.
+ * 
+ * It may be better to move convert_html_entities to its own converter and to chain
+ * converters together to achieve the same result.
+ *
+ * @copyright (c) 2012 University of Geneva
+ * @license GNU General Public License - http://www.gnu.org/copyleft/gpl.html
+ * @author Laurent Opprecht <laurent@opprecht.info>
+ */
+class Utf8Encoder extends Converter
+{
+
+    protected $started = false;
+    protected $from_encoding;
+    protected $encoding_converter;
+    protected $convert_html_entities = false;
+
+    function __construct($from_encoding = null , $convert_html_entities = false)
+    {
+        $this->from_encoding = $from_encoding ? $from_encoding : Encoding::system();
+        $this->encoding_converter = EncodingConverter::create($this->from_encoding, Utf8::NAME);
+        $this->convert_html_entities = $convert_html_entities;
+        $this->reset();
+    }
+
+    function from_encoding()
+    {
+        return $this->from_encoding;
+    }
+
+    function to_encoding()
+    {
+        return Utf8::NAME;
+    }
+
+    function get_convert_html_entities()
+    {
+        return $this->convert_html_entities;
+    }
+
+    function reset()
+    {
+        $this->started = false;
+    }
+
+    function convert($string)
+    {
+        if ($this->convert_html_entities) {
+            $string = html_entity_decode($string, ENT_COMPAT, Utf8::NAME);
+        }
+        $string = $this->encoding_converter->convert($string);
+        if (!$this->started) {
+            $this->started = true;
+            $string = Utf8::BOM . $string;
+        }
+        return $string;
+    }
+
+}