Project

General

Profile

Feature #4694 » file_doctor.class.php

Laurent Opprecht, 30/05/2012 09:45

 
1
<?php
2

    
3
/**
4
 * Description of file_doctor
5
 *
6
 * @copyright (c) 2012 University of Geneva
7
 * @license GNU General Public License - http://www.gnu.org/copyleft/gpl.html
8
 * @author Laurent Opprecht <laurent@opprecht.info>
9
 */
10
class FileDoctor
11
{
12

    
13
    const PATTERN_NOT_VISIBLE_CHARS = '/[^[:print:]-]/'; //Visible characters and the space character
14

    
15
    /**
16
     * @return FileDoctor
17
     */
18

    
19
    static function call()
20
    {
21
        return new self();
22
    }
23

    
24
    function writeln($text = '')
25
    {
26
        echo $text . '<br/>';
27
    }
28

    
29
    function cure()
30
    {
31
        $this->cure_rename_system_files_with_non_standard_chars();
32
        $this->cure_document_entries_with_non_standard_chars();
33

    
34
        $this->cure_create_missing_folders();
35
        //$this->cure_remove_lost_document_entries();
36
    }
37

    
38
    /**
39
     * Returns $names where non standard chars have been removed.
40
     * 
41
     * @param string $name
42
     * @return string
43
     */
44
    function rename($name)
45
    {
46
        $result = $name;
47
        $items = array();
48
        //this is not the standard é it is actually e' - e followed by '
49
        $items[] = array('from' => "eÌ", 'to' => 'e');
50
        //this is the standard é        
51
        $items[] = array('from' => '%E9', 'to' => 'e'); //é
52
        $items[] = array('from' => '%E8', 'to' => 'e'); //è
53
        $items[] = array('from' => "%92", 'to' => '_'); //’
54
        $items[] = array('from' => '%93', 'to' => '_'); //“
55
        $items[] = array('from' => '%94', 'to' => '_'); //”
56
        $items[] = array('from' => '%E0', 'to' => 'a'); //à
57
        $items[] = array('from' => '%82', 'to' => '_'); //‚ this is not the standard , 
58
        $items[] = array('from' => '%E4', 'to' => 'a'); //ä 
59
        //this is not the standard é it is actually e' - e followed by '
60
        $items[] = array('from' => "eÌ", 'to' => 'e');
61
        //this is the standard é
62
        $items[] = array('from' => 'é', 'to' => 'e');
63
        $items[] = array('from' => 'è', 'to' => 'e');
64
        $items[] = array('from' => "’", 'to' => '_');
65
        $items[] = array('from' => '“', 'to' => '_');
66
        $items[] = array('from' => '”', 'to' => '_');
67
        $items[] = array('from' => '"', 'to' => '');
68
        $items[] = array('from' => 'à', 'to' => 'a');
69
        $items[] = array('from' => '‚', 'to' => '_'); //this is not the standard , 
70
        $items[] = array('from' => 'ä', 'to' => 'a');
71
        $items[] = array('from' => '&', 'to' => 'and');
72

    
73
        foreach ($items as $map) {
74
            $from = $map['from'];
75
            $to = $map['to'];
76
            $result = str_replace($from, $to, $result);
77
        }
78
        /*
79
         * Remove all non standard characters remaining
80
         * Note that it is important to replace by nothing. 
81
         * If we replace by _ then some invisible chars will be
82
         * replaced resulting of __ instead of _ as expcted. 
83
         * 
84
         */
85
        $pattern = self::PATTERN_NOT_VISIBLE_CHARS;
86
        $result = preg_replace($pattern, '', $result);
87
        return $result;
88
    }
89

    
90
    /**
91
     * List system files containing non standard chars. I.e. anything that fails
92
     * renaming.
93
     * 
94
     * @param string $dir directory to scan or defaults to root_courses
95
     * @return array
96
     */
97
    function system_files_with_non_standard_chars($dir = '')
98
    {
99
        $result = array();
100
        $pattern = self::PATTERN_NOT_VISIBLE_CHARS; //Visible characters and the space character
101
        $dir = $dir ? $dir : Chamilo::root_courses();
102
        $files = scandir($dir);
103
        $files = array_diff($files, array('.', '..'));
104
        foreach ($files as $file) {
105
            $path = "$dir/$file";
106
            $new_path = $this->rename($path);
107
            if ($new_path !== $path) {
108
                $result[] = $path;
109
            }
110
            if (is_dir($path)) {
111
                $children = $this->system_files_with_non_standard_chars($path);
112
                $result = array_merge($result, $children);
113
            }
114
        }
115
        return $result;
116
    }
117

    
118
    /**
119
     * Rename files and directories by calling rename on the path below course.
120
     * 
121
     * Must be called before cure_docs as there is a slight chance that renaming
122
     * files will break the link - this is corrected by cure_docs.
123
     * 
124
     * @param string $dir 
125
     */
126
    function cure_rename_system_files_with_non_standard_chars($dir = '')
127
    {
128
        $this->writeln(__FUNCTION__);
129
        $keep_going = true;
130
        while ($keep_going) {
131
            /**
132
             * Not renaming a parent directory will move children directories as well
133
             * as a result we may need several path because children may failed to 
134
             * be renamed
135
             */
136
            $keep_going = false;
137
            //first we do the directories
138
            $pathes = $this->system_files_with_non_standard_chars($dir);
139
            foreach ($pathes as $path) {
140
                if (is_dir($path)) {
141
                    $root = Chamilo::root_courses();
142
                    $root = realpath($root);
143
                    $path = realpath($path);
144
                    $base = str_replace($root, '', $path);
145
                    $new_base = $this->rename($base);
146
                    $new_path = "$root/$new_base";
147
                    if ($new_base != $base && !file_exists($new_path)) {
148
                        rename($path, $new_path);
149
                        $this->writeln("old file name: $path");
150
                        $this->writeln("new file name: $new_path");
151
                        $this->writeln();
152
                        $keep_going = true;
153
                    }
154
                }
155
            }
156
        }
157

    
158
        //then we do the files
159
        $pattern = self::PATTERN_NOT_VISIBLE_CHARS; //Visible characters and the space character
160
        $pathes = $this->system_files_with_non_standard_chars($dir);
161
        foreach ($pathes as $path) {
162
            if (is_file($path)) {
163
                $root = Chamilo::root_courses();
164
                $root = realpath($root);
165
                $path = realpath($path);
166
                $base = str_replace($root, '', $path);
167
                $new_base = $this->rename($base);
168
                $new_path = "$root/$new_base";
169
                if ($new_base != $base && !file_exists($new_path)) {
170
                    rename($path, $new_path);
171
                    $this->writeln("old file name: $path");
172
                    $this->writeln("new file name: $new_path");
173
                    $this->writeln();
174
                }
175
            }
176
        }
177
    }
178

    
179
    /**
180
     * Returns the decimal representation of an utf8 string. Usefull to understand
181
     * what is going on - not printable chars, rare patterns such as e' for é, etc. 
182
     * 
183
     * @param type $text
184
     * @return string 
185
     */
186
    function sprint_utf8($text)
187
    {
188
        $result = '';
189
        mb_internal_encoding('utf-8');
190

    
191
        for ($i = 0, $n = mb_strlen($text); $i < $n; $i++) {
192
            $char = mb_substr($text, $i, 1);
193
            $num = strlen($char);
194
            for ($j = 0; $j < $num; $j++) {
195
                $result .= sprintf('%02x', ord($char[$j]));
196
            }
197
            $result .= ' ';
198
        }
199
        return $result;
200
    }
201

    
202
    /**
203
     * Rename lost document entries if renaming match an actual file.
204
     */
205
    function cure_document_entries_with_non_standard_chars()
206
    {
207
        $this->writeln(__FUNCTION__);
208
        $table_document = Database::get_course_table(TABLE_DOCUMENT);
209

    
210
        $files = $this->documents_not_readable();
211
        $folders = $this->folders_not_readable();
212
        $files = array_merge($files, $folders);
213
        foreach ($files as $doc) {
214
            $dir = api_get_path(SYS_COURSE_PATH) . $doc['directory'] . '/document';
215
            $path = $dir . $doc['path'];
216
            $new_path = $this->relink($path);
217
            if ($new_path) {
218
                $new_path = str_replace($dir, '', $new_path);
219
                $new_path = Database::escape_string($new_path);
220
                $id = $doc['id'];
221
                $c_id = $doc['c_id'];
222

    
223
                $update_statement = "UPDATE $table_document SET path ='$new_path' WHERE id = $id AND c_id = $c_id";
224
                Database::query($update_statement);
225

    
226
                $this->writeln("old file name: $path");
227
                $this->writeln("new file name: $new_path");
228
            }
229
        }
230
    }
231

    
232
    /**
233
     * Mark entries as deleted in the database. To be called as last resort.
234
     */
235
    function cure_remove_lost_document_entries()
236
    {
237
        $this->writeln(__FUNCTION__);
238
        $docs = $this->documents_not_readable();
239
        foreach ($docs as $doc) {
240
            $dir = api_get_path(SYS_COURSE_PATH) . $doc['directory'] . '/document';
241
            $path = $dir . $doc['path'];
242
            $id = $doc['id'];
243
            $c_id = $doc['c_id'];
244
            $is_file = $doc['filetype'] == 'file';
245
            if (!is_readable($path) && $is_file) {
246
                if (!file_exists($path)) {
247
                    $TABLE_DOCUMENT = Database :: get_course_table(TABLE_DOCUMENT);
248
                    $TABLE_ITEMPROPERTY = Database :: get_course_table(TABLE_ITEM_PROPERTY);
249
                    $sql = "UPDATE $TABLE_ITEMPROPERTY SET lastedit_type = 'DocumentDeleted', visibility = 2 WHERE ref = $id AND c_id = $c_id";
250
                    Database::query($sql);
251

    
252
                    $sql = "DELETE FROM $TABLE_DOCUMENT  WHERE c_id = $c_id AND id = $id";
253
                    Database::query($sql);
254
                    $this->writeln('Remove lost entry: ' . $path);
255
                }
256
            }
257
        }
258
    }
259

    
260
    /**
261
     * Create folders that do not exists.  
262
     */
263
    function cure_create_missing_folders()
264
    {
265
        $this->writeln(__FUNCTION__);
266
        $folders = $this->folders_not_readable();
267
        foreach ($folders as $folder) {
268
            //$children = $folder['child_count'];
269
            $dir = api_get_path(SYS_COURSE_PATH) . $folder['directory'] . '/document';
270
            $base = $folder['path'];
271
            $new_base = $this->rename($base);
272
            if ($new_base != $base) {
273
                $id = $folder['id'];
274
                $c_id = $folder['c_id'];
275
                $TABLE_DOCUMENT = Database :: get_course_table(TABLE_DOCUMENT);
276
                $sql = "UPDATE $TABLE_DOCUMENT SET path='$new_path' WHERE c_id = $c_id AND id = $id";
277
                Database::query($sql);
278
            }
279
            $path = $dir . $base;
280
            $this->ensure_directory($path);
281
            $this->writeln("ensure directory: $path");
282
        }
283
    }
284

    
285
    function ensure_directory($path)
286
    {
287
        if (is_readable($path)) {
288
            return;
289
        }
290
        if (is_file($path)) {
291
            return;
292
        }
293

    
294
        $defaut_permission = api_get_permissions_for_new_directories();
295
        mkdir($path, $defaut_permission, true);
296
    }
297

    
298
    function diagnose()
299
    {
300

    
301
        $files = $this->system_files_with_non_standard_chars();
302
        foreach ($files as $file) {
303
            $this->writeln($file);
304
        }
305

    
306
        $files = $this->documents_not_readable();
307
        foreach ($files as $file) {
308
            $dir = api_get_path(SYS_COURSE_PATH) . $file['directory'] . '/document';
309
            $path = $dir . $file['path'];
310
            $id = $file['id'];
311
            $c_id = $file['c_id'];
312

    
313
            $this->writeln($path . " (c_id => $c_id, id => $id)");
314
        }
315

    
316
        $folders = $this->folders_not_readable();
317
        foreach ($folders as $folder) {
318
            $dir = api_get_path(SYS_COURSE_PATH) . $folder['directory'] . '/document';
319
            $path = $dir . $folder['path'];
320
            $id = $folder['id'];
321
            $c_id = $folder['c_id'];
322

    
323
            $this->writeln($path . " (c_id => $c_id, id => $id)");
324
        }
325
    }
326

    
327
    function folders_not_readable()
328
    {
329
        $result = array();
330
        $table_document = Database::get_course_table(TABLE_DOCUMENT);
331
        $table_course = Database::get_main_table(TABLE_MAIN_COURSE);
332

    
333
        //            (SELECT count(*) FROM $table_document as dd WHERE  dd.path LIKE CONCAT(d.path, '%' ) ) AS child_count
334
        $sql = "SELECT 
335
                    c.directory, 
336
                    d.*
337
                FROM 
338
                    $table_document as d, 
339
                    $table_course as c 
340
                WHERE  
341
                    d.c_id = c.id AND
342
                    d.filetype = 'folder' AND
343
                    visibility <> 2";
344
        $documents = new ResultSet($sql);
345
        foreach ($documents as $document) {
346
            $dir = api_get_path(SYS_COURSE_PATH) . $document['directory'] . '/document';
347
            $path = $dir . $document['path'];
348
            if (!is_readable($path)) {
349
                $result[] = $document;
350
            }
351
        }
352
        return $result;
353
    }
354

    
355
    function documents_not_readable()
356
    {
357
        $result = array();
358
        $table_document = Database::get_course_table(TABLE_DOCUMENT);
359
        $table_course = Database::get_main_table(TABLE_MAIN_COURSE);
360
        $sql = "SELECT 
361
                    c.directory, 
362
                    d.*
363
                    
364
                FROM 
365
                    $table_document as d, 
366
                    $table_course as c 
367
                WHERE  
368
                    d.c_id = c.id AND
369
                    d.filetype = 'file' AND
370
                    visibility <> 2";
371
        $documents = new ResultSet($sql);
372
        foreach ($documents as $document) {
373
            $dir = api_get_path(SYS_COURSE_PATH) . $document['directory'] . '/document';
374
            $path = $dir . $document['path'];
375
            if (!is_readable($path)) {
376
                $result[] = $document;
377
            }
378
        }
379
        return $result;
380
    }
381

    
382
    function relink($path)
383
    {
384
        if (is_readable($path)) {
385
            return $path;
386
        }
387
        $result = $this->rename($path);
388
        if (is_readable($result)) {
389
            return $result;
390
        }
391
        return false;
392
    }
393

    
394
}
(1-1/2)