src/strip.vala
[app.mailtrimmer] / src / strip.vala
1 /**
2
3  ** check left to do:  
4   - range scans on maildir
5   - see how replacing the links works in the resulting email via thunderbird etc..
6   - some checksum issues (see dupelicates?? suspect 0byte issues?)  -- seems ok now?
7  
8
9
10
11   needs to scan 2 things
12   a) our mailfort email database
13        point it at the top directory, containing YEAR/MONTH/DAY.... directories.
14        scan each file (over a year old...)
15        extract out the attachment, and replace with HTML
16        DATABASE? - mysql or sqlite? - 
17            filesize / name / date / checksum / mimetype -- into mailfort should be OK.
18   b) the imap user emails
19            loop through user's directories
20            check age of email .. over 1 years..
21            ?? how to prevent 'repeat' scanning of emails?
22               ??? hidden '.' files containing last scan date?
23
24            check if file exists in our DB.. - replace the link...
25            otherwise generate a file. + add to DB...
26            
27    c) retreival system
28      -> URL -> get file
29    d) redirect system.
30      -> URL -> redirect to correct server
31
32
33 More notes on our Mailfort DB sync:
34 * some of these attachments are already in the database...
35  - so we need to update the DB..
36  - probably worth putting the code in a stored procedure..
37  
38  -- key scenarios
39    * first scan (and extract)
40    * rescan (as I messed up the first time - fix the DB...)
41    * email scan - attachments might not have related messages.
42  
43  
44  - {id} attachment_init(
45                 {exim_msg_id}
46                 {chksum}
47                 {filename),
48         )
49         // creates or returns id (can look for existing messages?
50         // can do a merge?? - copy 'old' record data into 'new'....  "prefer checksummed"
51         
52         attachment_update(
53                 {id}
54                 {exim_msg_id}
55                 {mailfort_msg_sig}
56                 {file_size}
57                 {created} // message date..
58                 {chksum}
59                 {filename),
60         {mime_type}
61         )
62         attachment_update_store(
63                         {id}
64                         {stored_filename}
65         )
66
67
68 */ 
69
70 // valac --pkg gmime --vapi
71 /*
72
73 // http://www.fromdual.com/mysql-vala-program-example << check mysql if this does not work.
74
75  valac  -g --vapidir=. --thread  strip.vala   --vapidir=../vapi \
76      --pkg glib-2.0 --pkg mysql --pkg gio-2.0 --pkg posix --pkg gmime-2.6 \
77       --Xcc=-lmysqlclient  -v \
78        -o /tmp/strip
79 */ 
80  
81 public class StripApplication : GLib.Application {
82
83         public static string? opt_path = null;
84         public static string? opt_file = null;  
85         public static string? opt_target_path = null;
86         public static string? opt_db_host = "127.0.0.1";
87         public static string? opt_db_name = null;       
88         public static string? opt_db_user = null;               
89         public static string? opt_db_pass = null;               
90
91         public static int    opt_limit = -1;
92
93         public static int    opt_age_newest = 1;
94         public static int    opt_age_oldest = 6;
95
96
97         public static bool      opt_is_extracting = false;
98         public static bool      opt_is_replacing = false;
99         public static bool      opt_scan_maildir  = false; 
100         public static bool      opt_scan_mailfort  = false;     
101         public static bool              opt_dump = false;       
102         public static bool              opt_debug = false; 
103         
104         public static bool opt_debug_sql = false;       
105         public static string? opt_replace_link = null;
106         
107         
108         public const GLib.OptionEntry[] options = {
109                 
110                 { "debug", 0, 0, OptionArg.NONE, ref opt_debug, "show debug messages for components", null },
111                 { "debug-sql", 0, 0, OptionArg.NONE, ref opt_debug_sql, "debug the SQL statements", null },         
112
113                 { "path", 0, 0, OptionArg.STRING, ref opt_path, "Directory where email to be parsed is", null },        
114                 { "file", 0, 0, OptionArg.STRING, ref opt_file, "A specific file to be parsed", null }, 
115
116                 { "target-path", 0, 0, OptionArg.STRING, ref opt_target_path, "Directory where attachments are to be put", null },
117
118                 { "link", 0, 0, OptionArg.STRING, ref opt_replace_link, "url for the replement link: eg. http://www.mysite.com/xxxx/%s", null },         
119                         
120                 { "host", 0, 0, OptionArg.STRING, ref opt_db_host, "Mysql host (default localhost)", null },    
121                 { "name", 0, 0, OptionArg.STRING, ref opt_db_name, "Mysql database name REQUIRED", null },      
122                 { "user", 0, 0, OptionArg.STRING, ref opt_db_user, "Mysql database user REQUIRED", null },      
123                 { "pass", 0, 0, OptionArg.STRING, ref opt_db_pass, "Mysql database password (default empty)", null },            
124
125                 { "extract", 0, 0, OptionArg.NONE, ref opt_is_extracting, "Should attachments be extracted (default NO)", null },
126                 { "replace", 0, 0, OptionArg.NONE, ref opt_is_replacing, "Should attachments be replaced (default NO)", null },
127                 { "dump", 0, 0, OptionArg.NONE, ref opt_dump, "Print the replaced mail contents to stdout", null },         
128
129                 { "limit", 0, 0, OptionArg.INT, ref opt_limit, "stop after X number of messages with attachments have been processed", null },         
130                 { "newest", 0, 0, OptionArg.INT, ref opt_age_newest, "do not replace messages newer that X months (default is 1 months)", null },
131                 { "oldest", 0, 0, OptionArg.INT, ref opt_age_oldest, "do not replace messages older than X (default is 6 months)", null },
132
133                 { "scan-maildir", 0, 0, OptionArg.NONE, ref opt_scan_maildir, "scan an maildir tree", null },
134                 { "scan-mailfort", 0, 0, OptionArg.NONE, ref opt_scan_mailfort, "scan a mailfort tree", null }, 
135                 { "scan-mailfort-only", 0, 0, OptionArg.STRING, ref opt_scan_mailfort_only, "scan a mailfort Year/month eg. 2010/08", null }, 
136                 { null }       
137         };         
138     public StripApplication( string[] args ) 
139     {
140                  Object(
141             application_id: "org.roojs.mailstripper",
142             flags: ApplicationFlags.FLAGS_NONE
143          );
144  
145                         
146          var opt_context =  new GLib.OptionContext ("Mail Stripper");
147                         
148          try {
149                                 
150             opt_context.set_help_enabled (true);
151             opt_context.add_main_entries (options, null);
152             opt_context.parse ( ref  args);
153             //opt_detach = !optx_no_detach;
154                             
155  
156                             
157              // options that have to be set.. bee or hive... (or stop all)
158             if ((!opt_scan_mailfort && !opt_scan_maildir) || (opt_scan_mailfort && opt_scan_maildir))  {
159                stdout.printf ("You must specify the type of directory tree to scan - either imap or mailfort\n%s",
160                    opt_context.get_help(true, null));
161                GLib.Process.exit(Posix.EXIT_FAILURE);
162             }
163                         
164                          if ((opt_db_name == null || opt_db_name.length < 1 || opt_db_user == null || opt_db_user.length < 1))  {
165                stdout.printf ("You must specify the database name / user \n%s",
166                    opt_context.get_help(true, null));
167                GLib.Process.exit(Posix.EXIT_FAILURE);
168             }
169                          if ((opt_path == null || opt_path.length < 1)   )  {
170                stdout.printf ("You must specify the scan start path\n%s",
171                    opt_context.get_help(true, null));
172                GLib.Process.exit(Posix.EXIT_FAILURE);
173             }
174                         if (opt_replace_link == null || (opt_replace_link.length < 1))  {
175                stdout.printf ("You must specify the link to use in the replacement \n%s",
176                    opt_context.get_help(true, null));
177                GLib.Process.exit(Posix.EXIT_FAILURE);
178             }
179             if ((opt_is_replacing || opt_is_extracting ) && (opt_target_path == null || opt_target_path.length < 1)) {
180                       stdout.printf ("You must specify a target path to put attachments\n%s",
181                    opt_context.get_help(true, null));
182                GLib.Process.exit(Posix.EXIT_FAILURE);
183             }
184             
185             
186          } catch (GLib.OptionError e) {
187             stdout.printf ("error: %s\n", e.message);
188             stdout.printf ("Run '%s --help' to see a full list of available command line options.\n%s", 
189                       args[0], opt_context.get_help(true, null));
190             GLib.Process.exit(Posix.EXIT_FAILURE);
191          }
192         }
193          
194     public static int main(string[] args) 
195     {
196                 
197                 var application = new StripApplication(  args);
198                 
199                 GLib.Log.set_always_fatal(LogLevelFlags.LEVEL_ERROR | LogLevelFlags.LEVEL_CRITICAL); 
200            
201            if (opt_debug || opt_debug_sql) {
202                         GLib.Log.set_handler(null, 
203                         GLib.LogLevelFlags.LEVEL_DEBUG | GLib.LogLevelFlags.LEVEL_WARNING | GLib.LogLevelFlags.LEVEL_INFO, 
204                         (dom, lvl, msg) => {
205                                         print("%s\n", msg);
206                                 }
207                         );
208                 }
209         
210         GMime.init(0);
211                 if (StripApplication.opt_is_replacing) {
212                         StripApplication.opt_is_extracting = true;
213                 }
214   
215                 GLib.debug("scanning folder: %s", opt_path );
216                 
217                 var strip = new Strip( opt_path );
218  
219                 
220                 strip.mysql  = new Mysql.Database();
221                 if (!strip.mysql.real_connect(
222                                 opt_db_host,
223                                 opt_db_user ,
224                                 opt_db_pass == null ? "" : opt_db_pass, //passwd
225                                 opt_db_name, //DB
226                                 3306, // not changable...?
227                                 null
228                         )
229                 ) {
230                         stdout.printf("ERROR %u: Connection failed: %s\n", 
231                                 strip.mysql.errno(), strip.mysql.error()
232                         );
233
234                         return 1;
235                 }
236         if (opt_file != null) {
237                 strip.base_dir = opt_path;
238                 strip.scan_file( GLib.Path.get_dirname(opt_file),  GLib.Path.get_basename(opt_file));
239                 return 0;
240         }
241
242                 strip.scan_dir(opt_path, "");
243         
244
245         
246         return 0;
247     }
248 }
249
250 public class Strip : GLib.Object {
251         
252  
253         
254         public string base_dir = "";
255         
256         public Mysql.Database mysql;
257         
258         int processed = 0;
259     
260     uint64 used_space_before = 0;
261     uint64 used_space_after = 0;
262     
263     
264     public Strip(string base_dir)
265     {
266         this.base_dir = base_dir;
267     }
268     
269     public void handle_part(GMime.Object parent, GMime.Object mime_obj)
270     {
271                 if (mime_obj is GMime.Part) {
272                    var  p = (GMime.Part)mime_obj;
273                         var ct = p.get_content_type();
274                         var cd = p.get_content_disposition();
275                         
276                         var sid = p.get_header("X-strip-id");
277                     if (sid != null && sid.length > 0) {
278                         this.update_attachment_db(p);
279                             GLib.debug("Skip attachment replace - it's already been done");
280                         return;
281                         }
282                         
283                         if (cd == null || cd.get_disposition().down() != "attachment") {
284                                 return;
285                         }
286                         if (ct.get_media_type() == "text") {
287                                 return;
288                         }
289                         if (ct.to_string() == "application/pgp-encrypted") {
290                                 return;
291                         }
292                         if (ct.to_string() == "application/pgp-keys") {
293                                 return;
294                         }
295                         if (p.get_filename() == null) {
296                                 return;
297                         }
298                          // print("got part %s\n", ct.to_string());
299                          if (parent is GMime.Multipart) {
300                                 
301                                 this.replace_attachment(((GMime.Multipart)parent), p);
302                                 // remove it !?
303
304                           }
305
306
307                         return;
308                 }
309                 if (mime_obj is GMime.Multipart) {
310                         
311
312                         var  mp = (GMime.Multipart)mime_obj;
313                         //var ct = mp.get_content_type();
314
315                         //print("got multi-part %s\n", ct.to_string());
316                         for (var i = 0; i< mp.get_count(); i++) { 
317                           var mo = mp.get_part(i);
318                           this.handle_part(mime_obj,mo);
319                         }
320                    // ((GMime.Multipart)mime_obj).foreach((sub_obj) => {
321                    //     Strip.handle_part(sub_obj);
322                 //
323                    // });
324
325
326                         return;
327                 }
328
329                 if (mime_obj is GMime.MessagePart) {
330                         var msg = ((GMime.MessagePart)mime_obj).get_message();
331                         msg.foreach((subobj) => {
332                          this.handle_part(msg,subobj);
333                     });
334                 
335                         //print("got message-part\n");
336                         return;
337                 }
338                 
339                 if (mime_obj is GMime.Message) {
340                         var mp = ((GMime.Message) mime_obj).get_mime_part();
341
342                         if (!(mp is GMime.Multipart)) {
343                                 //GLib.debug("get mimepart does not return a Multipart?");
344                                 return;
345                         }
346                         
347                         var mpc = ((GMime.Multipart)mp).get_count();
348                         
349                         //GLib.debug("Message has %d parts", mpc); 
350                         for (var i =0 ; i < mpc; i++) {
351                                 //GLib.debug("Getting part %d", i); 
352                                 var submime_obj = ((GMime.Multipart)mp).get_part(i);
353                         this.handle_part(mp,submime_obj);                       
354                     }
355                         print("got message??\n");
356                         return;
357                 }
358                 
359                 print("got something else\n");
360
361
362     }
363     public void update_attachment_db(GMime.Part attachment)
364     {
365         // only called when we have an sid...
366         var sid = attachment.get_header("X-strip-id");
367         if (sid == null || sid.length < 1) {
368                 GLib.debug("Strange - update attachment db called ?");
369                 return;
370         }
371         
372         // initialize it with known data..
373         // that should wipe out dupes.
374         var matches = this.query("SELECT count(id)   FROM Attachment WHERE id = %d".printf(
375                         int.parse(sid)));  
376
377                  
378                 if (matches == "0") {    
379                         // our old mailfort code deleted the crap out of old records...
380                         // if this occurs we will need to create the record again..
381                         this.fix_deleted_attachment_db(int.parse(sid),attachment);
382                         return;
383  
384                 }
385         
386         
387         // initialize it with known data..
388         // that should wipe out dupes.
389         var filesize = this.query("SELECT filesize FROM Attachment WHERE id = %d".printf(
390                         int.parse(sid)));  
391
392                 if (filesize=="") {      
393                    GLib.error("Ignoring record id (missing in database) :%s", sid);
394                    return;
395                 }
396                 if (int.parse(filesize) < 1) {
397                 GLib.debug("Could not get filesize from id :%s = %s", sid,filesize);
398                 Posix.exit(0);
399                 return;
400         }
401         
402         var chksum = this.query("SELECT  checksum FROM Attachment WHERE id = %d".printf(
403                         int.parse(sid)
404                 ));
405         var mime_filename = this.query("SELECT  mime_filename FROM Attachment WHERE id = %d".printf(
406                         int.parse(sid)));       
407                 
408         this.query("""
409              SELECT 
410                  attachment_init(
411                      '%s', '%s', '%s', %d
412                  ) as id 
413                  
414           """.printf(
415                           this.mysql_escape(this.active_message_exim_id),
416                           this.mysql_escape(chksum),
417                           this.mysql_escape(mime_filename),                       
418                           int.parse(filesize)
419                 ));
420         this.query("""
421                  SELECT attachment_update(
422                       %d, -- in_id INT(11),
423                       '%s', -- in_mime_type varchar(255),
424                       '%s', -- in_created DATETIME,
425                       '%s' -- in_mailfort_sig varchar(64)
426                  )
427               """.printf(
428                         int.parse(sid),
429                         "", // this will be ignored..
430                                 this.created_date,
431                                 this.mysql_escape(this.active_message_x_mailfort_sig)
432               
433               )
434                 );
435                 this.mysql.store_result();
436                 
437
438     
439     }
440     
441     
442     public void fix_deleted_attachment_db(int id, GMime.Part attachment)
443     {
444                 
445         var filename = attachment.get_header("X-strip-content-name");
446         var file_path  = attachment.get_header("X-strip-path");
447         var fn =  StripApplication.opt_target_path + "/" + file_path;
448         
449
450                 if (!FileUtils.test (fn, FileTest.EXISTS)) {
451                         GLib.debug("SKIP -- file does not exist");
452                         return;
453         }
454         
455         var chksum = this.md5_file(fn);
456                 var mime_type = attachment.get_header("X-strip-content-type");
457
458                 var fileinfo = File.new_for_path(fn)
459                                         .query_info(GLib.FileAttribute.STANDARD_SIZE+","+GLib.FileAttribute.TIME_MODIFIED
460                                                 ,GLib.FileQueryInfoFlags.NONE,null);
461         var file_size = (int) fileinfo.get_size();
462
463       
464                 this.real_query(-1, """
465                        
466                        
467                                 INSERT INTO Attachment  (  
468                                         id, 
469                                         
470                                     msgid ,
471                                     queue_id ,
472                                     mime_filename ,
473                                     mime_type,
474                                      
475                                     stored_filename ,
476                                     mime_charset ,
477                                     mime_cdisp ,
478                                     mime_is_cover ,
479                                     
480                                     mime_is_multi ,
481                                     mime_is_mail,
482                                     mime_size ,
483                                     filesize,
484                                     
485                                     checksum,
486                                     created
487
488                                 ) VALUES (
489                                         %d,  -- id
490                                         
491                                     '%s' , -- msgid
492                                     0,
493                                     '%s'  , -- filename
494                                     '%s',  -- mimetype
495                                     
496                                     '%s', -- stored file anme
497                                     '', -- charset
498                                     'attachment',
499                                     0,
500                                     0,
501                                     0,
502                                     %d, -- size
503                                     %d, -- size
504                                     
505                                     '%s', -- checkum
506                                         '%s' -- created:
507                                 )
508                        
509                        
510                       """.printf(
511                                 id,
512                                       this.mysql_escape(this.active_message_exim_id),
513                                       this.mysql_escape(filename),
514                                   this.mysql_escape(mime_type),
515                                   this.mysql_escape(file_path),
516                                         file_size,
517                                       file_size,
518                                       this.mysql_escape(chksum),
519                                 this.created_date
520                          ));
521               // this is done to fix the queue_id or maillog_id ??
522                  this.query("""
523                  SELECT attachment_update(
524                       %d, -- in_id INT(11),
525                                                 '', -- mime type
526                       '%s', -- in_created DATETIME,
527                       '%s' -- in_mailfort_sig varchar(64)
528                  )
529               """.printf(
530                                 id, 
531                                 this.created_date,
532                                 this.mysql_escape(this.active_message_x_mailfort_sig)
533               
534               )
535                 );
536                 // GLib.error("added attachment?");
537     }
538     
539     
540     public void replace_attachment(GMime.Multipart parent, GMime.Part attachment)
541     {
542         var sid = attachment.get_header("X-strip-id");
543         if (sid != null && sid.length > 0) {
544                 GLib.debug("Skip attachment replace - it's already been done");
545                 return;
546         }
547         
548         var c = attachment.get_content_object();
549         
550         var filename = attachment.get_filename().replace("/", "-").replace("\n", "").replace("\t", " ");
551         var fn = GLib.Environment.get_tmp_dir() +
552                         "/"+ this.active_name + "."+   GLib.Uri.escape_string(filename,"", false);
553
554             var outfile = new GMime.StreamFile.for_path(fn, "w");
555             outfile.set_owner(true);
556             var file_size = (int) c.write_to_stream(outfile);
557             var chksum = this.md5_file(fn);
558             outfile.flush();
559             outfile = null;
560         
561         if (file_size == 0) {
562
563                 GLib.debug("ERROR - file size of write to stream returned 0?");
564                 Posix.unlink(fn);               
565                 return;
566         }
567         
568         
569         
570  
571         var mime_type= attachment.get_content_type().to_string();
572         // at this point we have to do our database magic...
573         //filesize / name / date / checksum / mimetype -- into mailfort should be OK.
574         
575         var file_id = this.query("""
576                 SELECT 
577                 
578                 attachment_init(
579                                 '%s', -- in_msgid VARCHAR(32),
580                                 '%s', -- in_checksum VARCHAR(64),
581                                 '%s', -- in_mime_filename varchar(255)
582                                 %d -- filesize
583                         ) as id 
584                         
585           """.printf(
586                         this.mysql_escape(this.active_message_exim_id),
587                         chksum,
588                         this.mysql_escape( GLib.Uri.escape_string(attachment.get_filename(),"", false) ), // what is thsi is invalid?
589                          file_size)
590                 );
591                  
592                 
593                 if (file_id.length < 1) {
594                         GLib.debug("ERROR - CALL to attachment_init failed");
595                 Posix.unlink(fn);               
596                 return;
597                 
598                 }
599  
600                 if (int.parse(file_id) < 1) {
601                         GLib.debug("ERROR - CALL to attachment_init failed - returned 0?");
602                 Posix.unlink(fn);               
603                 return;
604                 
605                 }
606  
607         
608                 GLib.debug("fn = %s, m5=%s, id= %s", filename, mime_type, this.active_message_id);
609                 this.query("""
610                 
611                         SELECT attachment_update(
612                                 %d, -- in_id INT(11),
613                                 '%s', -- in_mime_type varchar(255),
614                                 '%s', -- in_created DATETIME,
615                                 '%s' -- in_mailfort_sig varchar(64)
616                                 
617                                 ) as result
618       """.printf(
619                 int.parse(file_id),
620                         this.mysql_escape(mime_type),
621                         this.created_date,
622                         this.mysql_escape(this.active_message_x_mailfort_sig)
623                 ));
624                  this.mysql.store_result();
625                                  
626  
627                 this.used_space_after += file_size;
628                         
629                 var target_fn = "";
630
631             if (StripApplication.opt_is_extracting) {
632                         target_fn = StripApplication.opt_target_path + "/" + this.created_dir +"/"+ file_id  + "-" + GLib.Uri.escape_string(filename,"", false);
633                 } 
634                     
635             var stored =  "/" + this.created_dir +"/"+ file_id  + "-" + GLib.Uri.escape_string(filename,"", false);
636                  this.query("""
637                 
638                         SELECT attachment_update_store(
639                                 %d, -- in_id INT(11),
640                                 '%s'  -- in_store_filename varchar(255),
641                          
642                                 
643                                 ) as result
644       """.printf(
645                 int.parse(file_id),
646                          this.mysql_escape( stored)
647                 ));   
648                          
649         var rep = new GMime.Part.with_type("text","html");
650         // we have to set up a redirect server - to redirect hpasite... to their internal service..
651         rep.set_filename(filename);
652         string txt = "<html><body>"+
653             "<a href=\"" + StripApplication.opt_replace_link + "/" +
654                         file_id + "/" + this.created_dir + "/"+chksum+"/"+ GLib.Uri.escape_string( filename, "", false) +"\">" + 
655             GLib.Uri.escape_string( filename, "", false) + // fixme needs html escaping...
656             "</a>" +
657             "</body></html>";
658
659         rep.get_content_type().set_parameter("charset", "utf-8");
660                 rep.set_header("X-strip-id", file_id);
661                 rep.set_header("X-strip-content-name",  filename);                              
662                 rep.set_header("X-strip-path", this.created_dir + "/" + file_id + "-" +  GLib.Uri.escape_string(filename,"", false));           
663                 rep.set_header("X-strip-content-type", mime_type);              
664         var stream =  new GMime.StreamMem.with_buffer(txt.data);
665         var con = new GMime.DataWrapper.with_stream(stream,GMime.ContentEncoding.DEFAULT);
666
667         rep.set_content_object(con);
668         GLib.debug("Replacing Attachment with HTML");
669         parent.replace(parent.index_of(attachment), rep);
670                 this.has_replaced = true;
671                  
672                 if (StripApplication.opt_is_extracting && target_fn.length > 0) {
673                         var dir = GLib.Path.get_dirname(target_fn);
674                         if (!FileUtils.test (dir, FileTest.IS_DIR)) {
675                                 GLib.DirUtils.create_with_parents(dir, 0755);
676                         }
677                         GLib.debug("Creating file %s", target_fn);
678                         if (!FileUtils.test (target_fn, FileTest.EXISTS)) {
679                                 var from = File.new_for_path (fn);
680                                 var to =  File.new_for_path (target_fn);
681                                 from.copy(to, 0, null);
682
683                         }
684                 } else { 
685                         GLib.debug("Skipping extraction %s", target_fn);
686                 }
687                 Posix.unlink(fn);
688                 
689
690
691     }
692     public string query(string str)
693     {
694             return this.real_query(1, str);
695     }
696     public string execute(string str)
697     {
698             return this.real_query(0, str);
699     }
700     /**
701     * need_return 
702     0 = no
703     1 = yes
704     -1 = don't try.
705     */
706     public string real_query(int need_return, string str)
707     {
708                 GLib.debug("Before Query : %u  : %s\n", this.mysql.errno(), this.mysql.error());
709
710
711         if (StripApplication.opt_debug_sql) {
712                 GLib.debug("SQL: %s\n", str);
713                 }
714                 
715                 
716         
717         var rc=  this.mysql.query(str); 
718         if ( rc != 0 ) {
719
720                     GLib.debug("ERROR %u: Query failed: %s\n", this.mysql.errno(), this.mysql.error());
721                                 Posix.exit(1);
722                 }
723         var rs = mysql.use_result();
724                 if (need_return == -1) {
725                         return "";
726                 }
727  
728         
729         //GLib.debug("got %d rows", (int) rs.num_rows());
730         
731         var got_row = false;
732                 string[] row;
733                 string ret = "";
734                 while( (row = rs.fetch_row()) != null) { 
735                         got_row = true;
736                         ret = row[0];
737                 
738                 }
739                 if (need_return == 0) {
740                 if (StripApplication.opt_debug_sql) {
741                                 GLib.debug("got %s", got_row ? "=Nothing=" : ret);
742                         }
743                         return got_row ? "" : ret;
744                 }
745                 if (!got_row) {
746
747                          GLib.debug("ERROR : no rows returned");
748                         Posix.exit(1);
749                         return "";
750                 }
751         if (StripApplication.opt_debug_sql) {
752                         GLib.debug("got %s", ret);
753                 }
754                 return ret;
755                 
756                  
757         }
758     
759     public string mysql_escape(string str)
760     {
761             unichar[] value_escaped = new unichar[str.length * 2 + 1];
762                 this.mysql.real_escape_string ((string) value_escaped, str, str.length);
763                 return (string) value_escaped;
764     }
765     
766     public string  md5_file(string fn) {
767               Checksum checksum = new Checksum (ChecksumType.MD5);
768
769               FileStream stream = FileStream.open (fn, "rb");
770               uint8 fbuf[100];
771               size_t size;
772
773               while ((size = stream.read (fbuf)) > 0) {
774                       checksum.update (fbuf, size);
775               }
776
777               unowned string digest = checksum.get_string ();
778               return digest;
779     }
780
781         string active_path = "";    
782     string active_name = "";
783     string active_message_id = "";
784     string active_message_x_mailfort_sig = "";
785     string active_message_exim_id = "";
786     bool has_replaced = false;
787     string created_date = ""; // should be YYYY-mm-dd
788     string created_dir = ""; // should be YYY/mm/dd
789     
790     public void scan_file(string path, string name)
791     {
792                 GLib.debug("Scan: %s/%s", path,name); 
793                 
794                 this.has_replaced = false; 
795         this.active_path = path;
796         this.active_name = name;
797         this.active_message_id = "";
798
799                 var mailtime = new DateTime.now_local();
800                 if (StripApplication.opt_scan_mailfort) {
801                     this.created_dir = this.active_path.substring(this.base_dir.length + 1 );
802                         this.created_date = this.created_dir.replace("/", "-");
803                         var bits = this.created_date.split("-");
804                         mailtime = new DateTime.local(int.parse(bits[0]),int.parse(bits[1]),int.parse(bits[2]),0,0,0);
805                         
806                         var oldest = new  DateTime.now_local();
807                         oldest = oldest.add_months(-1 * StripApplication.opt_age_oldest);
808                         var tspan = mailtime.difference(oldest) / GLib.TimeSpan.DAY;
809
810                         if (tspan < 0) {
811                                 GLib.debug("skip file is %d days older than %d months", (int)tspan, StripApplication.opt_age_oldest);
812                                 return;
813                         }
814                         
815                         var newest = new  DateTime.now_local();
816                         newest = newest.add_months(-1 * StripApplication.opt_age_newest);
817                         tspan = mailtime.difference(newest) / GLib.TimeSpan.DAY;
818                         if (tspan > 0) {
819                                 GLib.debug("skip file is %d days newer than %d months", (int)tspan, StripApplication.opt_age_newest);
820                                 return;
821                         }
822                         
823                 }
824         
825         
826                 var fileinfo = File.new_for_path(path +"/" + name)
827                                         .query_info(GLib.FileAttribute.STANDARD_SIZE+","+GLib.FileAttribute.TIME_MODIFIED
828                                                 ,GLib.FileQueryInfoFlags.NONE,null);
829         var file_size = (int) fileinfo.get_size();
830                 var mod_time = fileinfo.get_modification_time();
831                 
832                 
833                 
834                 if (!StripApplication.opt_scan_mailfort) {
835                    
836                 // it's a mail directory...
837                 // use the last modification time? as the default...
838                  mailtime = new DateTime.from_timeval_utc(mod_time);
839                  this.created_dir = mailtime.format("%Y/%m/%d");
840                          this.created_date =  mailtime.format("%Y-%m-%d %H:%M:%S");
841  
842         }
843                 // check on age of file...
844                 
845                 
846                 
847                 
848                 
849         this.used_space_before += file_size;
850         
851         var stream = new GMime.StreamFs.for_path (path +"/" + name,Posix.O_RDONLY, 0);
852         //stream.set_owner(true);
853         var parser = new GMime.Parser.with_stream(stream);
854         var message = parser.construct_message();
855  
856                 if (message == null) {
857                         GLib.debug("Could not parse file? %s/%s", path,name);
858                 this.used_space_after += file_size;                     
859                 return;
860                 }       
861
862
863                 // check : - is message over a year old?                
864                 // get various msg info..
865                 this.active_message_id = message.get_message_id();
866                 this.active_message_x_mailfort_sig = message.get_header("x-mailfort-sig");
867                 var recvd = message.get_header("received");
868                 this.active_message_exim_id = "";
869                 if (recvd != null && recvd.length > 1) {
870                         GLib.debug("RECV: %s", recvd);
871                         var lines = recvd.split("\t");
872                         for (var i = 0; i < lines.length;i++) {
873                                 var bits = lines[i].strip().split(" ");
874                                 if (bits[0] == "id") {
875                                         this.active_message_exim_id = bits[1].replace(";","");
876
877                                 }
878                                 
879                                 if (lines[i].contains(";")) {
880                                         var dbits = lines[i].strip().split(";");                                
881                                         GLib.debug("Reading time from : %s", dbits[1]);
882                                         var timez = GMime.utils_header_decode_date(dbits[1], null);
883                                         if (timez != 0) {
884                                                 mailtime = new DateTime.from_unix_utc(timez);
885                                                 this.created_date = mailtime.format("%Y-%m-%d %H:%M:%S");
886                                                 GLib.debug("Time is %s",this.created_date);
887                                                 // if it's not mailfort we can use that date to determine where to store it...
888                                                 if (!StripApplication.opt_scan_mailfort) {
889                                                         this.created_dir = mailtime.format("%Y/%m/%d");
890                                                 }
891                                         } else {
892                                                 GLib.debug("Could not read time from headers?");
893                                         }
894                                 }
895
896                         }
897                 }
898                 
899                 var oldest = new  DateTime.now_local();
900                 oldest = oldest.add_months(-1 * StripApplication.opt_age_oldest);
901                 var rtspan = mailtime.difference(oldest) / GLib.TimeSpan.DAY;
902                 GLib.debug("Checking oldest %d days difference", (int)rtspan   );
903                 if (rtspan < 0) {
904                         GLib.debug("skip(2) file is %d days older than %d months", (int)rtspan, StripApplication.opt_age_oldest);
905                         return;
906                 }
907                 var newest = new  DateTime.now_local();
908                 newest = newest.add_months(-1 * StripApplication.opt_age_newest);
909                 rtspan = mailtime.difference(newest) / GLib.TimeSpan.DAY;
910                 if (rtspan > 0) {
911                         GLib.debug("skip(2) file is %d days newer than %d months : %s", (int)rtspan, StripApplication.opt_age_newest,
912                                 mailtime.format("%Y-%m-%d %H:%M:%S"));
913                         return;
914                 }
915                 
916                 
917                 
918                 /*
919                 GLib.debug("Message DATA:\n mid: %s\nmailfort: %s \nexim_id: %s",
920                         this.active_message_id,
921                         this.active_message_x_mailfort_sig,
922                         this.active_message_exim_id
923                 );
924                  */
925                         
926                 // DATE?
927                 
928                 var mp = message.get_mime_part();
929
930                 if (!(mp is GMime.Multipart)) {
931                         //GLib.debug("get mimepart does not return a Multipart?");
932                 this.used_space_after += file_size;                                             
933                         return;
934                 }
935                 
936                 var mpc = ((GMime.Multipart)mp).get_count();
937                 
938                 //GLib.debug("Message has %d parts", mpc); 
939                 for (var i =0 ; i < mpc; i++) {
940                         //GLib.debug("Getting part %d", i); 
941                         var mime_obj = ((GMime.Multipart)mp).get_part(i);
942             this.handle_part(mp,mime_obj);                      
943         }
944                 
945         parser= null;
946
947       //  stream.set_owner(false);
948             //stream.close();
949         stream = null;//.close();
950         
951         
952                 if (!this.has_replaced) {
953                         this.used_space_after += file_size;
954                         GLib.debug("skpping write file - no replacement occured");
955                         return;
956                 }
957                 string tmpfile = "";
958                 GMime.Stream outstream = new GMime.StreamNull();
959                 if (StripApplication.opt_is_replacing) {
960                 
961                         tmpfile = GLib.Environment.get_tmp_dir() +"/" + name;
962                 outstream = new GMime.StreamFile.for_path (tmpfile,"w");
963                 ((GMime.StreamFile)outstream).set_owner(true);
964         }
965                 if (StripApplication.opt_dump) {
966                         outstream = new GMime.StreamMem();
967         }
968         
969         file_size = (int) message.write_to_stream(outstream);
970         if (StripApplication.opt_is_replacing) {
971                 ((GMime.StreamFile)outstream).set_owner(false);
972         }
973                 if (StripApplication.opt_dump) {
974                         var ua = ((GMime.StreamMem)outstream).get_byte_array().data;
975                         print("%s\n", (string) ua);
976                 }        
977         message = null;
978         outstream.flush();
979         outstream.close();
980         GLib.debug("finished writing output %d", file_size);
981
982         //
983         outstream = null;
984         
985           
986         this.used_space_after += file_size;
987         
988         
989         if (StripApplication.opt_is_replacing) {
990                 Posix.unlink(path +"/" + name);         
991                 GLib.debug("copy tmp file %s to %s" , tmpfile, path +"/" + name);               
992                 
993                 // link will not work, as we are doing it accross file systems
994                         var from = File.new_for_path (tmpfile);
995                         var nf =  File.new_for_path (path +"/" + name);
996                         from.copy(nf, 0, null);
997                         
998
999                 var newfileinfo = nf.query_info(GLib.FileAttribute.TIME_MODIFIED,GLib.FileQueryInfoFlags.NONE,null);
1000                 newfileinfo.set_modification_time(mod_time);
1001                 nf.set_attributes_from_info(newfileinfo,FileQueryInfoFlags.NONE);
1002                 Posix.unlink(tmpfile);
1003                 }
1004         this.processed++;
1005         
1006         if (StripApplication.opt_limit > -1 && this.processed >= StripApplication.opt_limit) {
1007                 GLib.debug("Reached replacement limit");
1008                 Posix.exit(1);
1009         }
1010         
1011         
1012         
1013         
1014     }
1015     
1016     
1017     public void scan_dir(string basepath, string subpath)
1018     {
1019         
1020         
1021         // determine if path is to old to scan..
1022         if (subpath.length > 0 && StripApplication.opt_scan_mailfort) {
1023                         var year =  int.parse(subpath.substring(1,4));  // "/2000"
1024                         var month = subpath.length > 5 ? int.parse(subpath.substring(6,2)) : 999; // "/2000/12"                 
1025                         var day = subpath.length > 8 ? int.parse(subpath.substring(9,2)) : 999; // "/2000/12/01"                        
1026                 
1027                 var oldest = new  DateTime.now_local();
1028                         oldest = oldest.add_months(-1 * StripApplication.opt_age_oldest);
1029                         
1030                         //GLib.debug("Checking directory %s is older than min: %d/%d/%d", subpath, oldest.get_year() , oldest.get_month(), oldest.get_day_of_month() );                                 
1031                         
1032                         if (year < oldest.get_year()) {
1033                                 GLib.debug("Skip directory %s is older than min year: %d", subpath, oldest.get_year());
1034                                 return;
1035                         }
1036                         if (year == oldest.get_year() &&  month < oldest.get_month()) {
1037                                 GLib.debug("Skip directory %s is older than min month: %d/%d", subpath, oldest.get_year() , oldest.get_month() );
1038                                 return;
1039                         }
1040                 if (year == oldest.get_year() &&  month == oldest.get_month() && day < oldest.get_day_of_month()) {
1041                                 GLib.debug("Skip directory %s is older than min day: %d/%d/%d", subpath, oldest.get_year() , oldest.get_month(), oldest.get_day_of_month() );           
1042                                 return;
1043                         }
1044                 
1045                 var newest = new  DateTime.now_local();
1046                         newest = newest.add_months(-1 * StripApplication.opt_age_newest);
1047                         
1048                         //GLib.debug("Checking directory %s is newer than max: %d/%d/%d", subpath, newest.get_year() , newest.get_month(), newest.get_day_of_month() );                                 
1049                         
1050                         if (year > newest.get_year()) {
1051                                 GLib.debug("Skip directory %s is newer than max year: %d", subpath, newest.get_year());
1052                                 return;
1053                         }
1054                         if (year == newest.get_year() &&  month != 999 && month > newest.get_month()) {
1055                                 GLib.debug("Skip directory %s is newer than max month: %d/%d", subpath, newest.get_year() , newest.get_month() );
1056                                 return;
1057                         }
1058                 if (year == newest.get_year() &&  month == newest.get_month() &&  day != 999 && day > newest.get_day_of_month()) {
1059                                 GLib.debug("Skip directory %s is newer than max day: %d/%d/%d", subpath, newest.get_year() , newest.get_month(), newest.get_day_of_month() );           
1060                                 return;
1061                         }
1062                 
1063                 
1064                 
1065         }
1066         
1067         
1068         var f = File.new_for_path(basepath + subpath);
1069                 FileEnumerator file_enum;
1070         var cancellable = new Cancellable ();
1071         try {      
1072             file_enum = f.enumerate_children(
1073                 FileAttribute.STANDARD_DISPLAY_NAME + "," +   FileAttribute.STANDARD_TYPE,
1074                         FileQueryInfoFlags.NOFOLLOW_SYMLINKS,  // FileQueryInfoFlags.NONE,
1075                         cancellable
1076                 );
1077         } catch (Error e) {
1078                 GLib.debug("Got error scanning dir? %s", e.message);
1079             // FIXME - show error..
1080             return;
1081         }
1082         FileInfo next_file;
1083          
1084         while (cancellable.is_cancelled () == false ) {
1085             try {
1086                 next_file = file_enum.next_file (cancellable);
1087             } catch(Error e) {
1088                 GLib.debug("error getting next file? %s", e.message);
1089                 break;
1090             }
1091
1092             if (next_file == null) {
1093                 break;
1094             }
1095                 
1096                 
1097                 if (next_file.get_is_symlink()) {
1098                 next_file = null;
1099                 continue;
1100             }
1101             
1102             var ds = next_file.get_display_name();
1103             if (next_file.get_file_type() != FileType.DIRECTORY) {
1104                 
1105                 
1106                 
1107                 if (ds[0] == ',') {
1108                         continue;
1109                 }
1110                 // other files to ignore?
1111                 if (Regex.match_simple (".tgz$", ds)) {
1112                         continue;
1113                 }
1114                 this.scan_file(basepath + subpath , ds);
1115                                 if(this.has_replaced) {
1116                          this.report_state("After scanning %s/%s".printf(basepath + subpath , ds));
1117                         }
1118                 continue;
1119             }
1120
1121
1122             //stdout.printf("Monitor.monitor: got file %s : type :%u\n",
1123             //        next_file.get_display_name(), next_file.get_file_type());
1124
1125
1126         
1127
1128             // not really needed?? - we are storing attachments in a seperate location now...
1129             if (ds[0] == '.') {
1130                 next_file = null;
1131                 continue;
1132             }
1133             if (ds == "attachments") {
1134                         continue;
1135                 }
1136             
1137             
1138             var sp = subpath+"/"+next_file.get_display_name();
1139             // skip modules.
1140             //print("got a file : " + sp);
1141          
1142             next_file = null;
1143             
1144             
1145             this.scan_dir(basepath,sp);
1146             
1147         }
1148     
1149     
1150     }
1151     void report_state(string msg) 
1152     {
1153         // Saved: 2G  Original 10G : 20%
1154         GLib.debug("Saved : %s (%.1f%%) | Original %s | %s", 
1155                         GLib.format_size(this.used_space_before - this.used_space_after), 
1156                         100f * ((1f * (this.used_space_before - this.used_space_after)) / (this.used_space_before * 1f)), 
1157                         GLib.format_size(this.used_space_before),                       
1158                         msg
1159                 );
1160         
1161         }
1162         
1163         
1164
1165 }