src/strip.vala
[app.mailtrimmer] / src / strip.vala
1 /**
2
3  ** check left to do:  
4   - range scans on maildir
5   - see how replacing the links works in the resulting email via thunderbird etc..
6   - some checksum issues (see dupelicates?? suspect 0byte issues?)  -- seems ok now?
7  
8
9
10
11   needs to scan 2 things
12   a) our mailfort email database
13        point it at the top directory, containing YEAR/MONTH/DAY.... directories.
14        scan each file (over a year old...)
15        extract out the attachment, and replace with HTML
16        DATABASE? - mysql or sqlite? - 
17            filesize / name / date / checksum / mimetype -- into mailfort should be OK.
18   b) the imap user emails
19            loop through user's directories
20            check age of email .. over 1 years..
21            ?? how to prevent 'repeat' scanning of emails?
22               ??? hidden '.' files containing last scan date?
23
24            check if file exists in our DB.. - replace the link...
25            otherwise generate a file. + add to DB...
26            
27    c) retreival system
28      -> URL -> get file
29    d) redirect system.
30      -> URL -> redirect to correct server
31
32
33 More notes on our Mailfort DB sync:
34 * some of these attachments are already in the database...
35  - so we need to update the DB..
36  - probably worth putting the code in a stored procedure..
37  
38  -- key scenarios
39    * first scan (and extract)
40    * rescan (as I messed up the first time - fix the DB...)
41    * email scan - attachments might not have related messages.
42  
43  
44  - {id} attachment_init(
45                 {exim_msg_id}
46                 {chksum}
47                 {filename),
48         )
49         // creates or returns id (can look for existing messages?
50         // can do a merge?? - copy 'old' record data into 'new'....  "prefer checksummed"
51         
52         attachment_update(
53                 {id}
54                 {exim_msg_id}
55                 {mailfort_msg_sig}
56                 {file_size}
57                 {created} // message date..
58                 {chksum}
59                 {filename),
60         {mime_type}
61         )
62         attachment_update_store(
63                         {id}
64                         {stored_filename}
65         )
66
67
68 */ 
69
70 // valac --pkg gmime --vapi
71 /*
72
73 // http://www.fromdual.com/mysql-vala-program-example << check mysql if this does not work.
74
75  valac  -g --vapidir=. --thread  strip.vala   --vapidir=../vapi \
76      --pkg glib-2.0 --pkg mysql --pkg gio-2.0 --pkg posix --pkg gmime-2.6 \
77       --Xcc=-lmysqlclient  -v \
78        -o /tmp/strip
79 */ 
80  
81 public class StripApplication : GLib.Application {
82
83         public static string? opt_path = null;
84         public static string? opt_file = null;  
85         public static string? opt_target_path = null;
86         public static string? opt_db_host = "127.0.0.1";
87         public static string? opt_db_name = null;       
88         public static string? opt_db_user = null;               
89         public static string? opt_db_pass = null;               
90
91         public static int    opt_limit = -1;
92
93         public static int    opt_age_newest = 1;
94         public static int    opt_age_oldest = 6;
95
96
97         public static bool      opt_is_extracting = false;
98         public static bool      opt_is_replacing = false;
99         public static bool      opt_scan_maildir  = false; 
100         public static bool      opt_scan_mailfort  = false;     
101         public static bool              opt_dump = false;       
102         public static bool              opt_debug = false; 
103         
104         public static bool opt_debug_sql = false;       
105         public static string? opt_replace_link = null;
106         
107         
108         public const GLib.OptionEntry[] options = {
109                 
110                 { "debug", 0, 0, OptionArg.NONE, ref opt_debug, "show debug messages for components", null },
111                 { "debug-sql", 0, 0, OptionArg.NONE, ref opt_debug_sql, "debug the SQL statements", null },         
112
113                 { "path", 0, 0, OptionArg.STRING, ref opt_path, "Directory where email to be parsed is", null },        
114                 { "file", 0, 0, OptionArg.STRING, ref opt_file, "A specific file to be parsed", null }, 
115
116                 { "target-path", 0, 0, OptionArg.STRING, ref opt_target_path, "Directory where attachments are to be put", null },
117
118                 { "link", 0, 0, OptionArg.STRING, ref opt_replace_link, "url for the replement link: eg. http://www.mysite.com/xxxx/%s", null },         
119                         
120                 { "host", 0, 0, OptionArg.STRING, ref opt_db_host, "Mysql host (default localhost)", null },    
121                 { "name", 0, 0, OptionArg.STRING, ref opt_db_name, "Mysql database name REQUIRED", null },      
122                 { "user", 0, 0, OptionArg.STRING, ref opt_db_user, "Mysql database user REQUIRED", null },      
123                 { "pass", 0, 0, OptionArg.STRING, ref opt_db_pass, "Mysql database password (default empty)", null },            
124
125                 { "extract", 0, 0, OptionArg.NONE, ref opt_is_extracting, "Should attachments be extracted (default NO)", null },
126                 { "replace", 0, 0, OptionArg.NONE, ref opt_is_replacing, "Should attachments be replaced (default NO)", null },
127                 { "dump", 0, 0, OptionArg.NONE, ref opt_dump, "Print the replaced mail contents to stdout", null },         
128
129                 { "limit", 0, 0, OptionArg.INT, ref opt_limit, "stop after X number of messages with attachments have been processed", null },         
130                 { "newest", 0, 0, OptionArg.INT, ref opt_age_newest, "do not replace messages newer that X months (default is 1 months)", null },
131                 { "oldest", 0, 0, OptionArg.INT, ref opt_age_oldest, "do not replace messages older than X (default is 6 months)", null },
132
133                 { "scan-maildir", 0, 0, OptionArg.NONE, ref opt_scan_maildir, "scan an maildir tree", null },
134                 { "scan-mailfort", 0, 0, OptionArg.NONE, ref opt_scan_mailfort, "scan a mailfort tree", null },  
135                 { null }       
136         };         
137     public StripApplication( string[] args ) 
138     {
139                  Object(
140             application_id: "org.roojs.mailstripper",
141             flags: ApplicationFlags.FLAGS_NONE
142          );
143  
144                         
145          var opt_context =  new GLib.OptionContext ("Mail Stripper");
146                         
147          try {
148                                 
149             opt_context.set_help_enabled (true);
150             opt_context.add_main_entries (options, null);
151             opt_context.parse ( ref  args);
152             //opt_detach = !optx_no_detach;
153                             
154  
155                             
156              // options that have to be set.. bee or hive... (or stop all)
157             if ((!opt_scan_mailfort && !opt_scan_maildir) || (opt_scan_mailfort && opt_scan_maildir))  {
158                stdout.printf ("You must specify the type of directory tree to scan - either imap or mailfort\n%s",
159                    opt_context.get_help(true, null));
160                GLib.Process.exit(Posix.EXIT_FAILURE);
161             }
162                         
163                          if ((opt_db_name == null || opt_db_name.length < 1 || opt_db_user == null || opt_db_user.length < 1))  {
164                stdout.printf ("You must specify the database name / user \n%s",
165                    opt_context.get_help(true, null));
166                GLib.Process.exit(Posix.EXIT_FAILURE);
167             }
168                          if ((opt_path == null || opt_path.length < 1)   )  {
169                stdout.printf ("You must specify the scan start path\n%s",
170                    opt_context.get_help(true, null));
171                GLib.Process.exit(Posix.EXIT_FAILURE);
172             }
173                         if (opt_replace_link == null || (opt_replace_link.length < 1))  {
174                stdout.printf ("You must specify the link to use in the replacement \n%s",
175                    opt_context.get_help(true, null));
176                GLib.Process.exit(Posix.EXIT_FAILURE);
177             }
178             if ((opt_is_replacing || opt_is_extracting ) && (opt_target_path == null || opt_target_path.length < 1)) {
179                       stdout.printf ("You must specify a target path to put attachments\n%s",
180                    opt_context.get_help(true, null));
181                GLib.Process.exit(Posix.EXIT_FAILURE);
182             }
183             
184             
185          } catch (GLib.OptionError e) {
186             stdout.printf ("error: %s\n", e.message);
187             stdout.printf ("Run '%s --help' to see a full list of available command line options.\n%s", 
188                       args[0], opt_context.get_help(true, null));
189             GLib.Process.exit(Posix.EXIT_FAILURE);
190          }
191         }
192          
193     public static int main(string[] args) 
194     {
195                 
196                 var application = new StripApplication(  args);
197                 
198                 GLib.Log.set_always_fatal(LogLevelFlags.LEVEL_ERROR | LogLevelFlags.LEVEL_CRITICAL); 
199            
200            if (opt_debug || opt_debug_sql) {
201                         GLib.Log.set_handler(null, 
202                         GLib.LogLevelFlags.LEVEL_DEBUG | GLib.LogLevelFlags.LEVEL_WARNING | GLib.LogLevelFlags.LEVEL_INFO, 
203                         (dom, lvl, msg) => {
204                                         print("%s\n", msg);
205                                 }
206                         );
207                 }
208         
209         GMime.init(0);
210                 if (StripApplication.opt_is_replacing) {
211                         StripApplication.opt_is_extracting = true;
212                 }
213   
214                 GLib.debug("scanning folder: %s", opt_path );
215                 
216                 var strip = new Strip( opt_path );
217  
218                 
219                 strip.mysql  = new Mysql.Database();
220                 if (!strip.mysql.real_connect(
221                                 opt_db_host,
222                                 opt_db_user ,
223                                 opt_db_pass == null ? "" : opt_db_pass, //passwd
224                                 opt_db_name, //DB
225                                 3306, // not changable...?
226                                 null
227                         )
228                 ) {
229                         stdout.printf("ERROR %u: Connection failed: %s\n", 
230                                 strip.mysql.errno(), strip.mysql.error()
231                         );
232
233                         return 1;
234                 }
235         if (opt_file != null) {
236                 strip.base_dir = opt_path;
237                 strip.scan_file( GLib.Path.get_dirname(opt_file),  GLib.Path.get_basename(opt_file));
238                 return 0;
239         }
240
241                 strip.scan_dir(opt_path, "");
242         
243
244         
245         return 0;
246     }
247 }
248
249 public class Strip : GLib.Object {
250         
251  
252         
253         public string base_dir = "";
254         
255         public Mysql.Database mysql;
256         
257         int processed = 0;
258     
259     uint64 used_space_before = 0;
260     uint64 used_space_after = 0;
261     
262     
263     public Strip(string base_dir)
264     {
265         this.base_dir = base_dir;
266     }
267     
268     public void handle_part(GMime.Object parent, GMime.Object mime_obj)
269     {
270                 if (mime_obj is GMime.Part) {
271                    var  p = (GMime.Part)mime_obj;
272                         var ct = p.get_content_type();
273                         var cd = p.get_content_disposition();
274                         
275                         var sid = p.get_header("X-strip-id");
276                     if (sid != null && sid.length > 0) {
277                         this.update_attachment_db(p);
278                             GLib.debug("Skip attachment replace - it's already been done");
279                         return;
280                         }
281                         
282                         if (cd == null || cd.get_disposition().down() != "attachment") {
283                                 return;
284                         }
285                         if (ct.get_media_type() == "text") {
286                                 return;
287                         }
288                         if (ct.to_string() == "application/pgp-encrypted") {
289                                 return;
290                         }
291                         if (ct.to_string() == "application/pgp-keys") {
292                                 return;
293                         }
294                         if (p.get_filename() == null) {
295                                 return;
296                         }
297                          // print("got part %s\n", ct.to_string());
298                          if (parent is GMime.Multipart) {
299                                 
300                                 this.replace_attachment(((GMime.Multipart)parent), p);
301                                 // remove it !?
302
303                           }
304
305
306                         return;
307                 }
308                 if (mime_obj is GMime.Multipart) {
309                         
310
311                         var  mp = (GMime.Multipart)mime_obj;
312                         //var ct = mp.get_content_type();
313
314                         //print("got multi-part %s\n", ct.to_string());
315                         for (var i = 0; i< mp.get_count(); i++) { 
316                           var mo = mp.get_part(i);
317                           this.handle_part(mime_obj,mo);
318                         }
319                    // ((GMime.Multipart)mime_obj).foreach((sub_obj) => {
320                    //     Strip.handle_part(sub_obj);
321                 //
322                    // });
323
324
325                         return;
326                 }
327
328                 if (mime_obj is GMime.MessagePart) {
329                         var msg = ((GMime.MessagePart)mime_obj).get_message();
330                         msg.foreach((subobj) => {
331                          this.handle_part(msg,subobj);
332                     });
333                 
334                         //print("got message-part\n");
335                         return;
336                 }
337                 
338                 if (mime_obj is GMime.Message) {
339                         var mp = ((GMime.Message) mime_obj).get_mime_part();
340
341                         if (!(mp is GMime.Multipart)) {
342                                 //GLib.debug("get mimepart does not return a Multipart?");
343                                 return;
344                         }
345                         
346                         var mpc = ((GMime.Multipart)mp).get_count();
347                         
348                         //GLib.debug("Message has %d parts", mpc); 
349                         for (var i =0 ; i < mpc; i++) {
350                                 //GLib.debug("Getting part %d", i); 
351                                 var submime_obj = ((GMime.Multipart)mp).get_part(i);
352                         this.handle_part(mp,submime_obj);                       
353                     }
354                         print("got message??\n");
355                         return;
356                 }
357                 
358                 print("got something else\n");
359
360
361     }
362     public void update_attachment_db(GMime.Part attachment)
363     {
364         // only called when we have an sid...
365         var sid = attachment.get_header("X-strip-id");
366         if (sid == null || sid.length < 1) {
367                 GLib.debug("Strange - update attachment db called ?");
368                 return;
369         }
370         
371         // initialize it with known data..
372         // that should wipe out dupes.
373         var matches = this.execute("SELECT id   FROM Attachment WHERE id = %d".printf(
374                         int.parse(sid)));  
375
376                  
377                 if (matches == "") {     
378                         // our old mailfort code deleted the crap out of old records...
379                         // if this occurs we will need to create the record again..
380                         this.fix_deleted_attachment_db(int.parse(sid),attachment);
381                         return;
382  
383                 }
384         
385         
386         // initialize it with known data..
387         // that should wipe out dupes.
388         var filesize = this.execute("SELECT filesize FROM Attachment WHERE id = %d".printf(
389                         int.parse(sid)));  
390
391                 if (filesize=="") {      
392                    GLib.error("Ignoring record id (missing in database) :%s", sid);
393                    return;
394                 }
395                 if (int.parse(filesize) < 1) {
396                 GLib.debug("Could not get filesize from id :%s = %s", sid,filesize);
397                 Posix.exit(0);
398                 return;
399         }
400         
401         var chksum = this.query("SELECT  checksum FROM Attachment WHERE id = %d".printf(
402                         int.parse(sid)
403                 ));
404         var mime_filename = this.query("SELECT  mime_filename FROM Attachment WHERE id = %d".printf(
405                         int.parse(sid)));       
406                 
407         this.query("""
408              SELECT 
409                  attachment_init(
410                      '%s', '%s', '%s', %d
411                  ) as id 
412                  
413           """.printf(
414                           this.mysql_escape(this.active_message_exim_id),
415                           this.mysql_escape(chksum),
416                           this.mysql_escape(mime_filename),                       
417                           int.parse(filesize)
418                 ));
419         this.query("""
420                  SELECT attachment_update(
421                       %d, -- in_id INT(11),
422                       '%s', -- in_mime_type varchar(255),
423                       '%s', -- in_created DATETIME,
424                       '%s' -- in_mailfort_sig varchar(64)
425                  )
426               """.printf(
427                         int.parse(sid),
428                         "", // this will be ignored..
429                                 this.created_date,
430                                 this.mysql_escape(this.active_message_x_mailfort_sig)
431               
432               )
433                 );
434                 this.mysql.store_result();
435                 
436
437     
438     }
439     
440     
441     public void fix_deleted_attachment_db(int id, GMime.Part attachment)
442     {
443                 
444         var filename = attachment.get_header("X-strip-content-name");
445         var file_path  = attachment.get_header("X-strip-path");
446         var fn =  StripApplication.opt_target_path + "/" + file_path;
447         var chksum = this.md5_file(fn);
448                 var mime_type = attachment.get_header("X-strip-content-type");
449
450                 var fileinfo = File.new_for_path(fn)
451                                         .query_info(GLib.FileAttribute.STANDARD_SIZE+","+GLib.FileAttribute.TIME_MODIFIED
452                                                 ,GLib.FileQueryInfoFlags.NONE,null);
453         var file_size = (int) fileinfo.get_size();
454
455                 if (!FileUtils.test (fn, FileTest.EXISTS)) {
456                         GLib.debug("SKIP -- file does not exist");
457                 }
458       
459                 this.real_query(-1, """
460                        
461                        
462                                 INSERT INTO Attachment  (  
463                                         id, 
464                                         
465                                     msgid ,
466                                     queue_id ,
467                                     mime_filename ,
468                                     mime_type,
469                                      
470                                     stored_filename ,
471                                     mime_charset ,
472                                     mime_cdisp ,
473                                     mime_is_cover ,
474                                     
475                                     mime_is_multi ,
476                                     mime_is_mail,
477                                     mime_size ,
478                                     filesize,
479                                     
480                                     checksum,
481                                     created
482
483                                 ) VALUES (
484                                         %d,  -- id
485                                         
486                                     '%s' , -- msgid
487                                     0,
488                                     '%s'  , -- filename
489                                     '%s',  -- mimetype
490                                     
491                                     '%s', -- stored file anme
492                                     '', -- charset
493                                     'attachment',
494                                     0,
495                                     0,
496                                     0,
497                                     %d, -- size
498                                     %d, -- size
499                                     
500                                     '%s', -- checkum
501                                         '%s' -- created:
502                                 )
503                        
504                        
505                       """.printf(
506                                 id,
507                                       this.mysql_escape(this.active_message_exim_id),
508                                       this.mysql_escape(filename),
509                                   this.mysql_escape(mime_type),
510                                   this.mysql_escape(file_path),
511                                         file_size,
512                                       file_size,
513                                       this.mysql_escape(chksum),
514                                 this.created_date
515                          ));
516               // this is done to fix the queue_id or maillog_id ??
517                  this.query("""
518                  SELECT attachment_update(
519                       %d, -- in_id INT(11),
520                                                 '', -- mime type
521                       '%s', -- in_created DATETIME,
522                       '%s' -- in_mailfort_sig varchar(64)
523                  )
524               """.printf(
525                                 id, 
526                                 this.created_date,
527                                 this.mysql_escape(this.active_message_x_mailfort_sig)
528               
529               )
530                 );
531                 // GLib.error("added attachment?");
532     }
533     
534     
535     public void replace_attachment(GMime.Multipart parent, GMime.Part attachment)
536     {
537         var sid = attachment.get_header("X-strip-id");
538         if (sid != null && sid.length > 0) {
539                 GLib.debug("Skip attachment replace - it's already been done");
540                 return;
541         }
542         
543         var c = attachment.get_content_object();
544         
545         var filename = attachment.get_filename().replace("/", "-").replace("\n", "").replace("\t", " ");
546         var fn = GLib.Environment.get_tmp_dir() +
547                         "/"+ this.active_name + "."+   GLib.Uri.escape_string(filename,"", false);
548
549             var outfile = new GMime.StreamFile.for_path(fn, "w");
550             outfile.set_owner(true);
551             var file_size = (int) c.write_to_stream(outfile);
552             var chksum = this.md5_file(fn);
553             outfile.flush();
554             outfile = null;
555         
556         if (file_size == 0) {
557
558                 GLib.debug("ERROR - file size of write to stream returned 0?");
559                 Posix.unlink(fn);               
560                 return;
561         }
562         
563         
564         
565  
566         var mime_type= attachment.get_content_type().to_string();
567         // at this point we have to do our database magic...
568         //filesize / name / date / checksum / mimetype -- into mailfort should be OK.
569         
570         var file_id = this.query("""
571                 SELECT 
572                 
573                 attachment_init(
574                                 '%s', -- in_msgid VARCHAR(32),
575                                 '%s', -- in_checksum VARCHAR(64),
576                                 '%s', -- in_mime_filename varchar(255)
577                                 %d -- filesize
578                         ) as id 
579                         
580           """.printf(
581                         this.mysql_escape(this.active_message_exim_id),
582                         chksum,
583                         this.mysql_escape( GLib.Uri.escape_string(attachment.get_filename(),"", false) ), // what is thsi is invalid?
584                          file_size)
585                 );
586                  
587                 
588                 if (file_id.length < 1) {
589                         GLib.debug("ERROR - CALL to attachment_init failed");
590                 Posix.unlink(fn);               
591                 return;
592                 
593                 }
594  
595                 if (int.parse(file_id) < 1) {
596                         GLib.debug("ERROR - CALL to attachment_init failed - returned 0?");
597                 Posix.unlink(fn);               
598                 return;
599                 
600                 }
601  
602         
603                 GLib.debug("fn = %s, m5=%s, id= %s", filename, mime_type, this.active_message_id);
604                 this.query("""
605                 
606                         SELECT attachment_update(
607                                 %d, -- in_id INT(11),
608                                 '%s', -- in_mime_type varchar(255),
609                                 '%s', -- in_created DATETIME,
610                                 '%s' -- in_mailfort_sig varchar(64)
611                                 
612                                 ) as result
613       """.printf(
614                 int.parse(file_id),
615                         this.mysql_escape(mime_type),
616                         this.created_date,
617                         this.mysql_escape(this.active_message_x_mailfort_sig)
618                 ));
619                  this.mysql.store_result();
620                                  
621  
622                 this.used_space_after += file_size;
623                         
624                 var target_fn = "";
625
626             if (StripApplication.opt_is_extracting) {
627                         target_fn = StripApplication.opt_target_path + "/" + this.created_dir +"/"+ file_id  + "-" + GLib.Uri.escape_string(filename,"", false);
628                 } 
629                     
630             var stored =  "/" + this.created_dir +"/"+ file_id  + "-" + filename;
631                  this.query("""
632                 
633                         SELECT attachment_update_store(
634                                 %d, -- in_id INT(11),
635                                 '%s'  -- in_store_filename varchar(255),
636                          
637                                 
638                                 ) as result
639       """.printf(
640                 int.parse(file_id),
641                          this.mysql_escape( stored)
642                 ));   
643                          
644         var rep = new GMime.Part.with_type("text","html");
645         // we have to set up a redirect server - to redirect hpasite... to their internal service..
646         rep.set_filename(filename);
647         string txt = "<html><body>"+
648             "<a href=\"" + StripApplication.opt_replace_link + "/" +
649                         file_id + "/" + this.created_dir + "/"+chksum+"/"+ GLib.Uri.escape_string( filename) +"\">" + 
650             GLib.Uri.escape_string( filename) + // fixme needs html escaping...
651             "</a>" +
652             "</body></html>";
653
654         rep.get_content_type().set_parameter("charset", "utf-8");
655                 rep.set_header("X-strip-id", file_id);
656                 rep.set_header("X-strip-content-name",  filename);                              
657                 rep.set_header("X-strip-path", this.created_dir + "/" + file_id + "-" + filename);              
658                 rep.set_header("X-strip-content-type", mime_type);              
659         var stream =  new GMime.StreamMem.with_buffer(txt.data);
660         var con = new GMime.DataWrapper.with_stream(stream,GMime.ContentEncoding.DEFAULT);
661
662         rep.set_content_object(con);
663         GLib.debug("Replacing Attachment with HTML");
664         parent.replace(parent.index_of(attachment), rep);
665                 this.has_replaced = true;
666                  
667                 if (StripApplication.opt_is_extracting && target_fn.length > 0) {
668                         var dir = GLib.Path.get_dirname(target_fn);
669                         if (!FileUtils.test (dir, FileTest.IS_DIR)) {
670                                 GLib.DirUtils.create_with_parents(dir, 0755);
671                         }
672                         GLib.debug("Creating file %s", target_fn);
673                         if (!FileUtils.test (target_fn, FileTest.EXISTS)) {
674                                 var from = File.new_for_path (fn);
675                                 var to =  File.new_for_path (target_fn);
676                                 from.copy(to, 0, null);
677
678                         }
679                 } else { 
680                         GLib.debug("Skipping extraction %s", target_fn);
681                 }
682                 Posix.unlink(fn);
683                 
684
685
686     }
687     public string query(string str)
688     {
689             return this.real_query(1, str);
690     }
691     public string execute(string str)
692     {
693             return this.real_query(0, str);
694     }
695     /**
696     * need_return 
697     0 = no
698     1 = yes
699     -1 = don't try.
700     */
701     public string real_query(int need_return, string str)
702     {
703                 GLib.debug("Before Query : %u  : %s\n", this.mysql.errno(), this.mysql.error());
704
705
706         if (StripApplication.opt_debug_sql) {
707                 GLib.debug("SQL: %s\n", str);
708                 }
709                 
710                 
711         
712         var rc=  this.mysql.query(str); 
713         if ( rc != 0 ) {
714
715                     GLib.debug("ERROR %u: Query failed: %s\n", this.mysql.errno(), this.mysql.error());
716                                 Posix.exit(1);
717                 }
718         var rs = mysql.use_result();
719                 if (need_return == -1) {
720                         return "";
721                 }
722  
723         
724         //GLib.debug("got %d rows", (int) rs.num_rows());
725         
726         var got_row = false;
727                 string[] row;
728                 string ret = "";
729                 while( (row = rs.fetch_row()) != null) { 
730                         got_row = true;
731                         ret = row[0];
732                 
733                 }
734                 if (need_return == 0) {
735                 if (StripApplication.opt_debug_sql) {
736                                 GLib.debug("got %s", got_row ? "=Nothing=" : ret);
737                         }
738                         return got_row ? "" : ret;
739                 }
740                 if (!got_row) {
741
742                          GLib.debug("ERROR : no rows returned");
743                         Posix.exit(1);
744                         return "";
745                 }
746         if (StripApplication.opt_debug_sql) {
747                         GLib.debug("got %s", ret);
748                 }
749                 return ret;
750                 
751                  
752         }
753     
754     public string mysql_escape(string str)
755     {
756             unichar[] value_escaped = new unichar[str.length * 2 + 1];
757                 this.mysql.real_escape_string ((string) value_escaped, str, str.length);
758                 return (string) value_escaped;
759     }
760     
761     public string  md5_file(string fn) {
762               Checksum checksum = new Checksum (ChecksumType.MD5);
763
764               FileStream stream = FileStream.open (fn, "rb");
765               uint8 fbuf[100];
766               size_t size;
767
768               while ((size = stream.read (fbuf)) > 0) {
769                       checksum.update (fbuf, size);
770               }
771
772               unowned string digest = checksum.get_string ();
773               return digest;
774     }
775
776         string active_path = "";    
777     string active_name = "";
778     string active_message_id = "";
779     string active_message_x_mailfort_sig = "";
780     string active_message_exim_id = "";
781     bool has_replaced = false;
782     string created_date = ""; // should be YYYY-mm-dd
783     string created_dir = ""; // should be YYY/mm/dd
784     
785     public void scan_file(string path, string name)
786     {
787                 GLib.debug("Scan: %s/%s", path,name); 
788                 
789                 this.has_replaced = false; 
790         this.active_path = path;
791         this.active_name = name;
792         this.active_message_id = "";
793
794                 var mailtime = new DateTime.now_local();
795                 if (StripApplication.opt_scan_mailfort) {
796                     this.created_dir = this.active_path.substring(this.base_dir.length + 1 );
797                         this.created_date = this.created_dir.replace("/", "-");
798                         var bits = this.created_date.split("-");
799                         mailtime = new DateTime.local(int.parse(bits[0]),int.parse(bits[1]),int.parse(bits[2]),0,0,0);
800                         
801                         var oldest = new  DateTime.now_local();
802                         oldest = oldest.add_months(-1 * StripApplication.opt_age_oldest);
803                         var tspan = mailtime.difference(oldest) / GLib.TimeSpan.DAY;
804
805                         if (tspan < 0) {
806                                 GLib.debug("skip file is %d days older than %d months", (int)tspan, StripApplication.opt_age_oldest);
807                                 return;
808                         }
809                         
810                         var newest = new  DateTime.now_local();
811                         newest = newest.add_months(-1 * StripApplication.opt_age_newest);
812                         tspan = mailtime.difference(newest) / GLib.TimeSpan.DAY;
813                         if (tspan > 0) {
814                                 GLib.debug("skip file is %d days newer than %d months", (int)tspan, StripApplication.opt_age_newest);
815                                 return;
816                         }
817                         
818                 }
819         
820         
821                 var fileinfo = File.new_for_path(path +"/" + name)
822                                         .query_info(GLib.FileAttribute.STANDARD_SIZE+","+GLib.FileAttribute.TIME_MODIFIED
823                                                 ,GLib.FileQueryInfoFlags.NONE,null);
824         var file_size = (int) fileinfo.get_size();
825                 var mod_time = fileinfo.get_modification_time();
826                 
827                 
828                 
829                 if (!StripApplication.opt_scan_mailfort) {
830                    
831                 // it's a mail directory...
832                 // use the last modification time? as the default...
833                  mailtime = new DateTime.from_timeval_utc(mod_time);
834                  this.created_dir = mailtime.format("%Y/%m/%d");
835                          this.created_date =  mailtime.format("%Y-%m-%d %H:%M:%S");
836  
837         }
838                 // check on age of file...
839                 
840                 
841                 
842                 
843                 
844         this.used_space_before += file_size;
845         
846         var stream = new GMime.StreamFs.for_path (path +"/" + name,Posix.O_RDONLY, 0);
847         //stream.set_owner(true);
848         var parser = new GMime.Parser.with_stream(stream);
849         var message = parser.construct_message();
850  
851                 if (message == null) {
852                         GLib.debug("Could not parse file? %s/%s", path,name);
853                 this.used_space_after += file_size;                     
854                 return;
855                 }       
856
857
858                 // check : - is message over a year old?                
859                 // get various msg info..
860                 this.active_message_id = message.get_message_id();
861                 this.active_message_x_mailfort_sig = message.get_header("x-mailfort-sig");
862                 var recvd = message.get_header("received");
863                 this.active_message_exim_id = "";
864                 if (recvd != null && recvd.length > 1) {
865                         GLib.debug("RECV: %s", recvd);
866                         var lines = recvd.split("\t");
867                         for (var i = 0; i < lines.length;i++) {
868                                 var bits = lines[i].strip().split(" ");
869                                 if (bits[0] == "id") {
870                                         this.active_message_exim_id = bits[1].replace(";","");
871
872                                 }
873                                 
874                                 if (lines[i].contains(";")) {
875                                         var dbits = lines[i].strip().split(";");                                
876                                         GLib.debug("Reading time from : %s", dbits[1]);
877                                         var timez = GMime.utils_header_decode_date(dbits[1], null);
878                                         if (timez != 0) {
879                                                 mailtime = new DateTime.from_unix_utc(timez);
880                                                 this.created_date = mailtime.format("%Y-%m-%d %H:%M:%S");
881                                                 GLib.debug("Time is %s",this.created_date);
882                                                 // if it's not mailfort we can use that date to determine where to store it...
883                                                 if (!StripApplication.opt_scan_mailfort) {
884                                                         this.created_dir = mailtime.format("%Y/%m/%d");
885                                                 }
886                                         } else {
887                                                 GLib.debug("Could not read time from headers?");
888                                         }
889                                 }
890
891                         }
892                 }
893                 
894                 var oldest = new  DateTime.now_local();
895                 oldest = oldest.add_months(-1 * StripApplication.opt_age_oldest);
896                 var rtspan = mailtime.difference(oldest) / GLib.TimeSpan.DAY;
897                 GLib.debug("Checking oldest %d days difference", (int)rtspan   );
898                 if (rtspan < 0) {
899                         GLib.debug("skip(2) file is %d days older than %d months", (int)rtspan, StripApplication.opt_age_oldest);
900                         return;
901                 }
902                 var newest = new  DateTime.now_local();
903                 newest = newest.add_months(-1 * StripApplication.opt_age_newest);
904                 rtspan = mailtime.difference(newest) / GLib.TimeSpan.DAY;
905                 if (rtspan > 0) {
906                         GLib.debug("skip(2) file is %d days newer than %d months : %s", (int)rtspan, StripApplication.opt_age_newest,
907                                 mailtime.format("%Y-%m-%d %H:%M:%S"));
908                         return;
909                 }
910                 
911                 
912                 
913                 /*
914                 GLib.debug("Message DATA:\n mid: %s\nmailfort: %s \nexim_id: %s",
915                         this.active_message_id,
916                         this.active_message_x_mailfort_sig,
917                         this.active_message_exim_id
918                 );
919                  */
920                         
921                 // DATE?
922                 
923                 var mp = message.get_mime_part();
924
925                 if (!(mp is GMime.Multipart)) {
926                         //GLib.debug("get mimepart does not return a Multipart?");
927                 this.used_space_after += file_size;                                             
928                         return;
929                 }
930                 
931                 var mpc = ((GMime.Multipart)mp).get_count();
932                 
933                 //GLib.debug("Message has %d parts", mpc); 
934                 for (var i =0 ; i < mpc; i++) {
935                         //GLib.debug("Getting part %d", i); 
936                         var mime_obj = ((GMime.Multipart)mp).get_part(i);
937             this.handle_part(mp,mime_obj);                      
938         }
939                 
940         parser= null;
941
942       //  stream.set_owner(false);
943             //stream.close();
944         stream = null;//.close();
945         
946         
947                 if (!this.has_replaced) {
948                         this.used_space_after += file_size;
949                         GLib.debug("skpping write file - no replacement occured");
950                         return;
951                 }
952                 string tmpfile = "";
953                 GMime.Stream outstream = new GMime.StreamNull();
954                 if (StripApplication.opt_is_replacing) {
955                 
956                         tmpfile = GLib.Environment.get_tmp_dir() +"/" + name;
957                 outstream = new GMime.StreamFile.for_path (tmpfile,"w");
958                 ((GMime.StreamFile)outstream).set_owner(true);
959         }
960                 if (StripApplication.opt_dump) {
961                         outstream = new GMime.StreamMem();
962         }
963         
964         file_size = (int) message.write_to_stream(outstream);
965         if (StripApplication.opt_is_replacing) {
966                 ((GMime.StreamFile)outstream).set_owner(false);
967         }
968                 if (StripApplication.opt_dump) {
969                         var ua = ((GMime.StreamMem)outstream).get_byte_array().data;
970                         print("%s\n", (string) ua);
971                 }        
972         message = null;
973         outstream.flush();
974         outstream.close();
975         GLib.debug("finished writing output %d", file_size);
976
977         //
978         outstream = null;
979         
980           
981         this.used_space_after += file_size;
982         
983         
984         if (StripApplication.opt_is_replacing) {
985                 Posix.unlink(path +"/" + name);         
986                 GLib.debug("copy tmp file %s to %s" , tmpfile, path +"/" + name);               
987                 
988                 // link will not work, as we are doing it accross file systems
989                         var from = File.new_for_path (tmpfile);
990                         var nf =  File.new_for_path (path +"/" + name);
991                         from.copy(nf, 0, null);
992                         
993
994                 var newfileinfo = nf.query_info(GLib.FileAttribute.TIME_MODIFIED,GLib.FileQueryInfoFlags.NONE,null);
995                 newfileinfo.set_modification_time(mod_time);
996                 nf.set_attributes_from_info(newfileinfo,FileQueryInfoFlags.NONE);
997                 Posix.unlink(tmpfile);
998                 }
999         this.processed++;
1000         
1001         if (StripApplication.opt_limit > -1 && this.processed >= StripApplication.opt_limit) {
1002                 GLib.debug("Reached replacement limit");
1003                 Posix.exit(1);
1004         }
1005         
1006         
1007         
1008         
1009     }
1010     
1011     
1012     public void scan_dir(string basepath, string subpath)
1013     {
1014         
1015         
1016         // determine if path is to old to scan..
1017         if (subpath.length > 0 && StripApplication.opt_scan_mailfort) {
1018                         var year =  int.parse(subpath.substring(1,4));  // "/2000"
1019                         var month = subpath.length > 5 ? int.parse(subpath.substring(6,2)) : 999; // "/2000/12"                 
1020                         var day = subpath.length > 8 ? int.parse(subpath.substring(9,2)) : 999; // "/2000/12/01"                        
1021                 
1022                 var oldest = new  DateTime.now_local();
1023                         oldest = oldest.add_months(-1 * StripApplication.opt_age_oldest);
1024                         
1025                         //GLib.debug("Checking directory %s is older than min: %d/%d/%d", subpath, oldest.get_year() , oldest.get_month(), oldest.get_day_of_month() );                                 
1026                         
1027                         if (year < oldest.get_year()) {
1028                                 GLib.debug("Skip directory %s is older than min year: %d", subpath, oldest.get_year());
1029                                 return;
1030                         }
1031                         if (year == oldest.get_year() &&  month < oldest.get_month()) {
1032                                 GLib.debug("Skip directory %s is older than min month: %d/%d", subpath, oldest.get_year() , oldest.get_month() );
1033                                 return;
1034                         }
1035                 if (year == oldest.get_year() &&  month == oldest.get_month() && day < oldest.get_day_of_month()) {
1036                                 GLib.debug("Skip directory %s is older than min day: %d/%d/%d", subpath, oldest.get_year() , oldest.get_month(), oldest.get_day_of_month() );           
1037                                 return;
1038                         }
1039                 
1040                 var newest = new  DateTime.now_local();
1041                         newest = newest.add_months(-1 * StripApplication.opt_age_newest);
1042                         
1043                         //GLib.debug("Checking directory %s is newer than max: %d/%d/%d", subpath, newest.get_year() , newest.get_month(), newest.get_day_of_month() );                                 
1044                         
1045                         if (year > newest.get_year()) {
1046                                 GLib.debug("Skip directory %s is newer than max year: %d", subpath, newest.get_year());
1047                                 return;
1048                         }
1049                         if (year == newest.get_year() &&  month != 999 && month > newest.get_month()) {
1050                                 GLib.debug("Skip directory %s is newer than max month: %d/%d", subpath, newest.get_year() , newest.get_month() );
1051                                 return;
1052                         }
1053                 if (year == newest.get_year() &&  month == newest.get_month() &&  day != 999 && day > newest.get_day_of_month()) {
1054                                 GLib.debug("Skip directory %s is newer than max day: %d/%d/%d", subpath, newest.get_year() , newest.get_month(), newest.get_day_of_month() );           
1055                                 return;
1056                         }
1057                 
1058                 
1059                 
1060         }
1061         
1062         
1063         var f = File.new_for_path(basepath + subpath);
1064                 FileEnumerator file_enum;
1065         var cancellable = new Cancellable ();
1066         try {      
1067             file_enum = f.enumerate_children(
1068                 FileAttribute.STANDARD_DISPLAY_NAME + "," +   FileAttribute.STANDARD_TYPE,
1069                         FileQueryInfoFlags.NOFOLLOW_SYMLINKS,  // FileQueryInfoFlags.NONE,
1070                         cancellable
1071                 );
1072         } catch (Error e) {
1073                 GLib.debug("Got error scanning dir? %s", e.message);
1074             // FIXME - show error..
1075             return;
1076         }
1077         FileInfo next_file;
1078          
1079         while (cancellable.is_cancelled () == false ) {
1080             try {
1081                 next_file = file_enum.next_file (cancellable);
1082             } catch(Error e) {
1083                 GLib.debug("error getting next file? %s", e.message);
1084                 break;
1085             }
1086
1087             if (next_file == null) {
1088                 break;
1089             }
1090                 
1091                 
1092                 if (next_file.get_is_symlink()) {
1093                 next_file = null;
1094                 continue;
1095             }
1096             
1097             var ds = next_file.get_display_name();
1098             if (next_file.get_file_type() != FileType.DIRECTORY) {
1099                 
1100                 
1101                 
1102                 if (ds[0] == ',') {
1103                         continue;
1104                 }
1105                 // other files to ignore?
1106                 if (Regex.match_simple (".tgz$", ds)) {
1107                         continue;
1108                 }
1109                 this.scan_file(basepath + subpath , ds);
1110                                 if(this.has_replaced) {
1111                          this.report_state("After scanning %s/%s".printf(basepath + subpath , ds));
1112                         }
1113                 continue;
1114             }
1115
1116
1117             //stdout.printf("Monitor.monitor: got file %s : type :%u\n",
1118             //        next_file.get_display_name(), next_file.get_file_type());
1119
1120
1121         
1122
1123             // not really needed?? - we are storing attachments in a seperate location now...
1124             if (ds[0] == '.') {
1125                 next_file = null;
1126                 continue;
1127             }
1128             if (ds == "attachments") {
1129                         continue;
1130                 }
1131             
1132             
1133             var sp = subpath+"/"+next_file.get_display_name();
1134             // skip modules.
1135             //print("got a file : " + sp);
1136          
1137             next_file = null;
1138             
1139             
1140             this.scan_dir(basepath,sp);
1141             
1142         }
1143     
1144     
1145     }
1146     void report_state(string msg) 
1147     {
1148         // Saved: 2G  Original 10G : 20%
1149         GLib.debug("Saved : %s (%.1f%%) | Original %s | %s", 
1150                         GLib.format_size(this.used_space_before - this.used_space_after), 
1151                         100f * ((1f * (this.used_space_before - this.used_space_after)) / (this.used_space_before * 1f)), 
1152                         GLib.format_size(this.used_space_before),                       
1153                         msg
1154                 );
1155         
1156         }
1157         
1158         
1159
1160 }