src/strip.vala
[app.mailtrimmer] / src / strip.vala
1 /**
2
3  ** check left to do:  
4   - range scans on maildir
5   - see how replacing the links works in the resulting email via thunderbird etc..
6   - some checksum issues (see dupelicates?? suspect 0byte issues?)  -- seems ok now?
7  
8
9
10
11   needs to scan 2 things
12   a) our mailfort email database
13        point it at the top directory, containing YEAR/MONTH/DAY.... directories.
14        scan each file (over a year old...)
15        extract out the attachment, and replace with HTML
16        DATABASE? - mysql or sqlite? - 
17            filesize / name / date / checksum / mimetype -- into mailfort should be OK.
18   b) the imap user emails
19            loop through user's directories
20            check age of email .. over 1 years..
21            ?? how to prevent 'repeat' scanning of emails?
22               ??? hidden '.' files containing last scan date?
23
24            check if file exists in our DB.. - replace the link...
25            otherwise generate a file. + add to DB...
26            
27    c) retreival system
28      -> URL -> get file
29    d) redirect system.
30      -> URL -> redirect to correct server
31
32
33 More notes on our Mailfort DB sync:
34 * some of these attachments are already in the database...
35  - so we need to update the DB..
36  - probably worth putting the code in a stored procedure..
37  
38  -- key scenarios
39    * first scan (and extract)
40    * rescan (as I messed up the first time - fix the DB...)
41    * email scan - attachments might not have related messages.
42  
43  
44  - {id} attachment_init(
45                 {exim_msg_id}
46                 {chksum}
47                 {filename),
48         )
49         // creates or returns id (can look for existing messages?
50         // can do a merge?? - copy 'old' record data into 'new'....  "prefer checksummed"
51         
52         attachment_update(
53                 {id}
54                 {exim_msg_id}
55                 {mailfort_msg_sig}
56                 {file_size}
57                 {created} // message date..
58                 {chksum}
59                 {filename),
60         {mime_type}
61         )
62         attachment_update_store(
63                         {id}
64                         {stored_filename}
65         )
66
67
68 */ 
69
70 // valac --pkg gmime --vapi
71 /*
72
73 // http://www.fromdual.com/mysql-vala-program-example << check mysql if this does not work.
74
75  valac  -g --vapidir=. --thread  strip.vala   --vapidir=../vapi \
76      --pkg glib-2.0 --pkg mysql --pkg gio-2.0 --pkg posix --pkg gmime-2.6 \
77       --Xcc=-lmysqlclient  -v \
78        -o /tmp/strip
79 */ 
80  
81 public class StripApplication : GLib.Application {
82
83         public static string? opt_path = null;
84         public static string? opt_file = null;  
85         public static string? opt_target_path = null;
86         public static string? opt_db_host = "127.0.0.1";
87         public static string? opt_db_name = null;       
88         public static string? opt_db_user = null;               
89         public static string? opt_db_pass = null;               
90         public static string? opt_scan_mailfort_only = "";              
91         
92         
93         public static int    opt_limit = -1;
94
95         public static int    opt_age_newest = 1;
96         public static int    opt_age_oldest = 6;
97
98
99         public static bool      opt_is_extracting = false;
100         public static bool      opt_is_replacing = false;
101         public static bool      opt_scan_maildir  = false; 
102         public static bool      opt_scan_mailfort  = false;     
103         public static bool              opt_dump = false;       
104         public static bool              opt_debug = false; 
105         
106         public static bool opt_debug_sql = false;       
107         public static string? opt_replace_link = null;
108         
109         
110         public const GLib.OptionEntry[] options = {
111                 
112                 { "debug", 0, 0, OptionArg.NONE, ref opt_debug, "show debug messages for components", null },
113                 { "debug-sql", 0, 0, OptionArg.NONE, ref opt_debug_sql, "debug the SQL statements", null },         
114
115                 { "path", 0, 0, OptionArg.STRING, ref opt_path, "Directory where email to be parsed is", null },        
116                 { "file", 0, 0, OptionArg.STRING, ref opt_file, "A specific file to be parsed", null }, 
117
118                 { "target-path", 0, 0, OptionArg.STRING, ref opt_target_path, "Directory where attachments are to be put", null },
119
120                 { "link", 0, 0, OptionArg.STRING, ref opt_replace_link, "url for the replement link: eg. http://www.mysite.com/xxxx/%s", null },         
121                         
122                 { "host", 0, 0, OptionArg.STRING, ref opt_db_host, "Mysql host (default localhost)", null },    
123                 { "name", 0, 0, OptionArg.STRING, ref opt_db_name, "Mysql database name REQUIRED", null },      
124                 { "user", 0, 0, OptionArg.STRING, ref opt_db_user, "Mysql database user REQUIRED", null },      
125                 { "pass", 0, 0, OptionArg.STRING, ref opt_db_pass, "Mysql database password (default empty)", null },            
126
127                 { "extract", 0, 0, OptionArg.NONE, ref opt_is_extracting, "Should attachments be extracted (default NO)", null },
128                 { "replace", 0, 0, OptionArg.NONE, ref opt_is_replacing, "Should attachments be replaced (default NO)", null },
129                 { "dump", 0, 0, OptionArg.NONE, ref opt_dump, "Print the replaced mail contents to stdout", null },         
130
131                 { "limit", 0, 0, OptionArg.INT, ref opt_limit, "stop after X number of messages with attachments have been processed", null },         
132                 { "newest", 0, 0, OptionArg.INT, ref opt_age_newest, "do not replace messages newer that X months (default is 1 months)", null },
133                 { "oldest", 0, 0, OptionArg.INT, ref opt_age_oldest, "do not replace messages older than X (default is 6 months)", null },
134
135                 { "scan-maildir", 0, 0, OptionArg.NONE, ref opt_scan_maildir, "scan an maildir tree", null },
136                 { "scan-mailfort", 0, 0, OptionArg.NONE, ref opt_scan_mailfort, "scan a mailfort tree", null }, 
137                 { "scan-mailfort-only", 0, 0, OptionArg.STRING, ref opt_scan_mailfort_only, "scan a mailfort Year/month eg. 2010/08", null }, 
138                 { null }       
139         };         
140     public StripApplication( string[] args ) 
141     {
142                  Object(
143             application_id: "org.roojs.mailstripper",
144             flags: ApplicationFlags.FLAGS_NONE
145          );
146  
147                         
148          var opt_context =  new GLib.OptionContext ("Mail Stripper");
149                         
150          try {
151                                 
152             opt_context.set_help_enabled (true);
153             opt_context.add_main_entries (options, null);
154             opt_context.parse ( ref  args);
155             //opt_detach = !optx_no_detach;
156                             
157  
158                             
159              // options that have to be set.. bee or hive... (or stop all)
160             if ((!opt_scan_mailfort && !opt_scan_maildir) || (opt_scan_mailfort && opt_scan_maildir))  {
161                stdout.printf ("You must specify the type of directory tree to scan - either imap or mailfort\n%s",
162                    opt_context.get_help(true, null));
163                GLib.Process.exit(Posix.EXIT_FAILURE);
164             }
165                         
166                          if ((opt_db_name == null || opt_db_name.length < 1 || opt_db_user == null || opt_db_user.length < 1))  {
167                stdout.printf ("You must specify the database name / user \n%s",
168                    opt_context.get_help(true, null));
169                GLib.Process.exit(Posix.EXIT_FAILURE);
170             }
171                          if ((opt_path == null || opt_path.length < 1)   )  {
172                stdout.printf ("You must specify the scan start path\n%s",
173                    opt_context.get_help(true, null));
174                GLib.Process.exit(Posix.EXIT_FAILURE);
175             }
176                         if (opt_replace_link == null || (opt_replace_link.length < 1))  {
177                stdout.printf ("You must specify the link to use in the replacement \n%s",
178                    opt_context.get_help(true, null));
179                GLib.Process.exit(Posix.EXIT_FAILURE);
180             }
181             if ((opt_is_replacing || opt_is_extracting ) && (opt_target_path == null || opt_target_path.length < 1)) {
182                       stdout.printf ("You must specify a target path to put attachments\n%s",
183                    opt_context.get_help(true, null));
184                GLib.Process.exit(Posix.EXIT_FAILURE);
185             }
186             
187             
188          } catch (GLib.OptionError e) {
189             stdout.printf ("error: %s\n", e.message);
190             stdout.printf ("Run '%s --help' to see a full list of available command line options.\n%s", 
191                       args[0], opt_context.get_help(true, null));
192             GLib.Process.exit(Posix.EXIT_FAILURE);
193          }
194         }
195          
196     public static int main(string[] args) 
197     {
198                 
199                 var application = new StripApplication(  args);
200                 
201                 GLib.Log.set_always_fatal(LogLevelFlags.LEVEL_ERROR | LogLevelFlags.LEVEL_CRITICAL); 
202            
203            if (opt_debug || opt_debug_sql) {
204                         GLib.Log.set_handler(null, 
205                         GLib.LogLevelFlags.LEVEL_DEBUG | GLib.LogLevelFlags.LEVEL_WARNING | GLib.LogLevelFlags.LEVEL_INFO, 
206                         (dom, lvl, msg) => {
207                                         print("%s\n", msg);
208                                 }
209                         );
210                 }
211         
212         GMime.init(0);
213                 if (StripApplication.opt_is_replacing) {
214                         StripApplication.opt_is_extracting = true;
215                 }
216   
217                 GLib.debug("scanning folder: %s", opt_path );
218                 
219                 var strip = new Strip( opt_path );
220  
221                 
222                 strip.mysql  = new Mysql.Database();
223                 if (!strip.mysql.real_connect(
224                                 opt_db_host,
225                                 opt_db_user ,
226                                 opt_db_pass == null ? "" : opt_db_pass, //passwd
227                                 opt_db_name, //DB
228                                 3306, // not changable...?
229                                 null
230                         )
231                 ) {
232                         stdout.printf("ERROR %u: Connection failed: %s\n", 
233                                 strip.mysql.errno(), strip.mysql.error()
234                         );
235
236                         return 1;
237                 }
238         if (opt_file != null) {
239                 strip.base_dir = opt_path;
240                 strip.scan_file( GLib.Path.get_dirname(opt_file),  GLib.Path.get_basename(opt_file));
241                 return 0;
242         }
243
244                 strip.scan_dir(opt_path, opt_scan_mailfort_only);
245         
246
247         
248         return 0;
249     }
250 }
251
252 public class Strip : GLib.Object {
253         
254  
255         
256         public string base_dir = "";
257         
258         public Mysql.Database mysql;
259         
260         int processed = 0;
261     
262     uint64 used_space_before = 0;
263     uint64 used_space_after = 0;
264     
265     
266     public Strip(string base_dir)
267     {
268         this.base_dir = base_dir;
269     }
270     
271     public void handle_part(GMime.Object parent, GMime.Object mime_obj)
272     {
273                 if (mime_obj is GMime.Part) {
274                    var  p = (GMime.Part)mime_obj;
275                         var ct = p.get_content_type();
276                         var cd = p.get_content_disposition();
277                         
278                         var sid = p.get_header("X-strip-id");
279                     if (sid != null && sid.length > 0) {
280                         this.update_attachment_db(p);
281                             GLib.debug("Skip attachment replace - it's already been done");
282                         return;
283                         }
284                         
285                         if (cd == null || cd.get_disposition().down() != "attachment") {
286                                 return;
287                         }
288                         if (ct.get_media_type() == "text") {
289                                 return;
290                         }
291                         if (ct.to_string() == "application/pgp-encrypted") {
292                                 return;
293                         }
294                         if (ct.to_string() == "application/pgp-keys") {
295                                 return;
296                         }
297                         if (p.get_filename() == null) {
298                                 return;
299                         }
300                          // print("got part %s\n", ct.to_string());
301                          if (parent is GMime.Multipart) {
302                                 
303                                 this.replace_attachment(((GMime.Multipart)parent), p);
304                                 // remove it !?
305
306                           }
307
308
309                         return;
310                 }
311                 if (mime_obj is GMime.Multipart) {
312                         
313
314                         var  mp = (GMime.Multipart)mime_obj;
315                         //var ct = mp.get_content_type();
316
317                         //print("got multi-part %s\n", ct.to_string());
318                         for (var i = 0; i< mp.get_count(); i++) { 
319                           var mo = mp.get_part(i);
320                           this.handle_part(mime_obj,mo);
321                         }
322                    // ((GMime.Multipart)mime_obj).foreach((sub_obj) => {
323                    //     Strip.handle_part(sub_obj);
324                 //
325                    // });
326
327
328                         return;
329                 }
330
331                 if (mime_obj is GMime.MessagePart) {
332                         var msg = ((GMime.MessagePart)mime_obj).get_message();
333                         msg.foreach((subobj) => {
334                          this.handle_part(msg,subobj);
335                     });
336                 
337                         //print("got message-part\n");
338                         return;
339                 }
340                 
341                 if (mime_obj is GMime.Message) {
342                         var mp = ((GMime.Message) mime_obj).get_mime_part();
343
344                         if (!(mp is GMime.Multipart)) {
345                                 //GLib.debug("get mimepart does not return a Multipart?");
346                                 return;
347                         }
348                         
349                         var mpc = ((GMime.Multipart)mp).get_count();
350                         
351                         //GLib.debug("Message has %d parts", mpc); 
352                         for (var i =0 ; i < mpc; i++) {
353                                 //GLib.debug("Getting part %d", i); 
354                                 var submime_obj = ((GMime.Multipart)mp).get_part(i);
355                         this.handle_part(mp,submime_obj);                       
356                     }
357                         print("got message??\n");
358                         return;
359                 }
360                 
361                 print("got something else\n");
362
363
364     }
365     public void update_attachment_db(GMime.Part attachment)
366     {
367         // only called when we have an sid...
368         var sid = attachment.get_header("X-strip-id");
369         if (sid == null || sid.length < 1) {
370                 GLib.debug("Strange - update attachment db called ?");
371                 return;
372         }
373         
374         // initialize it with known data..
375         // that should wipe out dupes.
376         var matches = this.query("SELECT count(id)   FROM Attachment WHERE id = %d".printf(
377                         int.parse(sid)));  
378
379                  
380                 if (matches == "0") {    
381                         // our old mailfort code deleted the crap out of old records...
382                         // if this occurs we will need to create the record again..
383                         this.fix_deleted_attachment_db(int.parse(sid),attachment);
384                         return;
385  
386                 }
387         
388         
389         // initialize it with known data..
390         // that should wipe out dupes.
391         var filesize = this.query("SELECT filesize FROM Attachment WHERE id = %d".printf(
392                         int.parse(sid)));  
393
394                 if (filesize=="") {      
395                    GLib.error("Ignoring record id (missing in database) :%s", sid);
396                    return;
397                 }
398                 if (int.parse(filesize) < 1) {
399                 GLib.debug("Could not get filesize from id :%s = %s", sid,filesize);
400                 Posix.exit(0);
401                 return;
402         }
403         
404         var chksum = this.query("SELECT  checksum FROM Attachment WHERE id = %d".printf(
405                         int.parse(sid)
406                 ));
407         var mime_filename = this.query("SELECT  mime_filename FROM Attachment WHERE id = %d".printf(
408                         int.parse(sid)));       
409                 
410         this.query("""
411              SELECT 
412                  attachment_init(
413                      '%s', '%s', '%s', %d
414                  ) as id 
415                  
416           """.printf(
417                           this.mysql_escape(this.active_message_exim_id),
418                           this.mysql_escape(chksum),
419                           this.mysql_escape(mime_filename),                       
420                           int.parse(filesize)
421                 ));
422         this.query("""
423                  SELECT attachment_update(
424                       %d, -- in_id INT(11),
425                       '%s', -- in_mime_type varchar(255),
426                       '%s', -- in_created DATETIME,
427                       '%s' -- in_mailfort_sig varchar(64)
428                  )
429               """.printf(
430                         int.parse(sid),
431                         "", // this will be ignored..
432                                 this.created_date,
433                                 this.mysql_escape(this.active_message_x_mailfort_sig)
434               
435               )
436                 );
437                 this.mysql.store_result();
438                 
439
440     
441     }
442     
443     
444     public void fix_deleted_attachment_db(int id, GMime.Part attachment)
445     {
446                 
447         var filename = attachment.get_header("X-strip-content-name");
448         var file_path  = attachment.get_header("X-strip-path");
449         var fn =  StripApplication.opt_target_path + "/" + file_path;
450         
451
452                 if (!FileUtils.test (fn, FileTest.EXISTS)) {
453                         GLib.debug("SKIP -- file does not exist");
454                         return;
455         }
456         
457         var chksum = this.md5_file(fn);
458                 var mime_type = attachment.get_header("X-strip-content-type");
459
460                 var fileinfo = File.new_for_path(fn)
461                                         .query_info(GLib.FileAttribute.STANDARD_SIZE+","+GLib.FileAttribute.TIME_MODIFIED
462                                                 ,GLib.FileQueryInfoFlags.NONE,null);
463         var file_size = (int) fileinfo.get_size();
464
465       
466                 this.real_query(-1, """
467                        
468                        
469                                 INSERT INTO Attachment  (  
470                                         id, 
471                                         
472                                     msgid ,
473                                     queue_id ,
474                                     mime_filename ,
475                                     mime_type,
476                                      
477                                     stored_filename ,
478                                     mime_charset ,
479                                     mime_cdisp ,
480                                     mime_is_cover ,
481                                     
482                                     mime_is_multi ,
483                                     mime_is_mail,
484                                     mime_size ,
485                                     filesize,
486                                     
487                                     checksum,
488                                     created
489
490                                 ) VALUES (
491                                         %d,  -- id
492                                         
493                                     '%s' , -- msgid
494                                     0,
495                                     '%s'  , -- filename
496                                     '%s',  -- mimetype
497                                     
498                                     '%s', -- stored file anme
499                                     '', -- charset
500                                     'attachment',
501                                     0,
502                                     0,
503                                     0,
504                                     %d, -- size
505                                     %d, -- size
506                                     
507                                     '%s', -- checkum
508                                         '%s' -- created:
509                                 )
510                        
511                        
512                       """.printf(
513                                 id,
514                                       this.mysql_escape(this.active_message_exim_id),
515                                       this.mysql_escape(filename),
516                                   this.mysql_escape(mime_type),
517                                   this.mysql_escape(file_path),
518                                         file_size,
519                                       file_size,
520                                       this.mysql_escape(chksum),
521                                 this.created_date
522                          ));
523               // this is done to fix the queue_id or maillog_id ??
524                  this.query("""
525                  SELECT attachment_update(
526                       %d, -- in_id INT(11),
527                                                 '', -- mime type
528                       '%s', -- in_created DATETIME,
529                       '%s' -- in_mailfort_sig varchar(64)
530                  )
531               """.printf(
532                                 id, 
533                                 this.created_date,
534                                 this.mysql_escape(this.active_message_x_mailfort_sig)
535               
536               )
537                 );
538                 // GLib.error("added attachment?");
539     }
540     
541     
542     public void replace_attachment(GMime.Multipart parent, GMime.Part attachment)
543     {
544         var sid = attachment.get_header("X-strip-id");
545         if (sid != null && sid.length > 0) {
546                 GLib.debug("Skip attachment replace - it's already been done");
547                 return;
548         }
549         
550         var c = attachment.get_content_object();
551         
552         var filename = attachment.get_filename().replace("/", "-").replace("\n", "").replace("\t", " ");
553         var fn = GLib.Environment.get_tmp_dir() +
554                         "/"+ this.active_name + "."+   GLib.Uri.escape_string(filename,"", false);
555
556             var outfile = new GMime.StreamFile.for_path(fn, "w");
557             outfile.set_owner(true);
558             var file_size = (int) c.write_to_stream(outfile);
559             var chksum = this.md5_file(fn);
560             outfile.flush();
561             outfile = null;
562         
563         if (file_size == 0) {
564
565                 GLib.debug("ERROR - file size of write to stream returned 0?");
566                 Posix.unlink(fn);               
567                 return;
568         }
569         
570         
571         
572  
573         var mime_type= attachment.get_content_type().to_string();
574         // at this point we have to do our database magic...
575         //filesize / name / date / checksum / mimetype -- into mailfort should be OK.
576         
577         var file_id = this.query("""
578                 SELECT 
579                 
580                 attachment_init(
581                                 '%s', -- in_msgid VARCHAR(32),
582                                 '%s', -- in_checksum VARCHAR(64),
583                                 '%s', -- in_mime_filename varchar(255)
584                                 %d -- filesize
585                         ) as id 
586                         
587           """.printf(
588                         this.mysql_escape(this.active_message_exim_id),
589                         chksum,
590                         this.mysql_escape( GLib.Uri.escape_string(attachment.get_filename(),"", false) ), // what is thsi is invalid?
591                          file_size)
592                 );
593                  
594                 
595                 if (file_id.length < 1) {
596                         GLib.debug("ERROR - CALL to attachment_init failed");
597                 Posix.unlink(fn);               
598                 return;
599                 
600                 }
601  
602                 if (int.parse(file_id) < 1) {
603                         GLib.debug("ERROR - CALL to attachment_init failed - returned 0?");
604                 Posix.unlink(fn);               
605                 return;
606                 
607                 }
608  
609         
610                 GLib.debug("fn = %s, m5=%s, id= %s", filename, mime_type, this.active_message_id);
611                 this.query("""
612                 
613                         SELECT attachment_update(
614                                 %d, -- in_id INT(11),
615                                 '%s', -- in_mime_type varchar(255),
616                                 '%s', -- in_created DATETIME,
617                                 '%s' -- in_mailfort_sig varchar(64)
618                                 
619                                 ) as result
620       """.printf(
621                 int.parse(file_id),
622                         this.mysql_escape(mime_type),
623                         this.created_date,
624                         this.mysql_escape(this.active_message_x_mailfort_sig)
625                 ));
626                  this.mysql.store_result();
627                                  
628  
629                 this.used_space_after += file_size;
630                         
631                 var target_fn = "";
632
633             if (StripApplication.opt_is_extracting) {
634                         target_fn = StripApplication.opt_target_path + "/" + this.created_dir +"/"+ file_id  + "-" + GLib.Uri.escape_string(filename,"", false);
635                 } 
636                     
637             var stored =  "/" + this.created_dir +"/"+ file_id  + "-" + GLib.Uri.escape_string(filename,"", false);
638                  this.query("""
639                 
640                         SELECT attachment_update_store(
641                                 %d, -- in_id INT(11),
642                                 '%s'  -- in_store_filename varchar(255),
643                          
644                                 
645                                 ) as result
646       """.printf(
647                 int.parse(file_id),
648                          this.mysql_escape( stored)
649                 ));   
650                          
651         var rep = new GMime.Part.with_type("text","html");
652         // we have to set up a redirect server - to redirect hpasite... to their internal service..
653         rep.set_filename(filename);
654         string txt = "<html><body>"+
655             "<a href=\"" + StripApplication.opt_replace_link + "/" +
656                         file_id + "/" + this.created_dir + "/"+chksum+"/"+ GLib.Uri.escape_string( filename, "", false) +"\">" + 
657             GLib.Uri.escape_string( filename, "", false) + // fixme needs html escaping...
658             "</a>" +
659             "</body></html>";
660
661         rep.get_content_type().set_parameter("charset", "utf-8");
662                 rep.set_header("X-strip-id", file_id);
663                 rep.set_header("X-strip-content-name",  filename);                              
664                 rep.set_header("X-strip-path", this.created_dir + "/" + file_id + "-" +  GLib.Uri.escape_string(filename,"", false));           
665                 rep.set_header("X-strip-content-type", mime_type);              
666         var stream =  new GMime.StreamMem.with_buffer(txt.data);
667         var con = new GMime.DataWrapper.with_stream(stream,GMime.ContentEncoding.DEFAULT);
668
669         rep.set_content_object(con);
670         GLib.debug("Replacing Attachment with HTML");
671         parent.replace(parent.index_of(attachment), rep);
672                 this.has_replaced = true;
673                  
674                 if (StripApplication.opt_is_extracting && target_fn.length > 0) {
675                         var dir = GLib.Path.get_dirname(target_fn);
676                         if (!FileUtils.test (dir, FileTest.IS_DIR)) {
677                                 GLib.DirUtils.create_with_parents(dir, 0755);
678                         }
679                         GLib.debug("Creating file %s", target_fn);
680                         if (!FileUtils.test (target_fn, FileTest.EXISTS)) {
681                                 var from = File.new_for_path (fn);
682                                 var to =  File.new_for_path (target_fn);
683                                 from.copy(to, 0, null);
684
685                         }
686                 } else { 
687                         GLib.debug("Skipping extraction %s", target_fn);
688                 }
689                 Posix.unlink(fn);
690                 
691
692
693     }
694     public string query(string str)
695     {
696             return this.real_query(1, str);
697     }
698     public string execute(string str)
699     {
700             return this.real_query(0, str);
701     }
702     /**
703     * need_return 
704     0 = no
705     1 = yes
706     -1 = don't try.
707     */
708     public string real_query(int need_return, string str)
709     {
710                 GLib.debug("Before Query : %u  : %s\n", this.mysql.errno(), this.mysql.error());
711
712
713         if (StripApplication.opt_debug_sql) {
714                 GLib.debug("SQL: %s\n", str);
715                 }
716                 
717                 
718         
719         var rc=  this.mysql.query(str); 
720         if ( rc != 0 ) {
721
722                     GLib.debug("ERROR %u: Query failed: %s\n", this.mysql.errno(), this.mysql.error());
723                                 Posix.exit(1);
724                 }
725         var rs = mysql.use_result();
726                 if (need_return == -1) {
727                         return "";
728                 }
729  
730         
731         //GLib.debug("got %d rows", (int) rs.num_rows());
732         
733         var got_row = false;
734                 string[] row;
735                 string ret = "";
736                 while( (row = rs.fetch_row()) != null) { 
737                         got_row = true;
738                         ret = row[0];
739                 
740                 }
741                 if (need_return == 0) {
742                 if (StripApplication.opt_debug_sql) {
743                                 GLib.debug("got %s", got_row ? "=Nothing=" : ret);
744                         }
745                         return got_row ? "" : ret;
746                 }
747                 if (!got_row) {
748
749                          GLib.debug("ERROR : no rows returned");
750                         Posix.exit(1);
751                         return "";
752                 }
753         if (StripApplication.opt_debug_sql) {
754                         GLib.debug("got %s", ret);
755                 }
756                 return ret;
757                 
758                  
759         }
760     
761     public string mysql_escape(string str)
762     {
763             unichar[] value_escaped = new unichar[str.length * 2 + 1];
764                 this.mysql.real_escape_string ((string) value_escaped, str, str.length);
765                 return (string) value_escaped;
766     }
767     
768     public string  md5_file(string fn) {
769               Checksum checksum = new Checksum (ChecksumType.MD5);
770
771               FileStream stream = FileStream.open (fn, "rb");
772               uint8 fbuf[100];
773               size_t size;
774
775               while ((size = stream.read (fbuf)) > 0) {
776                       checksum.update (fbuf, size);
777               }
778
779               unowned string digest = checksum.get_string ();
780               return digest;
781     }
782
783         string active_path = "";    
784     string active_name = "";
785     string active_message_id = "";
786     string active_message_x_mailfort_sig = "";
787     string active_message_exim_id = "";
788     bool has_replaced = false;
789     string created_date = ""; // should be YYYY-mm-dd
790     string created_dir = ""; // should be YYY/mm/dd
791     
792     public void scan_file(string path, string name)
793     {
794                 GLib.debug("Scan: %s/%s", path,name); 
795                 
796                 this.has_replaced = false; 
797         this.active_path = path;
798         this.active_name = name;
799         this.active_message_id = "";
800
801                 var mailtime = new DateTime.now_local();
802                 if (StripApplication.opt_scan_mailfort) {
803                     this.created_dir = this.active_path.substring(this.base_dir.length + 1 );
804                         this.created_date = this.created_dir.replace("/", "-");
805                         var bits = this.created_date.split("-");
806                         mailtime = new DateTime.local(int.parse(bits[0]),int.parse(bits[1]),int.parse(bits[2]),0,0,0);
807                         
808                         var oldest = new  DateTime.now_local();
809                         oldest = oldest.add_months(-1 * StripApplication.opt_age_oldest);
810                         var tspan = mailtime.difference(oldest) / GLib.TimeSpan.DAY;
811
812                         if (tspan < 0) {
813                                 GLib.debug("skip file is %d days older than %d months", (int)tspan, StripApplication.opt_age_oldest);
814                                 return;
815                         }
816                         
817                         var newest = new  DateTime.now_local();
818                         newest = newest.add_months(-1 * StripApplication.opt_age_newest);
819                         tspan = mailtime.difference(newest) / GLib.TimeSpan.DAY;
820                         if (tspan > 0) {
821                                 GLib.debug("skip file is %d days newer than %d months", (int)tspan, StripApplication.opt_age_newest);
822                                 return;
823                         }
824                         
825                 }
826         
827         
828                 var fileinfo = File.new_for_path(path +"/" + name)
829                                         .query_info(GLib.FileAttribute.STANDARD_SIZE+","+GLib.FileAttribute.TIME_MODIFIED
830                                                 ,GLib.FileQueryInfoFlags.NONE,null);
831         var file_size = (int) fileinfo.get_size();
832                 var mod_time = fileinfo.get_modification_time();
833                 
834                 
835                 
836                 if (!StripApplication.opt_scan_mailfort) {
837                    
838                 // it's a mail directory...
839                 // use the last modification time? as the default...
840                  mailtime = new DateTime.from_timeval_utc(mod_time);
841                  this.created_dir = mailtime.format("%Y/%m/%d");
842                          this.created_date =  mailtime.format("%Y-%m-%d %H:%M:%S");
843  
844         }
845                 // check on age of file...
846                 
847                 
848                 
849                 
850                 
851         this.used_space_before += file_size;
852         
853         var stream = new GMime.StreamFs.for_path (path +"/" + name,Posix.O_RDONLY, 0);
854         //stream.set_owner(true);
855         var parser = new GMime.Parser.with_stream(stream);
856         var message = parser.construct_message();
857  
858                 if (message == null) {
859                         GLib.debug("Could not parse file? %s/%s", path,name);
860                 this.used_space_after += file_size;                     
861                 return;
862                 }       
863
864
865                 // check : - is message over a year old?                
866                 // get various msg info..
867                 this.active_message_id = message.get_message_id();
868                 this.active_message_x_mailfort_sig = message.get_header("x-mailfort-sig");
869                 var recvd = message.get_header("received");
870                 this.active_message_exim_id = "";
871                 if (recvd != null && recvd.length > 1) {
872                         GLib.debug("RECV: %s", recvd);
873                         var lines = recvd.split("\t");
874                         for (var i = 0; i < lines.length;i++) {
875                                 var bits = lines[i].strip().split(" ");
876                                 if (bits[0] == "id") {
877                                         this.active_message_exim_id = bits[1].replace(";","");
878
879                                 }
880                                 
881                                 if (lines[i].contains(";")) {
882                                         var dbits = lines[i].strip().split(";");                                
883                                         GLib.debug("Reading time from : %s", dbits[1]);
884                                         var timez = GMime.utils_header_decode_date(dbits[1], null);
885                                         if (timez != 0) {
886                                                 mailtime = new DateTime.from_unix_utc(timez);
887                                                 this.created_date = mailtime.format("%Y-%m-%d %H:%M:%S");
888                                                 GLib.debug("Time is %s",this.created_date);
889                                                 // if it's not mailfort we can use that date to determine where to store it...
890                                                 if (!StripApplication.opt_scan_mailfort) {
891                                                         this.created_dir = mailtime.format("%Y/%m/%d");
892                                                 }
893                                         } else {
894                                                 GLib.debug("Could not read time from headers?");
895                                         }
896                                 }
897
898                         }
899                 }
900                 
901                 var oldest = new  DateTime.now_local();
902                 oldest = oldest.add_months(-1 * StripApplication.opt_age_oldest);
903                 var rtspan = mailtime.difference(oldest) / GLib.TimeSpan.DAY;
904                 GLib.debug("Checking oldest %d days difference", (int)rtspan   );
905                 if (rtspan < 0) {
906                         GLib.debug("skip(2) file is %d days older than %d months", (int)rtspan, StripApplication.opt_age_oldest);
907                         return;
908                 }
909                 var newest = new  DateTime.now_local();
910                 newest = newest.add_months(-1 * StripApplication.opt_age_newest);
911                 rtspan = mailtime.difference(newest) / GLib.TimeSpan.DAY;
912                 if (rtspan > 0) {
913                         GLib.debug("skip(2) file is %d days newer than %d months : %s", (int)rtspan, StripApplication.opt_age_newest,
914                                 mailtime.format("%Y-%m-%d %H:%M:%S"));
915                         return;
916                 }
917                 
918                 
919                 
920                 /*
921                 GLib.debug("Message DATA:\n mid: %s\nmailfort: %s \nexim_id: %s",
922                         this.active_message_id,
923                         this.active_message_x_mailfort_sig,
924                         this.active_message_exim_id
925                 );
926                  */
927                         
928                 // DATE?
929                 
930                 var mp = message.get_mime_part();
931
932                 if (!(mp is GMime.Multipart)) {
933                         //GLib.debug("get mimepart does not return a Multipart?");
934                 this.used_space_after += file_size;                                             
935                         return;
936                 }
937                 
938                 var mpc = ((GMime.Multipart)mp).get_count();
939                 
940                 //GLib.debug("Message has %d parts", mpc); 
941                 for (var i =0 ; i < mpc; i++) {
942                         //GLib.debug("Getting part %d", i); 
943                         var mime_obj = ((GMime.Multipart)mp).get_part(i);
944             this.handle_part(mp,mime_obj);                      
945         }
946                 
947         parser= null;
948
949       //  stream.set_owner(false);
950             //stream.close();
951         stream = null;//.close();
952         
953         
954                 if (!this.has_replaced) {
955                         this.used_space_after += file_size;
956                         GLib.debug("skpping write file - no replacement occured");
957                         return;
958                 }
959                 string tmpfile = "";
960                 GMime.Stream outstream = new GMime.StreamNull();
961                 if (StripApplication.opt_is_replacing) {
962                 
963                         tmpfile = GLib.Environment.get_tmp_dir() +"/" + name;
964                 outstream = new GMime.StreamFile.for_path (tmpfile,"w");
965                 ((GMime.StreamFile)outstream).set_owner(true);
966         }
967                 if (StripApplication.opt_dump) {
968                         outstream = new GMime.StreamMem();
969         }
970         
971         file_size = (int) message.write_to_stream(outstream);
972         if (StripApplication.opt_is_replacing) {
973                 ((GMime.StreamFile)outstream).set_owner(false);
974         }
975                 if (StripApplication.opt_dump) {
976                         var ua = ((GMime.StreamMem)outstream).get_byte_array().data;
977                         print("%s\n", (string) ua);
978                 }        
979         message = null;
980         outstream.flush();
981         outstream.close();
982         GLib.debug("finished writing output %d", file_size);
983
984         //
985         outstream = null;
986         
987           
988         this.used_space_after += file_size;
989         
990         
991         if (StripApplication.opt_is_replacing) {
992                 Posix.unlink(path +"/" + name);         
993                 GLib.debug("copy tmp file %s to %s" , tmpfile, path +"/" + name);               
994                 
995                 // link will not work, as we are doing it accross file systems
996                         var from = File.new_for_path (tmpfile);
997                         var nf =  File.new_for_path (path +"/" + name);
998                         from.copy(nf, 0, null);
999                         
1000
1001                 var newfileinfo = nf.query_info(GLib.FileAttribute.TIME_MODIFIED,GLib.FileQueryInfoFlags.NONE,null);
1002                 newfileinfo.set_modification_time(mod_time);
1003                 nf.set_attributes_from_info(newfileinfo,FileQueryInfoFlags.NONE);
1004                 Posix.unlink(tmpfile);
1005                 }
1006         this.processed++;
1007         
1008         if (StripApplication.opt_limit > -1 && this.processed >= StripApplication.opt_limit) {
1009                 GLib.debug("Reached replacement limit");
1010                 Posix.exit(1);
1011         }
1012         
1013         
1014         
1015         
1016     }
1017     
1018     
1019     public void scan_dir(string basepath, string subpath)
1020     {
1021         
1022         
1023         // determine if path is to old to scan..
1024         if (subpath.length > 0 && StripApplication.opt_scan_mailfort) {
1025                         var year =  int.parse(subpath.substring(1,4));  // "/2000"
1026                         var month = subpath.length > 5 ? int.parse(subpath.substring(6,2)) : 999; // "/2000/12"                 
1027                         var day = subpath.length > 8 ? int.parse(subpath.substring(9,2)) : 999; // "/2000/12/01"                        
1028                 
1029                 var oldest = new  DateTime.now_local();
1030                         oldest = oldest.add_months(-1 * StripApplication.opt_age_oldest);
1031                         
1032                         //GLib.debug("Checking directory %s is older than min: %d/%d/%d", subpath, oldest.get_year() , oldest.get_month(), oldest.get_day_of_month() );                                 
1033                         
1034                         if (year < oldest.get_year()) {
1035                                 GLib.debug("Skip directory %s is older than min year: %d", subpath, oldest.get_year());
1036                                 return;
1037                         }
1038                         if (year == oldest.get_year() &&  month < oldest.get_month()) {
1039                                 GLib.debug("Skip directory %s is older than min month: %d/%d", subpath, oldest.get_year() , oldest.get_month() );
1040                                 return;
1041                         }
1042                 if (year == oldest.get_year() &&  month == oldest.get_month() && day < oldest.get_day_of_month()) {
1043                                 GLib.debug("Skip directory %s is older than min day: %d/%d/%d", subpath, oldest.get_year() , oldest.get_month(), oldest.get_day_of_month() );           
1044                                 return;
1045                         }
1046                 
1047                 var newest = new  DateTime.now_local();
1048                         newest = newest.add_months(-1 * StripApplication.opt_age_newest);
1049                         
1050                         //GLib.debug("Checking directory %s is newer than max: %d/%d/%d", subpath, newest.get_year() , newest.get_month(), newest.get_day_of_month() );                                 
1051                         
1052                         if (year > newest.get_year()) {
1053                                 GLib.debug("Skip directory %s is newer than max year: %d", subpath, newest.get_year());
1054                                 return;
1055                         }
1056                         if (year == newest.get_year() &&  month != 999 && month > newest.get_month()) {
1057                                 GLib.debug("Skip directory %s is newer than max month: %d/%d", subpath, newest.get_year() , newest.get_month() );
1058                                 return;
1059                         }
1060                 if (year == newest.get_year() &&  month == newest.get_month() &&  day != 999 && day > newest.get_day_of_month()) {
1061                                 GLib.debug("Skip directory %s is newer than max day: %d/%d/%d", subpath, newest.get_year() , newest.get_month(), newest.get_day_of_month() );           
1062                                 return;
1063                         }
1064                 
1065                 
1066                 
1067         }
1068         
1069         
1070         var f = File.new_for_path(basepath + subpath);
1071                 FileEnumerator file_enum;
1072         var cancellable = new Cancellable ();
1073         try {      
1074             file_enum = f.enumerate_children(
1075                 FileAttribute.STANDARD_DISPLAY_NAME + "," +   FileAttribute.STANDARD_TYPE,
1076                         FileQueryInfoFlags.NOFOLLOW_SYMLINKS,  // FileQueryInfoFlags.NONE,
1077                         cancellable
1078                 );
1079         } catch (Error e) {
1080                 GLib.debug("Got error scanning dir? %s", e.message);
1081             // FIXME - show error..
1082             return;
1083         }
1084         FileInfo next_file;
1085          
1086         while (cancellable.is_cancelled () == false ) {
1087             try {
1088                 next_file = file_enum.next_file (cancellable);
1089             } catch(Error e) {
1090                 GLib.debug("error getting next file? %s", e.message);
1091                 break;
1092             }
1093
1094             if (next_file == null) {
1095                 break;
1096             }
1097                 
1098                 
1099                 if (next_file.get_is_symlink()) {
1100                 next_file = null;
1101                 continue;
1102             }
1103             
1104             var ds = next_file.get_display_name();
1105             if (next_file.get_file_type() != FileType.DIRECTORY) {
1106                 
1107                 
1108                 
1109                 if (ds[0] == ',') {
1110                         continue;
1111                 }
1112                 // other files to ignore?
1113                 if (Regex.match_simple (".tgz$", ds)) {
1114                         continue;
1115                 }
1116                 this.scan_file(basepath + subpath , ds);
1117                                 if(this.has_replaced) {
1118                          this.report_state("After scanning %s/%s".printf(basepath + subpath , ds));
1119                         }
1120                 continue;
1121             }
1122
1123
1124             //stdout.printf("Monitor.monitor: got file %s : type :%u\n",
1125             //        next_file.get_display_name(), next_file.get_file_type());
1126
1127
1128         
1129
1130             // not really needed?? - we are storing attachments in a seperate location now...
1131             if (ds[0] == '.') {
1132                 next_file = null;
1133                 continue;
1134             }
1135             if (ds == "attachments") {
1136                         continue;
1137                 }
1138             
1139             
1140             var sp = subpath+"/"+next_file.get_display_name();
1141             // skip modules.
1142             //print("got a file : " + sp);
1143          
1144             next_file = null;
1145             
1146             
1147             this.scan_dir(basepath,sp);
1148             
1149         }
1150     
1151     
1152     }
1153     void report_state(string msg) 
1154     {
1155         // Saved: 2G  Original 10G : 20%
1156         GLib.debug("Saved : %s (%.1f%%) | Original %s | %s", 
1157                         GLib.format_size(this.used_space_before - this.used_space_after), 
1158                         100f * ((1f * (this.used_space_before - this.used_space_after)) / (this.used_space_before * 1f)), 
1159                         GLib.format_size(this.used_space_before),                       
1160                         msg
1161                 );
1162         
1163         }
1164         
1165         
1166
1167 }