src/strip.vala
[app.mailtrimmer] / src / strip.vala
1 /**
2
3  ** check left to do:  
4   - range scans on maildir
5   - see how replacing the links works in the resulting email via thunderbird etc..
6   - some checksum issues (see dupelicates?? suspect 0byte issues?)  -- seems ok now?
7  
8
9
10
11   needs to scan 2 things
12   a) our mailfort email database
13        point it at the top directory, containing YEAR/MONTH/DAY.... directories.
14        scan each file (over a year old...)
15        extract out the attachment, and replace with HTML
16        DATABASE? - mysql or sqlite? - 
17            filesize / name / date / checksum / mimetype -- into mailfort should be OK.
18   b) the imap user emails
19            loop through user's directories
20            check age of email .. over 1 years..
21            ?? how to prevent 'repeat' scanning of emails?
22               ??? hidden '.' files containing last scan date?
23
24            check if file exists in our DB.. - replace the link...
25            otherwise generate a file. + add to DB...
26            
27    c) retreival system
28      -> URL -> get file
29    d) redirect system.
30      -> URL -> redirect to correct server
31
32
33 More notes on our Mailfort DB sync:
34 * some of these attachments are already in the database...
35  - so we need to update the DB..
36  - probably worth putting the code in a stored procedure..
37  
38  -- key scenarios
39    * first scan (and extract)
40    * rescan (as I messed up the first time - fix the DB...)
41    * email scan - attachments might not have related messages.
42  
43  
44  - {id} attachment_init(
45                 {exim_msg_id}
46                 {chksum}
47                 {filename),
48         )
49         // creates or returns id (can look for existing messages?
50         // can do a merge?? - copy 'old' record data into 'new'....  "prefer checksummed"
51         
52         attachment_update(
53                 {id}
54                 {exim_msg_id}
55                 {mailfort_msg_sig}
56                 {file_size}
57                 {created} // message date..
58                 {chksum}
59                 {filename),
60         {mime_type}
61         )
62         attachment_update_store(
63                         {id}
64                         {stored_filename}
65         )
66
67
68 */ 
69
70 // valac --pkg gmime --vapi
71 /*
72
73 // http://www.fromdual.com/mysql-vala-program-example << check mysql if this does not work.
74
75  valac  -g --vapidir=. --thread  strip.vala   --vapidir=../vapi \
76      --pkg glib-2.0 --pkg mysql --pkg gio-2.0 --pkg posix --pkg gmime-2.6 \
77       --Xcc=-lmysqlclient  -v \
78        -o /tmp/strip
79 */ 
80  
81 public class StripApplication : GLib.Application {
82
83         public static string? opt_path = null;
84         public static string? opt_file = null;  
85         public static string? opt_target_path = null;
86         public static string? opt_db_host = "127.0.0.1";
87         public static string? opt_db_name = null;       
88         public static string? opt_db_user = null;               
89         public static string? opt_db_pass = null;               
90         public static string? opt_scan_mailfort_only = "";              
91         
92         
93         public static int    opt_limit = -1;
94
95         public static int    opt_age_newest = 1;
96         public static int    opt_age_oldest = 6;
97
98
99         public static bool      opt_is_extracting = false;
100         public static bool      opt_is_replacing = false;
101         public static bool      opt_scan_maildir  = false; 
102         public static bool      opt_scan_mailfort  = false;     
103         public static bool              opt_dump = false;       
104         public static bool              opt_debug = false; 
105         public static bool              opt_stamp = false; 
106         
107         public static bool opt_debug_sql = false;       
108         public static string? opt_replace_link = null;
109         
110         
111         public const GLib.OptionEntry[] options = {
112                 
113                 { "debug", 0, 0, OptionArg.NONE, ref opt_debug, "show debug messages for components", null },
114                 { "debug-sql", 0, 0, OptionArg.NONE, ref opt_debug_sql, "debug the SQL statements", null },         
115
116                 { "path", 0, 0, OptionArg.STRING, ref opt_path, "Directory where email to be parsed is", null },        
117                 { "file", 0, 0, OptionArg.STRING, ref opt_file, "A specific file to be parsed", null }, 
118
119                 { "target-path", 0, 0, OptionArg.STRING, ref opt_target_path, "Directory where attachments are to be put", null },
120
121                 { "link", 0, 0, OptionArg.STRING, ref opt_replace_link, "url for the replement link: eg. http://www.mysite.com/xxxx/%s", null },         
122                         
123                 { "host", 0, 0, OptionArg.STRING, ref opt_db_host, "Mysql host (default localhost)", null },    
124                 { "name", 0, 0, OptionArg.STRING, ref opt_db_name, "Mysql database name REQUIRED", null },      
125                 { "user", 0, 0, OptionArg.STRING, ref opt_db_user, "Mysql database user REQUIRED", null },      
126                 { "pass", 0, 0, OptionArg.STRING, ref opt_db_pass, "Mysql database password (default empty)", null },            
127
128                 { "extract", 0, 0, OptionArg.NONE, ref opt_is_extracting, "Should attachments be extracted (default NO)", null },
129                 { "replace", 0, 0, OptionArg.NONE, ref opt_is_replacing, "Should attachments be replaced (default NO)", null },
130                 { "dump", 0, 0, OptionArg.NONE, ref opt_dump, "Print the replaced mail contents to stdout", null },         
131
132                 { "limit", 0, 0, OptionArg.INT, ref opt_limit, "stop after X number of messages with attachments have been processed", null },         
133                 { "newest", 0, 0, OptionArg.INT, ref opt_age_newest, "do not replace messages newer that X months (default is 1 months)", null },
134                 { "oldest", 0, 0, OptionArg.INT, ref opt_age_oldest, "do not replace messages older than X (default is 6 months)", null },
135
136                 { "scan-maildir", 0, 0, OptionArg.NONE, ref opt_scan_maildir, "scan an maildir tree", null },
137                 { "stamp", 0, 0, OptionArg.NONE, ref opt_stamp, "create and honour directory stamps - flag that a folder has been scanned already", null },
138                 { "scan-mailfort", 0, 0, OptionArg.NONE, ref opt_scan_mailfort, "scan a mailfort tree", null }, 
139                 { "scan-mailfort-only", 0, 0, OptionArg.STRING, ref opt_scan_mailfort_only, "scan a mailfort Year/month eg. /2010/08", null }, 
140                 { null }       
141         };         
142     public StripApplication( string[] args ) 
143     {
144                  Object(
145             application_id: "org.roojs.mailstripper",
146             flags: ApplicationFlags.FLAGS_NONE
147          );
148  
149                         
150          var opt_context =  new GLib.OptionContext ("Mail Stripper");
151                         
152          try {
153                                 
154             opt_context.set_help_enabled (true);
155             opt_context.add_main_entries (options, null);
156             opt_context.parse ( ref  args);
157             //opt_detach = !optx_no_detach;
158                             
159  
160                             
161              // options that have to be set.. bee or hive... (or stop all)
162             if ((!opt_scan_mailfort && !opt_scan_maildir) || (opt_scan_mailfort && opt_scan_maildir))  {
163                stdout.printf ("You must specify the type of directory tree to scan - either imap or mailfort\n%s",
164                    opt_context.get_help(true, null));
165                GLib.Process.exit(Posix.EXIT_FAILURE);
166             }
167                         
168                          if ((opt_db_name == null || opt_db_name.length < 1 || opt_db_user == null || opt_db_user.length < 1))  {
169                stdout.printf ("You must specify the database name / user \n%s",
170                    opt_context.get_help(true, null));
171                GLib.Process.exit(Posix.EXIT_FAILURE);
172             }
173                          if ((opt_path == null || opt_path.length < 1)   )  {
174                stdout.printf ("You must specify the scan start path\n%s",
175                    opt_context.get_help(true, null));
176                GLib.Process.exit(Posix.EXIT_FAILURE);
177             }
178                         if (opt_replace_link == null || (opt_replace_link.length < 1))  {
179                stdout.printf ("You must specify the link to use in the replacement \n%s",
180                    opt_context.get_help(true, null));
181                GLib.Process.exit(Posix.EXIT_FAILURE);
182             }
183             if ((opt_is_replacing || opt_is_extracting ) && (opt_target_path == null || opt_target_path.length < 1)) {
184                       stdout.printf ("You must specify a target path to put attachments\n%s",
185                    opt_context.get_help(true, null));
186                GLib.Process.exit(Posix.EXIT_FAILURE);
187             }
188             
189             
190          } catch (GLib.OptionError e) {
191             stdout.printf ("error: %s\n", e.message);
192             stdout.printf ("Run '%s --help' to see a full list of available command line options.\n%s", 
193                       args[0], opt_context.get_help(true, null));
194             GLib.Process.exit(Posix.EXIT_FAILURE);
195          }
196         }
197          
198     public static int main(string[] args) 
199     {
200                 
201                 var application = new StripApplication(  args);
202                 
203                 GLib.Log.set_always_fatal(LogLevelFlags.LEVEL_ERROR | LogLevelFlags.LEVEL_CRITICAL); 
204            
205            if (opt_debug || opt_debug_sql) {
206                         GLib.Log.set_handler(null, 
207                         GLib.LogLevelFlags.LEVEL_DEBUG | GLib.LogLevelFlags.LEVEL_WARNING | GLib.LogLevelFlags.LEVEL_INFO, 
208                         (dom, lvl, msg) => {
209                                         print("%s\n", msg);
210                                 }
211                         );
212                 }
213         
214         GMime.init(0);
215                 if (StripApplication.opt_is_replacing) {
216                         StripApplication.opt_is_extracting = true;
217                 }
218   
219                 GLib.debug("scanning folder: %s", opt_path );
220                 
221                 var strip = new Strip( opt_path );
222  
223                 
224                 strip.mysql  = new Mysql.Database();
225                 if (!strip.mysql.real_connect(
226                                 opt_db_host,
227                                 opt_db_user ,
228                                 opt_db_pass == null ? "" : opt_db_pass, //passwd
229                                 opt_db_name, //DB
230                                 3306, // not changable...?
231                                 null
232                         )
233                 ) {
234                         stdout.printf("ERROR %u: Connection failed: %s\n", 
235                                 strip.mysql.errno(), strip.mysql.error()
236                         );
237
238                         return 1;
239                 }
240         if (opt_file != null) {
241                 strip.base_dir = opt_path;
242                 strip.scan_file( GLib.Path.get_dirname(opt_file),  GLib.Path.get_basename(opt_file));
243                 return 0;
244         }
245
246                 strip.scan_dir(opt_path, opt_scan_mailfort_only);
247         
248
249         
250         return 0;
251     }
252 }
253
254 public class Strip : GLib.Object {
255         
256  
257         
258         public string base_dir = "";
259         
260         public Mysql.Database mysql;
261         
262         int processed = 0;
263     
264     uint64 used_space_before = 0;
265     uint64 used_space_after = 0;
266     
267     
268     public Strip(string base_dir)
269     {
270         this.base_dir = base_dir;
271     }
272     
273     public void handle_part(GMime.Object parent, GMime.Object mime_obj)
274     {
275                 if (mime_obj is GMime.Part) {
276                    var  p = (GMime.Part)mime_obj;
277                         var ct = p.get_content_type();
278                         var cd = p.get_content_disposition();
279                         
280                         var sid = p.get_header("X-strip-id");
281                     if (sid != null && sid.length > 0) {
282                         this.update_attachment_db(p);
283                             GLib.debug("Skip attachment replace - it's already been done");
284                         return;
285                         }
286                         
287                         if (cd == null || cd.get_disposition().down() != "attachment") {
288                                 return;
289                         }
290                         if (ct.get_media_type() == "text") {
291                                 return;
292                         }
293                         if (ct.to_string() == "application/pgp-encrypted") {
294                                 return;
295                         }
296                         if (ct.to_string() == "application/pgp-keys") {
297                                 return;
298                         }
299                         if (p.get_filename() == null) {
300                                 return;
301                         }
302                          // print("got part %s\n", ct.to_string());
303                          if (parent is GMime.Multipart) {
304                                 
305                                 this.replace_attachment(((GMime.Multipart)parent), p);
306                                 // remove it !?
307
308                           }
309
310
311                         return;
312                 }
313                 if (mime_obj is GMime.Multipart) {
314                         
315
316                         var  mp = (GMime.Multipart)mime_obj;
317                         //var ct = mp.get_content_type();
318
319                         //print("got multi-part %s\n", ct.to_string());
320                         for (var i = 0; i< mp.get_count(); i++) { 
321                           var mo = mp.get_part(i);
322                           this.handle_part(mime_obj,mo);
323                         }
324                    // ((GMime.Multipart)mime_obj).foreach((sub_obj) => {
325                    //     Strip.handle_part(sub_obj);
326                 //
327                    // });
328
329
330                         return;
331                 }
332
333                 if (mime_obj is GMime.MessagePart) {
334                         var msg = ((GMime.MessagePart)mime_obj).get_message();
335                         msg.foreach((subobj) => {
336                          this.handle_part(msg,subobj);
337                     });
338                 
339                         //print("got message-part\n");
340                         return;
341                 }
342                 
343                 if (mime_obj is GMime.Message) {
344                         var mp = ((GMime.Message) mime_obj).get_mime_part();
345
346                         if (!(mp is GMime.Multipart)) {
347                                 //GLib.debug("get mimepart does not return a Multipart?");
348                                 return;
349                         }
350                         
351                         var mpc = ((GMime.Multipart)mp).get_count();
352                         
353                         //GLib.debug("Message has %d parts", mpc); 
354                         for (var i =0 ; i < mpc; i++) {
355                                 //GLib.debug("Getting part %d", i); 
356                                 var submime_obj = ((GMime.Multipart)mp).get_part(i);
357                         this.handle_part(mp,submime_obj);                       
358                     }
359                         print("got message??\n");
360                         return;
361                 }
362                 
363                 print("got something else\n");
364
365
366     }
367     public void update_attachment_db(GMime.Part attachment)
368     {
369         // only called when we have an sid...
370         var sid = attachment.get_header("X-strip-id");
371         if (sid == null || sid.length < 1) {
372                 GLib.debug("Strange - update attachment db called ?");
373                 return;
374         }
375         
376         // initialize it with known data..
377         // that should wipe out dupes.
378         var matches = this.query("SELECT count(id)   FROM Attachment WHERE id = %d".printf(
379                         int.parse(sid)));  
380
381                  
382                 if (matches == "0") {    
383                         // our old mailfort code deleted the crap out of old records...
384                         // if this occurs we will need to create the record again..
385                         this.fix_deleted_attachment_db(int.parse(sid),attachment);
386                         return;
387  
388                 }
389         
390         
391         // initialize it with known data..
392         // that should wipe out dupes.
393         var filesize = this.query("SELECT filesize FROM Attachment WHERE id = %d".printf(
394                         int.parse(sid)));  
395
396                 if (filesize=="") {      
397                    GLib.error("Ignoring record id (missing in database) :%s", sid);
398                    return;
399                 }
400                 if (int.parse(filesize) < 1) {
401                 GLib.debug("Could not get filesize from id :%s = %s", sid,filesize);
402                 Posix.exit(0);
403                 return;
404         }
405         
406         var chksum = this.query("SELECT  checksum FROM Attachment WHERE id = %d".printf(
407                         int.parse(sid)
408                 ));
409         var mime_filename = this.query("SELECT  mime_filename FROM Attachment WHERE id = %d".printf(
410                         int.parse(sid)));       
411                 
412         this.query("""
413              SELECT 
414                  attachment_init(
415                      '%s', '%s', '%s', %d
416                  ) as id 
417                  
418           """.printf(
419                           this.mysql_escape(this.active_message_exim_id),
420                           this.mysql_escape(chksum),
421                           this.mysql_escape(mime_filename),                       
422                           int.parse(filesize)
423                 ));
424         this.query("""
425                  SELECT attachment_update(
426                       %d, -- in_id INT(11),
427                       '%s', -- in_mime_type varchar(255),
428                       '%s', -- in_created DATETIME,
429                       '%s' -- in_mailfort_sig varchar(64)
430                  )
431               """.printf(
432                         int.parse(sid),
433                         "", // this will be ignored..
434                                 this.created_date,
435                                 this.mysql_escape(this.active_message_x_mailfort_sig)
436               
437               )
438                 );
439                 this.mysql.store_result();
440                 
441
442     
443     }
444     
445     
446     public void fix_deleted_attachment_db(int id, GMime.Part attachment)
447     {
448                 
449         var filename = attachment.get_header("X-strip-content-name");
450         var file_path  = attachment.get_header("X-strip-path");
451         var fn =  StripApplication.opt_target_path + "/" + file_path;
452         
453
454                 if (!FileUtils.test (fn, FileTest.EXISTS)) {
455                         GLib.debug("SKIP -- file does not exist");
456                         return;
457         }
458         
459         var chksum = this.md5_file(fn);
460                 var mime_type = attachment.get_header("X-strip-content-type");
461
462                 var fileinfo = File.new_for_path(fn)
463                                         .query_info(GLib.FileAttribute.STANDARD_SIZE+","+GLib.FileAttribute.TIME_MODIFIED
464                                                 ,GLib.FileQueryInfoFlags.NONE,null);
465         var file_size = (int) fileinfo.get_size();
466
467       
468                 this.real_query(-1, """
469                        
470                        
471                                 INSERT INTO Attachment  (  
472                                         id, 
473                                         
474                                     msgid ,
475                                     queue_id ,
476                                     mime_filename ,
477                                     mime_type,
478                                      
479                                     stored_filename ,
480                                     mime_charset ,
481                                     mime_cdisp ,
482                                     mime_is_cover ,
483                                     
484                                     mime_is_multi ,
485                                     mime_is_mail,
486                                     mime_size ,
487                                     filesize,
488                                     
489                                     checksum,
490                                     created
491
492                                 ) VALUES (
493                                         %d,  -- id
494                                         
495                                     '%s' , -- msgid
496                                     0,
497                                     '%s'  , -- filename
498                                     '%s',  -- mimetype
499                                     
500                                     '%s', -- stored file anme
501                                     '', -- charset
502                                     'attachment',
503                                     0,
504                                     0,
505                                     0,
506                                     %d, -- size
507                                     %d, -- size
508                                     
509                                     '%s', -- checkum
510                                         '%s' -- created:
511                                 )
512                        
513                        
514                       """.printf(
515                                 id,
516                                       this.mysql_escape(this.active_message_exim_id),
517                                       this.mysql_escape(filename),
518                                   this.mysql_escape(mime_type),
519                                   this.mysql_escape(file_path),
520                                         file_size,
521                                       file_size,
522                                       this.mysql_escape(chksum),
523                                 this.created_date
524                          ));
525               // this is done to fix the queue_id or maillog_id ??
526                  this.query("""
527                  SELECT attachment_update(
528                       %d, -- in_id INT(11),
529                                                 '', -- mime type
530                       '%s', -- in_created DATETIME,
531                       '%s' -- in_mailfort_sig varchar(64)
532                  )
533               """.printf(
534                                 id, 
535                                 this.created_date,
536                                 this.mysql_escape(this.active_message_x_mailfort_sig)
537               
538               )
539                 );
540                 // GLib.error("added attachment?");
541     }
542     
543     
544     public void replace_attachment(GMime.Multipart parent, GMime.Part attachment)
545     {
546         var sid = attachment.get_header("X-strip-id");
547         if (sid != null && sid.length > 0) {
548                 GLib.debug("Skip attachment replace - it's already been done");
549                 return;
550         }
551         
552         var c = attachment.get_content_object();
553         
554         var filename = attachment.get_filename().replace("/", "-").replace("\n", "").replace("\t", " ");
555         var fn = GLib.Environment.get_tmp_dir() +
556                         "/"+ this.active_name + "."+   GLib.Uri.escape_string(filename,"", false);
557
558             var outfile = new GMime.StreamFile.for_path(fn, "w");
559             if (outfile == null) {
560                 return; // something has gone wrong??
561         }
562             outfile.set_owner(true);
563             var file_size = (int) c.write_to_stream(outfile);
564             var chksum = this.md5_file(fn);
565             outfile.flush();
566             outfile = null;
567         
568         if (file_size == 0) {
569
570                 GLib.debug("ERROR - file size of write to stream returned 0?");
571                 Posix.unlink(fn);               
572                 return;
573         }
574         
575         
576         
577  
578         var mime_type= attachment.get_content_type().to_string();
579         // at this point we have to do our database magic...
580         //filesize / name / date / checksum / mimetype -- into mailfort should be OK.
581         
582         var file_id = this.query("""
583                 SELECT 
584                 
585                 attachment_init(
586                                 '%s', -- in_msgid VARCHAR(32),
587                                 '%s', -- in_checksum VARCHAR(64),
588                                 '%s', -- in_mime_filename varchar(255)
589                                 %d -- filesize
590                         ) as id 
591                         
592           """.printf(
593                         this.mysql_escape(this.active_message_exim_id),
594                         chksum,
595                         this.mysql_escape( GLib.Uri.escape_string(attachment.get_filename(),"", false) ), // what is thsi is invalid?
596                          file_size)
597                 );
598                  
599                 
600                 if (file_id.length < 1) {
601                         GLib.debug("ERROR - CALL to attachment_init failed");
602                 Posix.unlink(fn);               
603                 return;
604                 
605                 }
606  
607                 if (int.parse(file_id) < 1) {
608                         GLib.debug("ERROR - CALL to attachment_init failed - returned 0?");
609                 Posix.unlink(fn);               
610                 return;
611                 
612                 }
613  
614         
615                 GLib.debug("fn = %s, m5=%s, id= %s", filename, mime_type, this.active_message_id);
616                 this.query("""
617                 
618                         SELECT attachment_update(
619                                 %d, -- in_id INT(11),
620                                 '%s', -- in_mime_type varchar(255),
621                                 '%s', -- in_created DATETIME,
622                                 '%s' -- in_mailfort_sig varchar(64)
623                                 
624                                 ) as result
625       """.printf(
626                 int.parse(file_id),
627                         this.mysql_escape(mime_type),
628                         this.created_date,
629                         this.mysql_escape(this.active_message_x_mailfort_sig)
630                 ));
631                  this.mysql.store_result();
632                                  
633  
634                 this.used_space_after += file_size;
635                         
636                 var target_fn = "";
637
638             if (StripApplication.opt_is_extracting) {
639                         target_fn = StripApplication.opt_target_path + "/" + this.created_dir +"/"+ file_id  + "-" + GLib.Uri.escape_string(filename,"", false);
640                 } 
641                     
642             var stored =  "/" + this.created_dir +"/"+ file_id  + "-" + GLib.Uri.escape_string(filename,"", false);
643                  this.query("""
644                 
645                         SELECT attachment_update_store(
646                                 %d, -- in_id INT(11),
647                                 '%s'  -- in_store_filename varchar(255),
648                          
649                                 
650                                 ) as result
651       """.printf(
652                 int.parse(file_id),
653                          this.mysql_escape( stored)
654                 ));   
655                          
656         var rep = new GMime.Part.with_type("text","html");
657         // we have to set up a redirect server - to redirect hpasite... to their internal service..
658         rep.set_filename(filename);
659         string txt = "<html><body>"+
660             "<a href=\"" + StripApplication.opt_replace_link + "/" +
661                         file_id + "/" + this.created_dir + "/"+chksum+"/"+ GLib.Uri.escape_string( filename, "", false) +"\">" + 
662             GLib.Uri.escape_string( filename, "", false) + // fixme needs html escaping...
663             "</a>" +
664             "</body></html>";
665
666         rep.get_content_type().set_parameter("charset", "utf-8");
667                 rep.set_header("X-strip-id", file_id);
668                 rep.set_header("X-strip-content-name",  filename);                              
669                 rep.set_header("X-strip-path", this.created_dir + "/" + file_id + "-" +  GLib.Uri.escape_string(filename,"", false));           
670                 rep.set_header("X-strip-content-type", mime_type);              
671         var stream =  new GMime.StreamMem.with_buffer(txt.data);
672         var con = new GMime.DataWrapper.with_stream(stream,GMime.ContentEncoding.DEFAULT);
673
674         rep.set_content_object(con);
675         GLib.debug("Replacing Attachment with HTML");
676         parent.replace(parent.index_of(attachment), rep);
677                 this.has_replaced = true;
678                  
679                 if (StripApplication.opt_is_extracting && target_fn.length > 0) {
680                         var dir = GLib.Path.get_dirname(target_fn);
681                         if (!FileUtils.test (dir, FileTest.IS_DIR)) {
682                                 GLib.DirUtils.create_with_parents(dir, 0755);
683                         }
684                         GLib.debug("Creating file %s", target_fn);
685                         if (!FileUtils.test (target_fn, FileTest.EXISTS)) {
686                                 var from = File.new_for_path (fn);
687                                 var to =  File.new_for_path (target_fn);
688                                 from.copy(to, 0, null);
689
690                         }
691                 } else { 
692                         GLib.debug("Skipping extraction %s", target_fn);
693                 }
694                 Posix.unlink(fn);
695                 
696
697
698     }
699     public string query(string str)
700     {
701             return this.real_query(1, str);
702     }
703     public string execute(string str)
704     {
705             return this.real_query(0, str);
706     }
707     /**
708     * need_return 
709     0 = no
710     1 = yes
711     -1 = don't try.
712     */
713     public string real_query(int need_return, string str)
714     {
715                 GLib.debug("Before Query : %u  : %s\n", this.mysql.errno(), this.mysql.error());
716
717
718         if (StripApplication.opt_debug_sql) {
719                 GLib.debug("SQL: %s\n", str);
720                 }
721                 
722                 
723         
724         var rc=  this.mysql.query(str); 
725         if ( rc != 0 ) {
726
727                     GLib.debug("ERROR %u: Query failed: %s\n", this.mysql.errno(), this.mysql.error());
728                                 Posix.exit(1);
729                 }
730         var rs = mysql.use_result();
731                 if (need_return == -1) {
732                         return "";
733                 }
734  
735         
736         //GLib.debug("got %d rows", (int) rs.num_rows());
737         
738         var got_row = false;
739                 string[] row;
740                 string ret = "";
741                 while( (row = rs.fetch_row()) != null) { 
742                         got_row = true;
743                         ret = row[0];
744                 
745                 }
746                 if (need_return == 0) {
747                 if (StripApplication.opt_debug_sql) {
748                                 GLib.debug("got %s", got_row ? "=Nothing=" : ret);
749                         }
750                         return got_row ? "" : ret;
751                 }
752                 if (!got_row) {
753
754                          GLib.debug("ERROR : no rows returned");
755                         Posix.exit(1);
756                         return "";
757                 }
758         if (StripApplication.opt_debug_sql) {
759                         GLib.debug("got %s", ret);
760                 }
761                 return ret;
762                 
763                  
764         }
765     
766     public string mysql_escape(string str)
767     {
768             unichar[] value_escaped = new unichar[str.length * 2 + 1];
769                 this.mysql.real_escape_string ((string) value_escaped, str, str.length);
770                 return (string) value_escaped;
771     }
772     
773     public string  md5_file(string fn) {
774               Checksum checksum = new Checksum (ChecksumType.MD5);
775
776               FileStream stream = FileStream.open (fn, "rb");
777               uint8 fbuf[100];
778               size_t size;
779
780               while ((size = stream.read (fbuf)) > 0) {
781                       checksum.update (fbuf, size);
782               }
783
784               unowned string digest = checksum.get_string ();
785               return digest;
786     }
787
788         string active_path = "";    
789     string active_name = "";
790     string active_message_id = "";
791     string active_message_x_mailfort_sig = "";
792     string active_message_exim_id = "";
793     bool has_replaced = false;
794     string created_date = ""; // should be YYYY-mm-dd
795     string created_dir = ""; // should be YYY/mm/dd
796     
797     public void scan_file(string path, string name)
798     {
799                 GLib.debug("Scan: %s/%s", path,name); 
800                 
801                 this.has_replaced = false; 
802         this.active_path = path;
803         this.active_name = name;
804         this.active_message_id = "";
805
806                 var mailtime = new DateTime.now_local();
807                 if (StripApplication.opt_scan_mailfort) {
808                     this.created_dir = this.active_path.substring(this.base_dir.length + 1 );
809                         this.created_date = this.created_dir.replace("/", "-");
810                         var bits = this.created_date.split("-");
811                         mailtime = new DateTime.local(int.parse(bits[0]),int.parse(bits[1]),int.parse(bits[2]),0,0,0);
812                         
813                         var oldest = new  DateTime.now_local();
814                         oldest = oldest.add_months(-1 * StripApplication.opt_age_oldest);
815                         var tspan = mailtime.difference(oldest) / GLib.TimeSpan.DAY;
816
817                         if (tspan < 0) {
818                                 GLib.debug("skip file is %d days older than %d months", (int)tspan, StripApplication.opt_age_oldest);
819                                 return;
820                         }
821                         
822                         var newest = new  DateTime.now_local();
823                         newest = newest.add_months(-1 * StripApplication.opt_age_newest);
824                         tspan = mailtime.difference(newest) / GLib.TimeSpan.DAY;
825                         if (tspan > 0) {
826                                 GLib.debug("skip file is %d days newer than %d months", (int)tspan, StripApplication.opt_age_newest);
827                                 return;
828                         }
829                         
830                 }
831         
832         
833                 var fileinfo = File.new_for_path(path +"/" + name)
834                                         .query_info(GLib.FileAttribute.STANDARD_SIZE+","+GLib.FileAttribute.TIME_MODIFIED
835                                                 ,GLib.FileQueryInfoFlags.NONE,null);
836         var file_size = (int) fileinfo.get_size();
837                 var mod_time = fileinfo.get_modification_time();
838                 
839                 
840                 
841                 if (!StripApplication.opt_scan_mailfort) {
842                    
843                 // it's a mail directory...
844                 // use the last modification time? as the default...
845                  mailtime = new DateTime.from_timeval_utc(mod_time);
846                  this.created_dir = mailtime.format("%Y/%m/%d");
847                          this.created_date =  mailtime.format("%Y-%m-%d %H:%M:%S");
848  
849         }
850                 // check on age of file...
851                 
852                 
853                 
854                 
855                 
856         this.used_space_before += file_size;
857         
858         var stream = new GMime.StreamFs.for_path (path +"/" + name,Posix.O_RDONLY, 0);
859         //stream.set_owner(true);
860         var parser = new GMime.Parser.with_stream(stream);
861         var message = parser.construct_message();
862  
863                 if (message == null) {
864                         GLib.debug("Could not parse file? %s/%s", path,name);
865                 this.used_space_after += file_size;                     
866                 return;
867                 }       
868
869
870                 // check : - is message over a year old?                
871                 // get various msg info..
872                 this.active_message_id = message.get_message_id();
873                 this.active_message_x_mailfort_sig = message.get_header("x-mailfort-sig");
874                 var recvd = message.get_header("received");
875                 this.active_message_exim_id = "";
876                 if (recvd != null && recvd.length > 1) {
877                         GLib.debug("RECV: %s", recvd);
878                         var lines = recvd.split("\t");
879                         for (var i = 0; i < lines.length;i++) {
880                                 var bits = lines[i].strip().split(" ");
881                                 if (bits[0] == "id") {
882                                         this.active_message_exim_id = bits[1].replace(";","");
883
884                                 }
885                                 
886                                 if (lines[i].contains(";")) {
887                                         var dbits = lines[i].strip().split(";");                                
888                                         GLib.debug("Reading time from : %s", dbits[1]);
889                                         var timez = GMime.utils_header_decode_date(dbits[1], null);
890                                         if (timez != 0) {
891                                                 mailtime = new DateTime.from_unix_utc(timez);
892                                                 this.created_date = mailtime.format("%Y-%m-%d %H:%M:%S");
893                                                 GLib.debug("Time is %s",this.created_date);
894                                                 // if it's not mailfort we can use that date to determine where to store it...
895                                                 if (!StripApplication.opt_scan_mailfort) {
896                                                         this.created_dir = mailtime.format("%Y/%m/%d");
897                                                 }
898                                         } else {
899                                                 GLib.debug("Could not read time from headers?");
900                                         }
901                                 }
902
903                         }
904                 }
905                 
906                 var oldest = new  DateTime.now_local();
907                 oldest = oldest.add_months(-1 * StripApplication.opt_age_oldest);
908                 var rtspan = mailtime.difference(oldest) / GLib.TimeSpan.DAY;
909                 GLib.debug("Checking oldest %d days difference", (int)rtspan   );
910                 if (rtspan < 0) {
911                         GLib.debug("skip(2) file is %d days older than %d months", (int)rtspan, StripApplication.opt_age_oldest);
912                         return;
913                 }
914                 var newest = new  DateTime.now_local();
915                 newest = newest.add_months(-1 * StripApplication.opt_age_newest);
916                 rtspan = mailtime.difference(newest) / GLib.TimeSpan.DAY;
917                 if (rtspan > 0) {
918                         GLib.debug("skip(2) file is %d days newer than %d months : %s", (int)rtspan, StripApplication.opt_age_newest,
919                                 mailtime.format("%Y-%m-%d %H:%M:%S"));
920                         return;
921                 }
922                 
923                 
924                 
925                 /*
926                 GLib.debug("Message DATA:\n mid: %s\nmailfort: %s \nexim_id: %s",
927                         this.active_message_id,
928                         this.active_message_x_mailfort_sig,
929                         this.active_message_exim_id
930                 );
931                  */
932                         
933                 // DATE?
934                 
935                 var mp = message.get_mime_part();
936
937                 if (!(mp is GMime.Multipart)) {
938                         //GLib.debug("get mimepart does not return a Multipart?");
939                 this.used_space_after += file_size;                                             
940                         return;
941                 }
942                 
943                 var mpc = ((GMime.Multipart)mp).get_count();
944                 
945                 //GLib.debug("Message has %d parts", mpc); 
946                 for (var i =0 ; i < mpc; i++) {
947                         //GLib.debug("Getting part %d", i); 
948                         var mime_obj = ((GMime.Multipart)mp).get_part(i);
949             this.handle_part(mp,mime_obj);                      
950         }
951                 
952         parser= null;
953
954       //  stream.set_owner(false);
955             //stream.close();
956         stream = null;//.close();
957         
958         
959                 if (!this.has_replaced) {
960                         this.used_space_after += file_size;
961                         GLib.debug("skpping write file - no replacement occured");
962                         return;
963                 }
964                 string tmpfile = "";
965                 GMime.Stream outstream = new GMime.StreamNull();
966                 if (StripApplication.opt_is_replacing) {
967                 
968                         tmpfile = GLib.Environment.get_tmp_dir() +"/" + name;
969                 outstream = new GMime.StreamFile.for_path (tmpfile,"w");
970                 ((GMime.StreamFile)outstream).set_owner(true);
971         }
972                 if (StripApplication.opt_dump) {
973                         outstream = new GMime.StreamMem();
974         }
975         
976         file_size = (int) message.write_to_stream(outstream);
977         if (StripApplication.opt_is_replacing) {
978                 ((GMime.StreamFile)outstream).set_owner(false);
979         }
980                 if (StripApplication.opt_dump) {
981                         var ua = ((GMime.StreamMem)outstream).get_byte_array().data;
982                         print("%s\n", (string) ua);
983                 }        
984         message = null;
985         outstream.flush();
986         outstream.close();
987         GLib.debug("finished writing output %d", file_size);
988
989         //
990         outstream = null;
991         
992           
993         this.used_space_after += file_size;
994         
995         
996         if (StripApplication.opt_is_replacing) {
997                 Posix.unlink(path +"/" + name);         
998                 GLib.debug("copy tmp file %s to %s" , tmpfile, path +"/" + name);               
999                 
1000                 // link will not work, as we are doing it accross file systems
1001                         var from = File.new_for_path (tmpfile);
1002                         var nf =  File.new_for_path (path +"/" + name);
1003                         from.copy(nf, 0, null);
1004                         
1005
1006                 var newfileinfo = nf.query_info(GLib.FileAttribute.TIME_MODIFIED,GLib.FileQueryInfoFlags.NONE,null);
1007                 newfileinfo.set_modification_time(mod_time);
1008                 nf.set_attributes_from_info(newfileinfo,FileQueryInfoFlags.NONE);
1009                 Posix.unlink(tmpfile);
1010                 }
1011         this.processed++;
1012         
1013         if (StripApplication.opt_limit > -1 && this.processed >= StripApplication.opt_limit) {
1014                 GLib.debug("Reached replacement limit");
1015                 Posix.exit(1);
1016         }
1017         
1018         
1019         
1020         
1021     }
1022     
1023     
1024     public void scan_dir(string basepath, string subpath)
1025     {
1026         
1027         
1028         // determine if path is to old to scan..
1029         if (subpath.length > 0 && StripApplication.opt_scan_mailfort) {
1030                         var year =  int.parse(subpath.substring(1,4));  // "/2000"
1031                         var month = subpath.length > 5 ? int.parse(subpath.substring(6,2)) : 999; // "/2000/12"                 
1032                         var day = subpath.length > 8 ? int.parse(subpath.substring(9,2)) : 999; // "/2000/12/01"                        
1033                 
1034                 var oldest = new  DateTime.now_local();
1035                         oldest = oldest.add_months(-1 * StripApplication.opt_age_oldest);
1036                         
1037                         //GLib.debug("Checking directory %s is older than min: %d/%d/%d", subpath, oldest.get_year() , oldest.get_month(), oldest.get_day_of_month() );                                 
1038                         
1039                         if (year < oldest.get_year()) {
1040                                 GLib.debug("Skip directory %s is older than min year: %d", subpath, oldest.get_year());
1041                                 return;
1042                         }
1043                         if (year == oldest.get_year() &&  month < oldest.get_month()) {
1044                                 GLib.debug("Skip directory %s is older than min month: %d/%d", subpath, oldest.get_year() , oldest.get_month() );
1045                                 return;
1046                         }
1047                 if (year == oldest.get_year() &&  month == oldest.get_month() && day < oldest.get_day_of_month()) {
1048                                 GLib.debug("Skip directory %s is older than min day: %d/%d/%d", subpath, oldest.get_year() , oldest.get_month(), oldest.get_day_of_month() );           
1049                                 return;
1050                         }
1051                 
1052                 var newest = new  DateTime.now_local();
1053                         newest = newest.add_months(-1 * StripApplication.opt_age_newest);
1054                         
1055                         //GLib.debug("Checking directory %s is newer than max: %d/%d/%d", subpath, newest.get_year() , newest.get_month(), newest.get_day_of_month() );                                 
1056                         
1057                         if (year > newest.get_year()) {
1058                                 GLib.debug("Skip directory %s is newer than max year: %d", subpath, newest.get_year());
1059                                 return;
1060                         }
1061                         if (year == newest.get_year() &&  month != 999 && month > newest.get_month()) {
1062                                 GLib.debug("Skip directory %s is newer than max month: %d/%d", subpath, newest.get_year() , newest.get_month() );
1063                                 return;
1064                         }
1065                 if (year == newest.get_year() &&  month == newest.get_month() &&  day != 999 && day > newest.get_day_of_month()) {
1066                                 GLib.debug("Skip directory %s is newer than max day: %d/%d/%d", subpath, newest.get_year() , newest.get_month(), newest.get_day_of_month() );           
1067                                 return;
1068                         }
1069                 
1070                 
1071                 
1072         }
1073         
1074         if (StripApplication.opt_stamp && GLib.FileUtils.test( basepath + subpath + ".strip-done-stamp", GLib.FileTest.EXISTS )) {
1075                 return;
1076                 }
1077         
1078         
1079         var f = File.new_for_path(basepath + subpath);
1080                 FileEnumerator file_enum;
1081         var cancellable = new Cancellable ();
1082         try {      
1083             file_enum = f.enumerate_children(
1084                 FileAttribute.STANDARD_DISPLAY_NAME + "," +   FileAttribute.STANDARD_TYPE,
1085                         FileQueryInfoFlags.NOFOLLOW_SYMLINKS,  // FileQueryInfoFlags.NONE,
1086                         cancellable
1087                 );
1088         } catch (Error e) {
1089                 GLib.debug("Got error scanning dir? %s", e.message);
1090             // FIXME - show error..
1091             return;
1092         }
1093         FileInfo next_file;
1094          
1095         while (cancellable.is_cancelled () == false ) {
1096             try {
1097                 next_file = file_enum.next_file (cancellable);
1098             } catch(Error e) {
1099                 GLib.debug("error getting next file? %s", e.message);
1100                 break;
1101             }
1102
1103             if (next_file == null) {
1104                 break;
1105             }
1106                 
1107                 
1108                 if (next_file.get_is_symlink()) {
1109                 next_file = null;
1110                 continue;
1111             }
1112             
1113             var ds = next_file.get_display_name();
1114             if (next_file.get_file_type() != FileType.DIRECTORY) {
1115                 
1116                 
1117                 
1118                 if (ds[0] == ',') {
1119                         continue;
1120                 }
1121                 // other files to ignore?
1122                 if (Regex.match_simple (".tgz$", ds)) {
1123                         continue;
1124                 }
1125                 this.scan_file(basepath + subpath , ds);
1126                                 if(this.has_replaced) {
1127                          this.report_state("After scanning %s/%s".printf(basepath + subpath , ds));
1128                         }
1129                 continue;
1130             }
1131
1132
1133             //stdout.printf("Monitor.monitor: got file %s : type :%u\n",
1134             //        next_file.get_display_name(), next_file.get_file_type());
1135
1136
1137         
1138
1139             // not really needed?? - we are storing attachments in a seperate location now...
1140             if (ds[0] == '.') {
1141                 next_file = null;
1142                 continue;
1143             }
1144             if (ds == "attachments") {
1145                         continue;
1146                 }
1147             
1148             
1149             var sp = subpath+"/"+next_file.get_display_name();
1150             // skip modules.
1151             //print("got a file : " + sp);
1152          
1153             next_file = null;
1154             
1155             
1156             this.scan_dir(basepath,sp);
1157             
1158         }
1159         
1160         // completed this folder
1161         
1162         if (StripApplication.opt_stamp) {
1163                 GLib.FileUtils.set_contents (basepath + subpath + ".strip-done-stamp", "Stripper done");
1164         }
1165         
1166     
1167     
1168     }
1169     void report_state(string msg) 
1170     {
1171         // Saved: 2G  Original 10G : 20%
1172         GLib.debug("Saved : %s (%.1f%%) | Original %s | %s", 
1173                         GLib.format_size(this.used_space_before - this.used_space_after), 
1174                         100f * ((1f * (this.used_space_before - this.used_space_after)) / (this.used_space_before * 1f)), 
1175                         GLib.format_size(this.used_space_before),                       
1176                         msg
1177                 );
1178         
1179         }
1180         
1181         
1182
1183 }