src/strip.vala
[app.mailtrimmer] / src / strip.vala
1 /**
2
3   needs to scan 2 things
4   a) our mailfort email database
5        point it at the top directory, containing YEAR/MONTH/DAY.... directories.
6        scan each file (over a year old...)
7        extract out the attachment, and replace with HTML
8        DATABASE? - mysql or sqlite? - 
9            filesize / name / date / checksum / mimetype -- into mailfort should be OK.
10   b) the imap user emails
11            loop through user's directories
12            check age of email .. over 1 years..
13            ?? how to prevent 'repeat' scanning of emails?
14               ??? hidden '.' files containing last scan date?
15
16            check if file exists in our DB.. - replace the link...
17            otherwise generate a file. + add to DB...
18            
19    c) retreival system
20      -> URL -> get file
21    d) redirect system.
22      -> URL -> redirect to correct server
23
24
25 More notes on our Mailfort DB sync:
26 * some of these attachments are already in the database...
27  - so we need to update the DB..
28  - probably worth putting the code in a stored procedure..
29  
30  -- key scenarios
31    * first scan (and extract)
32    * rescan (as I messed up the first time - fix the DB...)
33    * email scan - attachments might not have related messages.
34  
35  
36  - {id} attachment_init(
37                 {exim_msg_id}
38                 {chksum}
39                 {filename),
40         )
41         // creates or returns id (can look for existing messages?
42         // can do a merge?? - copy 'old' record data into 'new'....  "prefer checksummed"
43         
44         attachment_update(
45                 {id}
46                 {exim_msg_id}
47                 {mailfort_msg_sig}
48                 {file_size}
49                 {created} // message date..
50                 {chksum}
51                 {filename),
52         {mime_type}
53         )
54         attachment_update_store(
55                         {id}
56                         {stored_filename}
57         )
58
59
60 */ 
61
62 // valac --pkg gmime --vapi
63 /*
64
65 // http://www.fromdual.com/mysql-vala-program-example << check mysql if this does not work.
66
67  valac  -g --vapidir=. --thread  strip.vala   --vapidir=../vapi \
68      --pkg glib-2.0 --pkg mysql --pkg gio-2.0 --pkg posix --pkg gmime-2.6 \
69       --Xcc=-lmysqlclient  -v \
70        -o /tmp/strip
71 */ 
72  
73 public class StripApplication : GLib.Application {
74
75         public static string? opt_path = null;
76         public static string? opt_file = null;  
77         public static string? opt_target_path = null;
78         public static string? opt_db_host = "127.0.0.1";
79         public static string? opt_db_name = null;       
80         public static string? opt_db_user = null;               
81         public static string? opt_db_pass = null;               
82
83         public static int    opt_limit = -1;
84
85         public static int    opt_age_newest = 1;
86         public static int    opt_age_oldest = 6;
87
88
89         public static bool      opt_is_extracting = false;
90         public static bool      opt_is_replacing = false;
91         public static bool      opt_scan_maildir  = false; 
92         public static bool      opt_scan_mailfort  = false;     
93         public static bool              opt_dump = false;       
94         public static bool              opt_debug = false; 
95         
96         public static bool opt_debug_sql = false;       
97         public static string? opt_replace_link = null;
98         
99         
100         public const GLib.OptionEntry[] options = {
101                 
102                 { "debug", 0, 0, OptionArg.NONE, ref opt_debug, "show debug messages for components", null },
103                 { "debug-sql", 0, 0, OptionArg.NONE, ref opt_debug_sql, "debug the SQL statements", null },         
104
105                 { "path", 0, 0, OptionArg.STRING, ref opt_path, "Directory where email to be parsed is", null },        
106                 { "file", 0, 0, OptionArg.STRING, ref opt_file, "A specific file to be parsed", null }, 
107
108                 { "target-path", 0, 0, OptionArg.STRING, ref opt_target_path, "Directory where attachments are to be put", null },
109
110                 { "link", 0, 0, OptionArg.STRING, ref opt_replace_link, "url for the replement link: eg. http://www.mysite.com/xxxx/%s", null },         
111                         
112                 { "host", 0, 0, OptionArg.STRING, ref opt_db_host, "Mysql host (default localhost)", null },    
113                 { "name", 0, 0, OptionArg.STRING, ref opt_db_name, "Mysql database name REQUIRED", null },      
114                 { "user", 0, 0, OptionArg.STRING, ref opt_db_user, "Mysql database user REQUIRED", null },      
115                 { "pass", 0, 0, OptionArg.STRING, ref opt_db_pass, "Mysql database password (default empty)", null },            
116
117                 { "extract", 0, 0, OptionArg.NONE, ref opt_is_extracting, "Should attachments be extracted (default NO)", null },
118                 { "replace", 0, 0, OptionArg.NONE, ref opt_is_replacing, "Should attachments be replaced (default NO)", null },
119                 { "dump", 0, 0, OptionArg.NONE, ref opt_dump, "Print the replaced mail contents to stdout", null },         
120
121                 { "limit", 0, 0, OptionArg.INT, ref opt_limit, "stop after X number of messages with attachments have been processed", null },         
122                 { "newest", 0, 0, OptionArg.INT, ref opt_age_newest, "do not replace messages newer that X months (default is 1 months)", null },
123                 { "oldest", 0, 0, OptionArg.INT, ref opt_age_oldest, "do not replace messages older than X (default is 6 months)", null },
124
125                 { "scan-maildir", 0, 0, OptionArg.NONE, ref opt_scan_maildir, "scan an maildir tree", null },
126                 { "scan-mailfort", 0, 0, OptionArg.NONE, ref opt_scan_mailfort, "scan a mailfort tree", null },  
127                 { null }       
128         };         
129     public StripApplication( string[] args ) 
130     {
131                  Object(
132             application_id: "org.roojs.mailstripper",
133             flags: ApplicationFlags.FLAGS_NONE
134          );
135  
136                         
137          var opt_context =  new GLib.OptionContext ("Mail Stripper");
138                         
139          try {
140                                 
141             opt_context.set_help_enabled (true);
142             opt_context.add_main_entries (options, null);
143             opt_context.parse ( ref  args);
144             //opt_detach = !optx_no_detach;
145                             
146  
147                             
148              // options that have to be set.. bee or hive... (or stop all)
149             if ((!opt_scan_mailfort && !opt_scan_maildir) || (opt_scan_mailfort && opt_scan_maildir))  {
150                stdout.printf ("You must specify the type of directory tree to scan - either imap or mailfort\n%s",
151                    opt_context.get_help(true, null));
152                GLib.Process.exit(Posix.EXIT_FAILURE);
153             }
154                         
155                          if ((opt_db_name == null || opt_db_name.length < 1 || opt_db_user == null || opt_db_user.length < 1))  {
156                stdout.printf ("You must specify the database name / user \n%s",
157                    opt_context.get_help(true, null));
158                GLib.Process.exit(Posix.EXIT_FAILURE);
159             }
160                          if ((opt_path == null || opt_path.length < 1)   )  {
161                stdout.printf ("You must specify the scan start path\n%s",
162                    opt_context.get_help(true, null));
163                GLib.Process.exit(Posix.EXIT_FAILURE);
164             }
165                         if (opt_replace_link == null || (opt_replace_link.length < 1))  {
166                stdout.printf ("You must specify the link to use in the replacement \n%s",
167                    opt_context.get_help(true, null));
168                GLib.Process.exit(Posix.EXIT_FAILURE);
169             }
170             if ((opt_is_replacing || opt_is_extracting ) && (opt_target_path == null || opt_target_path.length < 1)) {
171                       stdout.printf ("You must specify a target path to put attachments\n%s",
172                    opt_context.get_help(true, null));
173                GLib.Process.exit(Posix.EXIT_FAILURE);
174             }
175             
176             
177          } catch (GLib.OptionError e) {
178             stdout.printf ("error: %s\n", e.message);
179             stdout.printf ("Run '%s --help' to see a full list of available command line options.\n%s", 
180                       args[0], opt_context.get_help(true, null));
181             GLib.Process.exit(Posix.EXIT_FAILURE);
182          }
183         }
184          
185     public static int main(string[] args) 
186     {
187                 
188                 var application = new StripApplication(  args);
189                 
190                 GLib.Log.set_always_fatal(LogLevelFlags.LEVEL_ERROR | LogLevelFlags.LEVEL_CRITICAL); 
191            
192            if (opt_debug || opt_debug_sql) {
193                         GLib.Log.set_handler(null, 
194                         GLib.LogLevelFlags.LEVEL_DEBUG | GLib.LogLevelFlags.LEVEL_WARNING | GLib.LogLevelFlags.LEVEL_INFO, 
195                         (dom, lvl, msg) => {
196                                         print("%s\n", msg);
197                                 }
198                         );
199                 }
200         
201         GMime.init(0);
202                 if (StripApplication.opt_is_replacing) {
203                         StripApplication.opt_is_extracting = true;
204                 }
205   
206                 GLib.debug("scanning folder: %s", opt_path );
207                 
208                 var strip = new Strip( opt_path );
209  
210                 
211                 strip.mysql  = new Mysql.Database();
212                 if (!strip.mysql.real_connect(
213                                 opt_db_host,
214                                 opt_db_user ,
215                                 opt_db_pass == null ? "" : opt_db_pass, //passwd
216                                 opt_db_name, //DB
217                                 3306, // not changable...?
218                                 null
219                         )
220                 ) {
221                         stdout.printf("ERROR %u: Connection failed: %s\n", 
222                                 strip.mysql.errno(), strip.mysql.error()
223                         );
224
225                         return 1;
226                 }
227         if (opt_file != null) {
228                 strip.base_dir = opt_path;
229                 strip.scan_file( GLib.Path.get_dirname(opt_file),  GLib.Path.get_basename(opt_file));
230                 return 0;
231         }
232
233                 strip.scan_dir(opt_path, "");
234         
235
236         
237         return 0;
238     }
239 }
240
241 public class Strip : GLib.Object {
242         
243  
244         
245         public string base_dir = "";
246         
247         public Mysql.Database mysql;
248         
249         int processed = 0;
250     
251     uint64 used_space_before = 0;
252     uint64 used_space_after = 0;
253     
254     
255     public Strip(string base_dir)
256     {
257         this.base_dir = base_dir;
258     }
259     
260     public void handle_part(GMime.Object parent, GMime.Object mime_obj)
261     {
262                 if (mime_obj is GMime.Part) {
263                    var  p = (GMime.Part)mime_obj;
264                         var ct = p.get_content_type();
265                         var cd = p.get_content_disposition();
266                         
267                         var sid = p.get_header("X-strip-id");
268                     if (sid != null && sid.length > 0) {
269                         this.update_attachment_db(p);
270                             GLib.debug("Skip attachment replace - it's already been done");
271                         return;
272                         }
273                         
274                         if (cd == null || cd.get_disposition().down() != "attachment") {
275                                 return;
276                         }
277                         if (ct.get_media_type() == "text") {
278                                 return;
279                         }
280                         if (ct.to_string() == "application/pgp-encrypted") {
281                                 return;
282                         }
283                         if (ct.to_string() == "application/pgp-keys") {
284                                 return;
285                         }
286                         if (p.get_filename() == null) {
287                                 return;
288                         }
289                          // print("got part %s\n", ct.to_string());
290                          if (parent is GMime.Multipart) {
291                                 
292                                 this.replace_attachment(((GMime.Multipart)parent), p);
293                                 // remove it !?
294
295                           }
296
297
298                         return;
299                 }
300                 if (mime_obj is GMime.Multipart) {
301                         
302
303                         var  mp = (GMime.Multipart)mime_obj;
304                         //var ct = mp.get_content_type();
305
306                         //print("got multi-part %s\n", ct.to_string());
307                         for (var i = 0; i< mp.get_count(); i++) { 
308                           var mo = mp.get_part(i);
309                           this.handle_part(mime_obj,mo);
310                         }
311                    // ((GMime.Multipart)mime_obj).foreach((sub_obj) => {
312                    //     Strip.handle_part(sub_obj);
313                 //
314                    // });
315
316
317                         return;
318                 }
319
320                 if (mime_obj is GMime.MessagePart) {
321                         var msg = ((GMime.MessagePart)mime_obj).get_message();
322                         msg.foreach((subobj) => {
323                          this.handle_part(msg,subobj);
324                     });
325                 
326                         //print("got message-part\n");
327                         return;
328                 }
329                 
330                 if (mime_obj is GMime.Message) {
331                         var mp = ((GMime.Message) mime_obj).get_mime_part();
332
333                         if (!(mp is GMime.Multipart)) {
334                                 //GLib.debug("get mimepart does not return a Multipart?");
335                                 return;
336                         }
337                         
338                         var mpc = ((GMime.Multipart)mp).get_count();
339                         
340                         //GLib.debug("Message has %d parts", mpc); 
341                         for (var i =0 ; i < mpc; i++) {
342                                 //GLib.debug("Getting part %d", i); 
343                                 var submime_obj = ((GMime.Multipart)mp).get_part(i);
344                         this.handle_part(mp,submime_obj);                       
345                     }
346                         print("got message??\n");
347                         return;
348                 }
349                 
350                 print("got something else\n");
351
352
353     }
354     public void update_attachment_db(GMime.Part attachment)
355     {
356         // only called when we have an sid...
357         var sid = attachment.get_header("X-strip-id");
358         if (sid == null || sid.length < 1) {
359                 GLib.debug("Strange - update attachment db called ?");
360                 return;
361         }
362         
363         // initialize it with known data..
364         // that should wipe out dupes.
365         var filesize = this.query("SELECT filesize FROM Attachment WHERE id = %d".printf(
366                         int.parse(sid)));       
367                 if (int.parse(filesize) < 1) {
368                 GLib.debug("Could not get filesize from id :%s = %s", sid,filesize);
369                 Posix.exit(0);
370                 return;
371         }
372         
373         var chksum = this.query("SELECT  checksum FROM Attachment WHERE id = %d".printf(
374                         int.parse(sid)
375                 ));
376         var mime_filename = this.query("SELECT  mime_filename FROM Attachment WHERE id = %d".printf(
377                         int.parse(sid)));       
378                 
379         this.query("""
380              SELECT 
381                  attachment_init(
382                      '%s', '%s', '%s', %d
383                  ) as id 
384                  
385           """.printf(
386                           this.mysql_escape(this.active_message_exim_id),
387                           this.mysql_escape(chksum),
388                           this.mysql_escape(mime_filename),                       
389                           int.parse(filesize)
390                 ));
391         this.query("""
392                  SELECT attachment_update(
393                       %d, -- in_id INT(11),
394                       '%s', -- in_mime_type varchar(255),
395                       '%s', -- in_created DATETIME,
396                       '%s' -- in_mailfort_sig varchar(64)
397                  )
398               """.printf(
399                         int.parse(sid),
400                         "", // this will be ignored..
401                                 this.created_date,
402                                 this.mysql_escape(this.active_message_x_mailfort_sig)
403               
404               )
405                 );
406                 this.mysql.store_result();
407                 
408
409     
410     }
411     
412     
413     public void replace_attachment(GMime.Multipart parent, GMime.Part attachment)
414     {
415         var sid = attachment.get_header("X-strip-id");
416         if (sid != null && sid.length > 0) {
417                 GLib.debug("Skip attachment replace - it's already been done");
418                 return;
419         }
420         
421         var c = attachment.get_content_object();
422         
423         var filename = attachment.get_filename().replace("/", "-").replace("\n", "").replace("\t", " ");
424         var fn = GLib.Environment.get_tmp_dir() +
425                         "/"+ this.active_name + "."+   filename;
426
427             var outfile = new GMime.StreamFile.for_path(fn, "w");
428             outfile.set_owner(true);
429             var file_size = (int) c.write_to_stream(outfile);
430             var chksum = this.md5_file(fn);
431             outfile.flush();
432             outfile = null;
433         
434         if (file_size == 0) {
435
436                 GLib.debug("ERROR - file size of write to stream returned 0?");
437                 Posix.unlink(fn);               
438                 return;
439         }
440         
441         
442         
443  
444         var mime_type= attachment.get_content_type().to_string();
445         // at this point we have to do our database magic...
446         //filesize / name / date / checksum / mimetype -- into mailfort should be OK.
447         
448         var file_id = this.query("""
449                 SELECT 
450                 
451                 attachment_init(
452                                 '%s', -- in_msgid VARCHAR(32),
453                                 '%s', -- in_checksum VARCHAR(64),
454                                 '%s', -- in_mime_filename varchar(255)
455                                 %d -- filesize
456                         ) as id 
457                         
458           """.printf(
459                         this.mysql_escape(this.active_message_exim_id),
460                         chksum,
461                         this.mysql_escape( attachment.get_filename() ), // what is thsi is invalid?
462                          file_size)
463                 );
464                  
465                 
466                 if (file_id.length < 1) {
467                         GLib.debug("ERROR - CALL to attachment_init failed");
468                 Posix.unlink(fn);               
469                 return;
470                 
471                 }
472  
473                 if (int.parse(file_id) < 1) {
474                         GLib.debug("ERROR - CALL to attachment_init failed - returned 0?");
475                 Posix.unlink(fn);               
476                 return;
477                 
478                 }
479  
480         
481                 GLib.debug("fn = %s, m5=%s, id= %s", filename, mime_type, this.active_message_id);
482                 this.query("""
483                 
484                         SELECT attachment_update(
485                                 %d, -- in_id INT(11),
486                                 '%s', -- in_mime_type varchar(255),
487                                 '%s', -- in_created DATETIME,
488                                 '%s' -- in_mailfort_sig varchar(64)
489                                 
490                                 ) as result
491       """.printf(
492                 int.parse(file_id),
493                         this.mysql_escape(mime_type),
494                         this.created_date,
495                         this.mysql_escape(this.active_message_x_mailfort_sig)
496                 ));
497                  this.mysql.store_result();
498                                  
499  
500                 this.used_space_after += file_size;
501                         
502                 var target_fn = "";
503
504             if (StripApplication.opt_is_extracting) {
505                         target_fn = StripApplication.opt_target_path + "/" + this.created_dir +"/"+ file_id  + "-" + filename;
506                 } 
507                     
508             var stored =  "/" + this.created_dir +"/"+ file_id  + "-" + filename;
509                  this.query("""
510                 
511                         SELECT attachment_update_store(
512                                 %d, -- in_id INT(11),
513                                 '%s'  -- in_store_filename varchar(255),
514                          
515                                 
516                                 ) as result
517       """.printf(
518                 int.parse(file_id),
519                          this.mysql_escape( stored)
520                 ));   
521                          
522         var rep = new GMime.Part.with_type("text","html");
523         // we have to set up a redirect server - to redirect hpasite... to their internal service..
524         rep.set_filename(filename);
525         string txt = "<html><body>"+
526             "<a href=\"" + StripApplication.opt_replace_link + "/" +
527                         file_id + "/" + this.created_dir + "/"+chksum+"/"+ GLib.Uri.escape_string( filename) +"\">" + 
528             GLib.Uri.escape_string( filename) + // fixme needs html escaping...
529             "</a>" +
530             "</body></html>";
531
532         rep.get_content_type().set_parameter("charset", "utf-8");
533                 rep.set_header("X-strip-id", file_id);
534                 rep.set_header("X-strip-content-name",  filename);                              
535                 rep.set_header("X-strip-path", this.created_dir + "/" + file_id + "-" + filename);              
536                 rep.set_header("X-strip-content-type", mime_type);              
537         var stream =  new GMime.StreamMem.with_buffer(txt.data);
538         var con = new GMime.DataWrapper.with_stream(stream,GMime.ContentEncoding.DEFAULT);
539
540         rep.set_content_object(con);
541         GLib.debug("Replacing Attachment with HTML");
542         parent.replace(parent.index_of(attachment), rep);
543                 this.has_replaced = true;
544                  
545                 if (StripApplication.opt_is_extracting && target_fn.length > 0) {
546                         var dir = GLib.Path.get_dirname(target_fn);
547                         if (!FileUtils.test (dir, FileTest.IS_DIR)) {
548                                 GLib.DirUtils.create_with_parents(dir, 0755);
549                         }
550                         GLib.debug("Creating file %s", target_fn);
551                         if (!FileUtils.test (target_fn, FileTest.EXISTS)) {
552                                 Posix.link(fn, target_fn);
553                         }
554                 } else { 
555                         GLib.debug("Skipping extraction %s", target_fn);
556                 }
557                 Posix.unlink(fn);
558                 
559
560
561     }
562     public string query(string str)
563     {
564             return this.real_query(true, str);
565     }
566     public string execute(string str)
567     {
568             return this.real_query(false, str);
569     }
570     public string real_query(bool need_return, string str)
571     {
572                 GLib.debug("Before Query : %u  : %s\n", this.mysql.errno(), this.mysql.error());
573
574
575         if (StripApplication.opt_debug_sql) {
576                 GLib.debug("SQL: %s\n", str);
577                 }
578                 
579                 
580         
581         var rc=  this.mysql.query(str);         
582         if ( rc != 0 ) {
583
584                     GLib.debug("ERROR %u: Query failed: %s\n", this.mysql.errno(), this.mysql.error());
585                                 Posix.exit(1);
586                 }
587                 
588
589         var rs = mysql.use_result();
590         
591         var got_row = false;
592                 string[] row;
593                 string ret = "";
594                 while( (row = rs.fetch_row()) != null) { 
595                         got_row = true;
596                         ret = row[0];
597                 
598                 }
599                 if (!need_return) {
600                         return "";
601                 }
602                 if (!got_row) {
603                          GLib.debug("ERROR : no rows returned");
604                         Posix.exit(1);
605                         return "";
606                 }
607                 GLib.debug("got %s", ret);
608                 return ret;
609                 
610                  
611         }
612     
613     public string mysql_escape(string str)
614     {
615             unichar[] value_escaped = new unichar[str.length * 2 + 1];
616                 this.mysql.real_escape_string ((string) value_escaped, str, str.length);
617                 return (string) value_escaped;
618     }
619     
620     public string  md5_file(string fn) {
621               Checksum checksum = new Checksum (ChecksumType.MD5);
622
623               FileStream stream = FileStream.open (fn, "rb");
624               uint8 fbuf[100];
625               size_t size;
626
627               while ((size = stream.read (fbuf)) > 0) {
628                       checksum.update (fbuf, size);
629               }
630
631               unowned string digest = checksum.get_string ();
632               return digest;
633     }
634
635         string active_path = "";    
636     string active_name = "";
637     string active_message_id = "";
638     string active_message_x_mailfort_sig = "";
639     string active_message_exim_id = "";
640     bool has_replaced = false;
641     string created_date = ""; // should be YYYY-mm-dd
642     string created_dir = ""; // should be YYY/mm/dd
643     
644     public void scan_file(string path, string name)
645     {
646                 GLib.debug("Scan: %s/%s", path,name); 
647                 
648                 this.has_replaced = false; 
649         this.active_path = path;
650         this.active_name = name;
651         this.active_message_id = "";
652
653                 var mailtime = new DateTime.now_local();
654                 if (StripApplication.opt_scan_mailfort) {
655                     this.created_dir = this.active_path.substring(this.base_dir.length + 1 );
656                         this.created_date = this.created_dir.replace("/", "-");
657                         var bits = this.created_date.split("-");
658                         mailtime = new DateTime.local(int.parse(bits[0]),int.parse(bits[1]),int.parse(bits[2]),0,0,0);
659                         
660                         var oldest = new  DateTime.now_local();
661                         oldest = oldest.add_months(-1 * StripApplication.opt_age_oldest);
662                         var tspan = mailtime.difference(oldest) / GLib.TimeSpan.DAY;
663
664                         if (tspan < 0) {
665                                 GLib.debug("skip file is %d days older than %d months", (int)tspan, StripApplication.opt_age_oldest);
666                                 return;
667                         }
668                         
669                         var newest = new  DateTime.now_local();
670                         newest = newest.add_months(-1 * StripApplication.opt_age_newest);
671                         tspan = mailtime.difference(newest) / GLib.TimeSpan.DAY;
672                         if (tspan > 0) {
673                                 GLib.debug("skip file is %d days newer than %d months", (int)tspan, StripApplication.opt_age_newest);
674                                 return;
675                         }
676                         
677                 }
678         
679         
680                 var fileinfo = File.new_for_path(path +"/" + name)
681                                         .query_info(GLib.FileAttribute.STANDARD_SIZE+","+GLib.FileAttribute.TIME_MODIFIED
682                                                 ,GLib.FileQueryInfoFlags.NONE,null);
683         var file_size = (int) fileinfo.get_size();
684                 var mod_time = fileinfo.get_modification_time();
685                 
686                 
687                 
688                 if (!StripApplication.opt_scan_mailfort) {
689                    
690                 // it's a mail directory...
691                 // use the last modification time? as the default...
692                  mailtime = new DateTime.from_timeval_utc(mod_time);
693                  this.created_dir = mailtime.format("%Y/%m/%d");
694                          this.created_date =  mailtime.format("%Y-%m-%d %H:%M:%S");
695  
696         }
697                 // check on age of file...
698                 
699                 
700                 
701                 
702                 
703         this.used_space_before += file_size;
704         
705         var stream = new GMime.StreamFs.for_path (path +"/" + name,Posix.O_RDONLY, 0);
706         //stream.set_owner(true);
707         var parser = new GMime.Parser.with_stream(stream);
708         var message = parser.construct_message();
709  
710                 if (message == null) {
711                         GLib.debug("Could not parse file? %s/%s", path,name);
712                 this.used_space_after += file_size;                     
713                 return;
714                 }       
715
716
717                 // check : - is message over a year old?                
718                 // get various msg info..
719                 this.active_message_id = message.get_message_id();
720                 this.active_message_x_mailfort_sig = message.get_header("x-mailfort-sig");
721                 var recvd = message.get_header("received");
722                 this.active_message_exim_id = "";
723                 if (recvd != null && recvd.length > 1) {
724                         GLib.debug("RECV: %s", recvd);
725                         var lines = recvd.split("\t");
726                         for (var i = 0; i < lines.length;i++) {
727                                 var bits = lines[i].strip().split(" ");
728                                 if (bits[0] == "id") {
729                                         this.active_message_exim_id = bits[1].replace(";","");
730
731                                 }
732                                 
733                                 if (lines[i].contains(";")) {
734                                         var dbits = lines[i].strip().split(";");                                
735                                         GLib.debug("Reading time from : %s", dbits[1]);
736                                         var timez = GMime.utils_header_decode_date(dbits[1], null);
737                                         if (timez != 0) {
738                                                 mailtime = new DateTime.from_unix_utc(timez);
739                                                 this.created_date = mailtime.format("%Y-%m-%d %H:%M:%S");
740                                                 GLib.debug("Time is %s",this.created_date);
741                                                 // if it's not mailfort we can use that date to determine where to store it...
742                                                 if (!StripApplication.opt_scan_mailfort) {
743                                                         this.created_dir = mailtime.format("%Y/%m/%d");
744                                                 }
745                                         } else {
746                                                 GLib.debug("Could not read time from headers?");
747                                         }
748                                 }
749
750                         }
751                 }
752                 
753                 var oldest = new  DateTime.now_local();
754                 oldest = oldest.add_months(-1 * StripApplication.opt_age_oldest);
755                 var rtspan = oldest.difference(oldest) / GLib.TimeSpan.DAY;
756
757                 if (rtspan < 0) {
758                         GLib.debug("skip(2) file is %d days older than %d months", (int)rtspan, StripApplication.opt_age_oldest);
759                         return;
760                 }
761                 var newest = new  DateTime.now_local();
762                 newest = newest.add_months(-1 * StripApplication.opt_age_newest);
763                 rtspan = mailtime.difference(newest) / GLib.TimeSpan.DAY;
764                 if (rtspan > 0) {
765                         GLib.debug("skip(2) file is %d days newer than %d months : %s", (int)rtspan, StripApplication.opt_age_newest,
766                                 mailtime.format("%Y-%m-%d %H:%M:%S"));
767                         return;
768                 }
769                 
770                 
771                 
772                 /*
773                 GLib.debug("Message DATA:\n mid: %s\nmailfort: %s \nexim_id: %s",
774                         this.active_message_id,
775                         this.active_message_x_mailfort_sig,
776                         this.active_message_exim_id
777                 );
778                  */
779                         
780                 // DATE?
781                 
782                 var mp = message.get_mime_part();
783
784                 if (!(mp is GMime.Multipart)) {
785                         //GLib.debug("get mimepart does not return a Multipart?");
786                 this.used_space_after += file_size;                                             
787                         return;
788                 }
789                 
790                 var mpc = ((GMime.Multipart)mp).get_count();
791                 
792                 //GLib.debug("Message has %d parts", mpc); 
793                 for (var i =0 ; i < mpc; i++) {
794                         //GLib.debug("Getting part %d", i); 
795                         var mime_obj = ((GMime.Multipart)mp).get_part(i);
796             this.handle_part(mp,mime_obj);                      
797         }
798                 
799         parser= null;
800
801       //  stream.set_owner(false);
802             //stream.close();
803         stream = null;//.close();
804         
805         
806                 if (!this.has_replaced) {
807                         this.used_space_after += file_size;
808                         GLib.debug("skpping write file - no replacement occured");
809                         return;
810                 }
811                 string tmpfile = "";
812                 GMime.Stream outstream = new GMime.StreamNull();
813                 if (StripApplication.opt_is_replacing) {
814                 
815                         tmpfile = GLib.Environment.get_tmp_dir() +"/" + name;
816                 outstream = new GMime.StreamFile.for_path (tmpfile,"w");
817                 ((GMime.StreamFile)outstream).set_owner(true);
818         }
819                 if (StripApplication.opt_dump) {
820                         outstream = new GMime.StreamMem();
821         }
822         
823         file_size = (int) message.write_to_stream(outstream);
824         if (StripApplication.opt_is_replacing) {
825                 ((GMime.StreamFile)outstream).set_owner(false);
826         }
827                 if (StripApplication.opt_dump) {
828                         var ua = ((GMime.StreamMem)outstream).get_byte_array().data;
829                         print("%s\n", (string) ua);
830                 }        
831         message = null;
832         outstream.flush();
833         outstream.close();
834         GLib.debug("finished writing output %d", file_size);
835
836         //
837         
838           
839         this.used_space_after += file_size;
840         
841         
842         if (StripApplication.opt_is_replacing) {
843                 Posix.unlink(path +"/" + name);         
844                 GLib.debug("copy tmp file %s to %s" , tmpfile, path +"/" + name);               
845                 Posix.link(tmpfile, path +"/" + name);
846                 Posix.unlink(tmpfile);
847                 var nf = File.new_for_path(path +"/" + name);
848                 var newfileinfo = nf.query_info(GLib.FileAttribute.TIME_MODIFIED,GLib.FileQueryInfoFlags.NONE,null);
849                 newfileinfo.set_modification_time(mod_time);
850                 nf.set_attributes_from_info(newfileinfo,FileQueryInfoFlags.NONE);
851                 }
852         this.processed++;
853         
854         if (StripApplication.opt_limit > -1 && this.processed >= StripApplication.opt_limit) {
855                 GLib.debug("Reached replacement limit");
856                 Posix.exit(1);
857         }
858         
859         
860         
861         
862     }
863     
864     
865     public void scan_dir(string basepath, string subpath)
866     {
867         
868         // determine if path is to old to scan..
869         if (subpath.length > 0 && StripApplication.opt_scan_mailfort) {
870                         var year =  int.parse(subpath.substring(1,4));  // "/2000"
871                         var month = subpath.length > 5 ? int.parse(subpath.substring(6,2)) : 999; // "/2000/12"                 
872                         var day = subpath.length > 8 ? int.parse(subpath.substring(9,2)) : 999; // "/2000/12/01"                        
873                 
874                 var oldest = new  DateTime.now_local();
875                         oldest = oldest.add_months(-1 * StripApplication.opt_age_oldest);
876                         
877                         GLib.debug("Checking directory %s is older than min: %d/%d/%d", subpath, oldest.get_year() , oldest.get_month(), oldest.get_day_of_month() );                                   
878                         
879                         if (year < oldest.get_year()) {
880                                 GLib.debug("Skip directory %s is older than min year: %d", subpath, oldest.get_year());
881                                 return;
882                         }
883                         if (year == oldest.get_year() &&  month < oldest.get_month()) {
884                                 GLib.debug("Skip directory %s is older than min year: %d/%d", subpath, oldest.get_year() , oldest.get_month() );
885                                 return;
886                         }
887                 if (year == oldest.get_year() &&  month == oldest.get_month() && day < oldest.get_day_of_month()) {
888                                 GLib.debug("Skip directory %s is older than min year: %d/%d/%d", subpath, oldest.get_year() , oldest.get_month(), oldest.get_day_of_month() );          
889                                 return;
890                         }
891                  
892                 var newest = new  DateTime.now_local();
893                         newest = newest.add_months(-1 * StripApplication.opt_age_newest);
894                         
895                         GLib.debug("Checking directory %s is newer than min: %d/%d/%d", subpath, newest.get_year() , newest.get_month(), newest.get_day_of_month() );                                   
896                         
897                         if (year > newest.get_year()) {
898                                 GLib.debug("Skip directory %s is newer than min year: %d", subpath, newest.get_year());
899                                 return;
900                         }
901                         if (year == newest.get_year() &&  month > newest.get_month()) {
902                                 GLib.debug("Skip directory %s is newer than min year: %d/%d", subpath, newest.get_year() , newest.get_month() );
903                                 return;
904                         }
905                 if (year == newest.get_year() &&  month == newest.get_month() && day > newest.get_day_of_month()) {
906                                 GLib.debug("Skip directory %s is newer than min year: %d/%d/%d", subpath, newest.get_year() , newest.get_month(), newest.get_day_of_month() );          
907                                 return;
908                         }
909                 
910                 
911                 
912         }
913         
914         
915         var f = File.new_for_path(basepath + subpath);
916                 FileEnumerator file_enum;
917         var cancellable = new Cancellable ();
918         try {      
919             file_enum = f.enumerate_children(
920                 FileAttribute.STANDARD_DISPLAY_NAME + "," +   FileAttribute.STANDARD_TYPE,
921                         FileQueryInfoFlags.NOFOLLOW_SYMLINKS,  // FileQueryInfoFlags.NONE,
922                         cancellable
923                 );
924         } catch (Error e) {
925                 GLib.debug("Got error scanning dir? %s", e.message);
926             // FIXME - show error..
927             return;
928         }
929         FileInfo next_file;
930          
931         while (cancellable.is_cancelled () == false ) {
932             try {
933                 next_file = file_enum.next_file (cancellable);
934             } catch(Error e) {
935                 GLib.debug("error getting next file? %s", e.message);
936                 break;
937             }
938
939             if (next_file == null) {
940                 break;
941             }
942                 
943                 
944                 if (next_file.get_is_symlink()) {
945                 next_file = null;
946                 continue;
947             }
948             
949
950             if (next_file.get_file_type() != FileType.DIRECTORY) {
951                 
952                 if (next_file.get_display_name()[0] == ',') {
953                         continue;
954                 }
955                 // other files to ignore?
956                 
957                 this.scan_file(basepath + subpath , next_file.get_display_name());
958                                 if(this.has_replaced) {
959                          this.report_state("After scanning %s/%s".printf(basepath + subpath , next_file.get_display_name()));
960                         }
961                 continue;
962             }
963
964
965             //stdout.printf("Monitor.monitor: got file %s : type :%u\n",
966             //        next_file.get_display_name(), next_file.get_file_type());
967
968
969         
970             var ds = next_file.get_display_name();
971             // not really needed?? - we are storing attachments in a seperate location now...
972             if (ds[0] == '.') {
973                 next_file = null;
974                 continue;
975             }
976             if (ds == "attachments") {
977                         continue;
978                 }
979             
980             
981             var sp = subpath+"/"+next_file.get_display_name();
982             // skip modules.
983             //print("got a file : " + sp);
984          
985             next_file = null;
986             
987             
988             this.scan_dir(basepath,sp);
989             
990         }
991     
992     
993     }
994     void report_state(string msg) 
995     {
996         // Saved: 2G  Original 10G : 20%
997         GLib.debug("Saved : %s (%.1f%%) | Original %s | %s", 
998                         GLib.format_size(this.used_space_before - this.used_space_after), 
999                         100f * ((1f * (this.used_space_before - this.used_space_after)) / (this.used_space_before * 1f)), 
1000                         GLib.format_size(this.used_space_before),                       
1001                         msg
1002                 );
1003         
1004         }
1005         
1006         
1007
1008 }