src/strip.vala
[app.mailtrimmer] / src / strip.vala
index ce5e7a5..1b22486 100644 (file)
@@ -1,5 +1,13 @@
 /**
 
+ ** check left to do:  
+  - range scans on maildir
+  - see how replacing the links works in the resulting email via thunderbird etc..
+  - some checksum issues (see dupelicates?? suspect 0byte issues?)  -- seems ok now?
+
+
+
   needs to scan 2 things
   a) our mailfort email database
        point it at the top directory, containing YEAR/MONTH/DAY.... directories.
@@ -123,7 +131,8 @@ public class StripApplication : GLib.Application {
                { "oldest", 0, 0, OptionArg.INT, ref opt_age_oldest, "do not replace messages older than X (default is 6 months)", null },
 
                { "scan-maildir", 0, 0, OptionArg.NONE, ref opt_scan_maildir, "scan an maildir tree", null },
-               { "scan-mailfort", 0, 0, OptionArg.NONE, ref opt_scan_mailfort, "scan a mailfort tree", null },  
+               { "scan-mailfort", 0, 0, OptionArg.NONE, ref opt_scan_mailfort, "scan a mailfort tree", null }, 
+               { "scan-mailfort-only", 0, 0, OptionArg.NONE, ref opt_scan_mailfort, "scan a mailfort Year/month eg. 2010/08", null }, 
                { null }       
        };         
     public StripApplication( string[] args ) 
@@ -360,10 +369,30 @@ public class Strip : GLib.Object {
                return;
        }
        
+       // initialize it with known data..
+       // that should wipe out dupes.
+       var matches = this.query("SELECT count(id)   FROM Attachment WHERE id = %d".printf(
+                       int.parse(sid)));  
+
+                
+               if (matches == "0") {    
+                       // our old mailfort code deleted the crap out of old records...
+                       // if this occurs we will need to create the record again..
+                       this.fix_deleted_attachment_db(int.parse(sid),attachment);
+                       return;
+               }
+       
+       
        // initialize it with known data..
        // that should wipe out dupes.
        var filesize = this.query("SELECT filesize FROM Attachment WHERE id = %d".printf(
-                       int.parse(sid)));       
+                       int.parse(sid)));  
+
+               if (filesize=="") {      
+                  GLib.error("Ignoring record id (missing in database) :%s", sid);
+                  return;
+               }
                if (int.parse(filesize) < 1) {
                GLib.debug("Could not get filesize from id :%s = %s", sid,filesize);
                Posix.exit(0);
@@ -410,6 +439,104 @@ public class Strip : GLib.Object {
     }
     
     
+    public void fix_deleted_attachment_db(int id, GMime.Part attachment)
+    {
+               
+        var filename = attachment.get_header("X-strip-content-name");
+        var file_path  = attachment.get_header("X-strip-path");
+        var fn =  StripApplication.opt_target_path + "/" + file_path;
+        
+
+               if (!FileUtils.test (fn, FileTest.EXISTS)) {
+                       GLib.debug("SKIP -- file does not exist");
+                       return;
+       }
+        
+        var chksum = this.md5_file(fn);
+               var mime_type = attachment.get_header("X-strip-content-type");
+
+               var fileinfo = File.new_for_path(fn)
+                                       .query_info(GLib.FileAttribute.STANDARD_SIZE+","+GLib.FileAttribute.TIME_MODIFIED
+                                               ,GLib.FileQueryInfoFlags.NONE,null);
+       var file_size = (int) fileinfo.get_size();
+
+      
+               this.real_query(-1, """
+                      
+                      
+                               INSERT INTO Attachment  (  
+                                       id, 
+                                       
+                                   msgid ,
+                                   queue_id ,
+                                   mime_filename ,
+                                   mime_type,
+                                    
+                                   stored_filename ,
+                                   mime_charset ,
+                                   mime_cdisp ,
+                                   mime_is_cover ,
+                                   
+                                   mime_is_multi ,
+                                   mime_is_mail,
+                                   mime_size ,
+                                   filesize,
+                                   
+                                   checksum,
+                                   created
+
+                               ) VALUES (
+                                       %d,  -- id
+                                       
+                                   '%s' , -- msgid
+                                   0,
+                                   '%s'  , -- filename
+                                   '%s',  -- mimetype
+                                   
+                                   '%s', -- stored file anme
+                                   '', -- charset
+                                   'attachment',
+                                   0,
+                                   0,
+                                   0,
+                                   %d, -- size
+                                   %d, -- size
+                                   
+                                   '%s', -- checkum
+                                       '%s' -- created:
+                               )
+                      
+                      
+                     """.printf(
+                               id,
+                                     this.mysql_escape(this.active_message_exim_id),
+                                     this.mysql_escape(filename),
+                                 this.mysql_escape(mime_type),
+                                 this.mysql_escape(file_path),
+                                       file_size,
+                                     file_size,
+                                     this.mysql_escape(chksum),
+                                       this.created_date
+                        ));
+             // this is done to fix the queue_id or maillog_id ??
+                this.query("""
+                 SELECT attachment_update(
+                      %d, -- in_id INT(11),
+                                               '', -- mime type
+                      '%s', -- in_created DATETIME,
+                      '%s' -- in_mailfort_sig varchar(64)
+                 )
+             """.printf(
+                               id, 
+                               this.created_date,
+                               this.mysql_escape(this.active_message_x_mailfort_sig)
+             
+             )
+               );
+               // GLib.error("added attachment?");
+    }
+    
+    
     public void replace_attachment(GMime.Multipart parent, GMime.Part attachment)
     {
         var sid = attachment.get_header("X-strip-id");
@@ -422,7 +549,7 @@ public class Strip : GLib.Object {
         
         var filename = attachment.get_filename().replace("/", "-").replace("\n", "").replace("\t", " ");
         var fn = GLib.Environment.get_tmp_dir() +
-                       "/"+ this.active_name + "."+   filename;
+                       "/"+ this.active_name + "."+   GLib.Uri.escape_string(filename,"", false);
 
            var outfile = new GMime.StreamFile.for_path(fn, "w");
            outfile.set_owner(true);
@@ -458,7 +585,7 @@ public class Strip : GLib.Object {
           """.printf(
                        this.mysql_escape(this.active_message_exim_id),
                        chksum,
-                       this.mysql_escape( attachment.get_filename() ), // what is thsi is invalid?
+                       this.mysql_escape( GLib.Uri.escape_string(attachment.get_filename(),"", false) ), // what is thsi is invalid?
                         file_size)
                );
                 
@@ -502,10 +629,10 @@ public class Strip : GLib.Object {
                var target_fn = "";
 
            if (StripApplication.opt_is_extracting) {
-                       target_fn = StripApplication.opt_target_path + "/" + this.created_dir +"/"+ file_id  + "-" + filename;
+                       target_fn = StripApplication.opt_target_path + "/" + this.created_dir +"/"+ file_id  + "-" + GLib.Uri.escape_string(filename,"", false);
                } 
                    
-           var stored =  "/" + this.created_dir +"/"+ file_id  + "-" + filename;
+           var stored =  "/" + this.created_dir +"/"+ file_id  + "-" + GLib.Uri.escape_string(filename,"", false);
                 this.query("""
                
                        SELECT attachment_update_store(
@@ -524,15 +651,15 @@ public class Strip : GLib.Object {
         rep.set_filename(filename);
         string txt = "<html><body>"+
             "<a href=\"" + StripApplication.opt_replace_link + "/" +
-                       file_id + "/" + this.created_dir + "/"+chksum+"/"+ GLib.Uri.escape_string( filename) +"\">" + 
-            GLib.Uri.escape_string( filename) + // fixme needs html escaping...
+                       file_id + "/" + this.created_dir + "/"+chksum+"/"+ GLib.Uri.escape_string( filename, "", false) +"\">" + 
+            GLib.Uri.escape_string( filename, "", false) + // fixme needs html escaping...
             "</a>" +
             "</body></html>";
 
         rep.get_content_type().set_parameter("charset", "utf-8");
                rep.set_header("X-strip-id", file_id);
                rep.set_header("X-strip-content-name",  filename);                              
-               rep.set_header("X-strip-path", this.created_dir + "/" + file_id + "-" + filename);              
+               rep.set_header("X-strip-path", this.created_dir + "/" + file_id + "-" +  GLib.Uri.escape_string(filename,"", false));           
                rep.set_header("X-strip-content-type", mime_type);              
         var stream =  new GMime.StreamMem.with_buffer(txt.data);
         var con = new GMime.DataWrapper.with_stream(stream,GMime.ContentEncoding.DEFAULT);
@@ -549,7 +676,10 @@ public class Strip : GLib.Object {
                        }
                        GLib.debug("Creating file %s", target_fn);
                        if (!FileUtils.test (target_fn, FileTest.EXISTS)) {
-                               Posix.link(fn, target_fn);
+                               var from = File.new_for_path (fn);
+                               var to =  File.new_for_path (target_fn);
+                               from.copy(to, 0, null);
+
                        }
                } else { 
                        GLib.debug("Skipping extraction %s", target_fn);
@@ -561,13 +691,19 @@ public class Strip : GLib.Object {
     }
     public string query(string str)
     {
-           return this.real_query(true, str);
+           return this.real_query(1, str);
     }
     public string execute(string str)
     {
-           return this.real_query(false, str);
+           return this.real_query(0, str);
     }
-    public string real_query(bool need_return, string str)
+    /**
+    * need_return 
+    0 = no
+    1 = yes
+    -1 = don't try.
+    */
+    public string real_query(int need_return, string str)
     {
                GLib.debug("Before Query : %u  : %s\n", this.mysql.errno(), this.mysql.error());
 
@@ -578,15 +714,19 @@ public class Strip : GLib.Object {
                
                
        
-       var rc=  this.mysql.query(str);         
+       var rc=  this.mysql.query(str); 
        if ( rc != 0 ) {
 
                    GLib.debug("ERROR %u: Query failed: %s\n", this.mysql.errno(), this.mysql.error());
                                Posix.exit(1);
                }
-               
-
-        var rs = mysql.use_result();
+       var rs = mysql.use_result();
+               if (need_return == -1) {
+                       return "";
+               }
+        
+        //GLib.debug("got %d rows", (int) rs.num_rows());
         
         var got_row = false;
                string[] row;
@@ -596,15 +736,21 @@ public class Strip : GLib.Object {
                        ret = row[0];
                
                }
-               if (!need_return) {
-                       return "";
+               if (need_return == 0) {
+               if (StripApplication.opt_debug_sql) {
+                               GLib.debug("got %s", got_row ? "=Nothing=" : ret);
+                       }
+                       return got_row ? "" : ret;
                }
                if (!got_row) {
+
                         GLib.debug("ERROR : no rows returned");
                        Posix.exit(1);
                        return "";
                }
-               GLib.debug("got %s", ret);
+       if (StripApplication.opt_debug_sql) {
+                       GLib.debug("got %s", ret);
+               }
                return ret;
                
                 
@@ -752,8 +898,8 @@ public class Strip : GLib.Object {
                
                var oldest = new  DateTime.now_local();
                oldest = oldest.add_months(-1 * StripApplication.opt_age_oldest);
-               var rtspan = oldest.difference(oldest) / GLib.TimeSpan.DAY;
-
+               var rtspan = mailtime.difference(oldest) / GLib.TimeSpan.DAY;
+               GLib.debug("Checking oldest %d days difference", (int)rtspan   );
                if (rtspan < 0) {
                        GLib.debug("skip(2) file is %d days older than %d months", (int)rtspan, StripApplication.opt_age_oldest);
                        return;
@@ -834,6 +980,7 @@ public class Strip : GLib.Object {
         GLib.debug("finished writing output %d", file_size);
 
         //
+        outstream = null;
         
          
        this.used_space_after += file_size;
@@ -842,12 +989,17 @@ public class Strip : GLib.Object {
        if (StripApplication.opt_is_replacing) {
                Posix.unlink(path +"/" + name);         
                GLib.debug("copy tmp file %s to %s" , tmpfile, path +"/" + name);               
-               Posix.link(tmpfile, path +"/" + name);
-               Posix.unlink(tmpfile);
-               var nf = File.new_for_path(path +"/" + name);
+               
+               // link will not work, as we are doing it accross file systems
+                       var from = File.new_for_path (tmpfile);
+                       var nf =  File.new_for_path (path +"/" + name);
+                       from.copy(nf, 0, null);
+                       
+
                var newfileinfo = nf.query_info(GLib.FileAttribute.TIME_MODIFIED,GLib.FileQueryInfoFlags.NONE,null);
                newfileinfo.set_modification_time(mod_time);
                nf.set_attributes_from_info(newfileinfo,FileQueryInfoFlags.NONE);
+               Posix.unlink(tmpfile);
                }
        this.processed++;
        
@@ -865,6 +1017,7 @@ public class Strip : GLib.Object {
     public void scan_dir(string basepath, string subpath)
     {
         
+        
         // determine if path is to old to scan..
         if (subpath.length > 0 && StripApplication.opt_scan_mailfort) {
                        var year =  int.parse(subpath.substring(1,4));  // "/2000"
@@ -874,36 +1027,36 @@ public class Strip : GLib.Object {
                var oldest = new  DateTime.now_local();
                        oldest = oldest.add_months(-1 * StripApplication.opt_age_oldest);
                        
-                       GLib.debug("Checking directory %s is older than min: %d/%d/%d", subpath, oldest.get_year() , oldest.get_month(), oldest.get_day_of_month() );                                   
+                       //GLib.debug("Checking directory %s is older than min: %d/%d/%d", subpath, oldest.get_year() , oldest.get_month(), oldest.get_day_of_month() );                                 
                        
                        if (year < oldest.get_year()) {
                                GLib.debug("Skip directory %s is older than min year: %d", subpath, oldest.get_year());
                                return;
                        }
                        if (year == oldest.get_year() &&  month < oldest.get_month()) {
-                               GLib.debug("Skip directory %s is older than min year: %d/%d", subpath, oldest.get_year() , oldest.get_month() );
+                               GLib.debug("Skip directory %s is older than min month: %d/%d", subpath, oldest.get_year() , oldest.get_month() );
                                return;
                        }
                if (year == oldest.get_year() &&  month == oldest.get_month() && day < oldest.get_day_of_month()) {
-                               GLib.debug("Skip directory %s is older than min year: %d/%d/%d", subpath, oldest.get_year() , oldest.get_month(), oldest.get_day_of_month() );          
+                               GLib.debug("Skip directory %s is older than min day: %d/%d/%d", subpath, oldest.get_year() , oldest.get_month(), oldest.get_day_of_month() );           
                                return;
                        }
-                
+               
                var newest = new  DateTime.now_local();
                        newest = newest.add_months(-1 * StripApplication.opt_age_newest);
                        
-                       GLib.debug("Checking directory %s is newer than min: %d/%d/%d", subpath, newest.get_year() , newest.get_month(), newest.get_day_of_month() );                                   
+                       //GLib.debug("Checking directory %s is newer than max: %d/%d/%d", subpath, newest.get_year() , newest.get_month(), newest.get_day_of_month() );                                 
                        
                        if (year > newest.get_year()) {
-                               GLib.debug("Skip directory %s is newer than min year: %d", subpath, newest.get_year());
+                               GLib.debug("Skip directory %s is newer than max year: %d", subpath, newest.get_year());
                                return;
                        }
-                       if (year == newest.get_year() &&  month > newest.get_month()) {
-                               GLib.debug("Skip directory %s is newer than min year: %d/%d", subpath, newest.get_year() , newest.get_month() );
+                       if (year == newest.get_year() &&  month != 999 && month > newest.get_month()) {
+                               GLib.debug("Skip directory %s is newer than max month: %d/%d", subpath, newest.get_year() , newest.get_month() );
                                return;
                        }
-               if (year == newest.get_year() &&  month == newest.get_month() && day > newest.get_day_of_month()) {
-                               GLib.debug("Skip directory %s is newer than min year: %d/%d/%d", subpath, newest.get_year() , newest.get_month(), newest.get_day_of_month() );          
+               if (year == newest.get_year() &&  month == newest.get_month() &&  day != 999 && day > newest.get_day_of_month()) {
+                               GLib.debug("Skip directory %s is newer than max day: %d/%d/%d", subpath, newest.get_year() , newest.get_month(), newest.get_day_of_month() );           
                                return;
                        }
                
@@ -946,17 +1099,21 @@ public class Strip : GLib.Object {
                 continue;
             }
             
-
+            var ds = next_file.get_display_name();
             if (next_file.get_file_type() != FileType.DIRECTORY) {
                 
-                if (next_file.get_display_name()[0] == ',') {
+                
+                
+                if (ds[0] == ',') {
                        continue;
                }
                // other files to ignore?
-                
-                this.scan_file(basepath + subpath , next_file.get_display_name());
+                if (Regex.match_simple (".tgz$", ds)) {
+                       continue;
+               }
+                this.scan_file(basepath + subpath , ds);
                                if(this.has_replaced) {
-                                this.report_state("After scanning %s/%s".printf(basepath + subpath , next_file.get_display_name()));
+                                this.report_state("After scanning %s/%s".printf(basepath + subpath , ds));
                        }
                 continue;
             }
@@ -967,7 +1124,7 @@ public class Strip : GLib.Object {
 
 
         
-            var ds = next_file.get_display_name();
+
             // not really needed?? - we are storing attachments in a seperate location now...
             if (ds[0] == '.') {
                 next_file = null;