src/strip.vala
[app.mailtrimmer] / src / strip.vala
index df3d702..f5f61e6 100644 (file)
@@ -82,9 +82,13 @@ public class StripApplication : GLib.Application {
 
        public static int    opt_limit = -1;
 
+       public static int    opt_age_newest = 1;
+       public static int    opt_age_oldest = 6;
+
+
        public static bool      opt_is_extracting = false;
        public static bool      opt_is_replacing = false;
-       public static bool      opt_scan_imap  = false; 
+       public static bool      opt_scan_maildir  = false; 
        public static bool      opt_scan_mailfort  = false;     
        public static bool              opt_dump = false;       
        public static bool              opt_debug = false; 
@@ -115,8 +119,10 @@ public class StripApplication : GLib.Application {
                { "dump", 0, 0, OptionArg.NONE, ref opt_dump, "Print the replaced mail contents to stdout", null },         
 
                { "limit", 0, 0, OptionArg.INT, ref opt_limit, "stop after X number of messages with attachments have been processed", null },         
+               { "newest", 0, 0, OptionArg.INT, ref opt_age_newest, "do not replace messages newer that X months (default is 1 months)", null },
+               { "oldest", 0, 0, OptionArg.INT, ref opt_age_oldest, "do not replace messages older than X (default is 6 months)", null },
 
-               { "scan-imap", 0, 0, OptionArg.NONE, ref opt_scan_imap, "scan an imap tree", null },
+               { "scan-maildir", 0, 0, OptionArg.NONE, ref opt_scan_maildir, "scan an maildir tree", null },
                { "scan-mailfort", 0, 0, OptionArg.NONE, ref opt_scan_mailfort, "scan a mailfort tree", null },  
                { null }       
        };         
@@ -140,7 +146,7 @@ public class StripApplication : GLib.Application {
  
                            
              // options that have to be set.. bee or hive... (or stop all)
-            if ((!opt_scan_mailfort && !opt_scan_imap) || (opt_scan_mailfort && opt_scan_imap))  {
+            if ((!opt_scan_mailfort && !opt_scan_maildir) || (opt_scan_mailfort && opt_scan_maildir))  {
                stdout.printf ("You must specify the type of directory tree to scan - either imap or mailfort\n%s",
                    opt_context.get_help(true, null));
                GLib.Process.exit(Posix.EXIT_FAILURE);
@@ -215,6 +221,7 @@ public class StripApplication : GLib.Application {
                        stdout.printf("ERROR %u: Connection failed: %s\n", 
                                strip.mysql.errno(), strip.mysql.error()
                        );
+
                        return 1;
                }
         if (opt_file != null) {
@@ -223,7 +230,7 @@ public class StripApplication : GLib.Application {
                return 0;
         }
 
-               strip.scan_dir(opt_path);
+               strip.scan_dir(opt_path, "");
         
 
         
@@ -324,7 +331,7 @@ public class Strip : GLib.Object {
                        var mp = ((GMime.Message) mime_obj).get_mime_part();
 
                        if (!(mp is GMime.Multipart)) {
-                               GLib.debug("get mimepart does not return a Multipart?");
+                               //GLib.debug("get mimepart does not return a Multipart?");
                                return;
                        }
                        
@@ -352,28 +359,53 @@ public class Strip : GLib.Object {
                GLib.debug("Strange - update attachment db called ?");
                return;
        }
-       this.query("""
-               SELECT attachment_update(
-                               %d, -- in_id INT(11),
-                               '%s', -- in_mime_type varchar(255),
-                               %d, -- in_mime_size int(11),
-                               '%s', -- in_created DATETIME,
-                               '%s' -- in_mailfort_sig varchar(64)
-                               
-                   )""
        
-                                
+       // initialize it with known data..
+       // that should wipe out dupes.
+       var filesize = this.query("SELECT filesize FROM Attachment WHERE id = %d".printf(
+                       int.parse(sid)));       
+               if (int.parse(filesize) < 1) {
+               GLib.debug("Could not get filesize from id :%s = %s", sid,filesize);
+               Posix.exit(0);
+               return;
+       }
+       
+       var chksum = this.query("SELECT  checksum FROM Attachment WHERE id = %d".printf(
+                       int.parse(sid)
+               ));
+       var mime_filename = this.query("SELECT  mime_filename FROM Attachment WHERE id = %d".printf(
+                       int.parse(sid)));       
+               
+       this.query("""
+             SELECT 
+                 attachment_init(
+                     '%s', '%s', '%s', %d
+                 ) as id 
+                 
+          """.printf(
+                         this.mysql_escape(this.active_message_exim_id),
+                         this.mysql_escape(chksum),
+                         this.mysql_escape(mime_filename),                       
+                         int.parse(filesize)
+               ));
+       this.query("""
+                 SELECT attachment_update(
+                      %d, -- in_id INT(11),
+                      '%s', -- in_mime_type varchar(255),
+                      '%s', -- in_created DATETIME,
+                      '%s' -- in_mailfort_sig varchar(64)
+                 )
              """.printf(
                        int.parse(sid),
                        "", // this will be ignored..
-                       0, // this will be ingored..
                                this.created_date,
                                this.mysql_escape(this.active_message_x_mailfort_sig)
              
              )
                );
-       
-       
+               this.mysql.store_result();
+               
+
     
     }
     
@@ -388,7 +420,7 @@ public class Strip : GLib.Object {
         
         var c = attachment.get_content_object();
         
-        var filename = attachment.get_filename().replace("/", "-");
+        var filename = attachment.get_filename().replace("/", "-").replace("\n", "").replace("\t", " ");
         var fn = GLib.Environment.get_tmp_dir() +
                        "/"+ this.active_name + "."+   filename;
 
@@ -413,158 +445,86 @@ public class Strip : GLib.Object {
         // at this point we have to do our database magic...
         //filesize / name / date / checksum / mimetype -- into mailfort should be OK.
         
-        this.query("""
+        var file_id = this.query("""
                SELECT 
-                       id,
-                       DATE_FORMAT(created,"%%Y/%%m/%%d") as create_date,
-                       msgid
-                FROM 
-                       Attachment 
-                       WHERE
-                checksum = '%s'
-                AND
-               filesize = %d
-               LIMIT
-                       1
-          """.printf( chksum, file_size)
+               
+               attachment_init(
+                               '%s', -- in_msgid VARCHAR(32),
+                               '%s', -- in_checksum VARCHAR(64),
+                               '%s', -- in_mime_filename varchar(255)
+                               %d -- filesize
+                       ) as id 
+                       
+          """.printf(
+                       this.mysql_escape(this.active_message_exim_id),
+                       chksum,
+                       this.mysql_escape( attachment.get_filename() ), // what is thsi is invalid?
+                        file_size)
                );
-               var file_id = "0";
-        var rs = mysql.use_result();
-               var row = rs.fetch_row();
-               
-               var target_fn = "";
+                
                
-               var create_dir = this.created_dir;
+               if (file_id.length < 1) {
+                       GLib.debug("ERROR - CALL to attachment_init failed");
+               Posix.unlink(fn);               
+               return;
                
+               }
+               if (int.parse(file_id) < 1) {
+                       GLib.debug("ERROR - CALL to attachment_init failed - returned 0?");
+               Posix.unlink(fn);               
+               return;
                
-               if (rs != null  && rs.num_rows() > 0) {
-                       file_id =  row[0];
-                       create_dir = row[1];
-                       GLib.debug("msgid %s", row[2]);
-                       Posix.exit(0);
-                       this.used_space_after += file_size;
-                       if (StripApplication.opt_is_extracting) {
-                               target_fn = StripApplication.opt_target_path + "/" + this.created_dir +"/"+ file_id  + "-" + filename;
-                       } 
-                       
-                       
-               } else {
-               
-                       GLib.debug("fn = %s, m5=%s, id= %s", filename, mime_type, this.active_message_id);
-                       this.query("""
-                       INSERT INTO
-                                       Attachment 
-                               (
-                                        queue_id,
-                                        mime_charset,
-                                        mime_cdisp,
-                                        mime_size,
-                                        
-                                        mime_is_cover,
-                                        mime_is_multi,
-                                        mime_is_mail,
-                                        
-                                        msgid,
-                                        maillog_id,
-                                        delivered,
-                                        
-                                        
-                                        checksum,
-                                        filesize,
-                                        stored_filename,
-                                        
-                                        created,
-                                        mime_filename,
-                                        mime_type,
-                                        
+               }
  
-                               )
-                                       VALUES
-                               (
-                                        COALESCE((SELECT id from MailQueue where msgid = '%s' AND message_sig = '%s' AND  msgid != '' AND message_sig != '' LIMIT 1),0),
-                                        '%s',
-                                        '%s',
-                                        %d,
-                                        
-                                        0,
-                                        0,
-                                        0,
-                                        
-                                        '%s',
-                                        COALESCE((SELECT id from email_log where msgid = '%s' AND message_sig = '%s' AND  msgid != '' AND message_sig != '' LIMIT 1),0),
-                                       '%s',
-                                        
-                                        '%s',
-                                        %d,
-                                        '%s',
-                                        
-                                        '%s',
-                                        '%s',
-                                        '%s' 
-                                         
-                               )
-                                        
-                     """.printf(
-                               this.mysql_escape(this.active_message_exim_id), this.mysql_escape(this.active_message_x_mailfort_sig),
-                               "",
-                               "attachment",
-                                file_size,
-                                
-                               this.mysql_escape(this.active_message_exim_id), // msgid ?
-                               this.mysql_escape(this.active_message_exim_id), this.mysql_escape(this.active_message_x_mailfort_sig), // maillog_id
-                               this.created_date, // delivered (from lookup)
-                                
-                                        chksum,
-                                        file_size,
-                                        "", // filled in after we find out the id..
-                                        
-                                        this.created_date,
-                                        this.mysql_escape(filename),
-                                        this.mysql_escape(mime_type) 
-                                        
+       
+               GLib.debug("fn = %s, m5=%s, id= %s", filename, mime_type, this.active_message_id);
+               this.query("""
+               
+                       SELECT attachment_update(
+                               %d, -- in_id INT(11),
+                               '%s', -- in_mime_type varchar(255),
+                               '%s', -- in_created DATETIME,
+                               '%s' -- in_mailfort_sig varchar(64)
+                               
+                               ) as result
+      """.printf(
+               int.parse(file_id),
+                       this.mysql_escape(mime_type),
+                       this.created_date,
+                       this.mysql_escape(this.active_message_x_mailfort_sig)
+               ));
+                this.mysql.store_result();
+                                
  
-                     
-                     
-                     )
-                       );
-                       this.used_space_after += file_size;
+               this.used_space_after += file_size;
                        
-                       var file_id_real = this.mysql.insert_id();
-                       GLib.debug("Got file_id = %d\n", (int)file_id_real);                    
-                       if (file_id_real == 0) {
-                               GLib.Process.exit(1);
-                       }
-                       file_id = "%d".printf((int)file_id_real);
-                       
-                       
+               var target_fn = "";
 
-                  if (StripApplication.opt_is_extracting) {
-                               target_fn = StripApplication.opt_target_path + "/" + this.created_dir +"/"+ file_id  + "-" + filename;
-                       } 
-                   
-                   var stored =  "/" + this.created_dir +"/"+ file_id  + "-" + filename;
-                   
-                       this.query("""
-                               UPDATE
-                                       Attachment
-                               SET
-                                       stored_filename = '%s'
-                               WHERE
-                                       id = %s
-                       """.printf(
-                               this.mysql_escape( stored),
-                               file_id
-                       ));
-               
+           if (StripApplication.opt_is_extracting) {
+                       target_fn = StripApplication.opt_target_path + "/" + this.created_dir +"/"+ file_id  + "-" + filename;
                } 
+                   
+           var stored =  "/" + this.created_dir +"/"+ file_id  + "-" + filename;
+                this.query("""
                
+                       SELECT attachment_update_store(
+                               %d, -- in_id INT(11),
+                               '%s'  -- in_store_filename varchar(255),
+                        
+                               
+                               ) as result
+      """.printf(
+               int.parse(file_id),
+                        this.mysql_escape( stored)
+               ));   
+                        
         var rep = new GMime.Part.with_type("text","html");
         // we have to set up a redirect server - to redirect hpasite... to their internal service..
         rep.set_filename(filename);
-        var txt = "<html><body>"+
+        string txt = "<html><body>"+
             "<a href=\"" + StripApplication.opt_replace_link + "/" +
-                       file_id + "/" + create_dir + "/"+chksum+"/"+ GLib.Uri.escape_string( filename) +"\">" + 
+                       file_id + "/" + this.created_dir + "/"+chksum+"/"+ GLib.Uri.escape_string( filename) +"\">" + 
             GLib.Uri.escape_string( filename) + // fixme needs html escaping...
             "</a>" +
             "</body></html>";
@@ -572,7 +532,7 @@ public class Strip : GLib.Object {
         rep.get_content_type().set_parameter("charset", "utf-8");
                rep.set_header("X-strip-id", file_id);
                rep.set_header("X-strip-content-name",  filename);                              
-               rep.set_header("X-strip-path", create_dir + "/" + file_id + "-" + filename);            
+               rep.set_header("X-strip-path", this.created_dir + "/" + file_id + "-" + filename);              
                rep.set_header("X-strip-content-type", mime_type);              
         var stream =  new GMime.StreamMem.with_buffer(txt.data);
         var con = new GMime.DataWrapper.with_stream(stream,GMime.ContentEncoding.DEFAULT);
@@ -599,19 +559,55 @@ public class Strip : GLib.Object {
 
 
     }
-    
-    public int query(string str)
+    public string query(string str)
     {
+           return this.real_query(true, str);
+    }
+    public string execute(string str)
+    {
+           return this.real_query(false, str);
+    }
+    public string real_query(bool need_return, string str)
+    {
+               GLib.debug("Before Query : %u  : %s\n", this.mysql.errno(), this.mysql.error());
+
+
        if (StripApplication.opt_debug_sql) {
                GLib.debug("SQL: %s\n", str);
                }
+               
+               
        
        var rc=  this.mysql.query(str);         
        if ( rc != 0 ) {
 
                    GLib.debug("ERROR %u: Query failed: %s\n", this.mysql.errno(), this.mysql.error());
+                               Posix.exit(1);
+               }
+               
+
+        var rs = mysql.use_result();
+        
+        var got_row = false;
+               string[] row;
+               string ret = "";
+               while( (row = rs.fetch_row()) != null) { 
+                       got_row = true;
+                       ret = row[0];
+               
+               }
+               if (!need_return) {
+                       return "";
                }
-               return rc;
+               if (!got_row) {
+                        GLib.debug("ERROR : no rows returned");
+                       Posix.exit(1);
+                       return "";
+               }
+               GLib.debug("got %s", ret);
+               return ret;
+               
+                
        }
     
     public string mysql_escape(string str)
@@ -654,13 +650,56 @@ public class Strip : GLib.Object {
        this.active_name = name;
        this.active_message_id = "";
 
-        this.created_dir = this.active_path.substring(this.base_dir.length + 1 );
-               this.created_date = this.created_dir.replace("/", "-");
+               var mailtime = new DateTime.now_local();
+               if (StripApplication.opt_scan_mailfort) {
+                   this.created_dir = this.active_path.substring(this.base_dir.length + 1 );
+                       this.created_date = this.created_dir.replace("/", "-");
+                       var bits = this.created_date.split("-");
+                       mailtime = new DateTime.local(int.parse(bits[0]),int.parse(bits[1]),int.parse(bits[2]),0,0,0);
+                       
+                       var oldest = new  DateTime.now_local();
+                       oldest = oldest.add_months(-1 * StripApplication.opt_age_oldest);
+                       var tspan = mailtime.difference(oldest) / GLib.TimeSpan.DAY;
+
+                       if (tspan > 0) {
+                               GLib.debug("skip file is %d days older than %d months", (int)tspan, StripApplication.opt_age_oldest);
+                               return;
+                       }
+                       
+                       var newest = new  DateTime.now_local();
+                       newest = oldest.add_months(-1 * StripApplication.opt_age_newest);
+                       tspan = mailtime.difference(newest) / GLib.TimeSpan.DAY;
+                       if (tspan < 0) {
+                               GLib.debug("skip file is %d days newer than %d months", (int)tspan, StripApplication.opt_age_newest);
+                               return;
+                       }
+                       
+               }
+       
        
-       var file_size = (int) File.new_for_path(path +"/" + name)
-                                       .query_info(GLib.FileAttribute.STANDARD_SIZE,GLib.FileQueryInfoFlags.NONE,null)
-                                       .get_size();
-                                              
+               var fileinfo = File.new_for_path(path +"/" + name)
+                                       .query_info(GLib.FileAttribute.STANDARD_SIZE+","+GLib.FileAttribute.TIME_MODIFIED
+                                               ,GLib.FileQueryInfoFlags.NONE,null);
+       var file_size = (int) fileinfo.get_size();
+               var mod_time = fileinfo.get_modification_time();
+               
+               
+               
+               if (!StripApplication.opt_scan_mailfort) {
+                  
+               // it's a mail directory...
+               // use the last modification time? as the default...
+                mailtime = new DateTime.from_timeval_utc(mod_time);
+                this.created_dir = mailtime.format("%Y/%m/%d");
+                        this.created_date =  mailtime.format("%Y-%m-%d %H:%M:%S");
+       }
+               // check on age of file...
+               
+               
+               
+               
+               
        this.used_space_before += file_size;
        
        var stream = new GMime.StreamFs.for_path (path +"/" + name,Posix.O_RDONLY, 0);
@@ -687,23 +726,63 @@ public class Strip : GLib.Object {
                        for (var i = 0; i < lines.length;i++) {
                                var bits = lines[i].strip().split(" ");
                                if (bits[0] == "id") {
-                                       this.active_message_exim_id = bits[1];
+                                       this.active_message_exim_id = bits[1].replace(";","");
+
                                }
+                               
+                               if (lines[i].contains(";")) {
+                                       var dbits = lines[i].strip().split(";");                                
+                                       GLib.debug("Reading time from : %s", dbits[1]);
+                                       var timez = GMime.utils_header_decode_date(dbits[1], null);
+                                       if (timez != 0) {
+                                               mailtime = new DateTime.from_unix_utc(timez);
+                                               this.created_date = mailtime.format("%Y-%m-%d %H:%M:%S");
+                                               GLib.debug("Time is %s",this.created_date);
+                                               // if it's not mailfort we can use that date to determine where to store it...
+                                               if (!StripApplication.opt_scan_mailfort) {
+                                                       this.created_dir = mailtime.format("%Y/%m/%d");
+                                               }
+                                       } else {
+                                               GLib.debug("Could not read time from headers?");
+                                       }
+                               }
+
                        }
                }
+               
+               var oldest = new  DateTime.now_local();
+               oldest = oldest.add_months(-1 * StripApplication.opt_age_oldest);
+               var rtspan = oldest.difference(oldest) / GLib.TimeSpan.DAY;
+
+               if (rtspan > 0) {
+                       GLib.debug("skip(2) file is %d days older than %d months", (int)rtspan, StripApplication.opt_age_oldest);
+                       return;
+               }
+               var newest = new  DateTime.now_local();
+               newest = oldest.add_months(-1 * StripApplication.opt_age_newest);
+               rtspan = mailtime.difference(newest) / GLib.TimeSpan.DAY;
+               if (rtspan < 0) {
+                       GLib.debug("skip(2) file is %d days newer than %d months : %s", (int)rtspan, StripApplication.opt_age_newest,
+                               mailtime.format("%Y-%m-%d %H:%M:%S"));
+                       return;
+               }
+               
+               
+               
+               /*
                GLib.debug("Message DATA:\n mid: %s\nmailfort: %s \nexim_id: %s",
                        this.active_message_id,
                        this.active_message_x_mailfort_sig,
                        this.active_message_exim_id
                );
-                
+                */
                        
                // DATE?
                
                var mp = message.get_mime_part();
 
                if (!(mp is GMime.Multipart)) {
-                       GLib.debug("get mimepart does not return a Multipart?");
+                       //GLib.debug("get mimepart does not return a Multipart?");
                this.used_space_after += file_size;                                             
                        return;
                }
@@ -765,6 +844,10 @@ public class Strip : GLib.Object {
                GLib.debug("copy tmp file %s to %s" , tmpfile, path +"/" + name);               
                Posix.link(tmpfile, path +"/" + name);
                Posix.unlink(tmpfile);
+               var nf = File.new_for_path(path +"/" + name);
+               var newfileinfo = nf.query_info(GLib.FileAttribute.TIME_MODIFIED,GLib.FileQueryInfoFlags.NONE,null);
+               newfileinfo.set_modification_time(mod_time);
+               nf.set_attributes_from_info(newfileinfo,FileQueryInfoFlags.NONE);
                }
        this.processed++;
        
@@ -779,9 +862,38 @@ public class Strip : GLib.Object {
     }
     
     
-    public void scan_dir(string path)
+    public void scan_dir(string basepath, string subpath)
     {
-        var f = File.new_for_path(path);
+        
+        // determine if path is to old to scan..
+        if (subpath.length > 0 && StripApplication.opt_scan_mailfort) {
+                       var year =  int.parse(subpath.substring(1,4));  // "/2000"
+                       var month = subpath.length > 5 ? int.parse(subpath.substring(6,2)) : 999; // "/2000/12"                 
+                       var day = subpath.length > 8 ? int.parse(subpath.substring(9,2)) : 999; // "/2000/12/01"                        
+               
+               var oldest = new  DateTime.now_local();
+                       oldest = oldest.add_months(-1 * StripApplication.opt_age_oldest);
+                       
+                       GLib.debug("Checking directory %s is older than min: %d/%d/%d", subpath, oldest.get_year() , oldest.get_month(), oldest.get_day_of_month() );                                   
+                       
+                       if (year < oldest.get_year()) {
+                               GLib.debug("Skip directory %s is older than min year: %d", subpath, oldest.get_year());
+                               return;
+                       }
+                       if (year == oldest.get_year() &&  month < oldest.get_month()) {
+                               GLib.debug("Skip directory %s is older than min year: %d/%d", subpath, oldest.get_year() , oldest.get_month() );
+                               return;
+                       }
+               if (year == oldest.get_year() &&  month == oldest.get_month() && day < oldest.get_day_of_month()) {
+                               GLib.debug("Skip directory %s is older than min year: %d/%d/%d", subpath, oldest.get_year() , oldest.get_month(), oldest.get_day_of_month() );          
+                               return;
+                       }
+                
+               
+        }
+        
+        
+        var f = File.new_for_path(basepath + subpath);
                FileEnumerator file_enum;
         var cancellable = new Cancellable ();
         try {      
@@ -808,16 +920,25 @@ public class Strip : GLib.Object {
             if (next_file == null) {
                 break;
             }
-
+               
+               
+               if (next_file.get_is_symlink()) {
+                next_file = null;
+                continue;
+            }
+            
 
             if (next_file.get_file_type() != FileType.DIRECTORY) {
                 
                 if (next_file.get_display_name()[0] == ',') {
                        continue;
                }
+               // other files to ignore?
                 
-                this.scan_file(path , next_file.get_display_name());
-                this.report_state("After scanning %s/%s".printf(path , next_file.get_display_name()));
+                this.scan_file(basepath + subpath , next_file.get_display_name());
+                               if(this.has_replaced) {
+                                this.report_state("After scanning %s/%s".printf(basepath + subpath , next_file.get_display_name()));
+                       }
                 continue;
             }
 
@@ -826,12 +947,9 @@ public class Strip : GLib.Object {
             //        next_file.get_display_name(), next_file.get_file_type());
 
 
-            if (next_file.get_is_symlink()) {
-                next_file = null;
-                continue;
-            }
-            
+        
             var ds = next_file.get_display_name();
+            // not really needed?? - we are storing attachments in a seperate location now...
             if (ds[0] == '.') {
                 next_file = null;
                 continue;
@@ -841,14 +959,14 @@ public class Strip : GLib.Object {
                }
             
             
-            var sp = path+"/"+next_file.get_display_name();
+            var sp = subpath+"/"+next_file.get_display_name();
             // skip modules.
             //print("got a file : " + sp);
          
             next_file = null;
             
             
-            this.scan_dir(sp);
+            this.scan_dir(basepath,sp);
             
         }