Page.vala
authorAlan Knowles <alan@roojs.com>
Tue, 16 Aug 2016 05:13:28 +0000 (13:13 +0800)
committerAlan Knowles <alan@roojs.com>
Tue, 16 Aug 2016 05:13:28 +0000 (13:13 +0800)
Release.vala
Request.vala
Spider.vala
WorkerBee.vala

Page.vala [deleted file]
Release.vala [deleted file]
Request.vala [deleted file]
Spider.vala [deleted file]
WorkerBee.vala [deleted file]

diff --git a/Page.vala b/Page.vala
deleted file mode 100644 (file)
index f51cad7..0000000
--- a/Page.vala
+++ /dev/null
@@ -1,154 +0,0 @@
-public class Page : Object, Json.Serializable
-{
-
-       public Domain domain;
-       public string path;
-       public int seqid; // seqid .. incremented?
-       public int id; // from the external database..
-       public string etag; // the etag header variable to allow caching.
-       public string md5; // raw contents md5..
-       
-       public GLib.DateTime last_dt; // last time it was scanned.
-       public GLib.DateTime last_change_dt; // last time it changed.  (if the difference of last_dt/change is big, then don't scan it much..?
-       public int depth;
-       
-       public string words; //? extracted words?
-       
-       public bool scanned; // has it been scanned.?
-       public bool scanned_needed; // does it need to be scanned this time...
-       
-       // what about 'how often it changed??' how can we measure that
-       
-       public Page.new_from_json(Domain domain, JSON.Object)
-       {
-               this.domain = domain;
-               
-               
-               
-       }
-       public Page.new_from_link(Domain domain, string path, int depth)
-       {
-               this.domain = domain;
-               this.path = path;
-               this.depth = depth;
-               this.scanned = false;
-               
-               // seqid? // other stuff..
-               
-       }
-       
-       
-       /* ------------------      parsing behaviour ---------*/
-       
-       
-       public void fetchPage()   
-       {
-               var header = this.fetchHeader(); // see if it's a new page..
-               if (header.etag == this.etag || header.modified.compare(this.last_dt) < 0) { // check this...
-                       this.markAsScanned();
-                       return; 
-               }
-               var body = this.fetchBody(); // uchar[] ?
-               var checksum =  Checksum.compute_for_data (body);
-               if (this.md5 == checksum) {
-                       this.markAsScanned();
-                       return; 
-               }
-               // -- page has changed....
-               this.last_change_dt =  new DateTime.now_local ();
-               
-               this.webview.load(this, body); // should trigger viewOnLoad when it's complete..
-               
-       
-       }
-       
-       public void viewOnLoad() // called when page is loaded..
-       {
-               // use up the view.
-               var new_words = this.webview.extractWords();
-               var links = this.webview.parseLinks();
-               
-               // destory the view?
-               
-       
-               // this may trigger a 'success story'....
-               this.domain.compareReleaseWords(this, new_words);
-               
-               // we have reached the limit..
-               if (this.depth == this.domain.depth) {
-                       return; 
-               }
-       
-               
-               var pgs = this.domain.pages;
-               for (var i =0 ; i < links.size; i++) {
-                       
-                       if (pgs.has_key(links.get(i)) {
-                               if ( pgs.get(links.get(i)).scanned) {
-                                       continue;
-                               }
-                               pgs.get(links.get(i)).scanned_needed = true; // update old page scenario..?
-                               continue;
-                       }
-                       // new page...
-                       var pg = new Page.new_from_link(this.domain, links.get(i), this.depth+1);
-                       pgs.set(pg.path, pg);
-                       
-               }
-               // signal next scan to occur?
-               
-       
-       }
-       
-       
-       
-       public void markAsScanned()
-       {
-               this.scanned = true;
-               this.last_dt = new DateTime.now_local ();
-               this.domain.local_seqid++;
-               this.seqid  = domain.local_seqid;
-               // write file!??!
-               this.writeFile();
-       }
-       
-       
-       
-       
-       
-       
-       
-       
-       /* --------- serialization -----------*/
-       
-       public bool deserialize_property (string property_name, out Value value, ParamSpec pspec, Json.Node property_node) {
-                       
-               // things we do not deserialize?
-               if (property_name == "domain" ||  property_name == "words" || property_name == "webview") {
-                       return false;
-               }
-               return default_deserialize_property (property_name, out value, pspec, property_node);
-       }
-       
-       public Json.Node serialize_property (string property_name, GLib.Value value, GLib.ParamSpec pspec) {
-
-               if (property_name == "domain" ||  property_name == "words" || property_name == "webview") {
-                       var ret= new Json.Node(Nodetype.STRING);
-                       ret.set_string(this.domain.domain);
-                       return ret;
-               }
-
-               
-               
-               return default_serialize_property(property_name, value, pspec);
-
-       }
-       
-       
-       
-       public toJSON()
-       {
-               return Json.gobject_serialize(this);
-       }
-       
-}
\ No newline at end of file
diff --git a/Release.vala b/Release.vala
deleted file mode 100644 (file)
index d898151..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-
-
-public class Release : Object
-{
-       public int id;
-       public string language;
-       public GLib.DateTime start_dt;
-       public GLib.DateTime end_dt;
-       public string[] words;
-       
-
-       
-       public Release.new_from_json(JSON.Object obj)
-       {
-               
-       }
-       
-       
-       
-       
-}
diff --git a/Request.vala b/Request.vala
deleted file mode 100644 (file)
index b3d776d..0000000
+++ /dev/null
@@ -1,85 +0,0 @@
-
-/**
-
-idea is a async http request.
-
--- question is are we creating a generic request.. or just doing a silly wrapper of Soup?
-
-
-x=  new Request(url)
-x.url = 
-x.connect.complete((headers, body, body_len) => {
-  .. what to do next
-});
-
-x.run(); // returns instantly...
-
-
-Usage:
-a - json fetches from main server.. (async not needed)
-b - head request on page to see if it's changed -- async usefull - we might want to be doing a few at the same time..
-c - body requests on a page... async usefull - we might want to be doing a few at the same time..
-
-
-
-valac  --thread  -g  Request.vala --pkg glib-2.0 --pkg gee-1.0 --pkg libsoup-2.4 --pkg gio-2.0 -o /tmp/req --target-glib=2.32  -X -lm -X -pg
-
-
-*/
-
-void main () {
-       var loop = new MainLoop();
-       var x = new Request("HEAD", "http://jobsonboats.com/");
-       x.complete.connect((uri, headers, body, body_len) => {
-               print("ct:  %s", headers.get("Content-type"));
-               print("got body %d\n", (int)body_len);
-               loop.quit();
-       });
-       x.send();
-       loop.run();
-}
-
-public class Request : Object
-{
-
-       public string url;
-       public string method = "GET";
-       public signal void complete( Soup.URI uri, Gee.HashMap<string,string> head, uint8[]? body, int64 length);
-
-       public Request(string method, string url)
-       {
-               this.method = method;
-               this.url = url;
-       }
-
-       public void send()
-       {
-
-               Soup.Session session = new Soup.Session ();
-               
-               session.use_thread_context = true;
-               
-               var msg = new Soup.Message(this.method, this.url);
-               
-               var headers= new Gee.HashMap<string,string>();
-               
-               session.queue_message (msg, (obj, mess) => {
-                       print ("Status Code: %u\n", mess.status_code);
-                       print ("Final URL: %s\n", mess.uri.to_string (false));
-               
-                        mess.response_headers.foreach ((name, val) => {
-                               print("HEADER %s: %s\n", name,val);
-                               headers.set(name, val);
-                       });
-                       if (this.method == "HEAD") {
-                               this.complete(mess.uri, headers, null, 0); 
-                       }
-                       this.complete(mess.uri, headers, mess.response_body.data, mess.response_body.length);
-                                
-               });
-               
-       }
-
-}
\ No newline at end of file
diff --git a/Spider.vala b/Spider.vala
deleted file mode 100644 (file)
index 0129b68..0000000
+++ /dev/null
@@ -1,194 +0,0 @@
-static Spider  _Spider;
-
-public class Spider : Object
-{
-    public Gtk.Window el;
-    private Spider  _this;
-
-    public static Spider singleton()
-    {
-        if (_Spider == null) {
-            _Spider= new Spider();
-        }
-        return _Spider;
-    }
-    public Xcls_scrolled_window scrolled_window;
-    public Xcls_view view;
-
-        // my vars (def)
-
-    // ctor
-    public Spider()
-    {
-        _this = this;
-        this.el = new Gtk.Window( Gtk.WindowType.TOPLEVEL );
-
-        // my vars (dec)
-
-        // set gobject values
-        var child_0 = new Xcls_scrolled_window( _this );
-        child_0.ref();
-        this.el.add (  child_0.el  );
-
-        //listeners
-        this.el.destroy.connect( () => {
-            Gtk.main_quit();
-        });
-        this.el.show.connect( () => {
-               
-               print("resizing window\n");
-               this.el.resize(1200,500);
-               print("loading url\n");
-               this.view.el.load_uri(webkitpdf.opt_url);
-               
-               // if we have not finished in 25 seconds
-               // call printit...
-                 GLib.Timeout.add_seconds(25, () => { 
-                               _this.view.printit();
-                               return true;
-               });
-               
-        
-        });
-    }
-
-    // user defined functions
-    public class Xcls_scrolled_window : Object
-    {
-        public Gtk.ScrolledWindow el;
-        private Spider  _this;
-
-
-            // my vars (def)
-
-        // ctor
-        public Xcls_scrolled_window(Spider _owner )
-        {
-            _this = _owner;
-            _this.scrolled_window = this;
-            this.el = new Gtk.ScrolledWindow( null, null );
-
-            // my vars (dec)
-
-            // set gobject values
-            var child_0 = new Xcls_view( _this );
-            child_0.ref();
-            this.el.add (  child_0.el  );
-
-            // init method
-
-            {
-               this.el.set_policy(Gtk.PolicyType.AUTOMATIC, Gtk.PolicyType.AUTOMATIC);
-            }
-        }
-
-        // user defined functions
-    }
-    public class Xcls_view : Object
-    {
-        public WebKit.WebView el;
-        private Spider  _this;
-
-
-            // my vars (def)
-        public bool print_started;
-
-        // ctor
-        public Xcls_view(Spider _owner )
-        {
-            _this = _owner;
-            _this.view = this;
-            this.el = new WebKit.WebView();
-
-            // my vars (dec)
-            this.print_started = false;
-
-            // set gobject values
-
-            //listeners
-            this.el.resource_request_starting.connect( (p0, webres, netreq ) => {
-              print("resource req. started %s\n", netreq.uri);
-              
-              // block some stuff...
-              
-              
-              
-              
-              
-              
-            });
-            this.el.resource_response_received.connect( (p0, webres, netres ) => {
-              print("resource res. recieved [%s] from %s\n", 
-                 netreq.message.request_headers.get_content_type(null),
-                 netreq.uri);
-              
-              // block some stuff...
-              
-              var msg = netreq.message.request_headers.get_content_type(null);
-              
-              
-              
-              
-            });
-            this.el.load_finished.connect( ( ) => {
-                        
-                   this.printit();
-             
-            });
-        }
-
-        // user defined functions
-        public void printit () {
-           print("load_finsihed\n");
-           
-           if (this.print_started) {
-              print("load_finsihed - already printing..\n");
-              return;
-           }
-            
-            
-        }
-        public void printpng () {
-            //var filename = "/tmp/test.pdf";
-            //print("load_changed %d ?= %d\n", le, LoadEvent.FINISHED);
-        
-            //if (le != LoadEvent.FINISHED) {
-            //    return;
-           // }
-           
-        // what size is the documet.
-        
-             print("making screenshot\n");
-             
-            // fix vapi - get_snapshot add '?' to all null.
-            var pixmap = _this.view.el.get_snapshot( null );
-        
-            
-            
-            int w,h;
-            
-            // add out to gdkpixmap.get_size
-               pixmap.get_size( out  w , out   h);
-               var pixbuf  = Gdk.pixbuf_get_from_drawable(null, pixmap, null , 0, 0, 0, 0, w, h);
-               print("pixbuf size: %d x %d\n ", w,h);
-                
-            
-            
-             pixbuf.save(webkitpdf.opt_target_png, "png");
-             print("Saved to %s\n", webkitpdf.opt_target_png);
-            //view.get_snapshot.begin(WebKit.SnapshotRegion.FULL_DOCUMENT, WebKit.SnapshotOptions.NONE, null, (obj, res) => {
-            //    var sf = view.get_snapshot.end(res);
-        
-            //    sf.write_to_png(Browser.opt_target_png);
-            //});
-            Gtk.main_quit();
-             
-           
-            
-            
-        
-        }
-    }
-
-
-}
diff --git a/WorkerBee.vala b/WorkerBee.vala
deleted file mode 100644 (file)
index e7019e9..0000000
+++ /dev/null
@@ -1,150 +0,0 @@
-
-
-public class WorkerBee : Object {
-
-       // 
-       public Gee.HashMap<int,Release> releases;
-       
-       public Domain domain; // current domain being worked on..
-       
-       public WorkerBee()
-       {
-               this.releases = new Gee.HashMap<int,Release>();
-       
-       
-       
-       public void start()
-       {
-               this.loadLocalReleases();
-               this.fetchReleases();
-                
-               
-               // should we fork or thread run a few windows?
-               // anyway single thread at present
-               
-               //while(true) {
-               
-               this.fetchDomain();
-               
-               // start indexing?      
-               
-               
-               
-               //}
-       
-       }
-       
-       void loadLocalReleases()
-       {
-               // directory scan 'releases' directory.
-               // delete files where end_dt is too old.
-       }
-       
-       
-       void fetchReleases()
-       {
-               var json = this.fetchJson("/PressRelease/Workers/Releases");
-               this.fetchReleasesFromList( json.get_array_member("data"));
-       }
-
-       Gee.HashMap<int,Release> fetchReleasesFromList(Json.Array ar)
-       {
-               string[] fetch_ids = {};
-
-               
-               for (var i =0;i < ar.get_length(); i++) {
-                       
-                       var id            = ar.get_object_element(i).get_string_member("id");
-                       var update_dt = ar.get_object_element(i).get_string_member("updated_dt");
-                       
-                       if (!this.releases.has_key(int)id)) {
-                               fetch_ids += id.to_string();
-                               continue;
-                       }
-                       if (this.releases.get((int)id).update_dt == updated_dt) {
-                               continue;
-                       }
-                       fetch_ids += id.to_string();
-                       
-               }
-               if fetch_ids.length < 1) {
-                       return;
-               }
-               // got to fetch some...
-               this.fetchReleasesMissing(fetch_ids);
-               
-               var ret = Gee.HashMap<int,Release>();
-               
-               for (var i =0;i < ar.get_length(); i++) {
-                       var id            = ar.get_object_element(i).get_string_member("id");
-                       ret.add((int)id, this.releases.get(id));
-               }
-               return ret;
-               
-       
-       }
-       
-       
-       
-       
-       
-
-       void fetchReleasesMissing(string[] fetch_ids) 
-       {
-               var json = this.fetchJson("/PressRelease/Workers/Releases/" + string.joinv(",", fetch_ids);
-               var ar = json.get_array_member("data");
-               
-               for (var i =0;i < ar.get_length(); i++) {
-                       var rel = new Release.new_from_json(ar.get_object_element(i));
-                       this.releases.set(rel.id, rel);
-               }
-       
-       }
-       
-       void fetchDomain() // set's up the domain, and calls fetch pages..
-       {
-               var json = this.fetchJson("/PressRelease/Workers/Domain");
-               // fetches a single domain to be scanned from the server..
-               var obj = json.get_object_member("data");
-               
-               this.domain = new Domain(obj.get_string_member('domain'));
-               this.domain.loadFromJson(obj);
-               this.domain.release = this.fetchReleasesFromList(obj.get_array_member('releases'));
-               this.domain.loadPages();
-
-               this.fetchDomainPages();
-                       // we need to fetch page information from the server..
-                       
-               //this.domain.prunePages(); --- check depth etc../ really old pages. that have not been seen for a while.
-               
-               this.domain.writeToFile(); // update our local copy...
-                
-       }
-       
-       void fetchDomainPages() // modifies domain + domain.pages
-       {
-               if (this.domain.remote_seq_id == this.domain.local_seqid) {
-                       // no need to fetch, local page data is the same...
-                       
-                       return;
-               }
-
-               while(this.domain.remote_seq_id > this.domain.local_seqid) {
-                       var json = this.fetchJson("/PressRelease/Workers/Pages/%d/%d".printf( this.domain.id, this.domain.local_seqid));
-                       var ar = json.get_array_member("data");
-               
-                       for (var i =0;i < ar.get_length(); i++) {
-                       
-                               var page = new Page.new_from_json(ar.get_object_element(i), this.domain);
-                               if (this.domain.pages.hasKey(page.path)) {
-                                       page.writeToFile(); // update local version..
-                               }
-                               this.domain.pages.set(page.path, page);
-                               this.domain.local_seqid = page.seqid;
-                       }
-                        
-               }
-                
-       }
-
-}
\ No newline at end of file