+++ /dev/null
-public class Page : Object, Json.Serializable
-{
-
- public Domain domain;
- public string path;
- public int seqid; // seqid .. incremented?
- public int id; // from the external database..
- public string etag; // the etag header variable to allow caching.
- public string md5; // raw contents md5..
-
- public GLib.DateTime last_dt; // last time it was scanned.
- public GLib.DateTime last_change_dt; // last time it changed. (if the difference of last_dt/change is big, then don't scan it much..?
- public int depth;
-
- public string words; //? extracted words?
-
- public bool scanned; // has it been scanned.?
- public bool scanned_needed; // does it need to be scanned this time...
-
- // what about 'how often it changed??' how can we measure that
-
- public Page.new_from_json(Domain domain, JSON.Object)
- {
- this.domain = domain;
-
-
-
- }
- public Page.new_from_link(Domain domain, string path, int depth)
- {
- this.domain = domain;
- this.path = path;
- this.depth = depth;
- this.scanned = false;
-
- // seqid? // other stuff..
-
- }
-
-
- /* ------------------ parsing behaviour ---------*/
-
-
- public void fetchPage()
- {
- var header = this.fetchHeader(); // see if it's a new page..
- if (header.etag == this.etag || header.modified.compare(this.last_dt) < 0) { // check this...
- this.markAsScanned();
- return;
- }
- var body = this.fetchBody(); // uchar[] ?
- var checksum = Checksum.compute_for_data (body);
- if (this.md5 == checksum) {
- this.markAsScanned();
- return;
- }
- // -- page has changed....
- this.last_change_dt = new DateTime.now_local ();
-
- this.webview.load(this, body); // should trigger viewOnLoad when it's complete..
-
-
- }
-
- public void viewOnLoad() // called when page is loaded..
- {
- // use up the view.
- var new_words = this.webview.extractWords();
- var links = this.webview.parseLinks();
-
- // destory the view?
-
-
- // this may trigger a 'success story'....
- this.domain.compareReleaseWords(this, new_words);
-
- // we have reached the limit..
- if (this.depth == this.domain.depth) {
- return;
- }
-
-
- var pgs = this.domain.pages;
- for (var i =0 ; i < links.size; i++) {
-
- if (pgs.has_key(links.get(i)) {
- if ( pgs.get(links.get(i)).scanned) {
- continue;
- }
- pgs.get(links.get(i)).scanned_needed = true; // update old page scenario..?
- continue;
- }
- // new page...
- var pg = new Page.new_from_link(this.domain, links.get(i), this.depth+1);
- pgs.set(pg.path, pg);
-
- }
- // signal next scan to occur?
-
-
- }
-
-
-
- public void markAsScanned()
- {
- this.scanned = true;
- this.last_dt = new DateTime.now_local ();
- this.domain.local_seqid++;
- this.seqid = domain.local_seqid;
- // write file!??!
- this.writeFile();
- }
-
-
-
-
-
-
-
-
- /* --------- serialization -----------*/
-
- public bool deserialize_property (string property_name, out Value value, ParamSpec pspec, Json.Node property_node) {
-
- // things we do not deserialize?
- if (property_name == "domain" || property_name == "words" || property_name == "webview") {
- return false;
- }
- return default_deserialize_property (property_name, out value, pspec, property_node);
- }
-
- public Json.Node serialize_property (string property_name, GLib.Value value, GLib.ParamSpec pspec) {
-
- if (property_name == "domain" || property_name == "words" || property_name == "webview") {
- var ret= new Json.Node(Nodetype.STRING);
- ret.set_string(this.domain.domain);
- return ret;
- }
-
-
-
- return default_serialize_property(property_name, value, pspec);
-
- }
-
-
-
- public toJSON()
- {
- return Json.gobject_serialize(this);
- }
-
-}
\ No newline at end of file
+++ /dev/null
-
-
-public class Release : Object
-{
- public int id;
- public string language;
- public GLib.DateTime start_dt;
- public GLib.DateTime end_dt;
- public string[] words;
-
-
-
- public Release.new_from_json(JSON.Object obj)
- {
-
- }
-
-
-
-
-}
+++ /dev/null
-
-/**
-
-idea is a async http request.
-
--- question is are we creating a generic request.. or just doing a silly wrapper of Soup?
-
-
-x= new Request(url)
-x.url =
-x.connect.complete((headers, body, body_len) => {
- .. what to do next
-});
-
-x.run(); // returns instantly...
-
-
-Usage:
-a - json fetches from main server.. (async not needed)
-b - head request on page to see if it's changed -- async usefull - we might want to be doing a few at the same time..
-c - body requests on a page... async usefull - we might want to be doing a few at the same time..
-
-
-
-valac --thread -g Request.vala --pkg glib-2.0 --pkg gee-1.0 --pkg libsoup-2.4 --pkg gio-2.0 -o /tmp/req --target-glib=2.32 -X -lm -X -pg
-
-
-*/
-
-void main () {
- var loop = new MainLoop();
- var x = new Request("HEAD", "http://jobsonboats.com/");
- x.complete.connect((uri, headers, body, body_len) => {
- print("ct: %s", headers.get("Content-type"));
- print("got body %d\n", (int)body_len);
- loop.quit();
- });
- x.send();
- loop.run();
-}
-
-public class Request : Object
-{
-
- public string url;
- public string method = "GET";
- public signal void complete( Soup.URI uri, Gee.HashMap<string,string> head, uint8[]? body, int64 length);
-
- public Request(string method, string url)
- {
- this.method = method;
- this.url = url;
- }
-
-
- public void send()
- {
-
- Soup.Session session = new Soup.Session ();
-
- session.use_thread_context = true;
-
- var msg = new Soup.Message(this.method, this.url);
-
- var headers= new Gee.HashMap<string,string>();
-
- session.queue_message (msg, (obj, mess) => {
- print ("Status Code: %u\n", mess.status_code);
- print ("Final URL: %s\n", mess.uri.to_string (false));
-
- mess.response_headers.foreach ((name, val) => {
- print("HEADER %s: %s\n", name,val);
- headers.set(name, val);
- });
- if (this.method == "HEAD") {
- this.complete(mess.uri, headers, null, 0);
- }
- this.complete(mess.uri, headers, mess.response_body.data, mess.response_body.length);
-
- });
-
- }
-
-
-}
\ No newline at end of file
+++ /dev/null
-static Spider _Spider;
-
-public class Spider : Object
-{
- public Gtk.Window el;
- private Spider _this;
-
- public static Spider singleton()
- {
- if (_Spider == null) {
- _Spider= new Spider();
- }
- return _Spider;
- }
- public Xcls_scrolled_window scrolled_window;
- public Xcls_view view;
-
- // my vars (def)
-
- // ctor
- public Spider()
- {
- _this = this;
- this.el = new Gtk.Window( Gtk.WindowType.TOPLEVEL );
-
- // my vars (dec)
-
- // set gobject values
- var child_0 = new Xcls_scrolled_window( _this );
- child_0.ref();
- this.el.add ( child_0.el );
-
- //listeners
- this.el.destroy.connect( () => {
- Gtk.main_quit();
- });
- this.el.show.connect( () => {
-
- print("resizing window\n");
- this.el.resize(1200,500);
- print("loading url\n");
- this.view.el.load_uri(webkitpdf.opt_url);
-
- // if we have not finished in 25 seconds
- // call printit...
- GLib.Timeout.add_seconds(25, () => {
- _this.view.printit();
- return true;
- });
-
-
- });
- }
-
- // user defined functions
- public class Xcls_scrolled_window : Object
- {
- public Gtk.ScrolledWindow el;
- private Spider _this;
-
-
- // my vars (def)
-
- // ctor
- public Xcls_scrolled_window(Spider _owner )
- {
- _this = _owner;
- _this.scrolled_window = this;
- this.el = new Gtk.ScrolledWindow( null, null );
-
- // my vars (dec)
-
- // set gobject values
- var child_0 = new Xcls_view( _this );
- child_0.ref();
- this.el.add ( child_0.el );
-
- // init method
-
- {
- this.el.set_policy(Gtk.PolicyType.AUTOMATIC, Gtk.PolicyType.AUTOMATIC);
- }
- }
-
- // user defined functions
- }
- public class Xcls_view : Object
- {
- public WebKit.WebView el;
- private Spider _this;
-
-
- // my vars (def)
- public bool print_started;
-
- // ctor
- public Xcls_view(Spider _owner )
- {
- _this = _owner;
- _this.view = this;
- this.el = new WebKit.WebView();
-
- // my vars (dec)
- this.print_started = false;
-
- // set gobject values
-
- //listeners
- this.el.resource_request_starting.connect( (p0, webres, netreq ) => {
- print("resource req. started %s\n", netreq.uri);
-
- // block some stuff...
-
-
-
-
-
-
- });
- this.el.resource_response_received.connect( (p0, webres, netres ) => {
- print("resource res. recieved [%s] from %s\n",
- netreq.message.request_headers.get_content_type(null),
- netreq.uri);
-
- // block some stuff...
-
- var msg = netreq.message.request_headers.get_content_type(null);
-
-
-
-
- });
- this.el.load_finished.connect( ( ) => {
-
- this.printit();
-
- });
- }
-
- // user defined functions
- public void printit () {
- print("load_finsihed\n");
-
- if (this.print_started) {
- print("load_finsihed - already printing..\n");
- return;
- }
-
-
- }
- public void printpng () {
- //var filename = "/tmp/test.pdf";
- //print("load_changed %d ?= %d\n", le, LoadEvent.FINISHED);
-
- //if (le != LoadEvent.FINISHED) {
- // return;
- // }
-
- // what size is the documet.
-
- print("making screenshot\n");
-
- // fix vapi - get_snapshot add '?' to all null.
- var pixmap = _this.view.el.get_snapshot( null );
-
-
-
- int w,h;
-
- // add out to gdkpixmap.get_size
- pixmap.get_size( out w , out h);
- var pixbuf = Gdk.pixbuf_get_from_drawable(null, pixmap, null , 0, 0, 0, 0, w, h);
- print("pixbuf size: %d x %d\n ", w,h);
-
-
-
- pixbuf.save(webkitpdf.opt_target_png, "png");
- print("Saved to %s\n", webkitpdf.opt_target_png);
- //view.get_snapshot.begin(WebKit.SnapshotRegion.FULL_DOCUMENT, WebKit.SnapshotOptions.NONE, null, (obj, res) => {
- // var sf = view.get_snapshot.end(res);
-
- // sf.write_to_png(Browser.opt_target_png);
- //});
- Gtk.main_quit();
-
-
-
-
-
- }
- }
-
-
-}
+++ /dev/null
-
-
-public class WorkerBee : Object {
-
- //
- public Gee.HashMap<int,Release> releases;
-
- public Domain domain; // current domain being worked on..
-
- public WorkerBee()
- {
- this.releases = new Gee.HashMap<int,Release>();
-
-
-
- public void start()
- {
- this.loadLocalReleases();
- this.fetchReleases();
-
-
- // should we fork or thread run a few windows?
- // anyway single thread at present
-
- //while(true) {
-
- this.fetchDomain();
-
- // start indexing?
-
-
-
- //}
-
- }
-
- void loadLocalReleases()
- {
- // directory scan 'releases' directory.
- // delete files where end_dt is too old.
- }
-
-
- void fetchReleases()
- {
- var json = this.fetchJson("/PressRelease/Workers/Releases");
- this.fetchReleasesFromList( json.get_array_member("data"));
- }
-
- Gee.HashMap<int,Release> fetchReleasesFromList(Json.Array ar)
- {
- string[] fetch_ids = {};
-
-
- for (var i =0;i < ar.get_length(); i++) {
-
- var id = ar.get_object_element(i).get_string_member("id");
- var update_dt = ar.get_object_element(i).get_string_member("updated_dt");
-
- if (!this.releases.has_key(int)id)) {
- fetch_ids += id.to_string();
- continue;
- }
- if (this.releases.get((int)id).update_dt == updated_dt) {
- continue;
- }
- fetch_ids += id.to_string();
-
- }
- if fetch_ids.length < 1) {
- return;
- }
- // got to fetch some...
- this.fetchReleasesMissing(fetch_ids);
-
- var ret = Gee.HashMap<int,Release>();
-
- for (var i =0;i < ar.get_length(); i++) {
- var id = ar.get_object_element(i).get_string_member("id");
- ret.add((int)id, this.releases.get(id));
- }
- return ret;
-
-
- }
-
-
-
-
-
-
- void fetchReleasesMissing(string[] fetch_ids)
- {
- var json = this.fetchJson("/PressRelease/Workers/Releases/" + string.joinv(",", fetch_ids);
- var ar = json.get_array_member("data");
-
- for (var i =0;i < ar.get_length(); i++) {
- var rel = new Release.new_from_json(ar.get_object_element(i));
- this.releases.set(rel.id, rel);
- }
-
- }
-
- void fetchDomain() // set's up the domain, and calls fetch pages..
- {
- var json = this.fetchJson("/PressRelease/Workers/Domain");
- // fetches a single domain to be scanned from the server..
- var obj = json.get_object_member("data");
-
- this.domain = new Domain(obj.get_string_member('domain'));
- this.domain.loadFromJson(obj);
- this.domain.release = this.fetchReleasesFromList(obj.get_array_member('releases'));
- this.domain.loadPages();
-
- this.fetchDomainPages();
- // we need to fetch page information from the server..
-
- //this.domain.prunePages(); --- check depth etc../ really old pages. that have not been seen for a while.
-
- this.domain.writeToFile(); // update our local copy...
-
- }
-
- void fetchDomainPages() // modifies domain + domain.pages
- {
- if (this.domain.remote_seq_id == this.domain.local_seqid) {
- // no need to fetch, local page data is the same...
-
- return;
- }
-
- while(this.domain.remote_seq_id > this.domain.local_seqid) {
- var json = this.fetchJson("/PressRelease/Workers/Pages/%d/%d".printf( this.domain.id, this.domain.local_seqid));
- var ar = json.get_array_member("data");
-
- for (var i =0;i < ar.get_length(); i++) {
-
- var page = new Page.new_from_json(ar.get_object_element(i), this.domain);
- if (this.domain.pages.hasKey(page.path)) {
- page.writeToFile(); // update local version..
- }
- this.domain.pages.set(page.path, page);
- this.domain.local_seqid = page.seqid;
- }
-
- }
-
- }
-
-}
\ No newline at end of file