public class Page : Object, Json.Serializable { public Domain domain; public string path; public int seqid; // seqid .. incremented? public int id; // from the external database.. public string etag; // the etag header variable to allow caching. public string md5; // raw contents md5.. public GLib.DateTime last_dt; // last time it was scanned. public GLib.DateTime last_change_dt; // last time it changed. (if the difference of last_dt/change is big, then don't scan it much..? public int depth; public string words; //? extracted words? public bool scanned; // has it been scanned.? public bool scanned_needed; // does it need to be scanned this time... // what about 'how often it changed??' how can we measure that public Page.new_from_json(Domain domain, JSON.Object) { this.domain = domain; } public Page.new_from_link(Domain domain, string path, int depth) { this.domain = domain; this.path = path; this.depth = depth; this.scanned = false; // seqid? // other stuff.. } /* ------------------ parsing behaviour ---------*/ public void fetchPage() { var header = this.fetchHeader(); // see if it's a new page.. if (header.etag == this.etag || header.modified.compare(this.last_dt) < 0) { // check this... this.markAsScanned(); return; } var body = this.fetchBody(); // uchar[] ? var checksum = Checksum.compute_for_data (body); if (this.md5 == checksum) { this.markAsScanned(); return; } // -- page has changed.... this.last_change_dt = new DateTime.now_local (); this.webview.load(this, body); // should trigger viewOnLoad when it's complete.. } public void viewOnLoad() // called when page is loaded.. { // use up the view. var new_words = this.webview.extractWords(); var links = this.webview.parseLinks(); // destory the view? // this may trigger a 'success story'.... this.domain.compareReleaseWords(this, new_words); // we have reached the limit.. if (this.depth == this.domain.depth) { return; } var pgs = this.domain.pages; for (var i =0 ; i < links.size; i++) { if (pgs.has_key(links.get(i)) { if ( pgs.get(links.get(i)).scanned) { continue; } pgs.get(links.get(i)).scanned_needed = true; // update old page scenario..? continue; } // new page... var pg = new Page.new_from_link(this.domain, links.get(i), this.depth+1); pgs.set(pg.path, pg); } // signal next scan to occur? } public void markAsScanned() { this.scanned = true; this.last_dt = new DateTime.now_local (); this.domain.local_seqid++; this.seqid = domain.local_seqid; // write file!??! this.writeFile(); } /* --------- serialization -----------*/ public bool deserialize_property (string property_name, out Value value, ParamSpec pspec, Json.Node property_node) { // things we do not deserialize? if (property_name == "domain" || property_name == "words" || property_name == "webview") { return false; } return default_deserialize_property (property_name, out value, pspec, property_node); } public Json.Node serialize_property (string property_name, GLib.Value value, GLib.ParamSpec pspec) { if (property_name == "domain" || property_name == "words" || property_name == "webview") { var ret= new Json.Node(Nodetype.STRING); ret.set_string(this.domain.domain); return ret; } return default_serialize_property(property_name, value, pspec); } public toJSON() { return Json.gobject_serialize(this); } }