2 GLib = imports.gi.GLib;
3 WebKit = imports.gi.WebKit;
5 TabbedBrowser = imports.TabbedBrowser;
6 File = imports.File.File;
8 base64 = imports.base64.base64;
10 BrowserView = new GType({
11 parent: WebKit.WebView.type,
21 var browsePage = false;
25 var storedir = '/home/alan/wkqueue';
26 if (!File.exists(storedir)) {
29 var parsedir = storedir + '/parse_queue';
30 var downloaddir = storedir + '/download_queue';
31 var donedir = storedir + '/downloaded_queue';
32 if (!File.exists(parsedir)) {
35 if (!File.exists(downloaddir)) {
36 File.mkdir(downloaddir);
38 if (!File.exists(donedir)) {
42 var update_title = function (web_view, web_frame, title)
45 title = title.slice(0,25) + "...";
47 tab.get_tab_label().label = title;
50 var update_url = function (web_view, web_frame)
52 var toolbar = tab.get_toolbar();
54 toolbar.set_url(web_frame.get_uri());
55 toolbar.set_can_go_back(web_view.can_go_back());
56 toolbar.set_can_go_forward(web_view.can_go_forward());
59 var update_progress = function (bar, progress)
61 tab.get_toolbar().set_progress(progress / 100);
64 var create_new_tab = function (web_view, web_frame, new_web_view)
66 new_web_view = new BrowserView();
67 new_web_view.signal.web_view_ready.connect(show_new_tab);
71 var show_new_tab = function (new_web_view)
73 TabbedBrowser.browser.new_tab("", new_web_view);
78 var hover_link = function (web_view, link, url)
80 tab.get_statusbar().set_status(url);
86 this.add_inject = function(force)
89 if (force || (typeof(injected[this.uri]) == 'undefined' )) {
90 injected[this.uri] = 0;
92 if (injected[this.uri] > 2) {
96 var fn = __script_path__ + "/inject.js";
97 if (File.exists(fn)) {
98 print("Adding inject");
99 var newjs = File.read(__script_path__ + "/inject.js");
100 TabbedBrowser.browser.current_tab().get_web_view().execute_script(
107 this.add_nsinject = function(force)
110 //if (force || (typeof(injected[this.uri]) == 'undefined' )) {
111 // injected[this.uri] = 0;
113 //if (injected[this.uri] > 2) {
116 //injected[this.uri]++;
117 var fn = __script_path__ + "/nsinject.js";
118 //if (File.exists(fn)) {
119 print("Adding inject");
120 var newjs = File.read(__script_path__ + "/nsinject.js");
121 TabbedBrowser.browser.current_tab().get_web_view().execute_script(
133 var load_finished = function ()
135 print("load finished");
137 GLib.timeout_add(GLib.PRIORITY_LOW, 1000, function() {
139 // _t.open("http://sg.lifestyleasia.com/videos/video-british-polo-day-singapore-2012/");
140 var mf = _t.get_main_frame();
142 var ar = Gtk.PaperSize.get_paper_sizes();
143 var psetup = new Gtk.PageSetup();
144 for(var i = 0; i < ar.length; i++) {
145 // print(ar[i].get_name());
146 if (ar[i].get_name() =='iso_a2') {
147 psetup.set_paper_size(ar[i]);
153 var p = new Gtk.PrintOperation({ export_filename : '/home/chris/test_pdf/test2.pdf' });
154 var s = new Gtk.PrintSettings();
157 //s.set_paper_size('iso_a4');
158 // psetup.set_scale(0.5);
159 p.set_default_page_setup(psetup);
161 for(var i = 0; i < ar.length; i++) {
162 // print(ar[i].get_name());
163 if (ar[i].get_name() =='iso_a4') {
164 // psetup.set_paper_size(ar[i]);
165 s.set_paper_size(ar[i]);
171 p.set_print_settings(s);
172 //var s = p.get_print_settings();
174 //p.set_print_settings(s);
175 mf.print_full(p, Gtk.PrintOperationAction.EXPORT)
180 tab.get_toolbar().set_progress(0);
184 _t.add_nsinject(true);
185 print(typeof(Seed.argv[2]));
186 if (Seed.argv[2] == null) {
191 _t.nsdownloadNext(Seed.argv[2] * 1)
195 print("onload: calling gather links");
196 TabbedBrowser.browser.current_tab().get_web_view().execute_script(
197 'BrowserMirror.gatherlinks();'
206 var load_committed = function (web_view, web_frame)
208 update_url(web_view, web_frame);
211 var clicked_link = function (web_view, web_frame, request,
212 action, decision, window)
214 if(action.get_reason() == WebKit.WebNavigationReason.LINK_CLICKED &&
215 action.get_button() == 2)
217 browser.new_tab(request.get_uri(), null);
227 this.browse = function (url)
229 if(url.search("://") < 0)
230 url = "http://" + url;
235 this.set_tab = function (new_tab)
240 this.get_tab = function ()
246 this.downloadqueue = function()
248 var filesList = File.list(downloaddir);
249 print("DOWNLOAD QUEUE LENGTH: " + filesList.length);
250 if (!maxQueue || maxQueue < filesList.length) {
251 maxQueue = filesList.length;
254 tab.get_toolbar().set_progress(filesList.length / Math.max(maxQueue, filesList.length ));
255 if (filesList == null || filesList.length == 0 ){
259 var url = decodeURIComponent(filesList[0]);
260 if (!this.checkdomain(url)) {
261 print("SKIP (external domain) : " + url);
262 File.remove(downloaddir + '/' + filesList[0]);
263 return this.downloadqueue();
266 this.downloadhead( url);
272 this.queuerun = function()
274 if (this.downloadqueue()) return true;
275 return this.parsequeue();
278 this.parsequeue = function(){
279 var filesList = File.list(parsedir);
280 if (filesList == null || filesList.length == 0 ){
284 browsePage = decodeURIComponent(filesList[0]);
285 print("parsing page:" + browsePage );
286 this.browse(browsePage);
296 this.downloadpage = function(link){
297 print("calling download page: " + link);
298 _t.add_inject(); // just in case..
299 //var url = File.read(__script_path__+"/downloadqueue/"+link);
301 "BrowserMirror.downloadpage(" + JSON.stringify(link) +");"
307 this.downloadhead = function(link){
308 print("calling download head: " + link);
309 _t.add_inject(); // just in case..
310 //var url = File.read(__script_path__+"/downloadqueue/"+link);
312 "BrowserMirror.downloadhead(" + JSON.stringify(link) +");"
317 this.nsdownloadNext = function()
319 /*if (nsqueue === false) {
321 //282,273,285,395,271,272,278,394,402,279,432,280,284,281,433,437,404,444,151,152,1,283,287,288,418,407,398,449,147,150,131,149,443,148,411,405,415,417,416,406,408,399,410,409,412,400,155,133,124,157,118,183,450,434,301,435,130,286,162,292,431,161,290,146,145,158,159,120,137,128,139,163,293,160,289,291,141,140,138,142,144,110,401,156,154,153,170,167,295,166,294,277,425,164,165,168,296,169,297,113,171,119,180,300,176,175,172,302,299,
322 //298,421,187,307,428,275,269,304,181,182,174,173,184,303,112,179,114,305,177,178,186,306,403,420,111,109,309,188,308,446,445,310,311,53,189,52,312,419,270,190,191,192,193,54,194,108,268,121,195,206,396,205,207,324,220,340,212,331,219,339,222,342,436,209,327,424,208,326,210,328,330,325,211,329,218,337,338,224,223,217,336,214,333,213,332,216,335,221,341,215,334,197,314,
323 //196,313,198,316,201,320,317,204,323,203,322,202,321,200,319,199,318,346,58,267,393,265,391,438,264,390,261,387,262,388,260,386,266,392,413,414,263,389,347,225,343,227,345,226,344,422,228,348,237,358,231,352,229,349,351,233,354,238,359,360,239,361,230,350,232,353,234,355,439,236,357,240,362,235,356,254,380,257,383,259,385,255,381,256,448,382,258,384,374,252,375,376,251,373,378,429,430,379,253,244,366,247,
324 //369,363,242,364,241,243,365,246,368,245,367,377,427,
325 426,250,447,372,249,371,248,370,442,397,129,441,440,116,122,123,125,126,136,115,117,127
329 if (!nsqueue.length) {
333 var pg = nsqueue.shift() ;
338 //print(typeof(Seed.argv[2]));
344 var pg = Seed.argv[2] * 1;
345 if ( File.exists( storedir+'/output/' + pg + '.csv')) {
346 print("DONE : " + storedir+'/output/' + pg + '.csv');
351 var pd = JSON.parse(File.read('/home/alan/.nspasswd'));
353 print("downloadnext : " + pg);
355 'NS.login(' + JSON.stringify(pd.username) + ',' + JSON.stringify(pd.password) + ',' + pg + ' );'
364 //this.set_scroll_adjustments(null, null);
366 this.signal.title_changed.connect(update_title);
367 this.signal.load_committed.connect(load_committed);
368 this.signal.load_finished.connect(load_finished);
369 this.signal.load_progress_changed.connect(update_progress);
371 // For some reason, this segfaults seed in the instance init closure handler
372 // Once that's fixed, uncommenting the next line will give middle-click-open-in-new tab
373 //this.signal.navigation_policy_decision_requested.connect(clicked_link);
375 this.signal.hovering_over_link.connect(hover_link);
377 this.signal.create_web_view.connect(create_new_tab);
381 print("ADDing console message sig handler");
386 this.signal.console_message.connect(function(wv, msg, line, sid) {
387 // print('BrowserView.js got ' + msg);
391 ret = JSON.parse(msg);
394 print("GOT INVALID message:" + msg)
399 print("got method : " + ret.method);
401 if (ret.method == 'exit') {
406 if (ret.method == 'nsdownloadpage'){
408 var mt = ret.contentType.split(';').shift();
410 print("INVALID CONTENT TYPE? \n" + JSON.stringify(ret));
417 print("CONSOLE GOT BLANK - call download next?");
419 //_t.nsdownloadNext();
420 // _t.moveToDone(ret.requesturl);
425 var info_f = _t.dupeCheck(ret.requesturl);
427 info = JSON.parse(File.read(info_f));
429 var target = storedir+'/output/' + ret.filename
436 //File.write(target ,decodeURIComponent(escape(base64.decode( ret.data))));
437 print("GOT array sized: " + ret.data.length);
439 File.writeBinaryArray(target,ret.data);
440 print("Wrote to file: " + target);
444 //_t.nsdownloadNext();
453 if (ret.method == 'gatherlinks'){
454 // flag the page as parsed.
457 _t.moveToDone(browsePage);
460 var sourcePage = browsePage;
462 print(typeof(ret.data));
463 if (typeof(ret.data) != 'object' ) {
464 print("GOT INVALID DATA?:" + JSON.stringify(ret,null,4));
468 ret.data.forEach(function(ln) {
469 if (!ln.href.match(/^http[s]*:\/\//)) {
470 print("SKIP link: " + ln.href);
473 ln.href= ln.href.replace(/#.*$/,'');
475 if (!_t.checkdomain(ln.href) ) {
476 print("SKIP link (external domain): " + ln.href);
480 // this is just for our purposes..
482 if (ln.href.match(/\/pages\/[0-9]+$/)) {
483 print("SKIP link (ingore unnamed pages): " + ln.href);
489 var fn = encodeURIComponent(ln.href);
491 var dupe = _t.dupeCheck(ln.href);
494 if (dupe == downloaddir + '/' + fn) {
495 var info = JSON.parse(File.read(dupe));
496 if (info && info.fromUrl && info.fromUrl.length > sourcePage.length) {
497 print("SKIP link (in queue already): " + ln.href);
500 print("found a longer link for url")
502 print("SKIP link (in another queue): " + ln.href);
507 File.write(downloaddir + '/' + fn, JSON.stringify( {
510 })); // write an empyt file indicating it needs downloading..
512 var filesList = File.list(downloaddir);
514 maxQueue = filesList.length;
517 if (ret.method == 'downloadpage'){
518 // got the results from download page:
520 // remove from downloadqueue.
525 var mt = ret.contentType.split(';').shift();
527 print("INVALID CONTENT TYPE? \n" + JSON.stringify(ret));
534 _t.moveToDone(ret.requesturl);
539 var info_f = _t.dupeCheck(ret.requesturl);
541 info = JSON.parse(File.read(info_f));
544 _t.moveToDone(ret.requesturl);
548 var target = _t.toFilename(ret.requesturl);
549 if (info && info.fromURL) {
550 var bn = decodeURIComponent(File.basename(target));
551 target = _t.toFilename(info.fromURL+'/'+ bn);
557 //File.write(target ,decodeURIComponent(escape(base64.decode( ret.data))));
558 print("GOT array sized: " + ret.data.length);
560 File.writeBinaryArray(target,ret.data);
561 print("Wrote to file: " + target);
565 // if it's HTML then add it to parse queue
566 // otehrwise save it.. and run the queue again.
569 //File.write(storedir+"/parsequeue/"+Math.random(), msg);
573 if (ret.method == 'downloadhead'){
574 // got the results from download page:
576 // remove from downloadqueue.
580 var mt = ret.contentType.split(';').shift();
582 print("INVALID CONTENT TYPE? \n" + JSON.stringify(ret));
588 // add to parse QUEUE..
589 print("moving to parse queue");
590 _t.moveToParse(ret.requesturl);
592 // stuf we do not care about..
593 case 'application/atom+xml':
595 print("moving to done queue");
596 _t.moveToDone(ret.requesturl);
600 print("calling download file");
601 _t.downloadpage( ret.requesturl );
602 // keep it on the queue..
603 // do not run the queue..
608 // if it's HTML then add it to parse queue
609 // otehrwise save it.. and run the queue again.
612 //File.write(storedir+"/parsequeue/"+Math.random(), msg);
620 this.toFilename = function(url)
622 url = url.replace(/^http[s]*:\/\//, '');
623 var p = url.split('/');
624 p.unshift(storedir+'/output');
625 for (var i =1 ;i < p.length; i++) {
626 p[i] = encodeURIComponent(p[i]);
629 p[p.length-1] = decodeURIComponent(p[p.length-1]);
631 var dir = File.dirname(ret);
636 this.checkdomain = function(comp)
638 var b = parseUri(this.uri);
639 var d = parseUri(comp);
640 return (d.host == b.host && d.protocol == b.protocol);
645 this.dupeCheck = function(url)
648 // order - return highest up the queue first..
649 if (File.exists(downloaddir +'/' + encodeURIComponent(url))) {
650 return downloaddir +'/' + encodeURIComponent(url);
652 if (File.exists(parsedir +'/' + encodeURIComponent(url))) {
653 return parsedir +'/' + encodeURIComponent(url);
655 if (File.exists(donedir +'/' + encodeURIComponent(url))) {
656 return donedir +'/' + encodeURIComponent(url);
662 this.moveToParse = function(url)
664 var old = this.dupeCheck(url);
665 var target =parsedir +'/' + encodeURIComponent(url);
669 File.write(target, old ? File.read(old) : '');
676 this.moveToDownload= function(url)
678 var old = this.dupeCheck(url);
679 var target =downloaddir +'/' + encodeURIComponent(url);
683 File.write(target, old ? File.read(old) : '');
689 this.moveToDone= function(url)
691 var old = this.dupeCheck(url);
692 var target = donedir +'/' + encodeURIComponent(url);
696 File.write(target, old ? File.read(old) : '');
706 function parseUri (str) {
707 var o = parseUri.options,
708 m = o.parser[o.strictMode ? "strict" : "loose"].exec(str),
712 while (i--) uri[o.key[i]] = m[i] || "";
715 uri[o.key[12]].replace(o.q.parser, function ($0, $1, $2) {
716 if ($1) uri[o.q.name][$1] = $2;
724 key: ["source","protocol","authority","userInfo","user","password","host","port","relative","path","directory","file","query","anchor"],
727 parser: /(?:^|&)([^&=]*)=?([^&]*)/g
730 strict: /^(?:([^:\/?#]+):)?(?:\/\/((?:(([^:@]*)(?::([^:@]*))?)?@)?([^:\/?#]*)(?::(\d*))?))?((((?:[^?#\/]*\/)*)([^?#]*))(?:\?([^#]*))?(?:#(.*))?)/,
731 loose: /^(?:(?![^:@]+:[^:@\/]*@)([^:\/?#.]+):)?(?:\/\/)?((?:(([^:@]*)(?::([^:@]*))?)?@)?([^:\/?#]*)(?::(\d*))?)(((\/(?:[^?#](?![^?#\/]*\.[^?#\/.]+(?:[?#]|$)))*\/?)?([^?#\/]*))(?:\?([^#]*))?(?:#(.*))?)/