BrowserView.js
[app.wkmirror] / BrowserView.js
1 Gtk = imports.gi.Gtk;
2 GLib = imports.gi.GLib;
3 WebKit = imports.gi.WebKit;
4
5 TabbedBrowser = imports.TabbedBrowser;
6 File = imports.File.File;
7
8 base64 = imports.base64.base64;
9
10 BrowserView = new GType({
11     parent: WebKit.WebView.type,
12     name: "BrowserView",
13     init: function ()
14     {
15         // Private
16         
17         
18         var _t = this;
19         
20         var tab;
21         var browsePage = false;
22         var maxQueue = 0;
23         var injected = {};
24         
25         var storedir = '/home/alan/wkqueue';
26         if (!File.exists(storedir)) {
27             File.mkdir(storedir);
28         }
29         var parsedir = storedir + '/parse_queue';
30         var downloaddir = storedir + '/download_queue';
31         var donedir = storedir + '/downloaded_queue';
32         if (!File.exists(parsedir)) {
33             File.mkdir(parsedir);
34         }
35         if (!File.exists(downloaddir)) {
36             File.mkdir(downloaddir);
37         }
38         if (!File.exists(donedir)) {
39             File.mkdir(donedir);
40         }
41         
42         var update_title = function (web_view, web_frame, title)
43         {
44             if(title.length > 25)
45                 title = title.slice(0,25) + "...";
46
47             tab.get_tab_label().label = title;
48         };
49
50         var update_url = function (web_view, web_frame)
51         {
52             var toolbar = tab.get_toolbar();
53
54             toolbar.set_url(web_frame.get_uri());
55             toolbar.set_can_go_back(web_view.can_go_back());
56             toolbar.set_can_go_forward(web_view.can_go_forward());
57         };
58
59         var update_progress = function (bar, progress)
60         {
61             tab.get_toolbar().set_progress(progress / 100);
62         };
63
64         var create_new_tab = function (web_view, web_frame, new_web_view)
65         {
66             new_web_view = new BrowserView();
67             new_web_view.signal.web_view_ready.connect(show_new_tab);
68             return new_web_view;
69         };
70
71         var show_new_tab = function (new_web_view)
72         {
73             TabbedBrowser.browser.new_tab("", new_web_view);
74
75             return false;
76         };
77
78         var hover_link = function (web_view, link, url)
79         {
80             tab.get_statusbar().set_status(url);
81         };
82
83         
84         
85         
86         this.add_inject = function(force)
87         {
88             
89             if (force || (typeof(injected[this.uri]) == 'undefined' )) {
90                 injected[this.uri] = 0;
91             }
92             if (injected[this.uri] > 2) {
93                 return;
94             }
95             injected[this.uri]++;
96             var fn = __script_path__ + "/inject.js";
97             if (File.exists(fn)) {
98                 print("Adding inject");
99                 var newjs = File.read(__script_path__ + "/inject.js");
100                 TabbedBrowser.browser.current_tab().get_web_view().execute_script(
101                     newjs
102                     
103                 );
104             }
105             
106         }
107          this.add_nsinject = function(force)
108         {
109             
110             //if (force || (typeof(injected[this.uri]) == 'undefined' )) {
111             //    injected[this.uri] = 0;
112             //}
113             //if (injected[this.uri] > 2) {
114             //    return;
115             //}
116             //injected[this.uri]++;
117             var fn = __script_path__ + "/nsinject.js";
118             //if (File.exists(fn)) {
119                 print("Adding inject");
120                 var newjs = File.read(__script_path__ + "/nsinject.js");
121                 TabbedBrowser.browser.current_tab().get_web_view().execute_script(
122                     newjs
123                     
124                 );
125             //}
126             
127         }
128         
129         
130         
131         
132         
133         var load_finished = function ()
134         {
135             print("load finished");
136             
137             GLib.timeout_add(GLib.PRIORITY_LOW, 1000, function() { 
138                  
139 //                _t.open("http://sg.lifestyleasia.com/videos/video-british-polo-day-singapore-2012/");
140                 var mf = _t.get_main_frame();
141                 
142                 var ar = Gtk.PaperSize.get_paper_sizes();
143                 var psetup = new Gtk.PageSetup();
144                 for(var i = 0; i < ar.length; i++) {
145 //                    print(ar[i].get_name());
146                     if (ar[i].get_name() =='iso_a2') {
147                         psetup.set_paper_size(ar[i]);
148                     }
149                 }
150                 
151
152                 
153                 var p = new Gtk.PrintOperation({ export_filename : '/home/chris/test_pdf/test2.pdf' });
154                 var s = new Gtk.PrintSettings();
155                 
156                 
157                 //s.set_paper_size('iso_a4');
158 //                psetup.set_scale(0.5);
159                 p.set_default_page_setup(psetup);
160                 
161                 for(var i = 0; i < ar.length; i++) {
162 //                    print(ar[i].get_name());
163                     if (ar[i].get_name() =='iso_a4') {
164 //                        psetup.set_paper_size(ar[i]);
165                         s.set_paper_size(ar[i]);
166                     }
167                 }
168                 
169                 
170                 
171                 p.set_print_settings(s);
172                 //var s = p.get_print_settings();
173                 //print(s);
174                 //p.set_print_settings(s);
175                 mf.print_full(p, Gtk.PrintOperationAction.EXPORT)
176                 //Seed.quit();
177             });
178             
179             return;
180             tab.get_toolbar().set_progress(0);
181             
182             _t.add_inject(true);
183           
184             _t.add_nsinject(true);
185               print(typeof(Seed.argv[2]));
186             if (Seed.argv[2] == null) {
187                 print("NEED ID!");
188                 Seed.quit();
189             
190             }
191             _t.nsdownloadNext(Seed.argv[2]  * 1)
192           
193             
194             if (browsePage) {
195                 print("onload: calling gather links");
196                 TabbedBrowser.browser.current_tab().get_web_view().execute_script(
197                     'BrowserMirror.gatherlinks();'
198                     
199                 );
200                 
201             }
202             
203             
204         };
205
206         var load_committed = function (web_view, web_frame)
207         {
208             update_url(web_view, web_frame);
209         };
210
211         var clicked_link = function (web_view, web_frame, request,
212                                      action, decision, window)
213         {
214             if(action.get_reason() == WebKit.WebNavigationReason.LINK_CLICKED &&
215                action.get_button() == 2)
216             {
217                 browser.new_tab(request.get_uri(), null);
218                 return true;
219             }
220
221             return false;
222         };
223
224         // Public
225         
226         
227         this.browse = function (url)
228         {
229             if(url.search("://") < 0)
230                 url = "http://" + url;
231
232             this.open(url);
233         };
234
235         this.set_tab = function (new_tab)
236         {
237             tab = new_tab;
238         };
239
240         this.get_tab = function ()
241         {
242             return tab;
243         };
244
245
246         this.downloadqueue = function()
247         {
248             var filesList = File.list(downloaddir);
249             print("DOWNLOAD QUEUE LENGTH: " + filesList.length);
250             if (!maxQueue || maxQueue < filesList.length) {
251                 maxQueue  = filesList.length;
252             }
253             
254             tab.get_toolbar().set_progress(filesList.length / Math.max(maxQueue, filesList.length ));
255             if (filesList == null || filesList.length == 0  ){
256                 
257                 return false;
258             }  
259             var url = decodeURIComponent(filesList[0]);
260             if (!this.checkdomain(url)) {
261                 print("SKIP (external domain) : " + url);
262                 File.remove(downloaddir + '/' + filesList[0]);
263                 return this.downloadqueue();
264                 
265             }
266             this.downloadhead( url);
267             return true;
268             
269         };
270         
271     
272         this.queuerun = function()
273         {
274             if (this.downloadqueue()) return true;
275             return this.parsequeue();
276         };
277         
278         this.parsequeue = function(){
279             var filesList = File.list(parsedir);
280             if (filesList == null || filesList.length == 0  ){
281                 return false;
282             } 
283             
284             browsePage = decodeURIComponent(filesList[0]);
285             print("parsing page:" + browsePage );
286             this.browse(browsePage);
287             
288              
289             
290             
291             
292             return true;
293         };
294         
295         
296         this.downloadpage = function(link){
297             print("calling download page: " + link);
298             _t.add_inject(); // just in case..
299                 //var url = File.read(__script_path__+"/downloadqueue/"+link);
300             this.execute_script(
301                 "BrowserMirror.downloadpage(" + JSON.stringify(link) +");"
302             );          
303                 
304             
305         };
306  
307         this.downloadhead = function(link){
308             print("calling download head: " + link);
309             _t.add_inject(); // just in case..
310                 //var url = File.read(__script_path__+"/downloadqueue/"+link);
311             this.execute_script(
312                 "BrowserMirror.downloadhead(" + JSON.stringify(link) +");"
313             );          
314                  
315         };
316         var nsqueue = false;
317         this.nsdownloadNext = function()
318         {
319             /*if (nsqueue === false) {
320                 nsqueue = [
321                            //282,273,285,395,271,272,278,394,402,279,432,280,284,281,433,437,404,444,151,152,1,283,287,288,418,407,398,449,147,150,131,149,443,148,411,405,415,417,416,406,408,399,410,409,412,400,155,133,124,157,118,183,450,434,301,435,130,286,162,292,431,161,290,146,145,158,159,120,137,128,139,163,293,160,289,291,141,140,138,142,144,110,401,156,154,153,170,167,295,166,294,277,425,164,165,168,296,169,297,113,171,119,180,300,176,175,172,302,299,
322                            //298,421,187,307,428,275,269,304,181,182,174,173,184,303,112,179,114,305,177,178,186,306,403,420,111,109,309,188,308,446,445,310,311,53,189,52,312,419,270,190,191,192,193,54,194,108,268,121,195,206,396,205,207,324,220,340,212,331,219,339,222,342,436,209,327,424,208,326,210,328,330,325,211,329,218,337,338,224,223,217,336,214,333,213,332,216,335,221,341,215,334,197,314,
323                             //196,313,198,316,201,320,317,204,323,203,322,202,321,200,319,199,318,346,58,267,393,265,391,438,264,390,261,387,262,388,260,386,266,392,413,414,263,389,347,225,343,227,345,226,344,422,228,348,237,358,231,352,229,349,351,233,354,238,359,360,239,361,230,350,232,353,234,355,439,236,357,240,362,235,356,254,380,257,383,259,385,255,381,256,448,382,258,384,374,252,375,376,251,373,378,429,430,379,253,244,366,247,
324                            //369,363,242,364,241,243,365,246,368,245,367,377,427,
325                            426,250,447,372,249,371,248,370,442,397,129,441,440,116,122,123,125,126,136,115,117,127
326                         ];
327             }
328             
329             if (!nsqueue.length) {
330                 print("DONE");
331                 return;
332             }
333             var pg = nsqueue.shift() ;
334             
335             
336             */
337             
338             //print(typeof(Seed.argv[2]));
339             if (!Seed.argv[2]) {
340                 print("NEED ID!");
341                 Seed.quit();
342             
343             }
344             var pg = Seed.argv[2]  * 1;
345             if ( File.exists( storedir+'/output/' + pg + '.csv')) {
346                 print("DONE : " + storedir+'/output/' + pg + '.csv');
347                 Seed.quit();
348                 return;
349             }
350             
351             var pd = JSON.parse(File.read('/home/alan/.nspasswd'));
352             
353             print("downloadnext : " + pg);
354             this.execute_script(
355                 'NS.login(' +  JSON.stringify(pd.username) + ',' + JSON.stringify(pd.password) + ',' + pg + ' );'
356                 
357             );
358             
359             
360             
361         }
362
363         // Implementation
364         //this.set_scroll_adjustments(null, null);
365         
366         this.signal.title_changed.connect(update_title);
367         this.signal.load_committed.connect(load_committed);
368         this.signal.load_finished.connect(load_finished);
369         this.signal.load_progress_changed.connect(update_progress);
370         
371         // For some reason, this segfaults seed in the instance init closure handler
372         // Once that's fixed, uncommenting the next line will give middle-click-open-in-new tab
373         //this.signal.navigation_policy_decision_requested.connect(clicked_link);
374
375         this.signal.hovering_over_link.connect(hover_link);
376
377         this.signal.create_web_view.connect(create_new_tab);
378         
379         
380          
381         print("ADDing console message sig handler");
382         
383         
384         
385         
386         this.signal.console_message.connect(function(wv, msg, line, sid) {
387             // print('BrowserView.js got ' + msg);
388             var methodname;
389             var ret;
390             try {
391                 ret = JSON.parse(msg);
392             
393             } catch(e) {
394                 print("GOT INVALID message:" + msg)
395                 return true;
396                 
397                 
398             }
399             print("got method : " + ret.method);
400             
401             if (ret.method == 'exit') {
402                 Seed.quit();
403             }
404             
405             
406             if (ret.method == 'nsdownloadpage'){
407                 try { 
408                     var mt = ret.contentType.split(';').shift();
409                 } catch( e) {
410                     print("INVALID CONTENT TYPE? \n" + JSON.stringify(ret));
411                     mt='';
412                 }
413                 
414                 switch(mt) {
415                    
416                     case '':
417                         print("CONSOLE GOT BLANK - call download next?");
418                           Seed.quit();
419                          //_t.nsdownloadNext();
420                        // _t.moveToDone(ret.requesturl);
421                         break;
422                     
423                     default:
424                         var info = false;
425                         var info_f = _t.dupeCheck(ret.requesturl);
426                         if (info_f) {
427                             info  = JSON.parse(File.read(info_f));
428                         }
429                          var target  = storedir+'/output/' + ret.filename 
430                          // flag it as done..
431                        
432                         
433                         
434                         
435                         
436                         //File.write(target ,decodeURIComponent(escape(base64.decode( ret.data))));
437                         print("GOT array sized: " + ret.data.length);
438                         
439                         File.writeBinaryArray(target,ret.data);
440                         print("Wrote to file: " + target);
441                         // get next..
442                         return true;
443                         //Seed.quit();
444                         //_t.nsdownloadNext();
445                         
446                         
447                         
448                         break;
449                 }
450                 return true; //???
451             }
452             
453             if (ret.method == 'gatherlinks'){
454                 // flag the page as parsed.
455                 
456                 if (browsePage) {
457                     _t.moveToDone(browsePage);
458                     
459                 }
460                 var sourcePage = browsePage;
461                 browsePage = false;
462                 print(typeof(ret.data));
463                 if (typeof(ret.data) != 'object' ) {
464                     print("GOT INVALID DATA?:" + JSON.stringify(ret,null,4));
465                     ret.data= [];
466                     
467                 }
468                 ret.data.forEach(function(ln) {
469                     if (!ln.href.match(/^http[s]*:\/\//)) {
470                         print("SKIP link: " + ln.href);
471                         return;
472                     }
473                     ln.href= ln.href.replace(/#.*$/,'');
474                     
475                     if (!_t.checkdomain(ln.href) ) {
476                         print("SKIP link (external domain): " + ln.href);
477                         return;
478                     }
479                     
480                     // this is just for our purposes..
481                     
482                     if (ln.href.match(/\/pages\/[0-9]+$/)) {
483                         print("SKIP link (ingore unnamed pages): " + ln.href);
484                         return;
485                     }
486                     
487                     
488                     
489                     var fn = encodeURIComponent(ln.href);
490                     
491                     var dupe  = _t.dupeCheck(ln.href);
492                     if (dupe) {
493                         
494                         if (dupe == downloaddir + '/'  + fn) {
495                             var info = JSON.parse(File.read(dupe));
496                             if (info && info.fromUrl && info.fromUrl.length > sourcePage.length) {
497                                 print("SKIP link (in queue already): " + ln.href);
498                                 return;
499                             }
500                             print("found a longer link for url")
501                         } else {
502                             print("SKIP link (in another queue): " + ln.href);
503                             return;
504                         }
505                     }
506                      
507                     File.write(downloaddir + '/'  + fn, JSON.stringify( {
508                         label : ln.label,
509                         fromURL : sourcePage
510                     })); // write an empyt file indicating it needs downloading..
511                 });         
512                 var filesList = File.list(downloaddir);
513             
514                 maxQueue =  filesList.length;
515             }
516             
517             if (ret.method == 'downloadpage'){
518                 // got the results from download page:
519                 // requesturl 
520                 // remove from downloadqueue.
521              
522                 
523                 //
524                 try { 
525                     var mt = ret.contentType.split(';').shift();
526                 } catch( e) {
527                     print("INVALID CONTENT TYPE? \n" + JSON.stringify(ret));
528                     mt='';
529                 }
530                 
531                 switch(mt) {
532                    
533                     case '':
534                         _t.moveToDone(ret.requesturl);
535                         break;
536                     
537                     default:
538                         var info = false;
539                         var info_f = _t.dupeCheck(ret.requesturl);
540                         if (info_f) {
541                             info  = JSON.parse(File.read(info_f));
542                         }
543                     
544                         _t.moveToDone(ret.requesturl);
545                         // flag it as done..
546                        
547                         // write it...
548                         var target  = _t.toFilename(ret.requesturl);
549                         if (info && info.fromURL) {
550                             var bn = decodeURIComponent(File.basename(target));
551                             target = _t.toFilename(info.fromURL+'/'+ bn);
552                         }
553                         
554                         
555                         
556                         
557                         //File.write(target ,decodeURIComponent(escape(base64.decode( ret.data))));
558                         print("GOT array sized: " + ret.data.length);
559                         
560                         File.writeBinaryArray(target,ret.data);
561                         print("Wrote to file: " + target);
562                         break;
563                 }
564                 
565                 // if it's HTML then add it to parse queue
566                 // otehrwise save it.. and run the queue again.
567                
568                 
569                 //File.write(storedir+"/parsequeue/"+Math.random(), msg);
570             }
571             
572            
573             if (ret.method == 'downloadhead'){
574                 // got the results from download page:
575                 // requesturl 
576                 // remove from downloadqueue.
577                 
578                 //
579                 try { 
580                     var mt = ret.contentType.split(';').shift();
581                 } catch( e) {
582                     print("INVALID CONTENT TYPE? \n" + JSON.stringify(ret));
583                     mt='';
584                 }
585                 
586                 switch(mt) {
587                     case 'text/html':
588                         // add to parse QUEUE..
589                         print("moving to parse queue");
590                         _t.moveToParse(ret.requesturl);
591                         break;
592                     // stuf we do not care about..
593                     case 'application/atom+xml':
594                     case '':
595                         print("moving to done queue");
596                          _t.moveToDone(ret.requesturl);
597                         break;
598                     
599                     default:
600                         print("calling download file");
601                         _t.downloadpage( ret.requesturl );
602                         // keep it on the queue..
603                         // do not run the queue..
604                         return true;
605                      
606                 }
607                 
608                 // if it's HTML then add it to parse queue
609                 // otehrwise save it.. and run the queue again.
610                
611                 
612                 //File.write(storedir+"/parsequeue/"+Math.random(), msg);
613             }
614             _t.queuerun();
615             
616             return true;
617         });
618         
619         
620         this.toFilename = function(url)
621         {
622             url = url.replace(/^http[s]*:\/\//, '');
623             var p = url.split('/');
624             p.unshift(storedir+'/output');
625             for (var i =1 ;i < p.length; i++) {
626                 p[i] = encodeURIComponent(p[i]);
627             
628             }
629             p[p.length-1] = decodeURIComponent(p[p.length-1]);
630             ret = p.join('/');
631             var dir = File.dirname(ret);
632             File.mkdirall(dir);
633             return ret;
634             
635         }
636         this.checkdomain = function(comp)
637         {
638             var b = parseUri(this.uri);
639             var d = parseUri(comp);
640             return (d.host == b.host && d.protocol == b.protocol);
641             
642             
643         }
644         
645         this.dupeCheck = function(url)
646         {
647             
648            // order - return highest up the queue first..
649             if (File.exists(downloaddir +'/' + encodeURIComponent(url))) {
650                 return downloaddir +'/' + encodeURIComponent(url);
651             }
652              if (File.exists(parsedir +'/' + encodeURIComponent(url))) {
653                 return parsedir +'/' + encodeURIComponent(url);
654             }
655             if (File.exists(donedir +'/' + encodeURIComponent(url))) {
656                 return donedir +'/' + encodeURIComponent(url);
657             }
658             return  false;
659             
660             
661         }
662         this.moveToParse = function(url)
663         {
664             var old = this.dupeCheck(url);
665             var target =parsedir +'/' + encodeURIComponent(url);
666             if (old == target) {
667                 return;
668             }
669             File.write(target, old ? File.read(old) : '');
670             if (old) {
671                 File.remove(old);
672             }
673             
674         }
675         
676         this.moveToDownload= function(url)
677         {
678             var old = this.dupeCheck(url);
679             var target =downloaddir +'/' + encodeURIComponent(url);
680             if (old == target) {
681                 return;
682             }
683             File.write(target, old ? File.read(old) : '');
684             if (old) {
685                 File.remove(old);
686             }
687             
688         }
689         this.moveToDone= function(url)
690         {
691             var old = this.dupeCheck(url);
692             var target = donedir +'/' + encodeURIComponent(url);
693             if (old == target) {
694                 return;
695             }
696             File.write(target, old ? File.read(old) : '');
697             if (old) {
698                 File.remove(old);
699             }
700             
701         }
702         
703     }
704 });
705
706 function parseUri (str) {
707         var     o   = parseUri.options,
708                 m   = o.parser[o.strictMode ? "strict" : "loose"].exec(str),
709                 uri = {},
710                 i   = 14;
711
712         while (i--) uri[o.key[i]] = m[i] || "";
713
714         uri[o.q.name] = {};
715         uri[o.key[12]].replace(o.q.parser, function ($0, $1, $2) {
716                 if ($1) uri[o.q.name][$1] = $2;
717         });
718
719         return uri;
720 };
721
722 parseUri.options = {
723         strictMode: false,
724         key: ["source","protocol","authority","userInfo","user","password","host","port","relative","path","directory","file","query","anchor"],
725         q:   {
726                 name:   "queryKey",
727                 parser: /(?:^|&)([^&=]*)=?([^&]*)/g
728         },
729         parser: {
730                 strict: /^(?:([^:\/?#]+):)?(?:\/\/((?:(([^:@]*)(?::([^:@]*))?)?@)?([^:\/?#]*)(?::(\d*))?))?((((?:[^?#\/]*\/)*)([^?#]*))(?:\?([^#]*))?(?:#(.*))?)/,
731                 loose:  /^(?:(?![^:@]+:[^:@\/]*@)([^:\/?#.]+):)?(?:\/\/)?((?:(([^:@]*)(?::([^:@]*))?)?@)?([^:\/?#]*)(?::(\d*))?)(((\/(?:[^?#](?![^?#\/]*\.[^?#\/.]+(?:[?#]|$)))*\/?)?([^?#\/]*))(?:\?([^#]*))?(?:#(.*))?)/
732         }
733 };