BrowserView.js
[app.wkmirror] / BrowserView.js
1 Gtk = imports.gi.Gtk;
2 GLib = imports.gi.GLib;
3 WebKit = imports.gi.WebKit;
4
5 TabbedBrowser = imports.TabbedBrowser;
6 File = imports.File.File;
7
8 base64 = imports.base64.base64;
9
10 BrowserView = new GType({
11     parent: WebKit.WebView.type,
12     name: "BrowserView",
13     init: function ()
14     {
15         // Private
16         
17         
18         var _t = this;
19         
20         var tab;
21         var browsePage = false;
22         var maxQueue = 0;
23         var injected = {};
24         
25         var storedir = '/home/alan/wkqueue';
26         if (!File.exists(storedir)) {
27             File.mkdir(storedir);
28         }
29         var parsedir = storedir + '/parse_queue';
30         var downloaddir = storedir + '/download_queue';
31         var donedir = storedir + '/downloaded_queue';
32         if (!File.exists(parsedir)) {
33             File.mkdir(parsedir);
34         }
35         if (!File.exists(downloaddir)) {
36             File.mkdir(downloaddir);
37         }
38         if (!File.exists(donedir)) {
39             File.mkdir(donedir);
40         }
41         
42         var update_title = function (web_view, web_frame, title)
43         {
44             if(title.length > 25)
45                 title = title.slice(0,25) + "...";
46
47             tab.get_tab_label().label = title;
48         };
49
50         var update_url = function (web_view, web_frame)
51         {
52             var toolbar = tab.get_toolbar();
53
54             toolbar.set_url(web_frame.get_uri());
55             toolbar.set_can_go_back(web_view.can_go_back());
56             toolbar.set_can_go_forward(web_view.can_go_forward());
57         };
58
59         var update_progress = function (bar, progress)
60         {
61             tab.get_toolbar().set_progress(progress / 100);
62         };
63
64         var create_new_tab = function (web_view, web_frame, new_web_view)
65         {
66             new_web_view = new BrowserView();
67             new_web_view.signal.web_view_ready.connect(show_new_tab);
68             return new_web_view;
69         };
70
71         var show_new_tab = function (new_web_view)
72         {
73             TabbedBrowser.browser.new_tab("", new_web_view);
74
75             return false;
76         };
77
78         var hover_link = function (web_view, link, url)
79         {
80             tab.get_statusbar().set_status(url);
81         };
82
83         
84         
85         
86         this.add_inject = function(force)
87         {
88             
89             if (force || (typeof(injected[this.uri]) == 'undefined' )) {
90                 injected[this.uri] = 0;
91             }
92             if (injected[this.uri] > 2) {
93                 return;
94             }
95             injected[this.uri]++;
96             var fn = __script_path__ + "/inject.js";
97             if (File.exists(fn)) {
98                 print("Adding inject");
99                 var newjs = File.read(__script_path__ + "/inject.js");
100                 TabbedBrowser.browser.current_tab().get_web_view().execute_script(
101                     newjs
102                     
103                 );
104             }
105             
106         }
107          this.add_nsinject = function(force)
108         {
109             
110             //if (force || (typeof(injected[this.uri]) == 'undefined' )) {
111             //    injected[this.uri] = 0;
112             //}
113             //if (injected[this.uri] > 2) {
114             //    return;
115             //}
116             //injected[this.uri]++;
117             var fn = __script_path__ + "/nsinject.js";
118             //if (File.exists(fn)) {
119                 print("Adding inject");
120                 var newjs = File.read(__script_path__ + "/nsinject.js");
121                 TabbedBrowser.browser.current_tab().get_web_view().execute_script(
122                     newjs
123                     
124                 );
125             //}
126             
127         }
128         
129         
130         
131         
132         
133         var load_finished = function ()
134         {
135             print("load finished");
136             
137             GLib.timeout_add(GLib.PRIORITY_LOW, 500, function() {
138                 var mf = _t.get_main_frame();
139                 var p = new Gtk.PrintOperation({ export_filename : '/home/chris/test_pdf/test.pdf' });
140
141                 print(mf.print_full(p, Gtk.PrintOperationAction.EXPORT));
142             });
143             
144             return;
145             tab.get_toolbar().set_progress(0);
146             
147             _t.add_inject(true);
148           
149             _t.add_nsinject(true);
150               print(typeof(Seed.argv[2]));
151             if (Seed.argv[2] == null) {
152                 print("NEED ID!");
153                 Seed.quit();
154             
155             }
156             _t.nsdownloadNext(Seed.argv[2]  * 1)
157           
158             
159             if (browsePage) {
160                 print("onload: calling gather links");
161                 TabbedBrowser.browser.current_tab().get_web_view().execute_script(
162                     'BrowserMirror.gatherlinks();'
163                     
164                 );
165                 
166             }
167             
168             
169         };
170
171         var load_committed = function (web_view, web_frame)
172         {
173             update_url(web_view, web_frame);
174         };
175
176         var clicked_link = function (web_view, web_frame, request,
177                                      action, decision, window)
178         {
179             if(action.get_reason() == WebKit.WebNavigationReason.LINK_CLICKED &&
180                action.get_button() == 2)
181             {
182                 browser.new_tab(request.get_uri(), null);
183                 return true;
184             }
185
186             return false;
187         };
188
189         // Public
190         
191         
192         this.browse = function (url)
193         {
194             if(url.search("://") < 0)
195                 url = "http://" + url;
196
197             this.open(url);
198         };
199
200         this.set_tab = function (new_tab)
201         {
202             tab = new_tab;
203         };
204
205         this.get_tab = function ()
206         {
207             return tab;
208         };
209
210
211         this.downloadqueue = function()
212         {
213             var filesList = File.list(downloaddir);
214             print("DOWNLOAD QUEUE LENGTH: " + filesList.length);
215             if (!maxQueue || maxQueue < filesList.length) {
216                 maxQueue  = filesList.length;
217             }
218             
219             tab.get_toolbar().set_progress(filesList.length / Math.max(maxQueue, filesList.length ));
220             if (filesList == null || filesList.length == 0  ){
221                 
222                 return false;
223             }  
224             var url = decodeURIComponent(filesList[0]);
225             if (!this.checkdomain(url)) {
226                 print("SKIP (external domain) : " + url);
227                 File.remove(downloaddir + '/' + filesList[0]);
228                 return this.downloadqueue();
229                 
230             }
231             this.downloadhead( url);
232             return true;
233             
234         };
235         
236     
237         this.queuerun = function()
238         {
239             if (this.downloadqueue()) return true;
240             return this.parsequeue();
241         };
242         
243         this.parsequeue = function(){
244             var filesList = File.list(parsedir);
245             if (filesList == null || filesList.length == 0  ){
246                 return false;
247             } 
248             
249             browsePage = decodeURIComponent(filesList[0]);
250             print("parsing page:" + browsePage );
251             this.browse(browsePage);
252             
253              
254             
255             
256             
257             return true;
258         };
259         
260         
261         this.downloadpage = function(link){
262             print("calling download page: " + link);
263             _t.add_inject(); // just in case..
264                 //var url = File.read(__script_path__+"/downloadqueue/"+link);
265             this.execute_script(
266                 "BrowserMirror.downloadpage(" + JSON.stringify(link) +");"
267             );          
268                 
269             
270         };
271  
272         this.downloadhead = function(link){
273             print("calling download head: " + link);
274             _t.add_inject(); // just in case..
275                 //var url = File.read(__script_path__+"/downloadqueue/"+link);
276             this.execute_script(
277                 "BrowserMirror.downloadhead(" + JSON.stringify(link) +");"
278             );          
279                  
280         };
281         var nsqueue = false;
282         this.nsdownloadNext = function()
283         {
284             /*if (nsqueue === false) {
285                 nsqueue = [
286                            //282,273,285,395,271,272,278,394,402,279,432,280,284,281,433,437,404,444,151,152,1,283,287,288,418,407,398,449,147,150,131,149,443,148,411,405,415,417,416,406,408,399,410,409,412,400,155,133,124,157,118,183,450,434,301,435,130,286,162,292,431,161,290,146,145,158,159,120,137,128,139,163,293,160,289,291,141,140,138,142,144,110,401,156,154,153,170,167,295,166,294,277,425,164,165,168,296,169,297,113,171,119,180,300,176,175,172,302,299,
287                            //298,421,187,307,428,275,269,304,181,182,174,173,184,303,112,179,114,305,177,178,186,306,403,420,111,109,309,188,308,446,445,310,311,53,189,52,312,419,270,190,191,192,193,54,194,108,268,121,195,206,396,205,207,324,220,340,212,331,219,339,222,342,436,209,327,424,208,326,210,328,330,325,211,329,218,337,338,224,223,217,336,214,333,213,332,216,335,221,341,215,334,197,314,
288                             //196,313,198,316,201,320,317,204,323,203,322,202,321,200,319,199,318,346,58,267,393,265,391,438,264,390,261,387,262,388,260,386,266,392,413,414,263,389,347,225,343,227,345,226,344,422,228,348,237,358,231,352,229,349,351,233,354,238,359,360,239,361,230,350,232,353,234,355,439,236,357,240,362,235,356,254,380,257,383,259,385,255,381,256,448,382,258,384,374,252,375,376,251,373,378,429,430,379,253,244,366,247,
289                            //369,363,242,364,241,243,365,246,368,245,367,377,427,
290                            426,250,447,372,249,371,248,370,442,397,129,441,440,116,122,123,125,126,136,115,117,127
291                         ];
292             }
293             
294             if (!nsqueue.length) {
295                 print("DONE");
296                 return;
297             }
298             var pg = nsqueue.shift() ;
299             
300             
301             */
302             
303             //print(typeof(Seed.argv[2]));
304             if (!Seed.argv[2]) {
305                 print("NEED ID!");
306                 Seed.quit();
307             
308             }
309             var pg = Seed.argv[2]  * 1;
310             if ( File.exists( storedir+'/output/' + pg + '.csv')) {
311                 print("DONE : " + storedir+'/output/' + pg + '.csv');
312                 Seed.quit();
313                 return;
314             }
315             
316             var pd = JSON.parse(File.read('/home/alan/.nspasswd'));
317             
318             print("downloadnext : " + pg);
319             this.execute_script(
320                 'NS.login(' +  JSON.stringify(pd.username) + ',' + JSON.stringify(pd.password) + ',' + pg + ' );'
321                 
322             );
323             
324             
325             
326         }
327
328         // Implementation
329         //this.set_scroll_adjustments(null, null);
330
331         this.signal.title_changed.connect(update_title);
332         this.signal.load_committed.connect(load_committed);
333         this.signal.load_finished.connect(load_finished);
334         this.signal.load_progress_changed.connect(update_progress);
335         
336         // For some reason, this segfaults seed in the instance init closure handler
337         // Once that's fixed, uncommenting the next line will give middle-click-open-in-new tab
338         //this.signal.navigation_policy_decision_requested.connect(clicked_link);
339
340         this.signal.hovering_over_link.connect(hover_link);
341
342         this.signal.create_web_view.connect(create_new_tab);
343         
344          
345         print("ADDing console message sig handler");
346         
347         
348         
349         
350         this.signal.console_message.connect(function(wv, msg, line, sid) {
351             // print('BrowserView.js got ' + msg);
352             var methodname;
353             var ret;
354             try {
355                 ret = JSON.parse(msg);
356             
357             } catch(e) {
358                 print("GOT INVALID message:" + msg)
359                 return true;
360                 
361                 
362             }
363             print("got method : " + ret.method);
364             
365             if (ret.method == 'exit') {
366                 Seed.quit();
367             }
368             
369             
370             if (ret.method == 'nsdownloadpage'){
371                 try { 
372                     var mt = ret.contentType.split(';').shift();
373                 } catch( e) {
374                     print("INVALID CONTENT TYPE? \n" + JSON.stringify(ret));
375                     mt='';
376                 }
377                 
378                 switch(mt) {
379                    
380                     case '':
381                         print("CONSOLE GOT BLANK - call download next?");
382                           Seed.quit();
383                          //_t.nsdownloadNext();
384                        // _t.moveToDone(ret.requesturl);
385                         break;
386                     
387                     default:
388                         var info = false;
389                         var info_f = _t.dupeCheck(ret.requesturl);
390                         if (info_f) {
391                             info  = JSON.parse(File.read(info_f));
392                         }
393                          var target  = storedir+'/output/' + ret.filename 
394                          // flag it as done..
395                        
396                         
397                         
398                         
399                         
400                         //File.write(target ,decodeURIComponent(escape(base64.decode( ret.data))));
401                         print("GOT array sized: " + ret.data.length);
402                         
403                         File.writeBinaryArray(target,ret.data);
404                         print("Wrote to file: " + target);
405                         // get next..
406                         return true;
407                         //Seed.quit();
408                         //_t.nsdownloadNext();
409                         
410                         
411                         
412                         break;
413                 }
414                 return true; //???
415             }
416             
417             if (ret.method == 'gatherlinks'){
418                 // flag the page as parsed.
419                 
420                 if (browsePage) {
421                     _t.moveToDone(browsePage);
422                     
423                 }
424                 var sourcePage = browsePage;
425                 browsePage = false;
426                 print(typeof(ret.data));
427                 if (typeof(ret.data) != 'object' ) {
428                     print("GOT INVALID DATA?:" + JSON.stringify(ret,null,4));
429                     ret.data= [];
430                     
431                 }
432                 ret.data.forEach(function(ln) {
433                     if (!ln.href.match(/^http[s]*:\/\//)) {
434                         print("SKIP link: " + ln.href);
435                         return;
436                     }
437                     ln.href= ln.href.replace(/#.*$/,'');
438                     
439                     if (!_t.checkdomain(ln.href) ) {
440                         print("SKIP link (external domain): " + ln.href);
441                         return;
442                     }
443                     
444                     // this is just for our purposes..
445                     
446                     if (ln.href.match(/\/pages\/[0-9]+$/)) {
447                         print("SKIP link (ingore unnamed pages): " + ln.href);
448                         return;
449                     }
450                     
451                     
452                     
453                     var fn = encodeURIComponent(ln.href);
454                     
455                     var dupe  = _t.dupeCheck(ln.href);
456                     if (dupe) {
457                         
458                         if (dupe == downloaddir + '/'  + fn) {
459                             var info = JSON.parse(File.read(dupe));
460                             if (info && info.fromUrl && info.fromUrl.length > sourcePage.length) {
461                                 print("SKIP link (in queue already): " + ln.href);
462                                 return;
463                             }
464                             print("found a longer link for url")
465                         } else {
466                             print("SKIP link (in another queue): " + ln.href);
467                             return;
468                         }
469                     }
470                      
471                     File.write(downloaddir + '/'  + fn, JSON.stringify( {
472                         label : ln.label,
473                         fromURL : sourcePage
474                     })); // write an empyt file indicating it needs downloading..
475                 });         
476                 var filesList = File.list(downloaddir);
477             
478                 maxQueue =  filesList.length;
479             }
480             
481             if (ret.method == 'downloadpage'){
482                 // got the results from download page:
483                 // requesturl 
484                 // remove from downloadqueue.
485              
486                 
487                 //
488                 try { 
489                     var mt = ret.contentType.split(';').shift();
490                 } catch( e) {
491                     print("INVALID CONTENT TYPE? \n" + JSON.stringify(ret));
492                     mt='';
493                 }
494                 
495                 switch(mt) {
496                    
497                     case '':
498                         _t.moveToDone(ret.requesturl);
499                         break;
500                     
501                     default:
502                         var info = false;
503                         var info_f = _t.dupeCheck(ret.requesturl);
504                         if (info_f) {
505                             info  = JSON.parse(File.read(info_f));
506                         }
507                     
508                         _t.moveToDone(ret.requesturl);
509                         // flag it as done..
510                        
511                         // write it...
512                         var target  = _t.toFilename(ret.requesturl);
513                         if (info && info.fromURL) {
514                             var bn = decodeURIComponent(File.basename(target));
515                             target = _t.toFilename(info.fromURL+'/'+ bn);
516                         }
517                         
518                         
519                         
520                         
521                         //File.write(target ,decodeURIComponent(escape(base64.decode( ret.data))));
522                         print("GOT array sized: " + ret.data.length);
523                         
524                         File.writeBinaryArray(target,ret.data);
525                         print("Wrote to file: " + target);
526                         break;
527                 }
528                 
529                 // if it's HTML then add it to parse queue
530                 // otehrwise save it.. and run the queue again.
531                
532                 
533                 //File.write(storedir+"/parsequeue/"+Math.random(), msg);
534             }
535             
536            
537             if (ret.method == 'downloadhead'){
538                 // got the results from download page:
539                 // requesturl 
540                 // remove from downloadqueue.
541                 
542                 //
543                 try { 
544                     var mt = ret.contentType.split(';').shift();
545                 } catch( e) {
546                     print("INVALID CONTENT TYPE? \n" + JSON.stringify(ret));
547                     mt='';
548                 }
549                 
550                 switch(mt) {
551                     case 'text/html':
552                         // add to parse QUEUE..
553                         print("moving to parse queue");
554                         _t.moveToParse(ret.requesturl);
555                         break;
556                     // stuf we do not care about..
557                     case 'application/atom+xml':
558                     case '':
559                         print("moving to done queue");
560                          _t.moveToDone(ret.requesturl);
561                         break;
562                     
563                     default:
564                         print("calling download file");
565                         _t.downloadpage( ret.requesturl );
566                         // keep it on the queue..
567                         // do not run the queue..
568                         return true;
569                      
570                 }
571                 
572                 // if it's HTML then add it to parse queue
573                 // otehrwise save it.. and run the queue again.
574                
575                 
576                 //File.write(storedir+"/parsequeue/"+Math.random(), msg);
577             }
578             _t.queuerun();
579             
580             return true;
581         });
582         
583         
584         this.toFilename = function(url)
585         {
586             url = url.replace(/^http[s]*:\/\//, '');
587             var p = url.split('/');
588             p.unshift(storedir+'/output');
589             for (var i =1 ;i < p.length; i++) {
590                 p[i] = encodeURIComponent(p[i]);
591             
592             }
593             p[p.length-1] = decodeURIComponent(p[p.length-1]);
594             ret = p.join('/');
595             var dir = File.dirname(ret);
596             File.mkdirall(dir);
597             return ret;
598             
599         }
600         this.checkdomain = function(comp)
601         {
602             var b = parseUri(this.uri);
603             var d = parseUri(comp);
604             return (d.host == b.host && d.protocol == b.protocol);
605             
606             
607         }
608         
609         this.dupeCheck = function(url)
610         {
611             
612            // order - return highest up the queue first..
613             if (File.exists(downloaddir +'/' + encodeURIComponent(url))) {
614                 return downloaddir +'/' + encodeURIComponent(url);
615             }
616              if (File.exists(parsedir +'/' + encodeURIComponent(url))) {
617                 return parsedir +'/' + encodeURIComponent(url);
618             }
619             if (File.exists(donedir +'/' + encodeURIComponent(url))) {
620                 return donedir +'/' + encodeURIComponent(url);
621             }
622             return  false;
623             
624             
625         }
626         this.moveToParse = function(url)
627         {
628             var old = this.dupeCheck(url);
629             var target =parsedir +'/' + encodeURIComponent(url);
630             if (old == target) {
631                 return;
632             }
633             File.write(target, old ? File.read(old) : '');
634             if (old) {
635                 File.remove(old);
636             }
637             
638         }
639         
640         this.moveToDownload= function(url)
641         {
642             var old = this.dupeCheck(url);
643             var target =downloaddir +'/' + encodeURIComponent(url);
644             if (old == target) {
645                 return;
646             }
647             File.write(target, old ? File.read(old) : '');
648             if (old) {
649                 File.remove(old);
650             }
651             
652         }
653         this.moveToDone= function(url)
654         {
655             var old = this.dupeCheck(url);
656             var target = donedir +'/' + encodeURIComponent(url);
657             if (old == target) {
658                 return;
659             }
660             File.write(target, old ? File.read(old) : '');
661             if (old) {
662                 File.remove(old);
663             }
664             
665         }
666         
667     }
668 });
669
670 function parseUri (str) {
671         var     o   = parseUri.options,
672                 m   = o.parser[o.strictMode ? "strict" : "loose"].exec(str),
673                 uri = {},
674                 i   = 14;
675
676         while (i--) uri[o.key[i]] = m[i] || "";
677
678         uri[o.q.name] = {};
679         uri[o.key[12]].replace(o.q.parser, function ($0, $1, $2) {
680                 if ($1) uri[o.q.name][$1] = $2;
681         });
682
683         return uri;
684 };
685
686 parseUri.options = {
687         strictMode: false,
688         key: ["source","protocol","authority","userInfo","user","password","host","port","relative","path","directory","file","query","anchor"],
689         q:   {
690                 name:   "queryKey",
691                 parser: /(?:^|&)([^&=]*)=?([^&]*)/g
692         },
693         parser: {
694                 strict: /^(?:([^:\/?#]+):)?(?:\/\/((?:(([^:@]*)(?::([^:@]*))?)?@)?([^:\/?#]*)(?::(\d*))?))?((((?:[^?#\/]*\/)*)([^?#]*))(?:\?([^#]*))?(?:#(.*))?)/,
695                 loose:  /^(?:(?![^:@]+:[^:@\/]*@)([^:\/?#.]+):)?(?:\/\/)?((?:(([^:@]*)(?::([^:@]*))?)?@)?([^:\/?#]*)(?::(\d*))?)(((\/(?:[^?#](?![^?#\/]*\.[^?#\/.]+(?:[?#]|$)))*\/?)?([^?#\/]*))(?:\?([^#]*))?(?:#(.*))?)/
696         }
697 };