BrowserView.js
[app.webkitpdf] / BrowserView.js
1 Gtk = imports.gi.Gtk;
2 GLib = imports.gi.GLib;
3 WebKit = imports.gi.WebKit;
4 Soup = imports.gi.Soup;
5
6 TabbedBrowser = imports.TabbedBrowser;
7 BrowserSettings = imports.BrowserSettings;
8 BrowserTab = imports.BrowserTab;
9
10 File = imports.File.File;
11
12 base64 = imports.base64.base64;
13
14 BrowserView = new GType({
15     parent: WebKit.WebView.type,
16     name: "BrowserView",
17     init: function ()
18     {
19         // Private
20         
21         var sess  = WebKit.get_default_session();
22         sess.proxy_uri = new Soup.URI.c_new('http://localhost:3128');       
23         var _t = this;
24         
25         var tab = BrowserTab;
26         var browsePage = false;
27         var maxQueue = 0;
28         var injected = {};
29         
30         var current_url = '';
31  
32         var update_title = function (web_view, web_frame, title)
33         {
34             
35             print("calling update title");
36             if(title.length > 25)
37                 title = title.slice(0,25) + "...";
38
39             tab.get_tab_label().label = title;
40         };
41         
42         
43         
44            
45         
46         var traversedom = function(doc, fn, sub ) {
47             print("TRAVERSE DOM?");
48             sub = sub || 0;
49             
50             var ret = '<HTML>';
51             
52             var cb =  function(s) {
53                 //print("CB:" + s);
54                 ret+=s;
55             };
56             cb.fn = fn;
57             cb.sub = sub +1;
58             print(doc);
59             print(doc.head);
60             print (doc.body);
61             
62              //Roo.select('body > div',true).each(function(el) {
63             traverseDOMTree(cb, doc.head, 1);
64             traverseDOMTree(cb, doc.body, 1);
65            //print(ret);   
66             File.write(cb.fn.replace(/\.html$/, '')+ (sub ? ('.' + sub  ) : '') + '.html' , ret +'</HTML>');
67         
68         };
69          
70         
71         
72         var traverseDOMTree = function(cb, currentElement, depth) {
73             if (currentElement) {
74                 
75                 //if (currentElement.class_name.match(/roo-dynamic/)) {
76                 //    return;
77                 //}
78                 //print(currentElement.node_name);
79                 var j;
80                 var nodeName = currentElement.node_name;
81                 var tagName = currentElement.tag_name;
82                 
83                 if  (nodeName == '#text') {
84                     cb(currentElement.node_value);
85                     return;
86                 
87                 }
88                 if(nodeName == 'BR'){
89                     cb("<BR/>");
90                     return;
91                 }
92                
93                 if (nodeName == 'SCRIPT') {
94                      
95                     return;
96                 }
97                 //if (nodeName == 'STYLE') {  return; }
98                 var i = 0;
99               // Prints the node tagName, such as <A>, <IMG>, etc
100                 var outNodeName = nodeName;
101                 if (nodeName == 'IFRAME') {
102                     //outNodeName  = 'DIV';
103                 }
104                 if (tagName) {
105                     
106                     
107                     
108                     
109                     var attr = [];
110                     
111                     for(i = 0; i < currentElement.attributes.length;i++) {
112                         var aname = currentElement.attributes.item(i).name;
113                         //if (aname =='class' || aname == 'style') {
114                         //    continue;
115                         //}
116                         if (aname =='src' && tagName == 'IFRAME') {
117                             continue;
118                         }
119                         
120                         attr.push(aname + '="' + currentElement.attributes.item(i).value + '"' );
121                     }
122                     if (tagName == 'IFRAME') {
123                         attr.push('src="' + File.basename(cb.fn).replace(/\.html$/, '')+ '.' + cb.sub + '.html"' );
124                     }
125                     
126                     //if (nodeName == 'IFRAME') {
127                     //    attr.push('src="' + currentElement.attributes.item(i).value + embeded_file '"' );
128                     //}
129                     
130                     //var style = elClassToStyle(currentElement);
131                     //if (style.length) {
132                     //    attr.push('style="' + style + '"' );
133                     //}
134                     
135                     
136                     //if (depth > 1000 && (tagName == 'BODY' || tagName == 'HEAD' )) {
137                      //   cb("<DIV"+ ( attr.length ? (' ' + attr.join(' ') ) : '') + ">");
138                     //} else {
139                         cb("<"+outNodeName + ( attr.length ? (' ' + attr.join(' ') ) : '') + ">");
140                         
141                     //}
142                 } 
143                 else {
144                   cb("[unknown tag]");
145                 }
146                 
147                  if (nodeName == 'IFRAME') {
148                     
149                    
150                     traversedom(currentElement.content_document,  cb.fn,  cb.sub )
151                     cb.sub++;
152                     cb("</IFRAME>");
153                     return;
154                 }
155                 
156                 
157                 // Traverse the tree
158                 i = 0;
159                 var currentElementChild = currentElement.child_nodes.item(i);
160                 var allText = true;
161                 while (currentElementChild) {
162                     // Formatting code (indent the tree so it looks nice on the screen)
163                     
164                     if  (currentElementChild.node_name == '#text') {
165                        // if (currentElementChild.node_value.length) {
166                             cb(currentElementChild.node_value);
167                        // }
168                         
169                         i++;
170                         currentElementChild=currentElement.child_nodes.item(i);
171                         continue;
172                     }   
173                     allText = false;
174                     //cb("\n");
175                     //for (j = 0; j < depth; j++) {
176                       // &#166 is just a vertical line
177                     //  cb("  ");
178                     //}//               
179                     
180                         
181                     // Recursively traverse the tree structure of the child node
182                     traverseDOMTree(cb, currentElementChild, depth+1);
183                     i++;
184                     currentElementChild=currentElement.child_nodes.item(i);
185                 }
186                 if (!allText) {
187                     // The remaining code is mostly for formatting the tree
188                     //cb("\n");
189                     //for (j = 0; j < depth - 1; j++) {
190                       //cb("  ");
191                     //}     
192                 }
193                 if (tagName) {
194                    // if (depth > 1000 && (tagName == 'BODY' || tagName == 'HEAD' )) {
195                    //     cb("</DIV>");
196                    // } else {
197                         cb("</"+outNodeName+">");
198                    // }
199                     
200                 }
201             }
202         };
203         
204
205         var update_url = function (web_view, web_frame)
206         {
207             var toolbar = tab.get_toolbar();
208
209             toolbar.set_url(web_frame.get_uri());
210             toolbar.set_can_go_back(web_view.can_go_back());
211             toolbar.set_can_go_forward(web_view.can_go_forward());
212             
213             
214             
215             
216         };
217
218     
219       
220
221         var create_new_tab = function (web_view, web_frame, new_web_view)
222         {
223             new_web_view = new BrowserView();
224             new_web_view.signal.web_view_ready.connect(show_new_tab);
225             return new_web_view;
226         };
227
228         var show_new_tab = function (new_web_view)
229         {
230             TabbedBrowser.browser.new_tab("", new_web_view);
231
232             return false;
233         };
234
235         var hover_link = function (web_view, link, url)
236         {
237             tab.get_statusbar().set_status(url);
238         };
239          
240         /*
241         
242         this.add_inject = function(force)
243         {
244             
245             if (force || (typeof(injected[this.uri]) == 'undefined' )) {
246                 injected[this.uri] = 0;
247             }
248             if (injected[this.uri] > 2) {
249                 return;
250             }
251             injected[this.uri]++;
252             var fn = __script_path__ + "/inject.js";
253             if (File.exists(fn)) {
254 //                print("Adding inject");
255                 var newjs = File.read(__script_path__ + "/inject.js");
256                 TabbedBrowser.browser.current_tab().get_web_view().execute_script(
257                     newjs
258                     
259                 );
260             }
261             
262         }
263         */
264         var after_login = false;
265         
266         
267         var load_finished_called = false;
268         
269         var done_print = false;
270         var do_print = function(web_frame) {
271             
272             
273             if (done_print) {
274                 print("do_print called -- while already doing this...");
275                 return;
276             }
277             done_print = true;
278             print("do_print called");
279             
280             var scr_a =  " var r = document.getElementsByTagName('link');" +
281                 "for (var i=0;i < r.length;i++) { " +
282                     "var a = r[i]; " +
283                     "if (  a.getAttribute('media') == '')  { continue; } " +
284                     "if (  a.getAttribute('media') == 'screen')  { "+
285                         "a.removeAttribute('media'); continue;" +
286                     "} " +
287                     //"a.parentNode.removeChild(a);" +
288                 "}";
289             var scr_b =  "var a = document.body.querySelectorAll('*'); "+
290                 "var dnodes=[]; "+
291                 "for(var i =i;i<a.length;i++) { "+
292                 "   var cs = window.getComputedStyle(a[i],null);"+
293                 "   if ('none' == cs.display) {"+
294                 "    dnodes.push(a[i]); "+
295                 "  }"+ 
296                 "}"+
297                 "console.log(dnodes.length);" +
298                 "dnodes.forEach(function(n) { "+
299                 "    try {  n.parentNode.removeChild(n);   } catch(e) { } "+
300                 "});";
301             
302             
303             //TabbedBrowser.browser.current_tab().get_web_view().execute_script(
304             //    scr_a
305             //);
306             //print(scr_b);
307             //TabbedBrowser.browser.current_tab().get_web_view().execute_script(
308             //    scr_b
309             //);
310             
311             if (current_url.match(/\.coconuts\.co\//) ) {
312                 TabbedBrowser.browser.current_tab().get_web_view().execute_script(
313                     File.read( __script_path__ + "/domains/coconuts.co.js")
314                 );
315                               
316             }
317             if (current_url.match(/\.itnewsafrica\.com\//) ) {
318                 TabbedBrowser.browser.current_tab().get_web_view().execute_script(
319                     File.read( __script_path__ + "/domains/itnewsafrica.com.js")
320                 );
321             }
322             if (current_url.match(/\.indiatimes\.com\//) ) {
323                 print("running indiatimes code");
324                 TabbedBrowser.browser.current_tab().get_web_view().execute_script(
325                     File.read( __script_path__ + "/domains/indiatimes.com.js")
326                 );
327             }
328             
329             if (current_url.match(/\.inquirer\.net\//) ) {
330                 print("add inquirer.net code");
331                 TabbedBrowser.browser.current_tab().get_web_view().execute_script(
332                     File.read( __script_path__ + "/domains/inquirer.net.js")
333                 );
334                               
335             }
336             if (current_url.match(/\.puntocellulare\.it\//) ) {
337                 TabbedBrowser.browser.current_tab().get_web_view().execute_script(
338                     File.read( __script_path__ + "/domains/coconuts.co.js")
339                 );
340                               
341             }
342             
343             // motorme - requires a scroll to make content appear..
344             if (current_url.match(/\.motorme\.my\//) ) {
345                 TabbedBrowser.browser.current_tab().get_web_view().execute_script("window.scrollTo(0,500);"); 
346                               
347             }
348             
349             if (current_url.match(/\.efytimes\.com\//) ||  current_url.match(/\/efytimes\.com\//) ) {
350                 TabbedBrowser.browser.current_tab().get_web_view().execute_script(
351                     File.read( __script_path__ + "/domains/efytimes.com.js")
352                 );
353             }
354             
355            
356             print("Delay:" + BrowserSettings.delay);
357          
358             GLib.timeout_add(GLib.PRIORITY_DEFAULT, BrowserSettings.delay *1, function() {
359                 
360                 try {
361                     
362                     if (current_url.match(/theindependent\.sg\//) ) {
363                         TabbedBrowser.browser.current_tab().get_web_view().execute_script(
364                             File.read( __script_path__ + "/domains/theindependent.sg.js")
365                         );
366                                       
367                     }
368                     print("FETCHING");
369                     //print(web_frame);
370                     if (web_frame &&  BrowserSettings.export_filename_html  ) {
371                         var html = traversedom(web_frame.get_dom_document(), BrowserSettings.export_filename_html);
372                         //File.write(BrowserSettings.export_filename_html, html);
373                         
374                     } 
375                      
376                     if (!BrowserSettings.export_filename) {
377                         if (!BrowserSettings.export_filename_html) {
378                              print("no export filename / and html filename");
379                             return;
380                         }
381                         
382                         Seed.quit();
383                     }
384                     var mf = _t.get_main_frame();
385                      
386                     print("Creating paper");
387                     var ar = Gtk.PaperSize.get_paper_sizes();
388                     var psetup = new Gtk.PageSetup();
389                     for(var i = 0; i < ar.length; i++) {
390                         if (ar[i].get_name() =='iso_a2') {
391                             psetup.set_paper_size(ar[i]);
392                         }
393                     }
394                     print("doing print operation");
395                     var p = new Gtk.PrintOperation({ export_filename : BrowserSettings.export_filename });
396                     print("doing print operation - set page"); 
397
398                     p.set_default_page_setup(psetup);
399                     
400                     print("doing print operation - print_full"); 
401                     mf.print_full(p, Gtk.PrintOperationAction.EXPORT);
402                     print("made image - exiting");
403                     
404                     Seed.quit();
405                     return true;
406                 } catch(e) {
407                     print("error occured");
408                     print(JSON.stringify(e));
409                 }
410             });
411         }
412         
413         
414         var weibo_added = false;
415         
416         var is_weibo = false;
417         
418         
419         var load_finished = function (webkit, web_frame, wb)
420         {
421             
422            
423             
424             if (!is_weibo) {
425                 do_print(web_frame);
426                 return;
427             }
428              
429              
430              print("load finished");
431              if (weibo_added) {
432                  
433                  if (after_login === true) {
434                      print("do print");
435                      do_print();
436                      return;
437                      
438                  }
439                  if (after_login === false) {
440                      return; // not here?
441                  }
442                  print("adding timeout?");
443                  //return;
444                 GLib.timeout_add(GLib.PRIORITY_LOW, 2000, function() {
445                     print("Redirecting after login?" + after_login);
446                     
447                     _t.browse(after_login);
448                     after_login  = true;
449                 });
450                 return;
451              }
452             
453             weibo_added = true;
454             TabbedBrowser.browser.current_tab().get_web_view().execute_script(
455                 File.read( __script_path__ + "/weibo.js") 
456             );
457             print("run_weibo(" + JSON.stringify( BrowserSettings.username ) + ", " + JSON.stringify(BrowserSettings.passwd) + ");");
458     
459             TabbedBrowser.browser.current_tab().get_web_view().execute_script(
460                     "run_weibo(" + JSON.stringify( BrowserSettings.username ) + ", " + JSON.stringify(BrowserSettings.passwd) + ");"
461             );
462             return;
463     
464             //    return;
465             print(after_login);
466             if (after_login !== false) {
467                 
468 //                return;
469                 // wait a bit then load the real page..
470                 GLib.timeout_add(GLib.PRIORITY_LOW, 500, function() {
471                     print("Redirecting after login?");
472                     _t.browse(after_login);
473                 });
474                 
475                 return;
476             }
477          
478             if (load_finished_called) {
479                 return ;
480             
481             }
482             load_finished_called = true;
483             
484           
485             
486 //            if(document.location.host=='weibo.com') {
487 //                // clear login dialog from weibo.
488 //                //check and hidden the login dialog and overlay .....
489 //                var els = document.querySelectorAll('div[node-type]');
490 //                var bbc = [];
491 //                for (i = 0; i < els.length; i++) {
492 //                    if (els[i].hasAttribute('node-type')) {
493 //                        if(els[i].getAttribute('node-type') == 'outer'){
494 //                            bbc.push(els[i]);
495 //                        }
496 //                    }
497 //                }
498 //                for (i = 0; i < bbc.length; i++) {// hide all the outer.....
499 //                    bbc[i].style.display = 'none';
500 //                }
501 //                
502 //                //try to login 
503 //                if(document.location.pathname == '/login.php'){
504 //                    
505 //                    // fills in the user name and password 
506 //                    var els = document.querySelectorAll('input[node-type]');
507 //                    
508 //                    for (i = 0; i < els.length; i++) {
509 //                        if (els[i].hasAttribute('node-type')) {
510 //                            if(els[i].getAttribute('node-type') == 'username'){
511 //                                els[i].value = BrowserSettings.username;
512 //                            }
513 //                            if(els[i].getAttribute('node-type') == 'password'){
514 //                                els[i].value = BrowserSettings.passwd;
515 //                            }
516 //                        }
517 //                    }
518 //                    
519 //                    
520 //                    // simulating click event to login ....
521 //                    var elss = document.querySelectorAll('a[node-type]');
522 //                    
523 //                    for (i = 0; i < elss.length; i++) {
524 //                        if (elss[i].hasAttribute('node-type')) {
525 //                            if(elss[i].getAttribute('node-type') == 'submitBtn'){
526 //                                elss[i].click();
527 //                            }
528 //                        }
529 //                    }
530 //                    
531 //                    
532 //                }
533 //                
534 //            }
535             
536             
537             // clear login dialog from weibo.
538 //            TabbedBrowser.browser.current_tab().get_web_view().execute_script(
539 //                " if (document.location.host=='weibo.com') { " +
540 //                " try {  " + 
541 //                " var a = document.getElementsByClassName('W_layer')[0]; " + 
542 //                " a.parentNode.removeChild(a.previousSibling); " + 
543 //                " a.parentNode.removeChild(a); " + 
544 //                " } catch(e) { }  } " );
545             
546             
547             return;
548             GLib.timeout_add(GLib.PRIORITY_LOW, BrowserSettings.delay, function() {
549                  
550                 var mf = _t.get_main_frame();
551                 
552                 var ar = Gtk.PaperSize.get_paper_sizes();
553                 var psetup = new Gtk.PageSetup();
554                 for(var i = 0; i < ar.length; i++) {
555                     if (ar[i].get_name() =='iso_a2') {
556                         psetup.set_paper_size(ar[i]);
557                     }
558                 }
559                 
560                 var p = new Gtk.PrintOperation({ export_filename : BrowserSettings.export_filename });
561                 p.set_default_page_setup(psetup);
562                 mf.print_full(p, Gtk.PrintOperationAction.EXPORT);
563                 print("made image - exiting");
564                 
565                 Seed.quit();
566                 return true;
567             });
568             
569             return;
570         };
571  
572  
573  
574         
575         var load_committed = function (web_view, web_frame)
576         {
577             print("load commited");
578             
579             update_url(web_view, web_frame);
580              
581             // call load finished after 20seconds??
582             // so even if it never complets we try and print the thing.
583             GLib.timeout_add(GLib.PRIORITY_LOW, 40000, function() {
584                 print("load commited - 3000 ms?");
585                 load_finished();
586             });
587             
588         };
589
590         var clicked_link = function (web_view, web_frame, request,
591                                      action, decision, window)
592         {
593             if(action.get_reason() == WebKit.WebNavigationReason.LINK_CLICKED &&
594                action.get_button() == 2)
595             {
596                 browser.new_tab(request.get_uri(), null);
597                 return true;
598             }
599
600             return false;
601         };
602
603         // Public
604         
605         
606         this.browse = function (url)
607         {
608             if(url.search("://") < 0)
609                 url = "http://" + url;
610             
611             current_url = url;
612             
613             print("BROWSE: " + url);
614             if (url.match(/\/weibo\.com\//) && after_login === false) {
615                 is_weibo = true;
616                 after_login = url;
617                 url = "http://weibo.com/login.php";
618             }
619             else  if (url.match(/\/weibo\.com\//) && after_login !== false) {
620                 after_login = false;
621             }
622             print("BROWSE -really: " + url);
623             this.open(url);
624         };
625
626         this.set_tab = function (new_tab)
627         {
628             tab = new_tab;
629         };
630
631         this.get_tab = function ()
632         {
633             return tab;
634         };
635
636     
637         // Implementation
638         //this.set_scroll_adjustments(null, null);
639         
640         //this.signal.title_changed.connect(update_title);
641         //this.signal.load_committed.connect(load_committed);
642         this.signal.load_finished.connect(load_finished);
643        
644         
645         // For some reason, this segfaults seed in the instance init closure handler
646         // Once that's fixed, uncommenting the next line will give middle-click-open-in-new tab
647         //this.signal.navigation_policy_decision_requested.connect(clicked_link);
648
649         //this.signal.hovering_over_link.connect(hover_link);
650
651         this.signal.create_web_view.connect(create_new_tab);
652         
653         
654          
655 //        print("ADDing console message sig handler");
656         
657         
658          
659         
660         this.toFilename = function(url)
661         {
662             url = url.replace(/^http[s]*:\/\//, '');
663             var p = url.split('/');
664             p.unshift(storedir+'/output');
665             for (var i =1 ;i < p.length; i++) {
666                 p[i] = encodeURIComponent(p[i]);
667             
668             }
669             p[p.length-1] = decodeURIComponent(p[p.length-1]);
670             ret = p.join('/');
671             var dir = File.dirname(ret);
672             File.mkdirall(dir);
673             return ret;
674             
675         }
676         this.checkdomain = function(comp)
677         {
678             var b = parseUri(this.uri);
679             var d = parseUri(comp);
680             return (d.host == b.host && d.protocol == b.protocol);
681             
682             
683         }
684         
685         this.dupeCheck = function(url)
686         {
687             
688            // order - return highest up the queue first..
689             if (File.exists(downloaddir +'/' + encodeURIComponent(url))) {
690                 return downloaddir +'/' + encodeURIComponent(url);
691             }
692              if (File.exists(parsedir +'/' + encodeURIComponent(url))) {
693                 return parsedir +'/' + encodeURIComponent(url);
694             }
695             if (File.exists(donedir +'/' + encodeURIComponent(url))) {
696                 return donedir +'/' + encodeURIComponent(url);
697             }
698             return  false;
699             
700             
701         }
702         this.moveToParse = function(url)
703         {
704             var old = this.dupeCheck(url);
705             var target =parsedir +'/' + encodeURIComponent(url);
706             if (old == target) {
707                 return;
708             }
709             File.write(target, old ? File.read(old) : '');
710             if (old) {
711                 File.remove(old);
712             }
713             
714         }
715         
716         this.moveToDownload= function(url)
717         {
718             var old = this.dupeCheck(url);
719             var target =downloaddir +'/' + encodeURIComponent(url);
720             if (old == target) {
721                 return;
722             }
723             File.write(target, old ? File.read(old) : '');
724             if (old) {
725                 File.remove(old);
726             }
727             
728         }
729         this.moveToDone= function(url)
730         {
731             var old = this.dupeCheck(url);
732             var target = donedir +'/' + encodeURIComponent(url);
733             if (old == target) {
734                 return;
735             }
736             File.write(target, old ? File.read(old) : '');
737             if (old) {
738                 File.remove(old);
739             }
740             
741         }
742         
743     }
744 });
745
746 function parseUri (str) {
747         var     o   = parseUri.options,
748                 m   = o.parser[o.strictMode ? "strict" : "loose"].exec(str),
749                 uri = {},
750                 i   = 14;
751
752         while (i--) uri[o.key[i]] = m[i] || "";
753
754         uri[o.q.name] = {};
755         uri[o.key[12]].replace(o.q.parser, function ($0, $1, $2) {
756                 if ($1) uri[o.q.name][$1] = $2;
757         });
758
759         return uri;
760 };
761
762 parseUri.options = {
763         strictMode: false,
764         key: ["source","protocol","authority","userInfo","user","password","host","port","relative","path","directory","file","query","anchor"],
765         q:   {
766                 name:   "queryKey",
767                 parser: /(?:^|&)([^&=]*)=?([^&]*)/g
768         },
769         parser: {
770                 strict: /^(?:([^:\/?#]+):)?(?:\/\/((?:(([^:@]*)(?::([^:@]*))?)?@)?([^:\/?#]*)(?::(\d*))?))?((((?:[^?#\/]*\/)*)([^?#]*))(?:\?([^#]*))?(?:#(.*))?)/,
771                 loose:  /^(?:(?![^:@]+:[^:@\/]*@)([^:\/?#.]+):)?(?:\/\/)?((?:(([^:@]*)(?::([^:@]*))?)?@)?([^:\/?#]*)(?::(\d*))?)(((\/(?:[^?#](?![^?#\/]*\.[^?#\/.]+(?:[?#]|$)))*\/?)?([^?#\/]*))(?:\?([^#]*))?(?:#(.*))?)/
772         }
773 };