Tv.php
[lib.XML_Tv] / Tv.php
1 <?php
2 /* 
3 Usage: 
4
5 make a weekly cronjob (/etc/cron.weekly/filltv) 
6 #!/bin/sh
7 /usr/bin/php /path/to/application/Tv.php /path/to/application/hongkong.ini > /tmp/hongkong.xml
8 /usr/bin/mythfilldatabase --file 1 1 /tmp/hongkong.xml
9
10 make sure the filltv file is chmod +x
11
12
13
14 Quick hack to generate xmltv listings from TVxb style .ini files.
15
16 */
17 // seriously it's this bad..
18 error_reporting(E_ALL & ~E_STRICT &~E_NOTICE);
19
20
21
22
23 if (!extension_loaded('mbstring')) {
24     dl('mbstring.so');
25 }
26
27 define ('QUOTE', '"');
28 class XML_Tv
29 {
30     var $config; // configuration array.
31     var $date; // date we are fetching
32     var $channels; // details on the channels.
33
34
35     function start($ini)
36     {
37         // load the ini file.
38         require_once 'JSON.php';
39         $j = new Services_JSON();
40         $conf = (array)($j->decode(file_get_contents($ini)));
41         //$conf = (array)json_decode(file_get_contents($ini));
42         //print_r($conf);
43         foreach($conf as $k=>$v) {
44             $conf[$k] = (array)$v;
45         }
46         $this->channels = $conf;
47         $this->config = $conf['global'];
48         unset($this->channels['global']);
49         //print_r($this->channels);
50         
51         $this->date = time();
52         
53         
54         foreach($this->channels as $k=>$v) {
55             
56             for ($i=0;$i< (isset($v['days']) ? $v['days'] : $conf['days']);$i++) {
57                 $this->grabChannel($k,$i);
58                    // break;
59             }
60                 
61             
62             
63             //break;
64         }
65         
66         echo $this->toXML();
67         
68     }
69     
70     function grabChannel($k,$dayoffset) 
71     {
72         $cinfo = $this->channels[$k];
73         
74         //print_r($cinfo);
75        // exit;
76         $date = $this->date + ($dayoffset * 24 * 60 * 60 );
77        
78         $url =  strftime( $cinfo['url'], $date);
79         $this->debug($url );
80         //echo "GET $url\n";
81         $data = @file_get_contents($url);
82         if (empty($data)) {
83             $this->debug("NO  DATA");
84              
85             // something went wrong..
86             return;
87         }
88         
89         $odata = $data;
90         $map = array(
91                 array("0x22", "0x09", "0x0a"),
92                 array('"', "\t", "\n")
93              );
94         
95
96         $hs = $cinfo['htmlstart'] === false ? false  : str_replace($map[0],$map[1],$cinfo['htmlstart']);
97         //echo $hs;
98         $he= str_replace($map[0],$map[1],$cinfo['htmlend']);
99         //echo $he;
100         if ($hs !== false) {
101             list( , $data) = explode($hs,$data);
102         }
103         
104         if (!empty($he)) {
105             list($data,) =  explode($he ,$data);
106         }
107         
108         
109         //$this->debug("DATA:".$data); 
110          
111         $method = 'parse'.$cinfo['htmlparsetype'];
112         
113         $chid = isset($cinfo['id']) ? $cinfo['id'] : $k;
114         // for multiday html layout of atv:
115         if (!empty($cinfo['htmldaysep'])) {
116             $days = explode($cinfo['htmldaysep'], $data);
117             // kludge. = first monday of current week..
118             // loook for... <BR>2007-12-31 Mon
119             $start = preg_match('/<BR>([0-9]{4}-[0-9]{2}-[0-9]{2}) Mon/i', $odata, $matches);
120             $start = isset($matches[1]) ? $matches[1] : 0; // first monday..
121             
122             $use_cols = 1;
123             foreach($days as $i=>$ddata) {
124                 
125                 
126                 
127                 $cols = explode(',',$cinfo['htmlcols']);
128                 $dn = strtotime($start) + (($i * $use_cols) * 24 * 60 * 60 );
129                 $res =  $this->$method($ddata,$cols,date('Y-m-d',$dn));
130                 if (is_string($res)) {
131                     $start = $res;
132                     $use_cols =0;
133                     continue;
134                 }
135                 if (!is_array($res)) {
136                     continue;
137                 }
138                 $day_id = strtotime($start) + (($i *  $use_cols ) * 24 * 60 * 60 );
139                 
140                 //print_r($res);
141                 
142                 if (empty($this->schedule[$chid][$day_id])) {
143                     $this->schedule[$chid][$day_id] = array();
144                 }
145                 $this->schedule[$chid][$day_id] = array_merge($this->schedule[$chid][$day_id],  $res); 
146             }
147             return;
148             
149             
150         }
151         
152         $cols = explode(',',$cinfo['htmlcols']);
153         
154         //print_r(array($data,$cols));
155         
156         $this->schedule[$k][$this->date + ($dayoffset * 24 * 60 * 60 )]  = $this->$method($data,$cols);
157         //print_r($this->schedule);
158     }
159     //tvb +???
160     function parseTable($data,$colnames) 
161     {
162         $rows = preg_split('/<tr[^>]*>/i', $data);
163        // print_r($rows);exit;
164         array_shift($rows);
165         
166         
167         foreach($rows as $r) {
168             //print_r($r);
169             $cols = preg_split('/\<td[^>]*\>/i', trim($r));
170             $rdata = array();
171             array_shift($cols);
172             //var_dump(count(array_values($cols)));
173             //print_r($r);
174             //    print_r($cols);exit;
175             
176             foreach($cols as $i=>$c) {
177                 if (count(array_values($cols)) != count(array_values($colnames))) {
178                     continue;
179                 }
180                 if (preg_match('/<table/i', $c)) {
181                     continue;
182                 }
183                 //var_dump($c);
184                 $c = str_ireplace('<br>',' ', $c);
185                 $c = str_ireplace('&nbsp;',' ', $c);
186                 $c = str_replace("\n",' ', $c);
187                 $c = str_replace("\r",' ', $c);
188                 //var_dump($c);
189                 $rdata[$colnames[$i]] = trim(strip_tags($c));
190             }
191             
192           //  print_r($rdata);
193             if (count(array_values($rdata)) != count(array_values($colnames))) {
194                 continue;
195             }
196            // print_R($rdata);
197             $ret[] = $rdata;
198         }
199         
200         $this->debug(print_r($ret,true));
201         
202         return $ret;
203         
204     
205     }
206     
207     function parseJade($data, $colnames, $day=0)
208     {
209         
210         $rows = preg_split('#</li>#i', $data);
211         //print_r($rows);
212         $ret = array();
213         foreach($rows as $r) {
214             $r = str_ireplace('&nbsp;',' ', $r);
215             $rdata = array();
216             
217             @list($time,$r) = explode('</span>', $r, 2);
218             
219             $rdata['hour'] = trim(strip_tags($time));
220             if (!strlen($rdata['hour'])) {
221                 continue;
222                }
223             //list($title,$r) = explode('</em>', $r, 2);
224             list($title,$r) = explode('</p>', $r, 2);
225             $rdata['description'] = trim(strip_tags($title));
226             $rdata['description2'] = trim(strip_tags($r));
227             $rdata['day'] = $day;
228             $ret[] = $rdata;
229         }
230         //print_R($ret);
231        return $ret;
232     }
233     
234     
235     function parseatv($data,$colnames, $day) 
236     {
237         
238         // if it's a day row..
239         
240         $lines = explode("\n",  trim($data));
241         //var_dump($lines[1]);
242         
243         
244         if (isset($lines[1]) && preg_match('/<div/', trim($lines[1]))) {
245             
246             preg_match('/<BR>([0-9]{4}-[0-9]{2}-[0-9]{2})/i', $lines[1], $matches);
247             //var_dump($matches[1]);
248             return $matches[1];
249             
250         }
251         $rows = preg_split('/<tr[^>]*>/i', $data);
252         if ($day == '2011-10-29') {
253             //$this->debug(print_r($rows,true));
254         }
255         if ($day < date('Y-m-d')) {
256            // $this->debug("OLD DATA  $day");
257             return array();
258         }
259         //$this->debug($day);
260         //$this->debug(print_r($rows,true));
261     //return; 
262         array_shift($rows);
263         
264         $ret = array();
265         //$day = false;
266         foreach($rows as $r) {
267             //print_r($r);
268             $cols = preg_split('/<td[^>]*>/i', $r);
269             $rdata = array();
270             
271             if (!isset($cols[2])) {
272                 continue;
273             }
274             //PRINT_r($cols);
275             $c= $cols[2];
276              //var_dump($c);
277             // look for time..
278             if (!preg_match('/^[0-9]+:[0-9]+/', $c)) {
279                 continue;
280             }
281             //$this->debug("GOT HOUR: $c");
282              $rdata['hour'] = trim(array_shift(explode('<', $c)));  
283             $c =  $cols[3];
284             
285             $kv = preg_split('/<br>/',$c);
286             
287             $c = $kv[0];
288             $c = str_ireplace('<br>',' ', $c);
289             $c = str_ireplace('&nbsp;',' ', $c);
290             $c = str_replace("\n",' ', $c);
291             $c = str_replace("\r",' ', $c);
292             $c = preg_replace('/\<[^>]+\>/', ' ', $c);
293             $rdata['description2']  = trim($c); 
294             
295             
296             
297             $c = isset($kv[1]) ? $kv[1] : ''; 
298             $c = str_ireplace('<br>',' ', $c);
299             $c = str_ireplace('&nbsp;',' ', $c);
300             $c = str_replace("\n",' ', $c);
301             $c = str_replace("\r",' ', $c);
302             $c = preg_replace('/\<[^>]+\>/', ' ', $c);
303             $rdata['description']  = trim($c); 
304             
305             
306             $rdata['day'] = $day;
307             
308             
309             //print_R($rdata);
310             $ret[] = $rdata;
311         }
312         //print_r($ret); exit;
313         return $ret;
314         
315     
316     }
317     
318     
319 /*
320     function parseTableCells($data,$colnames, $day) 
321     {
322         $rows = preg_split('/<tr[^>]*>/i', $data);
323         //$this->debug(print_r($rows,true));
324         //exit;
325         //exit;
326         array_shift($rows);
327         
328         
329         $day = false;
330         foreach($rows as $r) {
331             //print_r($r);
332             $cols = preg_split('/<td[^>]*>/i', $r);
333             $rdata = array();
334             $c= $cols[1];
335              //var_dump($c);
336             // look for time..
337             if (!preg_match('/^[0-9]+:[0-9]+\s/', $c)) {
338                 continue;
339             }
340             
341             $c = str_ireplace('<br>',' ', $c);
342             $c = str_ireplace('&nbsp;',' ', $c);
343             $c = str_replace("\n",' ', $c);
344             $c = str_replace("\r",' ', $c);
345             
346             
347             $c = preg_replace('/\<[^>]+\>/', ' ', $c);
348             $c = trim($c); 
349             $kv = preg_split("/\s+/", $c, 2);
350             
351             $rdata[$colnames[0]] = trim($kv[0]);
352             $rdata[$colnames[1]] = trim($kv[1]);
353             
354             
355             //print_r($kv);
356             if (count(array_values($rdata)) != count(array_values($colnames))) {
357                 continue;
358             }
359             $rdata['day'] = $day;
360             $ret[] = $rdata;
361         }
362         //print_r($ret);
363         return $ret;
364         
365     
366     }
367 */
368     /*
369     
370        <tv generator-info-name="tv_grab_uk">
371           <channel id="bbc2.bbc.co.uk">
372             <display-name lang="en">BBC2</display-name>
373           </channel>
374           <channel id="channel4.com">
375             <display-name lang="en">Channel 4</display-name>
376           </channel>
377         
378           <programme channel="bbc2.bbc.co.uk" start="20010829000500 +0100">
379             <title lang="en">The Phil Silvers Show</title>
380             <desc lang="en">
381               Bilko claims he's had a close encounter with an alien in order
382               to be given some compassionate leave so he can visit an old
383               flame in New York.
384             </desc>
385           </programme>
386         
387           <programme channel="channel4.com" start="20010829095500 +0100">
388             <title lang="en">King of the Hill</title>
389             <sub-title lang="en">Meet the Propaniacs</sub-title>
390             <desc lang="en">
391                Bobby tours with a comedy troupe who specialize in
392                propane-related mirth.
393             </desc>
394             <credits>
395               <actor>Mike Judge</actor>
396               <actor>Lane Smith</actor>
397             </credits>
398             <category lang="en">animation</category>
399           </programme>
400         </tv>
401     */
402     
403     function toXml()
404     {
405     
406         //print_r($this->schedule);
407         $doc = new DomDocument('1.0', 'UTF-8');
408         $tv = $doc->createElement('tv');
409         $tv->setAttribute( 'generator-info-name','akpear_xml_tv');
410         $doc->appendChild($tv);
411         
412         
413         //$out = '<'.'?xml version="1.0" encoding="UTF-8"?.'>'."\n" .
414          //       '<!DOCTYPE tv SYSTEM "xmltv.dtd">'."\n" ."\n" .
415           ///      '<tv generator-info-name="akpear_xml_tv">'."\n";
416           $donec = array();
417         foreach($this->channels as $k => $v) {
418             // dont dupe?!
419             $chid = isset($v['id']) ? $v['id'] : $k;
420             if (isset($donec[$chid])) {
421                 continue;
422             }
423             $donec[$chid] = true;
424             $ch = $doc->createElement('channel');
425             $ch->setAttribute('id', $chid);
426             $disp = $doc->createElement('display-name');
427             $disp->setAttribute('lang', 'en');
428             $disp->appendChild($doc->createTextNode($v['name']));
429             $ch->appendChild($disp);
430             $tv->appendChild($ch);
431             //$out .=  
432             //    '<channel id="'. $k .'">
433             //        <display-name lang="en">'. $v['name'] .'</display-name>
434             //    </channel>'."\n";
435         }
436         //print_r($this->schedule);
437         foreach($this->schedule as $chan => $scheds) {
438             //print_r($sched);
439             foreach($scheds as $day => $sched) {
440                 $hoffset = 0;
441                 $last = -1;
442                 
443                 if (empty($sched)) {
444                     continue;
445                 }
446                 foreach($sched as $item) {
447                     $item['day'] = $day;
448                     
449                     $bits = explode(':', $item['hour']);
450                     if ($bits[0] < $last) {
451                         $hoffset +=12;
452                     }
453                     $last = $bits[0];
454                     
455                     //var_dump($bits[0] + $hoffset);
456                     $start = mktime(/*hmsmdy  */
457                             $bits[0] + $hoffset,
458                             $bits[1],
459                             0,
460                             date('m', $day),
461                             date('d', $day),
462                             date('Y', $day)
463                             );
464                     if ($start < strtotime(date("Y-m-d 00:00:00", strtotime('NOW - 1 DAY')))) {
465                         continue;
466                     }
467                     $item['hoffset'] = $hoffset;
468                     $item['hoffset_ar'] = $bits;
469                     
470                     //$this->debug(print_r($item, true));
471                     $start_str = date('YmdHis',$start) . ' ' . $this->config['gmtoffset'];
472                     //var_dump($start_str);
473                     //var_dump($this->channels);
474                     $description =   iconv($this->channels[$chan]['encoding'], 'UTF-8',$item['description'] . 
475                                 (isset($item['description2']) ? ('   ' . $item['description2']) : '')); 
476                    
477                     $this->debug(date("Y-m-d H:i - ", $start). $description); 
478                    
479                    
480                     $pg = $doc->createElement('programme');
481                     
482                     $pg->setAttribute('channel', $chan);
483                     $pg->setAttribute('start', $start_str);
484                     
485                     $title = $doc->createElement('title');
486                     $title->setAttribute('lang', 'zh');
487                     $title->appendChild($doc->createTextNode($this->toTitle($description,$chan)));
488                     $pg->appendChild($title);
489                     
490                     $title = $doc->createElement('desc');
491                     $title->setAttribute('lang', 'zh');
492                     $title->appendChild($doc->createTextNode($description));
493                     $pg->appendChild($title);
494             
495                     $tv->appendChild($pg);
496                     //$out.= '<programme channel="'.$chan. '" start="'.$start_str. '">
497                     //    <title lang="zh">'. $this->toTitle($description) .'</title>
498                     //      <desc lang="zh">'. $description .'</desc>
499                     //    </programme>'."\n";
500                 }
501             }
502         }
503         
504         $doc->formatOutput = true;
505         
506         //$out .= "</tv>\n";
507         
508         return $doc->saveXML();
509     
510     }
511   
512     function toTitle($description, $chan)  
513     {
514         // remove sponsor message.
515         $title_pre = '';
516         @list($title, $fuldesc) = explode("&gt;&gt;",$description);
517         
518         
519         
520         if (preg_match('/Followed\s*By/i', $title)) {
521             $bits = preg_split('/Followed\s*By/i', $title);
522             $title_pre  = $bits[0]  . ' Followed By ';
523             $title = $bits[1];
524         }
525         $title = preg_replace('#countdown to[a-z0-9 ]+#i', '' , $title); # NICAM Language   
526         $title = preg_replace('#^(solar x|Samsung Digital)\s*#i', '' , $title); # known sponsors..
527         
528         $title = preg_replace('#[a-z0-9 ]+(presents|special|blockbuster|movie of the month|showtime)\s*:\s*#i', '', $title);
529         
530         $title = preg_replace('#\([a-z]+/[a-z]+\s*(|bilingual)\)#i', '' , $title); # NICAM Language 
531         $title = preg_replace('#\(live\)#i', '' , $title);            # live
532         $title = preg_replace('#\((s|c|l|e|cs|es|ecs|can|ce)[*]*\)#i', '' , $title);            # Subtitle
533         $title = preg_replace('#\((pg\d*\w*)\)#i', '' , $title);         # Adult
534         $title = preg_replace('#\(r\)#i', '' , $title);         # Repeated
535         $title = trim($title, '/');
536         $title = trim($title);
537         
538         $ret = $title_pre . $title;
539         
540       //  if (!strlen($ret)) {
541        //     die("got $description : nothing to return");
542         //   }
543         if (in_array($chan, array('tvbpearl.hk', 'english.atvworld.hk'))) {
544             $enonly = preg_replace('#^[^a-z0-9]+#i', '', $title);
545             if (strlen($enonly) > 10) {
546                 $title = $enonly;
547             }
548         }
549         
550         
551         return $title_pre . $title;
552     }
553  
554     
555     
556     function debug($str)
557     {
558         if (empty($this->config['debug'])) {
559              return;
560            }
561         echo $str."\n";
562        }
563     
564     
565 }
566
567 $x = new XML_Tv;
568 //print_r($_SERVER);
569
570 $x->start($_SERVER['argv'][1]);