Tv.php
[lib.XML_Tv] / Tv.php
1 <?php
2 /* 
3 Usage: 
4
5 make a weekly cronjob (/etc/cron.weekly/filltv) 
6 #!/bin/sh
7 /usr/bin/php /path/to/application/Tv.php /path/to/application/hongkong.ini > /tmp/hongkong.xml
8 /usr/bin/mythfilldatabase --file 1 1 /tmp/hongkong.xml
9
10 make sure the filltv file is chmod +x
11
12
13
14 Quick hack to generate xmltv listings from TVxb style .ini files.
15
16 */
17
18
19 if (!extension_loaded('mbstring')) {
20     dl('mbstring.so');
21 }
22
23 define ('QUOTE', '"');
24 class XML_Tv
25 {
26     var $config; // configuration array.
27     var $date; // date we are fetching
28     var $channels; // details on the channels.
29
30
31     function start($ini)
32     {
33         // load the ini file.
34         require_once 'JSON.php';
35         $j = new Services_JSON();
36         $conf = (array)($j->decode(file_get_contents($ini)));
37         //$conf = (array)json_decode(file_get_contents($ini));
38         //print_r($conf);
39         foreach($conf as $k=>$v) {
40             $conf[$k] = (array)$v;
41         }
42         $this->channels = $conf;
43         $this->config = $conf['global'];
44         unset($this->channels['global']);
45         //print_r($this->channels);
46         
47         $this->date = time();
48         
49         
50         foreach($this->channels as $k=>$v) {
51             
52             for ($i=0;$i< (isset($v['days']) ? $v['days'] : $conf['days']);$i++) {
53                 $this->grabChannel($k,$i);
54                    // break;
55             }
56                 
57             
58             
59             //break;
60         }
61         
62         echo $this->toXML();
63         
64     }
65     
66     function grabChannel($k,$dayoffset) 
67     {
68         $cinfo = $this->channels[$k];
69         
70         //print_r($cinfo);
71        // exit;
72         $date = $this->date + ($dayoffset * 24 * 60 * 60 );
73        
74         $url =  strftime( $cinfo['url'], $date);
75         $this->debug($url );
76         //echo "GET $url\n";
77         $data = @file_get_contents($url);
78         if (empty($data)) {
79             $this->debug("NO  DATA");
80              
81             // something went wrong..
82             return;
83         }
84         
85         $odata = $data;
86         $map = array(
87                 array("0x22", "0x09", "0x0a"),
88                 array('"', "\t", "\n")
89              );
90         
91
92         $hs = $cinfo['htmlstart'] === false ? false  : str_replace($map[0],$map[1],$cinfo['htmlstart']);
93         //echo $hs;
94         $he= str_replace($map[0],$map[1],$cinfo['htmlend']);
95         //echo $he;
96         if ($hs !== false) {
97             list( , $data) = explode($hs,$data);
98         }
99         
100         if (!empty($he)) {
101             list($data,) =  explode($he ,$data);
102         }
103         
104         
105         //$this->debug("DATA:".$data); 
106          
107         $method = 'parse'.$cinfo['htmlparsetype'];
108         
109         $chid = isset($cinfo['id']) ? $cinfo['id'] : $k;
110         // for multiday html layout of atv:
111         if (!empty($cinfo['htmldaysep'])) {
112             $days = explode($cinfo['htmldaysep'], $data);
113             // kludge. = first monday of current week..
114             // loook for... <BR>2007-12-31 Mon
115             $start = preg_match('/<BR>([0-9]{4}-[0-9]{2}-[0-9]{2}) Mon/i', $odata, $matches);
116             $start = isset($matches[1]) ? $matches[1] : 0; // first monday..
117             
118             $use_cols = 1;
119             foreach($days as $i=>$ddata) {
120                 
121                 
122                 
123                 $cols = explode(',',$cinfo['htmlcols']);
124                 $dn = strtotime($start) + (($i * $use_cols) * 24 * 60 * 60 );
125                 $res =  $this->$method($ddata,$cols,date('Y-m-d',$dn));
126                 if (is_string($res)) {
127                     $start = $res;
128                     $use_cols =0;
129                     continue;
130                 }
131                 if (!is_array($res)) {
132                     continue;
133                 }
134                 $day_id = strtotime($start) + (($i *  $use_cols ) * 24 * 60 * 60 );
135                 
136                 //print_r($res);
137                 
138                 if (empty($this->schedule[$chid][$day_id])) {
139                     $this->schedule[$chid][$day_id] = array();
140                 }
141                 $this->schedule[$chid][$day_id] = array_merge($this->schedule[$chid][$day_id],  $res); 
142             }
143             return;
144             
145             
146         }
147         
148         $cols = explode(',',$cinfo['htmlcols']);
149         
150         //print_r(array($data,$cols));
151         
152         $this->schedule[$k][$this->date + ($dayoffset * 24 * 60 * 60 )]  = $this->$method($data,$cols);
153         //print_r($this->schedule);
154     }
155     //tvb +???
156     function parseTable($data,$colnames) 
157     {
158         $rows = preg_split('/<tr[^>]*>/i', $data);
159        // print_r($rows);exit;
160         array_shift($rows);
161         
162         
163         foreach($rows as $r) {
164             //print_r($r);
165             $cols = preg_split('/\<td[^>]*\>/i', trim($r));
166             $rdata = array();
167             array_shift($cols);
168             //var_dump(count(array_values($cols)));
169             //print_r($r);
170             //    print_r($cols);exit;
171             
172             foreach($cols as $i=>$c) {
173                 if (count(array_values($cols)) != count(array_values($colnames))) {
174                     continue;
175                 }
176                 if (preg_match('/<table/i', $c)) {
177                     continue;
178                 }
179                 //var_dump($c);
180                 $c = str_ireplace('<br>',' ', $c);
181                 $c = str_ireplace('&nbsp;',' ', $c);
182                 $c = str_replace("\n",' ', $c);
183                 $c = str_replace("\r",' ', $c);
184                 //var_dump($c);
185                 $rdata[$colnames[$i]] = trim(strip_tags($c));
186             }
187             
188           //  print_r($rdata);
189             if (count(array_values($rdata)) != count(array_values($colnames))) {
190                 continue;
191             }
192            // print_R($rdata);
193             $ret[] = $rdata;
194         }
195         
196         $this->debug(print_r($ret,true));
197         
198         return $ret;
199         
200     
201     }
202     
203     function parseJade($data, $colnames, $day)
204     {
205         
206         $rows = preg_split('#</li>#i', $data);
207         //print_r($rows);
208         $ret = array();
209         foreach($rows as $r) {
210             $r = str_ireplace('&nbsp;',' ', $r);
211             $rdata = array();
212             
213             list($time,$r) = explode('</span>', $r, 2);
214             
215             $rdata['hour'] = trim(strip_tags($time));
216             if (!strlen($rdata['hour'])) {
217                 continue;
218                }
219             //list($title,$r) = explode('</em>', $r, 2);
220             list($title,$r) = explode('</p>', $r, 2);
221             $rdata['description'] = trim(strip_tags($title));
222             $rdata['description2'] = trim(strip_tags($r));
223             $rdata['day'] = $day;
224             $ret[] = $rdata;
225         }
226         //print_R($ret);
227        return $ret;
228     }
229     
230     
231     function parseatv($data,$colnames, $day) 
232     {
233         
234         // if it's a day row..
235         
236         $lines = explode("\n",  trim($data));
237         //var_dump($lines[1]);
238         
239         
240         if (isset($lines[1]) && preg_match('/<div/', trim($lines[1]))) {
241             
242             preg_match('/<BR>([0-9]{4}-[0-9]{2}-[0-9]{2})/i', $lines[1], $matches);
243             //var_dump($matches[1]);
244             return $matches[1];
245             
246         }
247         $rows = preg_split('/<tr[^>]*>/i', $data);
248         if ($day == '2011-10-29') {
249             //$this->debug(print_r($rows,true));
250         }
251         if ($day < date('Y-m-d')) {
252            // $this->debug("OLD DATA  $day");
253             return array();
254         }
255         //$this->debug($day);
256         //$this->debug(print_r($rows,true));
257     //return; 
258         array_shift($rows);
259         
260         $ret = array();
261         //$day = false;
262         foreach($rows as $r) {
263             //print_r($r);
264             $cols = preg_split('/<td[^>]*>/i', $r);
265             $rdata = array();
266             
267             if (!isset($cols[2])) {
268                 continue;
269             }
270             //PRINT_r($cols);
271             $c= $cols[2];
272              //var_dump($c);
273             // look for time..
274             if (!preg_match('/^[0-9]+:[0-9]+/', $c)) {
275                 continue;
276             }
277             //$this->debug("GOT HOUR: $c");
278              $rdata['hour'] = trim(array_shift(explode('<', $c)));  
279             $c =  $cols[3];
280             
281             $kv = preg_split('/<br>/',$c);
282             
283             $c = $kv[0];
284             $c = str_ireplace('<br>',' ', $c);
285             $c = str_ireplace('&nbsp;',' ', $c);
286             $c = str_replace("\n",' ', $c);
287             $c = str_replace("\r",' ', $c);
288             $c = preg_replace('/\<[^>]+\>/', ' ', $c);
289             $rdata['description2']  = trim($c); 
290             
291             
292             
293             $c = $kv[1];
294             $c = str_ireplace('<br>',' ', $c);
295             $c = str_ireplace('&nbsp;',' ', $c);
296             $c = str_replace("\n",' ', $c);
297             $c = str_replace("\r",' ', $c);
298             $c = preg_replace('/\<[^>]+\>/', ' ', $c);
299             $rdata['description']  = trim($c); 
300             
301             
302             $rdata['day'] = $day;
303             
304             
305             //print_R($rdata);
306             $ret[] = $rdata;
307         }
308         //print_r($ret); exit;
309         return $ret;
310         
311     
312     }
313     
314     
315 /*
316     function parseTableCells($data,$colnames, $day) 
317     {
318         $rows = preg_split('/<tr[^>]*>/i', $data);
319         //$this->debug(print_r($rows,true));
320         //exit;
321         //exit;
322         array_shift($rows);
323         
324         
325         $day = false;
326         foreach($rows as $r) {
327             //print_r($r);
328             $cols = preg_split('/<td[^>]*>/i', $r);
329             $rdata = array();
330             $c= $cols[1];
331              //var_dump($c);
332             // look for time..
333             if (!preg_match('/^[0-9]+:[0-9]+\s/', $c)) {
334                 continue;
335             }
336             
337             $c = str_ireplace('<br>',' ', $c);
338             $c = str_ireplace('&nbsp;',' ', $c);
339             $c = str_replace("\n",' ', $c);
340             $c = str_replace("\r",' ', $c);
341             
342             
343             $c = preg_replace('/\<[^>]+\>/', ' ', $c);
344             $c = trim($c); 
345             $kv = preg_split("/\s+/", $c, 2);
346             
347             $rdata[$colnames[0]] = trim($kv[0]);
348             $rdata[$colnames[1]] = trim($kv[1]);
349             
350             
351             //print_r($kv);
352             if (count(array_values($rdata)) != count(array_values($colnames))) {
353                 continue;
354             }
355             $rdata['day'] = $day;
356             $ret[] = $rdata;
357         }
358         //print_r($ret);
359         return $ret;
360         
361     
362     }
363 */
364     /*
365     
366        <tv generator-info-name="tv_grab_uk">
367           <channel id="bbc2.bbc.co.uk">
368             <display-name lang="en">BBC2</display-name>
369           </channel>
370           <channel id="channel4.com">
371             <display-name lang="en">Channel 4</display-name>
372           </channel>
373         
374           <programme channel="bbc2.bbc.co.uk" start="20010829000500 +0100">
375             <title lang="en">The Phil Silvers Show</title>
376             <desc lang="en">
377               Bilko claims he's had a close encounter with an alien in order
378               to be given some compassionate leave so he can visit an old
379               flame in New York.
380             </desc>
381           </programme>
382         
383           <programme channel="channel4.com" start="20010829095500 +0100">
384             <title lang="en">King of the Hill</title>
385             <sub-title lang="en">Meet the Propaniacs</sub-title>
386             <desc lang="en">
387                Bobby tours with a comedy troupe who specialize in
388                propane-related mirth.
389             </desc>
390             <credits>
391               <actor>Mike Judge</actor>
392               <actor>Lane Smith</actor>
393             </credits>
394             <category lang="en">animation</category>
395           </programme>
396         </tv>
397     */
398     
399     function toXml()
400     {
401     
402         //print_r($this->schedule);
403         $doc = new DomDocument('1.0', 'UTF-8');
404         $tv = $doc->createElement('tv');
405         $tv->setAttribute( 'generator-info-name','akpear_xml_tv');
406         $doc->appendChild($tv);
407         
408         
409         //$out = '<'.'?xml version="1.0" encoding="UTF-8"?.'>'."\n" .
410          //       '<!DOCTYPE tv SYSTEM "xmltv.dtd">'."\n" ."\n" .
411           ///      '<tv generator-info-name="akpear_xml_tv">'."\n";
412           $donec = array();
413         foreach($this->channels as $k => $v) {
414             // dont dupe?!
415             $chid = isset($v['id']) ? $v['id'] : $k;
416             if (isset($donec[$chid])) {
417                 continue;
418             }
419             $donec[$chid] = true;
420             $ch = $doc->createElement('channel');
421             $ch->setAttribute('id', $chid);
422             $disp = $doc->createElement('display-name');
423             $disp->setAttribute('lang', 'en');
424             $disp->appendChild($doc->createTextNode($v['name']));
425             $ch->appendChild($disp);
426             $tv->appendChild($ch);
427             //$out .=  
428             //    '<channel id="'. $k .'">
429             //        <display-name lang="en">'. $v['name'] .'</display-name>
430             //    </channel>'."\n";
431         }
432         //print_r($this->schedule);
433         foreach($this->schedule as $chan => $scheds) {
434             //print_r($sched);
435             foreach($scheds as $day => $sched) {
436                 $hoffset = 0;
437                 $last = -1;
438                 
439                 if (empty($sched)) {
440                     continue;
441                 }
442                 foreach($sched as $item) {
443                     $item['day'] = $day;
444                     
445                     $bits = explode(':', $item['hour']);
446                     if ($bits[0] < $last) {
447                         $hoffset +=12;
448                     }
449                     $last = $bits[0];
450                     
451                     //var_dump($bits[0] + $hoffset);
452                     $start = mktime(/*hmsmdy  */
453                             $bits[0] + $hoffset,
454                             $bits[1],
455                             0,
456                             date('m', $day),
457                             date('d', $day),
458                             date('Y', $day)
459                             );
460                     if ($start < strtotime(date("Y-m-d 00:00:00", strtotime('NOW - 1 DAY')))) {
461                         continue;
462                     }
463                     $item['hoffset'] = $hoffset;
464                     $item['hoffset_ar'] = $bits;
465                     
466                     //$this->debug(print_r($item, true));
467                     $start_str = date('YmdHis',$start) . ' ' . $this->config['gmtoffset'];
468                     //var_dump($start_str);
469                     //var_dump($this->channels);
470                     $description =   iconv($this->channels[$chan]['encoding'], 'UTF-8',$item['description'] . 
471                                 (isset($item['description2']) ? ('   ' . $item['description2']) : '')); 
472                    
473                     $this->debug(date("Y-m-d H:i - ", $start). $description); 
474                    
475                    
476                     $pg = $doc->createElement('programme');
477                     
478                     $pg->setAttribute('channel', $chan);
479                     $pg->setAttribute('start', $start_str);
480                     
481                     $title = $doc->createElement('title');
482                     $title->setAttribute('lang', 'zh');
483                     $title->appendChild($doc->createTextNode($this->toTitle($description,$chan)));
484                     $pg->appendChild($title);
485                     
486                     $title = $doc->createElement('desc');
487                     $title->setAttribute('lang', 'zh');
488                     $title->appendChild($doc->createTextNode($description));
489                     $pg->appendChild($title);
490             
491                     $tv->appendChild($pg);
492                     //$out.= '<programme channel="'.$chan. '" start="'.$start_str. '">
493                     //    <title lang="zh">'. $this->toTitle($description) .'</title>
494                     //      <desc lang="zh">'. $description .'</desc>
495                     //    </programme>'."\n";
496                 }
497             }
498         }
499         
500         $doc->formatOutput = true;
501         
502         //$out .= "</tv>\n";
503         
504         return $doc->saveXML();
505     
506     }
507   
508     function toTitle($description, $chan)  
509     {
510         // remove sponsor message.
511         $title_pre = '';
512         @list($title, $fuldesc) = explode("&gt;&gt;",$description);
513         
514         
515         
516         if (preg_match('/Followed\s*By/i', $title)) {
517             $bits = preg_split('/Followed\s*By/i', $title);
518             $title_pre  = $bits[0]  . ' Followed By ';
519             $title = $bits[1];
520         }
521         $title = preg_replace('#countdown to[a-z0-9 ]+#i', '' , $title); # NICAM Language   
522         $title = preg_replace('#^(solar x|Samsung Digital)\s*#i', '' , $title); # known sponsors..
523         
524         $title = preg_replace('#[a-z0-9 ]+(presents|special|blockbuster|movie of the month|showtime)\s*:\s*#i', '', $title);
525         
526         $title = preg_replace('#\([a-z]+/[a-z]+\s*(|bilingual)\)#i', '' , $title); # NICAM Language 
527         $title = preg_replace('#\(live\)#i', '' , $title);            # live
528         $title = preg_replace('#\((s|c|l|e|cs|es|ecs|can|ce)[*]*\)#i', '' , $title);            # Subtitle
529         $title = preg_replace('#\((pg\d*\w*)\)#i', '' , $title);         # Adult
530         $title = preg_replace('#\(r\)#i', '' , $title);         # Repeated
531         $title = trim($title, '/');
532         $title = trim($title);
533         
534         $ret = $title_pre . $title;
535         
536       //  if (!strlen($ret)) {
537        //     die("got $description : nothing to return");
538         //   }
539         if (in_array($chan, array('tvbpearl.hk', 'english.atvworld.hk'))) {
540             $enonly = preg_replace('#^[^a-z0-9]+#i', '', $title);
541             if (strlen($enonly) > 10) {
542                 $title = $enonly;
543             }
544         }
545         
546         
547         return $title_pre . $title;
548     }
549  
550     
551     
552     function debug($str)
553     {
554         if (empty($this->config['debug'])) {
555              return;
556            }
557         echo $str."\n";
558        }
559     
560     
561 }
562
563 $x = new XML_Tv;
564 //print_r($_SERVER);
565
566 $x->start($_SERVER['argv'][1]);