Tv.php
[lib.XML_Tv] / Tv.php
1 <?php
2 /* 
3 Usage: 
4
5 make a weekly cronjob (/etc/cron.weekly/filltv) 
6 #!/bin/sh
7 /usr/bin/php /path/to/application/Tv.php /path/to/application/hongkong.ini > /tmp/hongkong.xml
8 /usr/bin/mythfilldatabase --file 1 1 /tmp/hongkong.xml
9
10 make sure the filltv file is chmod +x
11
12
13
14 Quick hack to generate xmltv listings from TVxb style .ini files.
15
16 */
17
18
19 if (!extension_loaded('mbstring')) {
20     dl('mbstring.so');
21 }
22
23 define ('QUOTE', '"');
24 class XML_Tv
25 {
26     var $config; // configuration array.
27     var $date; // date we are fetching
28     var $channels; // details on the channels.
29
30
31     function start($ini)
32     {
33         // load the ini file.
34         require_once 'JSON.php';
35         $j = new Services_JSON();
36         $conf = (array)($j->decode(file_get_contents($ini)));
37         //$conf = (array)json_decode(file_get_contents($ini));
38         //print_r($conf);
39         foreach($conf as $k=>$v) {
40             $conf[$k] = (array)$v;
41         }
42         $this->channels = $conf;
43         $this->config = $conf['global'];
44         unset($this->channels['global']);
45         //print_r($this->channels);
46         
47         $this->date = time();
48         
49         
50         foreach($this->channels as $k=>$v) {
51             
52             for ($i=0;$i< (isset($v['days']) ? $v['days'] : $conf['days']);$i++) {
53                 $this->grabChannel($k,$i);
54                    // break;
55             }
56                 
57             
58             
59             //break;
60         }
61         
62         echo $this->toXML();
63         
64     }
65     
66     function grabChannel($k,$dayoffset) 
67     {
68         $cinfo = $this->channels[$k];
69         
70         //print_r($cinfo);
71        // exit;
72         $date = $this->date + ($dayoffset * 24 * 60 * 60 );
73        
74         $url =  strftime( $cinfo['url'], $date);
75         $this->debug($url );
76         //echo "GET $url\n";
77         $data = @file_get_contents($url);
78         if (empty($data)) {
79             $this->debug("NO  DATA");
80              
81             // something went wrong..
82             return;
83         }
84         
85         $odata = $data;
86         $map = array(
87                 array("0x22", "0x09", "0x0a"),
88                 array('"', "\t", "\n")
89              );
90         
91
92         $hs = $cinfo['htmlstart'] === false ? false  : str_replace($map[0],$map[1],$cinfo['htmlstart']);
93         //echo $hs;
94         $he= str_replace($map[0],$map[1],$cinfo['htmlend']);
95         //echo $he;
96         if ($hs !== false) {
97             list( , $data) = explode($hs,$data);
98         }
99         
100         if (!empty($he)) {
101             list($data,) =  explode($he ,$data);
102         }
103         
104         
105         //$this->debug("DATA:".$data); 
106          
107         $method = 'parse'.$cinfo['htmlparsetype'];
108         
109         $chid = isset($cinfo['id']) ? $cinfo['id'] : $k;
110         // for multiday html layout of atv:
111         if (!empty($cinfo['htmldaysep'])) {
112             $days = explode($cinfo['htmldaysep'], $data);
113             // kludge. = first monday of current week..
114             // loook for... <BR>2007-12-31 Mon
115             $start = preg_match('/<BR>([0-9]{4}-[0-9]{2}-[0-9]{2}) Mon/i', $odata, $matches);
116             $start = $matches[1]; // first monday..
117             foreach($days as $i=>$ddata) {
118                 $cols = explode(',',$cinfo['htmlcols']);
119                 $dn = strtotime($start) + (($i ) * 24 * 60 * 60 );
120                 $this->schedule[$chid][strtotime($start) + (($i ) * 24 * 60 * 60 )]  = $this->$method($ddata,$cols,date('Y-m-d',$dn));
121             }
122             return;
123             
124             
125         }
126         
127         $cols = explode(',',$cinfo['htmlcols']);
128         
129         //print_r(array($data,$cols));
130         
131         $this->schedule[$k][$this->date + ($dayoffset * 24 * 60 * 60 )]  = $this->$method($data,$cols);
132         //print_r($this->schedule);
133     }
134     //tvb +???
135     function parseTable($data,$colnames) 
136     {
137         $rows = preg_split('/<tr[^>]*>/i', $data);
138        // print_r($rows);exit;
139         array_shift($rows);
140         
141         
142         foreach($rows as $r) {
143             //print_r($r);
144             $cols = preg_split('/\<td[^>]*\>/i', trim($r));
145             $rdata = array();
146             array_shift($cols);
147             //var_dump(count(array_values($cols)));
148             //print_r($r);
149             //    print_r($cols);exit;
150             
151             foreach($cols as $i=>$c) {
152                 if (count(array_values($cols)) != count(array_values($colnames))) {
153                     continue;
154                 }
155                 if (preg_match('/<table/i', $c)) {
156                     continue;
157                 }
158                 //var_dump($c);
159                 $c = str_ireplace('<br>',' ', $c);
160                 $c = str_ireplace('&nbsp;',' ', $c);
161                 $c = str_replace("\n",' ', $c);
162                 $c = str_replace("\r",' ', $c);
163                 //var_dump($c);
164                 $rdata[$colnames[$i]] = trim(strip_tags($c));
165             }
166             
167           //  print_r($rdata);
168             if (count(array_values($rdata)) != count(array_values($colnames))) {
169                 continue;
170             }
171            // print_R($rdata);
172             $ret[] = $rdata;
173         }
174         
175         $this->debug(print_r($ret,true));
176         
177         return $ret;
178         
179     
180     }
181     
182     function parseJade($data, $colnames)
183     {
184         
185         $rows = preg_split('#</li>#i', $data);
186         //print_r($rows);
187         $ret = array();
188         foreach($rows as $r) {
189             $r = str_ireplace('&nbsp;',' ', $r);
190             $rdata = array();
191             
192             list($time,$r) = explode('</span>', $r, 2);
193             
194             $rdata['hour'] = trim(strip_tags($time));
195             if (!strlen($rdata['hour'])) {
196                 continue;
197                }
198             //list($title,$r) = explode('</em>', $r, 2);
199             list($title,$r) = explode('</p>', $r, 2);
200             $rdata['description'] = trim(strip_tags($title));
201             $rdata['description2'] = trim(strip_tags($r));
202             $rdata['day'] = $day;
203             $ret[] = $rdata;
204         }
205         //print_R($ret);
206        return $ret;
207     }
208     
209     
210     function parseatv($data,$colnames, $day) 
211     {
212         
213                 
214         $rows = preg_split('/<tr[^>]*>/i', $data);
215         //$this->debug(print_r($rows,true));
216         //exit;
217         array_shift($rows);
218         
219         
220         $day = false;
221         foreach($rows as $r) {
222             //print_r($r);
223             $cols = preg_split('/<td[^>]*>/i', $r);
224             $rdata = array();
225             
226             
227             PRINT_r($cols);
228             $c= $cols[2];
229              //var_dump($c);
230             // look for time..
231             if (!preg_match('/^[0-9]+:[0-9]+/', $c)) {
232                 continue;
233             }
234             
235             $rdata['time'] = trim(preg_replace('/\<.*$/', '', $c));
236             $c =  $cols[3];
237             
238             $kv = preg_split('/<br>/',$c);
239             
240             $c = $kv[0];
241             $c = str_ireplace('<br>',' ', $c);
242             $c = str_ireplace('&nbsp;',' ', $c);
243             $c = str_replace("\n",' ', $c);
244             $c = str_replace("\r",' ', $c);
245             $c = preg_replace('/\<[^>]+\>/', ' ', $c);
246             $rdata['description2']  = trim($c); 
247             
248             
249             
250             $c = $kv[1];
251             $c = str_ireplace('<br>',' ', $c);
252             $c = str_ireplace('&nbsp;',' ', $c);
253             $c = str_replace("\n",' ', $c);
254             $c = str_replace("\r",' ', $c);
255             $c = preg_replace('/\<[^>]+\>/', ' ', $c);
256             $rdata['description']  = trim($c); 
257             
258             
259             $rdata['day'] = $day;
260             
261             
262             print_R($rdata);
263             $ret[] = $rdata;
264         }
265         //print_r($ret); exit;
266         return $ret;
267         
268     
269     }
270     
271     
272 /*
273     function parseTableCells($data,$colnames, $day) 
274     {
275         $rows = preg_split('/<tr[^>]*>/i', $data);
276         //$this->debug(print_r($rows,true));
277         //exit;
278         //exit;
279         array_shift($rows);
280         
281         
282         $day = false;
283         foreach($rows as $r) {
284             //print_r($r);
285             $cols = preg_split('/<td[^>]*>/i', $r);
286             $rdata = array();
287             $c= $cols[1];
288              //var_dump($c);
289             // look for time..
290             if (!preg_match('/^[0-9]+:[0-9]+\s/', $c)) {
291                 continue;
292             }
293             
294             $c = str_ireplace('<br>',' ', $c);
295             $c = str_ireplace('&nbsp;',' ', $c);
296             $c = str_replace("\n",' ', $c);
297             $c = str_replace("\r",' ', $c);
298             
299             
300             $c = preg_replace('/\<[^>]+\>/', ' ', $c);
301             $c = trim($c); 
302             $kv = preg_split("/\s+/", $c, 2);
303             
304             $rdata[$colnames[0]] = trim($kv[0]);
305             $rdata[$colnames[1]] = trim($kv[1]);
306             
307             
308             //print_r($kv);
309             if (count(array_values($rdata)) != count(array_values($colnames))) {
310                 continue;
311             }
312             $rdata['day'] = $day;
313             $ret[] = $rdata;
314         }
315         //print_r($ret);
316         return $ret;
317         
318     
319     }
320 */
321     /*
322     
323        <tv generator-info-name="tv_grab_uk">
324           <channel id="bbc2.bbc.co.uk">
325             <display-name lang="en">BBC2</display-name>
326           </channel>
327           <channel id="channel4.com">
328             <display-name lang="en">Channel 4</display-name>
329           </channel>
330         
331           <programme channel="bbc2.bbc.co.uk" start="20010829000500 +0100">
332             <title lang="en">The Phil Silvers Show</title>
333             <desc lang="en">
334               Bilko claims he's had a close encounter with an alien in order
335               to be given some compassionate leave so he can visit an old
336               flame in New York.
337             </desc>
338           </programme>
339         
340           <programme channel="channel4.com" start="20010829095500 +0100">
341             <title lang="en">King of the Hill</title>
342             <sub-title lang="en">Meet the Propaniacs</sub-title>
343             <desc lang="en">
344                Bobby tours with a comedy troupe who specialize in
345                propane-related mirth.
346             </desc>
347             <credits>
348               <actor>Mike Judge</actor>
349               <actor>Lane Smith</actor>
350             </credits>
351             <category lang="en">animation</category>
352           </programme>
353         </tv>
354     */
355     
356     function toXml()
357     {
358     
359         //print_r($this->schedule);
360         $doc = new DomDocument('1.0', 'UTF-8');
361         $tv = $doc->createElement('tv');
362         $tv->setAttribute( 'generator-info-name','akpear_xml_tv');
363         $doc->appendChild($tv);
364         
365         
366         //$out = '<'.'?xml version="1.0" encoding="UTF-8"?.'>'."\n" .
367          //       '<!DOCTYPE tv SYSTEM "xmltv.dtd">'."\n" ."\n" .
368           ///      '<tv generator-info-name="akpear_xml_tv">'."\n";
369           $donec = array();
370         foreach($this->channels as $k => $v) {
371             // dont dupe?!
372             $chid = isset($v['id']) ? $v['id'] : $k;
373             if (isset($donec[$chid])) {
374                 continue;
375             }
376             $donec[$chid] = true;
377             $ch = $doc->createElement('channel');
378             $ch->setAttribute('id', $chid);
379             $disp = $doc->createElement('display-name');
380             $disp->setAttribute('lang', 'en');
381             $disp->appendChild($doc->createTextNode($v['name']));
382             $ch->appendChild($disp);
383             $tv->appendChild($ch);
384             //$out .=  
385             //    '<channel id="'. $k .'">
386             //        <display-name lang="en">'. $v['name'] .'</display-name>
387             //    </channel>'."\n";
388         }
389         //print_r($this->schedule);
390         foreach($this->schedule as $chan => $scheds) {
391             //print_r($sched);
392             foreach($scheds as $day => $sched) {
393                 $hoffset = 0;
394                 $last = -1;
395                 
396                 if (empty($sched)) {
397                     continue;
398                 }
399                 foreach($sched as $item) {
400                     //print_r($item);
401                     $bits = explode(':', $item['hour']);
402                     if ($bits[0] < $last) {
403                         $hoffset +=12;
404                     }
405                     $last = $bits[0];
406                     //var_dump($bits[0] + $hoffset);
407                     $start = mktime(/*hmsmdy  */
408                             $bits[0] + $hoffset,
409                             $bits[1],
410                             0,
411                             date('m', $day),
412                             date('d', $day),
413                             date('Y', $day)
414                             );
415                             
416                     $start_str = date('YmdHis',$start) . ' ' . $this->config['gmtoffset'];
417                     //var_dump($start_str);
418                     //var_dump($this->channels);
419                     $description =   iconv($this->channels[$chan]['encoding'], 'UTF-8',$item['description'] . 
420                                 (isset($item['description2']) ? ('   ' . $item['description2']) : '')); 
421                    
422                     $pg = $doc->createElement('programme');
423                     
424                     $pg->setAttribute('channel', $chan);
425                     $pg->setAttribute('start', $start_str);
426                     
427                     $title = $doc->createElement('title');
428                     $title->setAttribute('lang', 'zh');
429                     $title->appendChild($doc->createTextNode($this->toTitle($description,$chan)));
430                     $pg->appendChild($title);
431                     
432                     $title = $doc->createElement('desc');
433                     $title->setAttribute('lang', 'zh');
434                     $title->appendChild($doc->createTextNode($description));
435                     $pg->appendChild($title);
436             
437                     $tv->appendChild($pg);
438                     //$out.= '<programme channel="'.$chan. '" start="'.$start_str. '">
439                     //    <title lang="zh">'. $this->toTitle($description) .'</title>
440                     //      <desc lang="zh">'. $description .'</desc>
441                     //    </programme>'."\n";
442                 }
443             }
444         }
445         
446         $doc->formatOutput = true;
447         
448         //$out .= "</tv>\n";
449         
450         return $doc->saveXML();
451     
452     }
453   
454     function toTitle($description, $chan)  
455     {
456         // remove sponsor message.
457         $title_pre = '';
458         list($title, $fuldesc) = explode("&gt;&gt;",$description);
459         
460         
461         
462         if (preg_match('/Followed\s*By/i', $title)) {
463             $bits = preg_split('/Followed\s*By/i', $title);
464             $title_pre  = $bits[0]  . ' Followed By ';
465             $title = $bits[1];
466         }
467         $title = preg_replace('#countdown to[a-z0-9 ]+#i', '' , $title); # NICAM Language   
468         $title = preg_replace('#^(solar x|Samsung Digital)\s*#i', '' , $title); # known sponsors..
469         
470         $title = preg_replace('#[a-z0-9 ]+(presents|special|blockbuster|movie of the month|showtime)\s*:\s*#i', '', $title);
471         
472         $title = preg_replace('#\([a-z]+/[a-z]+\s*(|bilingual)\)#i', '' , $title); # NICAM Language 
473         $title = preg_replace('#\(live\)#i', '' , $title);            # live
474         $title = preg_replace('#\((s|c|l|e|cs|es|ecs|can|ce)[*]*\)#i', '' , $title);            # Subtitle
475         $title = preg_replace('#\((pg\d*\w*)\)#i', '' , $title);         # Adult
476         $title = preg_replace('#\(r\)#i', '' , $title);         # Repeated
477         $title = trim($title, '/');
478         $title = trim($title);
479         
480         $ret = $title_pre . $title;
481         
482       //  if (!strlen($ret)) {
483        //     die("got $description : nothing to return");
484         //   }
485         if (in_array($chan, array('tvbpearl.hk', 'english.atvworld.hk'))) {
486             $enonly = preg_replace('#^[^a-z0-9]+#i', '', $title);
487             if (strlen($enonly) > 10) {
488                 $title = $enonly;
489             }
490         }
491         
492         
493         return $title_pre . $title;
494     }
495  
496     
497     
498     function debug($str)
499     {
500         if (empty($this->config['debug'])) {
501              return;
502            }
503         echo $str."\n";
504        }
505     
506     
507 }
508
509 $x = new XML_Tv;
510 //print_r($_SERVER);
511
512 $x->start($_SERVER['argv'][1]);