5 make a weekly cronjob (/etc/cron.weekly/filltv)
7 /usr/bin/php /path/to/application/Tv.php /path/to/application/hongkong.ini > /tmp/hongkong.xml
8 /usr/bin/mythfilldatabase --file 1 1 /tmp/hongkong.xml
10 make sure the filltv file is chmod +x
14 Quick hack to generate xmltv listings from TVxb style .ini files.
19 if (!extension_loaded('mbstring')) {
23 define ('QUOTE', '"');
26 var $config; // configuration array.
27 var $date; // date we are fetching
28 var $channels; // details on the channels.
34 require_once 'JSON.php';
35 $j = new Services_JSON();
36 $conf = (array)($j->decode(file_get_contents($ini)));
37 //$conf = (array)json_decode(file_get_contents($ini));
39 foreach($conf as $k=>$v) {
40 $conf[$k] = (array)$v;
42 $this->channels = $conf;
43 $this->config = $conf['global'];
44 unset($this->channels['global']);
45 //print_r($this->channels);
50 foreach($this->channels as $k=>$v) {
52 for ($i=0;$i< (isset($v['days']) ? $v['days'] : $conf['days']);$i++) {
53 $this->grabChannel($k,$i);
66 function grabChannel($k,$dayoffset)
68 $cinfo = $this->channels[$k];
72 $date = $this->date + ($dayoffset * 24 * 60 * 60 );
74 $url = strftime( $cinfo['url'], $date);
77 $data = @file_get_contents($url);
79 $this->debug("NO DATA");
81 // something went wrong..
87 array("0x22", "0x09", "0x0a"),
88 array('"', "\t", "\n")
92 $hs = $cinfo['htmlstart'] === false ? false : str_replace($map[0],$map[1],$cinfo['htmlstart']);
94 $he= str_replace($map[0],$map[1],$cinfo['htmlend']);
97 list( , $data) = explode($hs,$data);
101 list($data,) = explode($he ,$data);
105 //$this->debug("DATA:".$data);
107 $method = 'parse'.$cinfo['htmlparsetype'];
109 $chid = isset($cinfo['id']) ? $cinfo['id'] : $k;
110 // for multiday html layout of atv:
111 if (!empty($cinfo['htmldaysep'])) {
112 $days = explode($cinfo['htmldaysep'], $data);
113 // kludge. = first monday of current week..
114 // loook for... <BR>2007-12-31 Mon
115 $start = preg_match('/<BR>([0-9]{4}-[0-9]{2}-[0-9]{2}) Mon/i', $odata, $matches);
116 $start = isset($matches[1]) ? $matches[1] : 0; // first monday..
119 foreach($days as $i=>$ddata) {
123 $cols = explode(',',$cinfo['htmlcols']);
124 $dn = strtotime($start) + (($i * $use_cols) * 24 * 60 * 60 );
125 $res = $this->$method($ddata,$cols,date('Y-m-d',$dn));
126 if (is_string($res)) {
131 if (!is_array($res)) {
134 $day_id = strtotime($start) + (($i * $use_cols ) * 24 * 60 * 60 );
138 if (empty($this->schedule[$chid][$day_id])) {
139 $this->schedule[$chid][$day_id] = array();
141 $this->schedule[$chid][$day_id] = array_merge($this->schedule[$chid][$day_id], $res);
148 $cols = explode(',',$cinfo['htmlcols']);
150 //print_r(array($data,$cols));
152 $this->schedule[$k][$this->date + ($dayoffset * 24 * 60 * 60 )] = $this->$method($data,$cols);
153 //print_r($this->schedule);
156 function parseTable($data,$colnames)
158 $rows = preg_split('/<tr[^>]*>/i', $data);
159 // print_r($rows);exit;
163 foreach($rows as $r) {
165 $cols = preg_split('/\<td[^>]*\>/i', trim($r));
168 //var_dump(count(array_values($cols)));
170 // print_r($cols);exit;
172 foreach($cols as $i=>$c) {
173 if (count(array_values($cols)) != count(array_values($colnames))) {
176 if (preg_match('/<table/i', $c)) {
180 $c = str_ireplace('<br>',' ', $c);
181 $c = str_ireplace(' ',' ', $c);
182 $c = str_replace("\n",' ', $c);
183 $c = str_replace("\r",' ', $c);
185 $rdata[$colnames[$i]] = trim(strip_tags($c));
189 if (count(array_values($rdata)) != count(array_values($colnames))) {
196 $this->debug(print_r($ret,true));
203 function parseJade($data, $colnames)
206 $rows = preg_split('#</li>#i', $data);
209 foreach($rows as $r) {
210 $r = str_ireplace(' ',' ', $r);
213 list($time,$r) = explode('</span>', $r, 2);
215 $rdata['hour'] = trim(strip_tags($time));
216 if (!strlen($rdata['hour'])) {
219 //list($title,$r) = explode('</em>', $r, 2);
220 list($title,$r) = explode('</p>', $r, 2);
221 $rdata['description'] = trim(strip_tags($title));
222 $rdata['description2'] = trim(strip_tags($r));
223 $rdata['day'] = $day;
231 function parseatv($data,$colnames, $day)
234 // if it's a day row..
236 $lines = explode("\n", trim($data));
237 //var_dump($lines[1]);
240 if (isset($lines[1]) && preg_match('/<div/', trim($lines[1]))) {
242 preg_match('/<BR>([0-9]{4}-[0-9]{2}-[0-9]{2})/i', $lines[1], $matches);
243 var_dump($matches[1]);
247 $rows = preg_split('/<tr[^>]*>/i', $data);
248 if ($day == '2011-10-29') {
249 //$this->debug(print_r($rows,true));
251 if ($day < date('Y-m-d')) {
252 // $this->debug("OLD DATA $day");
255 //$this->debug($day);
256 //$this->debug(print_r($rows,true));
262 foreach($rows as $r) {
264 $cols = preg_split('/<td[^>]*>/i', $r);
267 if (!isset($cols[2])) {
274 if (!preg_match('/^[0-9]+:[0-9]+/', $c)) {
277 //$this->debug("GOT HOUR: $c");
278 $rdata['hour'] = trim(array_shift(explode('<', $c)));
281 $kv = preg_split('/<br>/',$c);
284 $c = str_ireplace('<br>',' ', $c);
285 $c = str_ireplace(' ',' ', $c);
286 $c = str_replace("\n",' ', $c);
287 $c = str_replace("\r",' ', $c);
288 $c = preg_replace('/\<[^>]+\>/', ' ', $c);
289 $rdata['description2'] = trim($c);
294 $c = str_ireplace('<br>',' ', $c);
295 $c = str_ireplace(' ',' ', $c);
296 $c = str_replace("\n",' ', $c);
297 $c = str_replace("\r",' ', $c);
298 $c = preg_replace('/\<[^>]+\>/', ' ', $c);
299 $rdata['description'] = trim($c);
302 $rdata['day'] = $day;
308 //print_r($ret); exit;
316 function parseTableCells($data,$colnames, $day)
318 $rows = preg_split('/<tr[^>]*>/i', $data);
319 //$this->debug(print_r($rows,true));
326 foreach($rows as $r) {
328 $cols = preg_split('/<td[^>]*>/i', $r);
333 if (!preg_match('/^[0-9]+:[0-9]+\s/', $c)) {
337 $c = str_ireplace('<br>',' ', $c);
338 $c = str_ireplace(' ',' ', $c);
339 $c = str_replace("\n",' ', $c);
340 $c = str_replace("\r",' ', $c);
343 $c = preg_replace('/\<[^>]+\>/', ' ', $c);
345 $kv = preg_split("/\s+/", $c, 2);
347 $rdata[$colnames[0]] = trim($kv[0]);
348 $rdata[$colnames[1]] = trim($kv[1]);
352 if (count(array_values($rdata)) != count(array_values($colnames))) {
355 $rdata['day'] = $day;
366 <tv generator-info-name="tv_grab_uk">
367 <channel id="bbc2.bbc.co.uk">
368 <display-name lang="en">BBC2</display-name>
370 <channel id="channel4.com">
371 <display-name lang="en">Channel 4</display-name>
374 <programme channel="bbc2.bbc.co.uk" start="20010829000500 +0100">
375 <title lang="en">The Phil Silvers Show</title>
377 Bilko claims he's had a close encounter with an alien in order
378 to be given some compassionate leave so he can visit an old
383 <programme channel="channel4.com" start="20010829095500 +0100">
384 <title lang="en">King of the Hill</title>
385 <sub-title lang="en">Meet the Propaniacs</sub-title>
387 Bobby tours with a comedy troupe who specialize in
388 propane-related mirth.
391 <actor>Mike Judge</actor>
392 <actor>Lane Smith</actor>
394 <category lang="en">animation</category>
402 //print_r($this->schedule);
403 $doc = new DomDocument('1.0', 'UTF-8');
404 $tv = $doc->createElement('tv');
405 $tv->setAttribute( 'generator-info-name','akpear_xml_tv');
406 $doc->appendChild($tv);
409 //$out = '<'.'?xml version="1.0" encoding="UTF-8"?.'>'."\n" .
410 // '<!DOCTYPE tv SYSTEM "xmltv.dtd">'."\n" ."\n" .
411 /// '<tv generator-info-name="akpear_xml_tv">'."\n";
413 foreach($this->channels as $k => $v) {
415 $chid = isset($v['id']) ? $v['id'] : $k;
416 if (isset($donec[$chid])) {
419 $donec[$chid] = true;
420 $ch = $doc->createElement('channel');
421 $ch->setAttribute('id', $chid);
422 $disp = $doc->createElement('display-name');
423 $disp->setAttribute('lang', 'en');
424 $disp->appendChild($doc->createTextNode($v['name']));
425 $ch->appendChild($disp);
426 $tv->appendChild($ch);
428 // '<channel id="'. $k .'">
429 // <display-name lang="en">'. $v['name'] .'</display-name>
432 //print_r($this->schedule);
433 foreach($this->schedule as $chan => $scheds) {
435 foreach($scheds as $day => $sched) {
442 foreach($sched as $item) {
445 $bits = explode(':', $item['hour']);
446 if ($bits[0] < $last) {
451 //var_dump($bits[0] + $hoffset);
452 $start = mktime(/*hmsmdy */
460 if ($start < strtotime(date("Y-m-d 00:00:00", strtotime('NOW - 1 DAY')))) {
463 $item['hoffset'] = $hoffset;
464 $item['hoffset_ar'] = $bits;
466 //$this->debug(print_r($item, true));
467 $start_str = date('YmdHis',$start) . ' ' . $this->config['gmtoffset'];
468 //var_dump($start_str);
469 //var_dump($this->channels);
470 $description = iconv($this->channels[$chan]['encoding'], 'UTF-8',$item['description'] .
471 (isset($item['description2']) ? (' ' . $item['description2']) : ''));
473 $this->debug(date("Y-m-d H:i - ", $start). $description);
476 $pg = $doc->createElement('programme');
478 $pg->setAttribute('channel', $chan);
479 $pg->setAttribute('start', $start_str);
481 $title = $doc->createElement('title');
482 $title->setAttribute('lang', 'zh');
483 $title->appendChild($doc->createTextNode($this->toTitle($description,$chan)));
484 $pg->appendChild($title);
486 $title = $doc->createElement('desc');
487 $title->setAttribute('lang', 'zh');
488 $title->appendChild($doc->createTextNode($description));
489 $pg->appendChild($title);
491 $tv->appendChild($pg);
492 //$out.= '<programme channel="'.$chan. '" start="'.$start_str. '">
493 // <title lang="zh">'. $this->toTitle($description) .'</title>
494 // <desc lang="zh">'. $description .'</desc>
495 // </programme>'."\n";
500 $doc->formatOutput = true;
504 return $doc->saveXML();
508 function toTitle($description, $chan)
510 // remove sponsor message.
512 @list($title, $fuldesc) = explode(">>",$description);
516 if (preg_match('/Followed\s*By/i', $title)) {
517 $bits = preg_split('/Followed\s*By/i', $title);
518 $title_pre = $bits[0] . ' Followed By ';
521 $title = preg_replace('#countdown to[a-z0-9 ]+#i', '' , $title); # NICAM Language
522 $title = preg_replace('#^(solar x|Samsung Digital)\s*#i', '' , $title); # known sponsors..
524 $title = preg_replace('#[a-z0-9 ]+(presents|special|blockbuster|movie of the month|showtime)\s*:\s*#i', '', $title);
526 $title = preg_replace('#\([a-z]+/[a-z]+\s*(|bilingual)\)#i', '' , $title); # NICAM Language
527 $title = preg_replace('#\(live\)#i', '' , $title); # live
528 $title = preg_replace('#\((s|c|l|e|cs|es|ecs|can|ce)[*]*\)#i', '' , $title); # Subtitle
529 $title = preg_replace('#\((pg\d*\w*)\)#i', '' , $title); # Adult
530 $title = preg_replace('#\(r\)#i', '' , $title); # Repeated
531 $title = trim($title, '/');
532 $title = trim($title);
534 $ret = $title_pre . $title;
536 // if (!strlen($ret)) {
537 // die("got $description : nothing to return");
539 if (in_array($chan, array('tvbpearl.hk', 'english.atvworld.hk'))) {
540 $enonly = preg_replace('#^[^a-z0-9]+#i', '', $title);
541 if (strlen($enonly) > 10) {
547 return $title_pre . $title;
554 if (empty($this->config['debug'])) {
566 $x->start($_SERVER['argv'][1]);