5 make a weekly cronjob (/etc/cron.weekly/filltv)
7 /usr/bin/php /path/to/application/Tv.php /path/to/application/hongkong.ini > /tmp/hongkong.xml
8 /usr/bin/mythfilldatabase --file 1 1 /tmp/hongkong.xml
10 make sure the filltv file is chmod +x
14 Quick hack to generate xmltv listings from TVxb style .ini files.
17 // seriously it's this bad..
18 error_reporting(E_ALL & ~E_STRICT &~E_NOTICE);
23 if (!extension_loaded('mbstring')) {
27 define ('QUOTE', '"');
30 var $config; // configuration array.
31 var $date; // date we are fetching
32 var $channels; // details on the channels.
38 require_once 'JSON.php';
39 $j = new Services_JSON();
40 $conf = (array)($j->decode(file_get_contents($ini)));
41 //$conf = (array)json_decode(file_get_contents($ini));
43 foreach($conf as $k=>$v) {
44 $conf[$k] = (array)$v;
46 $this->channels = $conf;
47 $this->config = $conf['global'];
48 unset($this->channels['global']);
49 //print_r($this->channels);
54 foreach($this->channels as $k=>$v) {
56 for ($i=0;$i< (isset($v['days']) ? $v['days'] : $conf['days']);$i++) {
57 $this->grabChannel($k,$i);
70 function grabChannel($k,$dayoffset)
72 $cinfo = $this->channels[$k];
76 $date = $this->date + ($dayoffset * 24 * 60 * 60 );
78 $url = strftime( $cinfo['url'], $date);
81 $data = @file_get_contents($url);
83 $this->debug("NO DATA");
85 // something went wrong..
91 array("0x22", "0x09", "0x0a"),
92 array('"', "\t", "\n")
96 $hs = $cinfo['htmlstart'] === false ? false : str_replace($map[0],$map[1],$cinfo['htmlstart']);
98 $he= str_replace($map[0],$map[1],$cinfo['htmlend']);
101 list( , $data) = explode($hs,$data);
105 list($data,) = explode($he ,$data);
109 //$this->debug("DATA:".$data);
111 $method = 'parse'.$cinfo['htmlparsetype'];
113 $chid = isset($cinfo['id']) ? $cinfo['id'] : $k;
114 // for multiday html layout of atv:
115 if (!empty($cinfo['htmldaysep'])) {
116 $days = explode($cinfo['htmldaysep'], $data);
117 // kludge. = first monday of current week..
118 // loook for... <BR>2007-12-31 Mon
119 $start = preg_match('/<BR>([0-9]{4}-[0-9]{2}-[0-9]{2}) Mon/i', $odata, $matches);
120 $start = isset($matches[1]) ? $matches[1] : 0; // first monday..
123 foreach($days as $i=>$ddata) {
127 $cols = explode(',',$cinfo['htmlcols']);
128 $dn = strtotime($start) + (($i * $use_cols) * 24 * 60 * 60 );
129 $res = $this->$method($ddata,$cols,date('Y-m-d',$dn));
130 if (is_string($res)) {
135 if (!is_array($res)) {
138 $day_id = strtotime($start) + (($i * $use_cols ) * 24 * 60 * 60 );
142 if (empty($this->schedule[$chid][$day_id])) {
143 $this->schedule[$chid][$day_id] = array();
145 $this->schedule[$chid][$day_id] = array_merge($this->schedule[$chid][$day_id], $res);
152 $cols = explode(',',$cinfo['htmlcols']);
154 //print_r(array($data,$cols));
156 $this->schedule[$k][$this->date + ($dayoffset * 24 * 60 * 60 )] = $this->$method($data,$cols);
157 //print_r($this->schedule);
160 function parseTable($data,$colnames)
162 $rows = preg_split('/<tr[^>]*>/i', $data);
163 // print_r($rows);exit;
167 foreach($rows as $r) {
169 $cols = preg_split('/\<td[^>]*\>/i', trim($r));
172 //var_dump(count(array_values($cols)));
174 // print_r($cols);exit;
176 foreach($cols as $i=>$c) {
177 if (count(array_values($cols)) != count(array_values($colnames))) {
180 if (preg_match('/<table/i', $c)) {
184 $c = str_ireplace('<br>',' ', $c);
185 $c = str_ireplace(' ',' ', $c);
186 $c = str_replace("\n",' ', $c);
187 $c = str_replace("\r",' ', $c);
189 $rdata[$colnames[$i]] = trim(strip_tags($c));
193 if (count(array_values($rdata)) != count(array_values($colnames))) {
200 $this->debug(print_r($ret,true));
207 function parseJade($data, $colnames, $day=0)
210 $rows = preg_split('#</li>#i', $data);
213 foreach($rows as $r) {
214 $r = str_ireplace(' ',' ', $r);
217 @list($time,$r) = explode('</span>', $r, 2);
219 $rdata['hour'] = trim(strip_tags($time));
220 if (!strlen($rdata['hour'])) {
223 //list($title,$r) = explode('</em>', $r, 2);
224 list($title,$r) = explode('</p>', $r, 2);
225 $rdata['description'] = trim(strip_tags($title));
226 $rdata['description2'] = trim(strip_tags($r));
227 $rdata['day'] = $day;
235 function parseatv($data,$colnames, $day)
238 // if it's a day row..
240 $lines = explode("\n", trim($data));
241 //var_dump($lines[1]);
244 if (isset($lines[1]) && preg_match('/<div/', trim($lines[1]))) {
246 preg_match('/<BR>([0-9]{4}-[0-9]{2}-[0-9]{2})/i', $lines[1], $matches);
247 //var_dump($matches[1]);
251 $rows = preg_split('/<tr[^>]*>/i', $data);
252 if ($day == '2011-10-29') {
253 //$this->debug(print_r($rows,true));
255 if ($day < date('Y-m-d')) {
256 // $this->debug("OLD DATA $day");
259 //$this->debug($day);
260 //$this->debug(print_r($rows,true));
266 foreach($rows as $r) {
268 $cols = preg_split('/<td[^>]*>/i', $r);
271 if (!isset($cols[2])) {
278 if (!preg_match('/^[0-9]+:[0-9]+/', $c)) {
281 //$this->debug("GOT HOUR: $c");
282 $rdata['hour'] = trim(array_shift(explode('<', $c)));
285 $kv = preg_split('/<br>/',$c);
288 $c = str_ireplace('<br>',' ', $c);
289 $c = str_ireplace(' ',' ', $c);
290 $c = str_replace("\n",' ', $c);
291 $c = str_replace("\r",' ', $c);
292 $c = preg_replace('/\<[^>]+\>/', ' ', $c);
293 $rdata['description2'] = trim($c);
297 $c = isset($kv[1]) ? $kv[1] : '';
298 $c = str_ireplace('<br>',' ', $c);
299 $c = str_ireplace(' ',' ', $c);
300 $c = str_replace("\n",' ', $c);
301 $c = str_replace("\r",' ', $c);
302 $c = preg_replace('/\<[^>]+\>/', ' ', $c);
303 $rdata['description'] = trim($c);
306 $rdata['day'] = $day;
312 //print_r($ret); exit;
320 function parseTableCells($data,$colnames, $day)
322 $rows = preg_split('/<tr[^>]*>/i', $data);
323 //$this->debug(print_r($rows,true));
330 foreach($rows as $r) {
332 $cols = preg_split('/<td[^>]*>/i', $r);
337 if (!preg_match('/^[0-9]+:[0-9]+\s/', $c)) {
341 $c = str_ireplace('<br>',' ', $c);
342 $c = str_ireplace(' ',' ', $c);
343 $c = str_replace("\n",' ', $c);
344 $c = str_replace("\r",' ', $c);
347 $c = preg_replace('/\<[^>]+\>/', ' ', $c);
349 $kv = preg_split("/\s+/", $c, 2);
351 $rdata[$colnames[0]] = trim($kv[0]);
352 $rdata[$colnames[1]] = trim($kv[1]);
356 if (count(array_values($rdata)) != count(array_values($colnames))) {
359 $rdata['day'] = $day;
370 <tv generator-info-name="tv_grab_uk">
371 <channel id="bbc2.bbc.co.uk">
372 <display-name lang="en">BBC2</display-name>
374 <channel id="channel4.com">
375 <display-name lang="en">Channel 4</display-name>
378 <programme channel="bbc2.bbc.co.uk" start="20010829000500 +0100">
379 <title lang="en">The Phil Silvers Show</title>
381 Bilko claims he's had a close encounter with an alien in order
382 to be given some compassionate leave so he can visit an old
387 <programme channel="channel4.com" start="20010829095500 +0100">
388 <title lang="en">King of the Hill</title>
389 <sub-title lang="en">Meet the Propaniacs</sub-title>
391 Bobby tours with a comedy troupe who specialize in
392 propane-related mirth.
395 <actor>Mike Judge</actor>
396 <actor>Lane Smith</actor>
398 <category lang="en">animation</category>
406 //print_r($this->schedule);
407 $doc = new DomDocument('1.0', 'UTF-8');
408 $tv = $doc->createElement('tv');
409 $tv->setAttribute( 'generator-info-name','akpear_xml_tv');
410 $doc->appendChild($tv);
413 //$out = '<'.'?xml version="1.0" encoding="UTF-8"?.'>'."\n" .
414 // '<!DOCTYPE tv SYSTEM "xmltv.dtd">'."\n" ."\n" .
415 /// '<tv generator-info-name="akpear_xml_tv">'."\n";
417 foreach($this->channels as $k => $v) {
419 $chid = isset($v['id']) ? $v['id'] : $k;
420 if (isset($donec[$chid])) {
423 $donec[$chid] = true;
424 $ch = $doc->createElement('channel');
425 $ch->setAttribute('id', $chid);
426 $disp = $doc->createElement('display-name');
427 $disp->setAttribute('lang', 'en');
428 $disp->appendChild($doc->createTextNode($v['name']));
429 $ch->appendChild($disp);
430 $tv->appendChild($ch);
432 // '<channel id="'. $k .'">
433 // <display-name lang="en">'. $v['name'] .'</display-name>
436 //print_r($this->schedule);
437 foreach($this->schedule as $chan => $scheds) {
439 foreach($scheds as $day => $sched) {
446 foreach($sched as $item) {
449 $bits = explode(':', $item['hour']);
450 if ($bits[0] < $last) {
455 //var_dump($bits[0] + $hoffset);
456 $start = mktime(/*hmsmdy */
464 if ($start < strtotime(date("Y-m-d 00:00:00", strtotime('NOW - 1 DAY')))) {
467 $item['hoffset'] = $hoffset;
468 $item['hoffset_ar'] = $bits;
470 //$this->debug(print_r($item, true));
471 $start_str = date('YmdHis',$start) . ' ' . $this->config['gmtoffset'];
472 //var_dump($start_str);
473 //var_dump($this->channels);
474 $description = iconv($this->channels[$chan]['encoding'], 'UTF-8',$item['description'] .
475 (isset($item['description2']) ? (' ' . $item['description2']) : ''));
477 $this->debug(date("Y-m-d H:i - ", $start). $description);
480 $pg = $doc->createElement('programme');
482 $pg->setAttribute('channel', $chan);
483 $pg->setAttribute('start', $start_str);
485 $title = $doc->createElement('title');
486 $title->setAttribute('lang', 'zh');
487 $title->appendChild($doc->createTextNode($this->toTitle($description,$chan)));
488 $pg->appendChild($title);
490 $title = $doc->createElement('desc');
491 $title->setAttribute('lang', 'zh');
492 $title->appendChild($doc->createTextNode($description));
493 $pg->appendChild($title);
495 $tv->appendChild($pg);
496 //$out.= '<programme channel="'.$chan. '" start="'.$start_str. '">
497 // <title lang="zh">'. $this->toTitle($description) .'</title>
498 // <desc lang="zh">'. $description .'</desc>
499 // </programme>'."\n";
504 $doc->formatOutput = true;
508 return $doc->saveXML();
512 function toTitle($description, $chan)
514 // remove sponsor message.
516 @list($title, $fuldesc) = explode(">>",$description);
520 if (preg_match('/Followed\s*By/i', $title)) {
521 $bits = preg_split('/Followed\s*By/i', $title);
522 $title_pre = $bits[0] . ' Followed By ';
525 $title = preg_replace('#countdown to[a-z0-9 ]+#i', '' , $title); # NICAM Language
526 $title = preg_replace('#^(solar x|Samsung Digital)\s*#i', '' , $title); # known sponsors..
528 $title = preg_replace('#[a-z0-9 ]+(presents|special|blockbuster|movie of the month|showtime)\s*:\s*#i', '', $title);
530 $title = preg_replace('#\([a-z]+/[a-z]+\s*(|bilingual)\)#i', '' , $title); # NICAM Language
531 $title = preg_replace('#\(live\)#i', '' , $title); # live
532 $title = preg_replace('#\((s|c|l|e|cs|es|ecs|can|ce)[*]*\)#i', '' , $title); # Subtitle
533 $title = preg_replace('#\((pg\d*\w*)\)#i', '' , $title); # Adult
534 $title = preg_replace('#\(r\)#i', '' , $title); # Repeated
535 $title = trim($title, '/');
536 $title = trim($title);
538 $ret = $title_pre . $title;
540 // if (!strlen($ret)) {
541 // die("got $description : nothing to return");
543 if (in_array($chan, array('tvbpearl.hk', 'english.atvworld.hk'))) {
544 $enonly = preg_replace('#^[^a-z0-9]+#i', '', $title);
545 if (strlen($enonly) > 10) {
551 return $title_pre . $title;
558 if (empty($this->config['debug'])) {
570 $x->start($_SERVER['argv'][1]);