From d1c353b498a05289a08caeac8dac0120d4ad046d Mon Sep 17 00:00:00 2001 From: BlastytheBlue Date: Fri, 19 Dec 2014 17:32:07 +0800 Subject: [PATCH] sync --- .../DataObjects/Reddit_livefeed_embed.php | 129 +++++++++++++++--- Reddit/ImportEmbed.php | 1 + 2 files changed, 114 insertions(+), 16 deletions(-) diff --git a/Pman/Reddit/DataObjects/Reddit_livefeed_embed.php b/Pman/Reddit/DataObjects/Reddit_livefeed_embed.php index 77a1e1015..bb9d4d764 100644 --- a/Pman/Reddit/DataObjects/Reddit_livefeed_embed.php +++ b/Pman/Reddit/DataObjects/Reddit_livefeed_embed.php @@ -3,7 +3,6 @@ * Table Definition for cash_costing_map */ require_once 'DB/DataObject.php'; -require_once 'Net/URL.php'; class Pman_Reddit_DataObjects_Reddit_livefeed_embed extends DB_DataObject @@ -25,15 +24,35 @@ class Pman_Reddit_DataObjects_Reddit_livefeed_embed extends DB_DataObject /* the code above is auto generated do not remove the tag below */ ###END_AUTOCODE - function convertClean(){ + /*function convertClean(){; + require_once 'Net/URL.php'; $data = $this->frame_body; libxml_use_internal_errors (true); $doc = new DOMDocument(); $doc->loadHTML($data); //$doc->loadHTML($data); $xpath = new DOMXpath($doc); - } + + foreach($xpath->query('//link') as $l){ + $url=parse_url($l->getAttribute("href")); + $l->setAttribute("href",'/assets/'. $url["host"] . $url["path"]); + } + + foreach($xpath->query('//img') as $im){ + $url = parse_url($im->getAttribute("src")); + $file = '/assets/'. $url["host"] . $url["path"]; + if($url->query){ + $file += "--" . md5($url->query); + } + $im->setAttribute("src",$file); + } + + $this->frame_body_clean = $doc->saveHTML(); + $this->update(); + }*/ function createAssets(){ + require_once 'Net/URL.php'; + echo $this->url; $data = $this->frame_body; libxml_use_internal_errors (true); $doc = new DOMDocument(); @@ -42,20 +61,21 @@ class Pman_Reddit_DataObjects_Reddit_livefeed_embed extends DB_DataObject $xpath = new DOMXpath($doc); foreach($xpath->query('//link') as $l){ - $url=parse_url($l->getAttribute("href")); - $file = '/home/reddit/assets/'. $url["host"] . $url["path"]; - if(file_exists($file)){ + $href=$l->getAttribute("href"); + $url=parse_url($href); + $file = '/assets/'. $url["host"] . $url["path"]; + if(file_exists("home/reddit" . $file)){ echo "file exists already.\n"; continue; } - $dir = dirname($file); + $dir = dirname("/home/reddit" . $file); if(!file_exists($dir)){ echo "Made directory: ".$dir . "\n"; mkdir($dir,0775,true); } - $contents=file_get_contents($l->getAttribute("href")); - preg_match_all("/url\(([^\)]*)/",$contents,$matches); - foreach($matches[1] as $r){ + $contents = file_get_contents($href); + //preg_match_all(,$contents,$matches); + /*foreach($matches[1] as $r){ if(preg_match("/^data:/",$r)===1){ continue; }; @@ -67,7 +87,7 @@ class Pman_Reddit_DataObjects_Reddit_livefeed_embed extends DB_DataObject $linked=$nu->resolvePath(dirname($l->getAttribute("href")) . $r); } $lnk = parse_url($linked); - $fl = '/home/reddit/assets/' . $lnk["host"] . $lnk["path"]; + $fl = '/assets/' . $lnk["host"] . $lnk["path"]; if(file_exists($fl)){ echo "file exists already.\n"; continue; @@ -78,14 +98,90 @@ class Pman_Reddit_DataObjects_Reddit_livefeed_embed extends DB_DataObject mkdir($dr,0775,true); } $cntnts=file_get_contents($linked); - file_put_contents($fl,$cntnts); - }; - file_put_contents($file,$contents); + file_put_contents("/home/reddit" . $fl,$cntnts); + + };*/ + + if($l->getAttribute("rel")!="stylesheet"){ + continue; + } + + $new_contents = preg_replace_callback( + "/(?<=url\()[^\)]*/", + function($matches) use($href){ + if(preg_match("/^data:/",$matches[0])===1){ + return $matches[0]; + }; + $linked=""; + if(preg_match("/^\//",$matches[0])===1){ + $linked=$url["host"] . $matches[0]; + }else{ + $nu = new Net_URL(); + $prelink = preg_replace("/^.*\:?\/\//","",dirname($href) . "/" . $matches[0]); + $linked = "http://" . $nu->resolvePath($prelink); + //echo "\n\n\n$prelink\n\n\n$linked\n\n\n"; + } + $lnk = parse_url($linked); + $fl = '/assets' . $lnk["host"] . $lnk["path"]; + if(file_exists("/home/reddit" . $fl)){ + echo "file exists already.\n"; + return $fl; + } + $dr = dirname("/home/reddit" . $fl); + if(!file_exists($dr)){ + echo "Made directory: ".$dr . "\n"; + mkdir($dr,0775,true); + } + $cntnts=file_get_contents($linked); + file_put_contents("/home/reddit" . $fl,$cntnts); + return $fl; + }, + $contents + ); + file_put_contents("/home/reddit" . $file, $new_contents); + file_put_contents("/home/reddit" . preg_replace("/(?=(\.css)?$)/", "--original", $file, 1), $contents); + $l->setAttribute("href",$file); + break; } foreach($xpath->query('//img') as $im){ - $url=parse_url($im->getAttribute("src")); + require_once 'File/MimeType.php'; + $url = parse_url($im->getAttribute("src")); + $src = $im->getAttribute("src"); + + $f = fopen($src,"r"); + $info = stream_get_meta_data($f); + foreach($info["wrapper_data"] as $header){ + preg_match("/content\-type:\s*([^ ;]*)/i",$header,$exts); + //echo $header . "\n"; + if(sizeof($exts)>1){ + break; + } + } + //echo print_r($exts); + $img = stream_get_contents($f); + $fm = new File_MimeType(); + if($exts[1]){ + $ext = $fm->toExt($exts[1]); + } + $file = '/home/reddit/assets/'. $url["host"] . $url["path"]; + + $file = preg_replace("/\.[a-z]{3,4}$/i","",$file); + + if(array_key_exists("query",$url)){ + $file .= "--" . md5($url["query"]); + } + + if($ext){ + $file .= "." . $ext; + } + + //echo "\n\nFile: " .$file . "\nSrc: " . $src . "\nExts:" . print_r($exts) . "\nMeta: " . print_r($info); + + fclose($f); + + //. "--" . md if(file_exists($file)){ echo "file exists already."; continue; @@ -95,7 +191,8 @@ class Pman_Reddit_DataObjects_Reddit_livefeed_embed extends DB_DataObject echo "Made directory: ".$dir . "\n"; mkdir($dir,0775,true); } - file_put_contents($file,file_get_contents($im->getAttribute("src"))); + file_put_contents($file,$img); + break; } } function convertHTML(){ diff --git a/Reddit/ImportEmbed.php b/Reddit/ImportEmbed.php index 7499ea7ef..3096bebb1 100644 --- a/Reddit/ImportEmbed.php +++ b/Reddit/ImportEmbed.php @@ -32,6 +32,7 @@ class Reddit_ImportEmbed extends Pman { //$dd->fetchEmbed(); //$dd->convertHTML(); $dd->createAssets(); + //$dd->convertClean(); } exit; -- 2.39.2