Pman.Dialog.CmsBlog.bjs
[Pman.Cms] / DataObjects / CmsRefer.php
1 <?php
2 /**
3  * Table Definition for referrers
4  */
5 class_exists('DB_DataObject') ? '' : require_once 'DB/DataObject.php';
6
7 class Pman_Cms_DataObjects_CmsRefer extends DB_DataObject 
8 {
9
10     ###START_AUTOCODE
11     /* the code below is auto generated do not remove the above tag */
12
13     var $__table = 'CmsRefer';                       // table name
14     var $blog_id;                        // int(11)  not_null primary_key auto_increment
15     var $day;                             // date(10)  not_null primary_key
16     var $seen;                           // int(11)  not_null
17     var $scheme;                          // string(5)  
18     var $host;                            // string(128)  not_null primary_key
19     var $port;                            // string(5)  
20     var $path;                            // string(255)  
21     var $checked;
22     var $checked_success;
23     var $query;                           // string(255)  
24     var $title;
25  
26     /* the code above is auto generated do not remove the tag below */
27     ###END_AUTOCODE
28     
29     function checkValid() {
30         // does it need checking?
31         // links are valid for 30 days..
32          // echo "CHECK? <PRE>" . print_r($this, true) . '</PRE>';
33        
34         if (($this->checked_success > 0) && strtotime($this->checked) > (time() - (30 * 24 * 60 * 60))) {
35             
36             return $this->checked_success;
37         }
38         // if we failed to check, wait another day.. 
39         if ($this->checked_success < 0 && $this->checked && strtotime($this->checked) > (time() - (24 * 60 * 60))) {
40             //echo "holding off checking";exit;
41             return $this->checked_success;
42         }
43         // we delete ourselves if 10 failures..
44         if ($this->checked_success < -10) {
45             $this->delete();
46             //echo "DELETEING?"; exit;
47             return 0;
48         }
49        //echo "starting checking";exit;
50         $original_success = $this->checked_success;
51         // DB_DataObject::debugLevel(1);
52         // update the fact we have checked it before we potentially hang..
53         require_once 'DB/DataObject/Cast.php';
54         
55         if (!$this->clean()) {
56             $this->delete();
57             return 0;
58         }
59         
60         
61         if (strlen($this->query)) {
62             
63             parse_str ( $this->query,$q );
64             if (isset($q['q'])) {
65                 
66                 
67                 
68                 
69                 $this->host == 'google.com';    
70                 $this->query = 'q='.  $q['q']; // not if urlencoding is needed.
71                 $this->title =  urldecode($q['q']);
72                 $this->checked_success = 1;
73                 $this->checked = DB_DataObject_Cast::sql('NOW()');
74                 $up = clone($this);
75                 $up->update();
76                 return 2;
77             }
78         }
79         
80         $this->checked_success--;
81         $this->checked = DB_DataObject_Cast::sql('NOW()');
82         $up = clone($this);
83         $up->update();
84        
85         
86         // this could hang!!???
87         //echo "checking ".$this->url();
88         
89         $data = @file_get_contents($this->url());
90         //echo htmlspecialchars($data);
91         // look for our url in it!
92         // fixme also needs to look for page urls..
93         if ((false === strpos($data,"http://www.akbkhome.com/blog.php/View/{$this->blog_id}/")) && 
94             (false === strpos($data,"http://www.roojs.com/index.php/View/{$this->blog_id}/")) &&
95             (false === strpos($data,"http://roojs.com/index.php/View/{$this->blog_id}/")) &&
96            (false === strpos($data,"http://blog.akbkhome.com/archives/{$this->blog_id}_"))) 
97
98         {
99             //echo 'link not on page..';
100             return 0;
101         }
102         if (preg_match('#<title>([^<]+)</title>#i',$data,$matches)) {
103             $this->title = $matches[1];
104         }
105         
106         
107         $this->checked_success = 2;
108         $up = clone($this);
109         $up->update();
110         
111         if ($original_success < 1) {
112             //DB_DataObject::debugLevel(1);
113             $e = DB_DataObject::factory('cms_page');
114             $e->get($this->blog_id);
115             $auth = $e->getAuthorObject();
116             $this->entryObject = $e;
117            // $auth->sendComment($this,'referer.txt');
118         }
119         
120         return 1;
121     
122     }
123     
124     
125     function clean()        // return false if invalid... ???/
126     {
127         //DB_DataObject::debugLevel(1);
128         
129         parse_str ( $this->query,$q );
130         
131         if (isset($q['PHPSESSID'])) {
132             unset($q['PHPSESSID']);
133         }
134             
135         $ip = gethostbyname($this->host);
136         switch(true) {
137             
138              
139             // BIG IGNORE LIST
140             // cause the blog is called smooking.... !
141             case preg_match('#/smoking/#',$this->query):
142             // live journal friends links - rather messy aggreted views
143             case preg_match('#/friends/#',$this->path):
144             // flexy teens crap!
145             case (preg_match('#teen#i',$this->query) &&  preg_match('#flexy#i',$this->query)):
146             case (preg_match('#teen#i',$this->title) &&  preg_match('#flexy#i',$this->title)):
147             
148             case preg_match('#teens\.search\.aol\.com$#',$this->host);
149             //this site is not about smoking.. 
150             case preg_match('#smoking#i',$this->query):
151             
152             // stargeek is a questionable search engine op site.
153             case preg_match('#stargeek\.com$#',$this->host):
154             
155             // ignore google tranlations.
156             case preg_match('#translate\.google\.#',$this->host):
157             
158             case preg_match('#mywebsearch#',$this->host):
159             case preg_match('#ie-seven\.com$#',$this->host):
160             case preg_match('#iest\.ru$#',$this->host):
161             case preg_match('#ibissecurity\.com$#',$this->host):
162             case preg_match('#iwebtool\.com$#',$this->host):
163             case preg_match('#qoils\.com$#',$this->host):
164             case preg_match('#xml4\.com$#',$this->host):
165             case preg_match('#firmantivirus\.com$#',$this->host):
166             case preg_match('#hostingvoice.com$#',$this->host):
167             case preg_match('#framecart.com$#',$this->host):
168             case preg_match('#o-list\.com$#',$this->host):
169             case preg_match('#stopsmoking911\.com$#',$this->host):
170             case preg_match('#hv-deals\.info$#',$this->host):
171            case preg_match('#freeforums\.org$#',$this->host):
172             // nasty link spammer!
173             case preg_match('#insurance#',$this->host):
174             //case preg_match('#debtconsolidation#',$this->host):
175             //case preg_match('#distancelearning#',$this->host):
176             // workfromhomefre group of IP's
177             case preg_match('#^207\.36\.200\..*$#',$ip):
178             //case preg_match('#uxsk.com$#',$this->host): // ns@thatsdata.com
179             //case preg_match('#vqsn.com$#',$this->host): // ns@thatsdata.com
180             //case preg_match('#zfjo.com$#',$this->host): //  ns@targe5t.com
181             //case preg_match('#ybvm.com$#',$this->host): //  ns@targe5t.com
182             // the thatsdata group of IP addresses..
183             case preg_match('#^64\.70\.250\..*$#',$ip):
184             // the targe5t.com group of IP's
185             case preg_match('#^69\.64\.76\..*$#',$ip); // web sites are on this.
186             case preg_match('#^69\.64\.78\..*$#',$ip); // ns is on this.
187             // the themasterns.com group of IPs
188             case preg_match('#^70\.85\.234\..*$#',$ip); // websites are on this.
189             case preg_match('#^70\.87\.36\..*$#',$ip); // ns is on this.
190             case preg_match('#^70\.85\.235\..*$#',$ip); // websites are on this.
191             
192             // questionable..
193             
194             return false;
195             
196             // lexpov a pornsite that spamed us and got caught up in google. etc.
197             case preg_match('#/lexpov/#',$this->query):
198             case preg_match('#/lexpov/#',$this->host):
199             case preg_match('#/optonline/#',$this->host): // stupid search engine that keeps refering..
200             case preg_match('#/lexpov/#',$this->path):
201             case preg_match('#/lexpov/#',$this->title):
202                 return  false;
203             
204             // replace msn search with google.
205             case isset($q['as_q']): 
206                 $q['q'] = $q['as_q'];
207                 
208             case isset($q['p']): // yahoo + fallthrough
209                 if (isset($q['p'])) {
210                     $q['q'] = $q['p'];
211                 }
212                 
213                                 
214             case isset($q['q']): 
215            
216                 // gaming serach engine with direct queries..
217                 if (preg_match('/roojs/',$q['q'])) {
218                     return false;
219                 }
220            
221            
222                 $this->host = 'google.com';
223                 $this->query = 'q='.$q['q'];
224                 $this->path = '/search';
225                 
226                 return true;
227             
228             // PHP's standard  session id
229             /*
230             case (false !== strpos($this->query,'PHPSESSID=')):
231                 $query = explode('&',$this->query);
232                 $this->query = '';
233                 foreach($query as $k=>$v) {
234                         
235                     if (false !== strpos($k,'PHPSESSID=')) {
236                         continue; // 
237                     }
238
239                     $this->query .= strlen($this->query) ? '&' : '';
240                     $this->query .= $k;
241                     break;
242                 }
243                 break true;
244             */
245             case ($this->host == 'www.linux.org.ru'):
246                 $this->path = preg_replace('#/profile/[a-z0-9]+/#i', '/', $this->path);
247                 return true;
248             
249             
250             // php manuals
251             case preg_match('#\.php\.net$#',$this->host):
252                 $this->host = 'php.net';
253                 return true;
254             
255             // root these ones.. = common referrers
256             case preg_match('#weblabor\.hu$#',$this->host):
257                 $this->query = '';
258                 $this->path = '';
259                 $this->host = 'weblabor.hu';
260                 return true;
261             
262             case preg_match('#planet-php\.net$$#',$this->host):
263                 $this->query = '';
264                 $this->path = '';
265                 $this->host = 'www.planet-php.net';
266                 return true;
267             
268             case preg_match('#artima\.com$#',$this->host):
269                 $this->query = '';
270                 $this->path = '';
271                 $this->host = 'www.artima.com';
272                 return true;
273             
274             // general 
275             
276             default: 
277                 // prefix www where relivant..
278                 if (!preg_match('#^www\.#',$this->host)) {
279                     $q = DB_DataObject::factory('CmsRefer');
280                     $q->host = 'www.'.$this->host;
281                     if ($q->find(true)) {
282                         $this->host = 'www.' . $this->host;
283                     }
284                 } else {
285                     // remove www. if we already have it..
286                     
287                     $q = DB_DataObject::factory('CmsRefer');
288                     $q->host = substr($this->host,4);
289                     if ($q->find(true)) {
290                         $this->host = substr($this->host,4);
291                     }
292                 }
293                 
294                 
295                 // praenanz.de has trackbacks- as a add on to the referrers..
296                 $q->path = preg_replace('#/trackbacks-$#','', $q->path);
297                 break;
298             
299         }
300         return true;
301        
302     }
303     
304     
305     function url() {
306         return "http://{$this->host}{$this->path}" . ($this->query ? "?{$this->query}" : '');
307     }
308     
309      
310          
311     
312 }