3 * Table Definition for referrers
5 class_exists('DB_DataObject') ? '' : require_once 'DB/DataObject.php';
7 class Pman_Cms_DataObjects_CmsRefer extends DB_DataObject
11 /* the code below is auto generated do not remove the above tag */
13 var $__table = 'CmsRefer'; // table name
14 var $blog_id; // int(11) not_null primary_key auto_increment
15 var $day; // date(10) not_null primary_key
16 var $seen; // int(11) not_null
17 var $scheme; // string(5)
18 var $host; // string(128) not_null primary_key
19 var $port; // string(5)
20 var $path; // string(255)
23 var $query; // string(255)
26 /* the code above is auto generated do not remove the tag below */
29 function checkValid() {
30 // does it need checking?
31 // links are valid for 30 days..
32 // echo "CHECK? <PRE>" . print_r($this, true) . '</PRE>';
34 if (($this->checked_success > 0) && strtotime($this->checked) > (time() - (30 * 24 * 60 * 60))) {
36 return $this->checked_success;
38 // if we failed to check, wait another day..
39 if ($this->checked_success < 0 && $this->checked && strtotime($this->checked) > (time() - (24 * 60 * 60))) {
40 //echo "holding off checking";exit;
41 return $this->checked_success;
43 // we delete ourselves if 10 failures..
44 if ($this->checked_success < -10) {
46 //echo "DELETEING?"; exit;
49 //echo "starting checking";exit;
50 $original_success = $this->checked_success;
51 // DB_DataObject::debugLevel(1);
52 // update the fact we have checked it before we potentially hang..
53 require_once 'DB/DataObject/Cast.php';
55 if (!$this->clean()) {
61 if (strlen($this->query)) {
63 parse_str ( $this->query,$q );
69 $this->host == 'google.com';
70 $this->query = 'q='. $q['q']; // not if urlencoding is needed.
71 $this->title = urldecode($q['q']);
72 $this->checked_success = 1;
73 $this->checked = DB_DataObject_Cast::sql('NOW()');
80 $this->checked_success--;
81 $this->checked = DB_DataObject_Cast::sql('NOW()');
86 // this could hang!!???
87 //echo "checking ".$this->url();
89 $data = @file_get_contents($this->url());
90 //echo htmlspecialchars($data);
91 // look for our url in it!
92 // fixme also needs to look for page urls..
93 if ((false === strpos($data,"http://www.akbkhome.com/blog.php/View/{$this->blog_id}/")) &&
94 (false === strpos($data,"http://www.roojs.com/index.php/View/{$this->blog_id}/")) &&
95 (false === strpos($data,"http://roojs.com/index.php/View/{$this->blog_id}/")) &&
96 (false === strpos($data,"http://blog.akbkhome.com/archives/{$this->blog_id}_")))
99 //echo 'link not on page..';
102 if (preg_match('#<title>([^<]+)</title>#i',$data,$matches)) {
103 $this->title = $matches[1];
107 $this->checked_success = 2;
111 if ($original_success < 1) {
112 //DB_DataObject::debugLevel(1);
113 $e = DB_DataObject::factory('cms_page');
114 $e->get($this->blog_id);
115 $auth = $e->getAuthorObject();
116 $this->entryObject = $e;
117 // $auth->sendComment($this,'referer.txt');
125 function clean() // return false if invalid... ???/
127 //DB_DataObject::debugLevel(1);
129 parse_str ( $this->query,$q );
131 if (isset($q['PHPSESSID'])) {
132 unset($q['PHPSESSID']);
135 $ip = gethostbyname($this->host);
140 // cause the blog is called smooking.... !
141 case preg_match('#/smoking/#',$this->query):
142 // live journal friends links - rather messy aggreted views
143 case preg_match('#/friends/#',$this->path):
145 case (preg_match('#teen#i',$this->query) && preg_match('#flexy#i',$this->query)):
146 case (preg_match('#teen#i',$this->title) && preg_match('#flexy#i',$this->title)):
148 case preg_match('#teens\.search\.aol\.com$#',$this->host);
149 //this site is not about smoking..
150 case preg_match('#smoking#i',$this->query):
152 // stargeek is a questionable search engine op site.
153 case preg_match('#stargeek\.com$#',$this->host):
155 // ignore google tranlations.
156 case preg_match('#translate\.google\.#',$this->host):
158 case preg_match('#mywebsearch#',$this->host):
159 case preg_match('#ie-seven\.com$#',$this->host):
160 case preg_match('#iest\.ru$#',$this->host):
161 case preg_match('#ibissecurity\.com$#',$this->host):
162 case preg_match('#iwebtool\.com$#',$this->host):
163 case preg_match('#qoils\.com$#',$this->host):
164 case preg_match('#xml4\.com$#',$this->host):
165 case preg_match('#firmantivirus\.com$#',$this->host):
166 case preg_match('#hostingvoice.com$#',$this->host):
167 case preg_match('#framecart.com$#',$this->host):
168 case preg_match('#o-list\.com$#',$this->host):
169 case preg_match('#stopsmoking911\.com$#',$this->host):
170 case preg_match('#hv-deals\.info$#',$this->host):
171 case preg_match('#freeforums\.org$#',$this->host):
172 // nasty link spammer!
173 case preg_match('#insurance#',$this->host):
174 //case preg_match('#debtconsolidation#',$this->host):
175 //case preg_match('#distancelearning#',$this->host):
176 // workfromhomefre group of IP's
177 case preg_match('#^207\.36\.200\..*$#',$ip):
178 //case preg_match('#uxsk.com$#',$this->host): // ns@thatsdata.com
179 //case preg_match('#vqsn.com$#',$this->host): // ns@thatsdata.com
180 //case preg_match('#zfjo.com$#',$this->host): // ns@targe5t.com
181 //case preg_match('#ybvm.com$#',$this->host): // ns@targe5t.com
182 // the thatsdata group of IP addresses..
183 case preg_match('#^64\.70\.250\..*$#',$ip):
184 // the targe5t.com group of IP's
185 case preg_match('#^69\.64\.76\..*$#',$ip); // web sites are on this.
186 case preg_match('#^69\.64\.78\..*$#',$ip); // ns is on this.
187 // the themasterns.com group of IPs
188 case preg_match('#^70\.85\.234\..*$#',$ip); // websites are on this.
189 case preg_match('#^70\.87\.36\..*$#',$ip); // ns is on this.
190 case preg_match('#^70\.85\.235\..*$#',$ip); // websites are on this.
196 // lexpov a pornsite that spamed us and got caught up in google. etc.
197 case preg_match('#/lexpov/#',$this->query):
198 case preg_match('#/lexpov/#',$this->host):
199 case preg_match('#/optonline/#',$this->host): // stupid search engine that keeps refering..
200 case preg_match('#/lexpov/#',$this->path):
201 case preg_match('#/lexpov/#',$this->title):
204 // replace msn search with google.
205 case isset($q['as_q']):
206 $q['q'] = $q['as_q'];
208 case isset($q['p']): // yahoo + fallthrough
209 if (isset($q['p'])) {
216 // gaming serach engine with direct queries..
217 if (preg_match('/roojs/',$q['q'])) {
222 $this->host = 'google.com';
223 $this->query = 'q='.$q['q'];
224 $this->path = '/search';
228 // PHP's standard session id
230 case (false !== strpos($this->query,'PHPSESSID=')):
231 $query = explode('&',$this->query);
233 foreach($query as $k=>$v) {
235 if (false !== strpos($k,'PHPSESSID=')) {
239 $this->query .= strlen($this->query) ? '&' : '';
245 case ($this->host == 'www.linux.org.ru'):
246 $this->path = preg_replace('#/profile/[a-z0-9]+/#i', '/', $this->path);
251 case preg_match('#\.php\.net$#',$this->host):
252 $this->host = 'php.net';
255 // root these ones.. = common referrers
256 case preg_match('#weblabor\.hu$#',$this->host):
259 $this->host = 'weblabor.hu';
262 case preg_match('#planet-php\.net$$#',$this->host):
265 $this->host = 'www.planet-php.net';
268 case preg_match('#artima\.com$#',$this->host):
271 $this->host = 'www.artima.com';
277 // prefix www where relivant..
278 if (!preg_match('#^www\.#',$this->host)) {
279 $q = DB_DataObject::factory('CmsRefer');
280 $q->host = 'www.'.$this->host;
281 if ($q->find(true)) {
282 $this->host = 'www.' . $this->host;
285 // remove www. if we already have it..
287 $q = DB_DataObject::factory('CmsRefer');
288 $q->host = substr($this->host,4);
289 if ($q->find(true)) {
290 $this->host = substr($this->host,4);
295 // praenanz.de has trackbacks- as a add on to the referrers..
296 $q->path = preg_replace('#/trackbacks-$#','', $q->path);
306 return "http://{$this->host}{$this->path}" . ($this->query ? "?{$this->query}" : '');