<?PHP //==================================================== // FileName: snap.class.php // Summary: 网页快照类 // Author: millken(迷路林肯) // LastModifed:2007-06-29 // copyright (c)2007 [email]millken@gmail.com[/email] //==================================================== class snap{ var $dir; var $log; var $contents; var $filename; var $host; var $name; var $data_ts; var $ttl; var $url; var $ts; function snap(){ $this->log = "New snap() object instantiated.<br />n"; $this->dir = dirname(__FILE__)."/"; } function fetch($url="",$ttl=10){ $this->log .= "--------------------------------<br />fetch() called<br />n"; $this->log .= "url: ".$url."<br />n"; $hosts = parse_url($url); $this->host = $hosts['scheme'].'://'.$hosts['host'].'/'; if (!$url) { $this->log .= "OOPS: You need to pass a URL!<br />"; return false; } $this->ttl = $ttl; $this->url = $url; $this->name = md5($this->url); $this->filename = $this->dir.$this->name; $this->log .= "Filename: ".$this->filename."<br />"; $this->getFile_ts(); $this->file_get_content();
} function file_get_content(){ ob_start(); $this->ts = time() - $this->data_ts; if($this->data_ts <>0 && $this->ts <= $this->ttl){ $this->log .= "cache has expired<br />"; @readfile($this->filename); $this->contents = ob_get_contents(); ob_end_clean(); }else{ $this->log .= "cache hasn't expired<br />"; @readfile($this->url); $this->contents = ob_get_contents(); ob_end_clean(); $this->saveToCache(); } return true; } function saveToCache(){ $this->log .= "saveToCache() called<br />"; //create file pointer if (!$fp=@fopen($this->filename,"w")) { $this->log .= "Could not open ".$this->filename."<br />"; return false; } $this->contents = $this->formaturl($this->contents,$this->host); $this->contents = preg_replace("'<script[^>]*?>.*?</script>'si","",$this->contents); //write to file if (!@fwrite($fp,$this->contents)) { $this->log .= "Could not write to ".$this->filename."<br />"; fclose($fp); return false; } //close file pointer fclose($fp); return true; } function getFile_ts(){ $this->log .= "getFile_ts() called<br />"; if (!file_exists($this->filename)) { $this->data_ts = 0; $this->log .= $this->filename." does not exist<br />"; return false; } $this->data_ts = filemtime($this->filename); return true; } function formaturl($l1,$l2){ if (preg_match_all("/(<img[^>]+src=\"([^\"]+)\"[^>]*>)|(<link[^>]+href=\"([^\"]+)\"[^>]*>)|(<a[^>]+href=\"([^\"]+)\"[^>]*>)|(<img[^>]+src='([^']+)'[^>]*>)|(<a[^>]+href='([^']+)'[^>]*>)/i",$l1,$regs)){ foreach($regs[0] as $num => $url){ $l1 = str_replace($url,$this->lIIIIl($url,$l2),$l1); } } return $l1; }
function lIIIIl($l1,$l2){ if(preg_match("/(.*)(href|src)=(.+?)( |/>|>).*/i",$l1,$regs)){$I2 = $regs[3];} if(strlen($I2)>0){ $I1 = str_replace(chr(34),"",$I2); $I1 = str_replace(chr(39),"",$I1); }else{return $l1;} $url_parsed = parse_url($l2); $scheme = $url_parsed["scheme"];if($scheme!=""){$scheme = $scheme."://";} $host = $url_parsed["host"]; $l3 = $scheme.$host; if(strlen($l3)==0){return $l1;} $path = dirname($url_parsed["path"]);if($path[0]=="\"){$path="";} $pos = strpos($I1,"#"); if($pos>0) $I1 = substr($I1,0,$pos); //判断类型 if(preg_match("/^(http|https|ftp):(//|\\)(([w/\+-~`@:%])+.)+([w/\.=?+-~`@':!%#]|(&)|&)+/i",$I1)){return $l1; }//http开头的url类型要跳过 elseif($I1[0]=="/"){$I1 = $l3.$I1;}//绝对路径 elseif(substr($I1,0,3)=="../"){//相对路径 while(substr($I1,0,3)=="../"){ $I1 = substr($I1,strlen($I1)-(strlen($I1)-3),strlen($I1)-3); if(strlen($path)>0){ $path = dirname($path); } } $I1 = $l3.$path."/".$I1; } elseif(substr($I1,0,2)=="./"){ $I1 = $l3.$path.substr($I1,strlen($I1)-(strlen($I1)-1),strlen($I1)-1); } elseif(strtolower(substr($I1,0,7))=="mailto:"||strtolower(substr($I1,0,11))=="java script:"){ return $l1; }else{ $I1 = $l3.$path."/".$I1; } return str_replace($I2,"\"$I1\"",$l1); } } ?> |