php模拟浏览器(BrowserEmulator)获取远程文件内容的实例代码

发布时间:2020-12-29编辑:脚本学堂
介绍下如何用php实现模拟浏览器,进行远程文件内容的获取,有需要的朋友,可以参考下。

出于安全考虑,常常会关闭fopen, file_get_contents, 也就是会把 allow_url_fopen设置为OFF。
此时大家可以借助下面的类,继续使用这些函数。供大家学习参考。

代码如下:
 

复制代码 代码示例:
<?php     
    /**
    * BrowserEmulator class. Provides methods for opening urls and emulating
    * a web browser request.
    **/ 
    class BrowserEmulator { 
      var $headerLines = Array(); 
      var $postData = Array(); 
      var $multiPartPost = False; 
      var $authUser = ""; 
      var $authPass = ""; 
      var $port; 
      var $lastResponse = ''; 
      var $lastRequest = ''; 
      var $debug = false; 
      var $customHttp = False; 
      
      public function BrowserEmulator() { 
        $this->resetHeaderLines(); 
        $this->resetPort(); 
      } 
        /**
      * Adds a single header field to the HTTP request header. The resulting header
      * line will have the format
      * $name: $valuen
      **/ 
      public function addHeaderLine($name, $value) { 
        $this->headerLines[$name] = $value; 
      } 
      
      /**
      * Deletes all custom header lines. This will not remove the User-Agent header field,
      * which is necessary for correct operation.
      **/ 
      public function resetHeaderLines() { 
        $this->headerLines = Array(); 
        
        /*******************************************************************************/ 
        /**************   YOU MAX SET THE USER AGENT STRING HERE   *******************/ 
        /* http://www.jb200.com      */ 
        /* default is "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)",         */ 
        /* which means Internet Explorer 6.0 on WinXP                       */ 
        
        $this->headerLines["User-Agent"] = 'Mozilla/5.0 (X11; U; linux i686; en-US; rv:1.9.0.10) Gecko/2009042315 Firefox/3.0.10'; 
     
     
        /*******************************************************************************/ 
        /**
        * Set default to accept gzip encoded files
        */ 
        $this->headerLines["Accept-Encoding"] = "*/*"; 
      } 
      
      /** 
      * Add a post parameter. Post parameters are sent in the body of an HTTP POST request. 
      **/ 
      public function addPostData($name, $value = '') { 
        $this->postData[$name] = $value; 
      } 
      
      /**
      * Deletes all custom post parameters.
      **/ 
      public function resetPostData() { 
        $this->postData = Array(); 
      }     
     
      public function handleMultiPart() { 
      $boundry = '----------------------------795088511166260704540879626';
     
        $this->headerLines["Accept"] = ' text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'; 
        $this->headerLines["Connection"] = 'Close'; 
        $this->headerLines["Content-Type"] = "multipart/form-data; boundary=$boundry"; 
        $out = ''; 
        foreach($this->postData as $item => $data) { 
          if(is_array($data)) { 
            $out .= "--$boundryrn" 
                   ."Content-Disposition: form-data; name="$item"; filename="{$data['filename']}"rn" 
                   ."Content-Type: application/octet-streamrn" 
                   ."rn" 
                   .$data['contents']."rn"; 
          } else { 
            $out .= "--$boundryrn" 
                   ."Content-Disposition: form-data; name="$item"rn" 
                   ."rn" 
                   .$data."rn"; 
          } 
        } 
        $out .= "--{$boundry}--rn"; 
        return $out; 
      }     
     
      /** 
      * Sets an auth user and password to use for the request. 
      * Set both as emptyempty strings to disable authentication. 
      **/ 
      public function setAuth($user, $pass) { 
        $this->authUser = $user; 
        $this->authPass = $pass; 
      } 
      /**
      * Selects a custom port to use for the request.
      **/ 
      public function setPort($portNumber) { 
        $this->port = $portNumber; 
      } 
      
      /**
      * Resets the port used for request to the HTTP default (80).
      **/ 
      public function resetPort() { 
        $this->port = 80; 
      }     
     
      /**
       * Parse any cookies set in the URL, and return the trimed string
       **/ 
      public function preparseURL($url) { 
        if($cookies = stristr($url, ':COOKIE:')) { 
          $url = rtrim(substr($url, 0, -strlen($cookies)), '&'); 
          $this->addHeaderLine("Cookie", '$Version=1; '.strtr(substr($cookies, 8), '&', ';')); 
        } 
        return $url; 
      }     
     
      /**
      * Make an fopen call to $url with the parameters set by previous member
      * method calls. Send all set headers, post data and user authentication data.
      * Returns a file handle on success, or false on failure.
      **/ 
      public function fopen($url) { 
        $url = $this->preparseURL($url); 
        $this->lastResponse = Array(); 
        
        $parts = parse_url($url); 
        $protocol = $parts['scheme']; 
        $server = $parts['host']; 
        $port = $parts['port']; 
        $path = $parts['path']; 
        if(isset($parts['query'])) { 
          $path .= '?'.$parts['query']; 
        }    
     
        if($protocol == 'https') { 
          // TODO: https is locked to port 443, why? 
          $server = 'ssl://'.$server; 
          $this->setPort(443); 
        } elseif ($port!="") { 
            $this->setPort($port); 
        } 
        if ($path=="") $path = "/"; 
        $socket = false; 
        $socket = fsockopen($server, $this->port); 
        if ($socket) { 
            if ($this->authUser!="" && $this->authPass!="") { 
              $this->headerLines["Authorization"] = "Basic ".base64_encode($this->authUser.":".$this->authPass); 
            } 
           
            if($this->customHttp) 
              $request = $this->customHttp." $pathrn"; 
            elseif (count($this->postData)==0) 
              $request = "GET $path HTTP/1.0rn"; 
            else 
              $request = "POST $path HTTP/1.1rn"; 
     
     
            $request .= "Host: {$parts['host']}rn"; 
            
            if ($this->debug) echo $request; 
            if (count($this->postData)>0) { 
              if($this->multiPartPost) { 
                $PostString = $this->handleMultiPart(); 
              } else { 
                $PostStringArray = Array(); 
                foreach ($this->postData AS $key=>$value) { 
                  if(emptyempty($value)) 
                    $PostStringArray[] = $key; 
                  else 
                    $PostStringArray[] = "$key=$value"; 
                } 
                $PostString = join("&", $PostStringArray); 
              } 
              $this->headerLines["Content-Length"] = strlen($PostString); 
            } 
            
            foreach ($this->headerLines AS $key=>$value) { 
              if ($this->debug) echo "$key: $valuen"; 
              $request .= "$key: $valuern"; 
            } 
            if ($this->debug) echo "n"; 
            $request .= "rn"; 
            if (count($this->postData)>0) { 
              $request .= $PostString; 
            } 
        } 
        $this->lastRequest = $request;     
     
        for ($written = 0; $written < strlen($request); $written += $fwrite) { 
          $fwrite = fwrite($socket, substr($request, $written)); 
          if (!$fwrite) { 
            break
          } 
        } 
        if ($this->debug) echo "n"; 
        if ($socket) { 
          $line = fgets($socket); 
          if ($this->debug) echo $line; 
          $this->lastResponse .= $line; 
          $status = substr($line,9,3); 
          while (trim($line = fgets($socket)) != ""){ 
            if ($this->debug) echo "$line"; 
            $this->lastResponse .= $line; 
            if ($status=="401" AND strpos($line,"WWW-Authenticate: Basic realm="")===0) { 
              fclose($socket); 
              return FALSE; 
            } 
          } 
        } 
        return $socket; 
      } 
       
      /**
      * Make an file call to $url with the parameters set by previous member
      * method calls. Send all set headers, post data and user authentication data.
      * Returns the requested file as a string on success, or false on failure.
      **/ 
      public function file_get_contents($url) { 
        if(file_exists($url)) // local file 
          return file_get_contents($url); 
        $file = ''; 
        $socket = $this->fopen($url); 
        if ($socket) { 
            while (!feof($socket)) { 
              $file .= fgets($socket); 
            } 
        } else { 
            Yii::log('Browser Emulator: file_get_contents bad socket', CLogger::LEVEL_ERROR); 
            return FALSE; 
        } 
        fclose($socket); 
     
     
        if(strstr($this->lastResponse, 'Content-Encoding: gzip') !== FALSE) { 
          if(function_exists('gzinflate')) { 
            $file = gzinflate(substr($file,10)); 
            if($this->debug) echo "Result file: ".$file; 
          } 
        }
        return $file; 
      }
     
      /**
       * Simulate a file() call by exploding file_get_contents()
       **/ 
      public function file($url) { 
        $data = $this->file_get_contents($url); 
        if($data) 
          return explode('n', $data); 
        return False; 
      } 
      
      public function getLastResponseHeaders() { 
        return $this->lastResponse; 
      } 
    }
?>

 

复制代码 代码示例:

实例:
<?php
    $be = new BrowserEmulator(); 
     
    $output = $be->file_get_contents("http://jb200.com/rss.php"); 
    $response = $be->getLastResponseHeaders(); 
     
    echo $output; 
?>

关联:
PHP获取远程文件内容
 

复制代码 代码示例:
<?php
 function curl_get_contents($url) 
    { 
        $dir = pathinfo($url); 
        $host = $dir['dirname']; 
        $refer = $host.'/'; 
     
        $ch = curl_init($url); 
        curl_setopt ($ch, CURLOPT_REFERER, $refer); 
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); 
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); 
        curl_setopt($ch, CURLOPT_BINARYTRANSFER, 1); 
        $data = curl_exec($ch); 
        curl_close($ch); 
         
        return $data; 
    }
?>