| 方法1:
 用file_get_contents以get方式获取内容<?php$url='http://www.domain.com/?para=123';$html=file_get_contents($url);echo$html;?>方法2:用file_get_contents函数,以post方式获取url<?php$url='http://www.domain.com/test.php?id=123';$data=array('foo'=>'bar');$data=
 http_build_query($data);$opts=array('http'=>array(   'method'=>'POST',   'header'=>"Content-type:
 application/x-www-form-urlencoded\r\n".                      "Content-Length:
 ". strlen($data)
 . "\r\n",   'content'=>$data));$ctx=
 stream_context_create($opts);$html=
 @file_get_contents($url,'',$ctx);如果需要再传递cookie数据,则把'header'=>"Content-type:
 application/x-www-form-urlencoded\r\n".                   "Content-Length:
 ". strlen($data)
 . "\r\n",修改为'header'=>"Content-type:
 application/x-www-form-urlencoded\r\n".                  "Content-Length:
 ". strlen($data)
 . "\r\n".                 "cookie:cookie1=c1;cookie2=c2\r\n";即可方法3:
 用fopen打开url,
 以get方式获取内容 <?php$fp=fopen($url,'r');$header=
 stream_get_meta_data($fp);//获取报头信息while(!feof($fp))
 { $result.=fgets($fp,
 1024); }echo"url
 header: {$header} <br>":echo"url
 body: $result";fclose($fp);?>方法4:
 用fopen打开url,
 以post方式获取内容 <?php$data=array('foo2'=>'bar2','foo3'=>'bar3');$data=
 http_build_query($data);$opts=array('http'=>array('method'=>'POST','header'=>"Content-type:
 application/x-www-form-urlencoded\r\nCookie:cook1=c3;cook2=c4\r\n". "Content-Length:
 ". strlen($data)
 . "\r\n",'content'=>$data));$context=
 stream_context_create($opts);$html=fopen('http://www.test.com/zzzz.php?id=i3&id2=i4','rb',false,$context);$w=fread($html,1024);echo$w;?>方法5:用fsockopen函数打开url,以get方式获取完整的数据,包括header和body<?phpfunctionget_url
 ($url,$cookie=false){$url=parse_url($url);$query=$url[path]."?".$url[query];echo"Query:".$query;$fp=fsockopen($url[host],$url[port]?$url[port]:80
 , $errno,$errstr,
 30); if(!$fp)
 { returnfalse;}else{$request="GET
 $query HTTP/1.1\r\n";$request.="Host:
 $url[host]\r\n";$request.="Connection:
 Close\r\n";if($cookie)$request.="Cookie:  
 $cookie\n";$request.="\r\n";fwrite($fp,$request);while(!@feof($fp))
 { $result.=
 @fgets($fp,
 1024); }fclose($fp);return$result;}}//获取url的html部分,去掉headerfunctionGetUrlHTML($url,$cookie=false){$rowdata=
 get_url($url,$cookie);if($rowdata){$body=stristr($rowdata,"\r\n\r\n");$body=substr($body,4,strlen($body));return$body;}   returnfalse;}?>方法6:用fsockopen函数打开url,以POST方式获取完整的数据,包括header和body<?phpfunctionHTTP_Post($URL,$data,$cookie,$referrer=""){   //
 parsing the given URL $URL_Info=parse_url($URL);   //
 Building referrer if($referrer=="")//
 if not given use this script as referrer $referrer="111";   //
 making string from $data foreach($dataas$key=>$value)$values[]="$key=".urlencode($value);$data_string=implode("&",$values);   //
 Find out which port is needed - if not given use standard (=80) if(!isset($URL_Info["port"]))$URL_Info["port"]=80;   //
 building POST-request: $request.="POST
 ".$URL_Info["path"]."
 HTTP/1.1\n";$request.="Host:
 ".$URL_Info["host"]."\n";$request.="Referer:
 $referer\n";$request.="Content-type:
 application/x-www-form-urlencoded\n";$request.="Content-length:
 ".strlen($data_string)."\n";$request.="Connection:
 close\n";   $request.="Cookie:  
 $cookie\n";   $request.="\n";$request.=$data_string."\n";   $fp=fsockopen($URL_Info["host"],$URL_Info["port"]);fputs($fp,$request);while(!feof($fp))
 { $result.=fgets($fp,
 1024); }fclose($fp);   return$result;}?>方法7:使用curl库,使用curl库之前,可能需要查看一下php.ini是否已经打开了curl扩展<?php$ch=
 curl_init(); $timeout=
 5; curl_setopt
 ($ch,
 CURLOPT_URL, 'http://www.domain.com/');curl_setopt
 ($ch,
 CURLOPT_RETURNTRANSFER, 1); curl_setopt
 ($ch,
 CURLOPT_CONNECTTIMEOUT, $timeout);$file_contents=
 curl_exec($ch);curl_close($ch);echo$file_contents;?> | 
php获得网页源代码抓取网页内容的几种方法
作者:admin 时间:2013-5-25 15:38:36 浏览:21319这里收集了3种利用php获得网页源代码抓取网页内容的方法,我们可以根据实际需要选用。
1、使用file_get_contents获得网页源代码
这个方法最常用,只需要两行代码即可,非常简单方便。
参考代码:
- <?php 
- $fh= file_get_contents('http://www.webkaka.com/'); 
- echo $fh; 
- ?> 
2、使用fopen获得网页源代码
这个方法用的人也不少,不过代码有点多。
参考代码:
- <?php 
- $fh = fopen('http://www.webkaka.com/', 'r'); 
- if($fh){ 
- while(!feof($fh)) { 
- echo fgets($fh); 
- } 
- } 
- ?> 
3、使用curl获得网页源代码
使用curl获得网页源代码的做法,往往是需要更高要求的人使用,例如当你需要在抓取网页内容的同时,得到网页header信息,还有ENCODING编码的使用,USERAGENT的使用等等。
参考代码一:
- <?php 
- // 创建一个新cURL资源 
- $ch = curl_init(); 
- // 设置URL和相应的选项 
- curl_setopt($ch, CURLOPT_URL, "http://www.webkaka.com/"); 
- curl_setopt($ch, CURLOPT_HEADER, false); 
- // 抓取URL并把它传递给浏览器 
- $data = curl_exec($ch); 
- echo $data; 
- //关闭cURL资源,并且释放系统资源 
- curl_close($ch); 
- ?> 
参考代码二:
- <?php 
- $szUrl = "http://www.webkaka.com/"; 
- $UserAgent = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; SLCC1; .NET CLR 2.0.50727; .NET CLR 3.0.04506; .NET CLR 3.5.21022; .NET CLR 1.0.3705; .NET CLR 1.1.4322)'; 
- $curl = curl_init(); 
- curl_setopt($curl, CURLOPT_URL, $szUrl); 
- curl_setopt($curl, CURLOPT_HEADER, 0); //0表示不输出Header,1表示输出 
- curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); 
- curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false); 
- curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, false); 
- curl_setopt($curl, CURLOPT_ENCODING, ''); 
- curl_setopt($curl, CURLOPT_USERAGENT, $UserAgent); 
- curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1); 
- $data = curl_exec($curl); 
- echo $data; 
- //echo curl_errno($curl); //返回0时表示程序执行成功 如何从curl_errno返回值获取错误信息 
