方法1:
用file_get_contents以get方式获取内容<?php$url='http://www.domain.com/?para=123';$html=file_get_contents($url);echo$html;?>方法2:用file_get_contents函数,以post方式获取url<?php$url='http://www.domain.com/test.php?id=123';$data=array('foo'=>'bar');$data=
http_build_query($data);$opts=array('http'=>array( 'method'=>'POST', 'header'=>"Content-type:
application/x-www-form-urlencoded\r\n" . "Content-Length:
" . strlen($data)
. "\r\n", 'content'=>$data));$ctx=
stream_context_create($opts);$html=
@file_get_contents($url,'',$ctx);如果需要再传递cookie数据,则把'header'=>"Content-type:
application/x-www-form-urlencoded\r\n" . "Content-Length:
" . strlen($data)
. "\r\n",修改为'header'=>"Content-type:
application/x-www-form-urlencoded\r\n" . "Content-Length:
" . strlen($data)
. "\r\n". "cookie:cookie1=c1;cookie2=c2\r\n";即可方法3:
用fopen打开url,
以get方式获取内容 <?php$fp=fopen($url,'r');$header=
stream_get_meta_data($fp);//获取报头信息while(!feof($fp))
{ $result.=fgets($fp,
1024); }echo"url
header: {$header} <br>":echo"url
body: $result";fclose($fp);?>方法4:
用fopen打开url,
以post方式获取内容 <?php$data=array('foo2'=>'bar2','foo3'=>'bar3');$data=
http_build_query($data);$opts=array('http'=>array('method'=>'POST','header'=>"Content-type:
application/x-www-form-urlencoded\r\nCookie:cook1=c3;cook2=c4\r\n" . "Content-Length:
" . strlen($data)
. "\r\n",'content'=>$data));$context=
stream_context_create($opts);$html=fopen('http://www.test.com/zzzz.php?id=i3&id2=i4','rb',false,$context);$w=fread($html,1024);echo$w;?>方法5:用fsockopen函数打开url,以get方式获取完整的数据,包括header和body<?phpfunctionget_url
($url,$cookie=false){$url=parse_url($url);$query=$url[path]."?".$url[query];echo"Query:".$query;$fp=fsockopen($url[host],$url[port]?$url[port]:80
, $errno,$errstr,
30); if(!$fp)
{ returnfalse;}else{$request="GET
$query HTTP/1.1\r\n";$request.="Host:
$url[host]\r\n";$request.="Connection:
Close\r\n";if($cookie)$request.="Cookie:
$cookie\n";$request.="\r\n";fwrite($fp,$request);while(!@feof($fp))
{ $result.=
@fgets($fp,
1024); }fclose($fp);return$result;}}//获取url的html部分,去掉headerfunctionGetUrlHTML($url,$cookie=false){$rowdata=
get_url($url,$cookie);if($rowdata){$body=stristr($rowdata,"\r\n\r\n");$body=substr($body,4,strlen($body));return$body;} returnfalse;}?>方法6:用fsockopen函数打开url,以POST方式获取完整的数据,包括header和body<?phpfunctionHTTP_Post($URL,$data,$cookie,$referrer=""){ //
parsing the given URL $URL_Info=parse_url($URL); //
Building referrer if($referrer=="")//
if not given use this script as referrer $referrer="111"; //
making string from $data foreach($dataas$key=>$value)$values[]="$key=".urlencode($value);$data_string=implode("&",$values); //
Find out which port is needed - if not given use standard (=80) if(!isset($URL_Info["port"]))$URL_Info["port"]=80; //
building POST-request: $request.="POST
".$URL_Info["path"]."
HTTP/1.1\n";$request.="Host:
".$URL_Info["host"]."\n";$request.="Referer:
$referer\n";$request.="Content-type:
application/x-www-form-urlencoded\n";$request.="Content-length:
".strlen($data_string)."\n";$request.="Connection:
close\n"; $request.="Cookie:
$cookie\n"; $request.="\n";$request.=$data_string."\n"; $fp=fsockopen($URL_Info["host"],$URL_Info["port"]);fputs($fp,$request);while(!feof($fp))
{ $result.=fgets($fp,
1024); }fclose($fp); return$result;}?>方法7:使用curl库,使用curl库之前,可能需要查看一下php.ini是否已经打开了curl扩展<?php$ch=
curl_init(); $timeout=
5; curl_setopt
($ch,
CURLOPT_URL, 'http://www.domain.com/');curl_setopt
($ch,
CURLOPT_RETURNTRANSFER, 1); curl_setopt
($ch,
CURLOPT_CONNECTTIMEOUT, $timeout);$file_contents=
curl_exec($ch);curl_close($ch);echo$file_contents;?> |
php获得网页源代码抓取网页内容的几种方法
作者:admin 时间:2013-5-25 15:38:36 浏览:21319这里收集了3种利用php获得网页源代码抓取网页内容的方法,我们可以根据实际需要选用。
1、使用file_get_contents获得网页源代码
这个方法最常用,只需要两行代码即可,非常简单方便。
参考代码:
<?php
$fh= file_get_contents('http://www.webkaka.com/');
echo $fh;
?>
2、使用fopen获得网页源代码
这个方法用的人也不少,不过代码有点多。
参考代码:
<?php
$fh = fopen('http://www.webkaka.com/', 'r');
if($fh){
while(!feof($fh)) {
echo fgets($fh);
}
}
?>
3、使用curl获得网页源代码
使用curl获得网页源代码的做法,往往是需要更高要求的人使用,例如当你需要在抓取网页内容的同时,得到网页header信息,还有ENCODING编码的使用,USERAGENT的使用等等。
参考代码一:
<?php
// 创建一个新cURL资源
$ch = curl_init();
// 设置URL和相应的选项
curl_setopt($ch, CURLOPT_URL, "http://www.webkaka.com/");
curl_setopt($ch, CURLOPT_HEADER, false);
// 抓取URL并把它传递给浏览器
$data = curl_exec($ch);
echo $data;
//关闭cURL资源,并且释放系统资源
curl_close($ch);
?>
参考代码二:
<?php
$szUrl = "http://www.webkaka.com/";
$UserAgent = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; SLCC1; .NET CLR 2.0.50727; .NET CLR 3.0.04506; .NET CLR 3.5.21022; .NET CLR 1.0.3705; .NET CLR 1.1.4322)';
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, $szUrl);
curl_setopt($curl, CURLOPT_HEADER, 0); //0表示不输出Header,1表示输出
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($curl, CURLOPT_ENCODING, '');
curl_setopt($curl, CURLOPT_USERAGENT, $UserAgent);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1);
$data = curl_exec($curl);
echo $data;
//echo curl_errno($curl); //返回0时表示程序执行成功 如何从curl_errno返回值获取错误信息