background image

$curls

[

$key

] = 

$ch

;

curl_multi_add_handle(

$mh

,

$curls

[

$key

]);

}
}

elseif

(

$method

 == 'post'){

//post

 

方式传值

foreach

(

$options

 

as

 

$key

=>

$option

){

$ch

 = curl_init(

$urls

);

$option

[CURLOPT_RETURNTRANSFER] = true;

$option

[CURLOPT_TIMEOUT] = 5;

$option

[CURLOPT_POST] = true;

curl_setopt_array(

$ch

,

$option

);

$curls

[

$key

] = 

$ch

;

curl_multi_add_handle(

$mh

,

$curls

[

$key

]);

}
}

else

{

exit

("参数出错!\n");

}

do

{

$mrc

 = curl_multi_exec(

$mh

,

$active

);

curl_multi_select(

$mh

);

//减少 CPU

 

压力 注释掉 CPU 压力变大

}

while

(

$active

);

foreach

(

$curls

 

as

 

$key

=>

$ch

){

$html

 = curl_multi_getcontent(

$ch

);

curl_multi_remove_handle(

$mh

,

$ch

);

curl_close(

$ch

);

$htmls

[

$key

] = 

$html

;

}
curl_multi_close(

$mh

);

return

 

$htmls

;

}
 
常用的 get 请求是通过改变 url 参数来实现的,又因为我们的函数是针对数据采集的。必然
是分类采集,所以网址类似于这种:
http:

//www.baidu.com/s?wd=shili&pn=0&ie=utf-8

http:

//www.baidu.com/s?wd=shili&pn=10&ie=utf-8

http:

//www.baidu.com/s?wd=shili&pn=20&ie=utf-8

http:

//www.baidu.com/s?wd=shili&pn=30&ie=utf-8

http:

//www.baidu.com/s?wd=shili&pn=50&ie=utf-8

上面五个页面是很有规律的,改变的仅仅是 pn 的值。

 

复制代码 代码如下: