xxtime / curl-utils
此包最新版本(0.2.2)没有可用的许可证信息。
curl 多任务用于爬虫等
0.2.2
2019-11-12 01:56 UTC
Requires
- php: >=5.5
This package is auto-updated.
Last update: 2024-09-15 15:48:21 UTC
README
curl 多任务用于爬虫等
安装
composer require xxtime/curl-utils
如何使用它
use Xxtime\CurlUtils\CurlUtils $curlUtils = new CurlUtils(); // get method $curlUtils->get('https://www.xxtime.com'); // post method $curlUtils->post('https://www.xxtime.com', ['title' => 'XT curlUtils']); // set custom curl options $curlUtils->setOptions([ "CURLOPT_TIMEOUT" => 20, "CURLOPT_USERAGENT" => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36' ]);
用于多请求
use Xxtime\CurlUtils\CurlUtils class Demo{ protected $curlUtils; public function run(){ $this->curlUtils = new CurlUtils(); $this->curlUtils->setOptions([ "CURLOPT_USERAGENT" => 'Mozilla/5.0 (Macintosh; Intel Mac OS X)', ]); $urls = [ 'https://www.xxtime.com', ]; $this->curlUtils->add( $urls, // urls null, // set custom curl options for every url [$this, 'callback'], // callback function ['depth' => 5] // custom argv will be use in callback function ); $this->curlUtils->run(); } public function callback($content, $header, $argv){ // do something // no Content-Type then ignore if (empty($header['Content-Type'])) { return false; } // not a html page, save content or ignore if (strpos($header['Content-Type'], 'text/html' === false)) { return true; } // limit the request depth if ($argv['depth'] == 0) { return true; } // analysis the html content // continue to add new tasks into the task pool $options = ["CURLOPT_REFERER" => $header['url']]; $this->curlUtils->add( $urls, $options, [$this, 'callback'], ['depth' => $argv['depth'] - 1] ); } }