bjd / php-dom-crawl
基于 PHP 脚本抓取 Html Dom 文本。
v1.0.3
2016-08-09 07:36 UTC
Requires
- php: >=5.4.0
This package is not auto-updated.
Last update: 2024-09-14 19:53:01 UTC
README
### 简介
现在您可以通过 composer 轻松获取此代码,它需要 PHP 5.4+ 来运行。
### 安装
检查 packagist 以获取最新版本。https://packagist.org.cn/packages/bjd/php-dom-crawl
添加以下参数
composer require bjd/php-dom-crawl
### 使用方法
获取内容 html 文本值,就像使用 JavaScript Dom 解析一样。
一个抓取测试 ShadowSocks 账号的示例。在 yiiframe 中使用。
namespace frontend\controllers; use yii; use yii\web\Controller; use Bjd\PhpDom\PhpCrawl; class TestController extends Controller{ public function actionIndex(){ $res_data = $this->_getContent(); $file_put_path = Yii::getAlias("@app/runtime/logs/ss.tmp"); file_put_contents($file_put_path,$res_data); yii::$app->response->sendFile($file_put_path,"gui-config.json"); yii::$app->end(); } public function actionStr(){ echo $this->_getContent(); } private function _getContent(){ $html = PhpCrawl::file_get_html("http://ss.yuvpn.com/page/testss.html"); $res = $html->find(".testvpnitem") ; $ss_vpn = []; foreach($res as $key=>$value){ $text = $value->innertext(); $ss_vpn[$key]['server'] = $value->find("span")[0]->innertext(); $matches = []; preg_match_all("'¶Ë¿Ú£º\s?(.*?)<br\s?/>'is",$text,$matches); $ss_vpn[$key]['server_port'] = $matches[1][0]; preg_match_all("'ÃÜÂ룺\s?(.*?)<br\s?/>'is",$text,$matches); $ss_vpn[$key]['password'] = $matches[1][0]; $ss_vpn[$key]['method'] = $value->find("span")[1]->innertext(); $ss_vpn[$key]['remarks'] = $value->find("span")[0]->innertext(); } $data = [ "index" => count($ss_vpn), "global" => true, "enabled" => true, "shareOverLan" => false, "isDefault" => false, "localPort" => 1080 ]; $data['configs'] = $ss_vpn; return json_encode($data); } }