atk14 / textmit-client
Textmit.com 搜索和索引引擎的客户端
Requires
- php: >=5.6.0
- atk14/api-data-fetcher: >=1.10.8 <2.0
- atk14/string4: ^0.5
Requires (Dev)
- atk14/tester: *
README
这是一个用于搜索和索引引擎 Texmit.com 的客户端。该客户端使用 PHP 编写,可以非常容易地集成到 ATK14 应用程序中。
1. 基本用法
在配置文件中设置 TEXTMIT_API_KEY 常量。
define("TEXTMIT_API_KEY","123.eeee.abcde....");
WHERE 获取 TEXTMIT_API_KEY?目前 Textmit 引擎处于封闭测试阶段。因此,您需要邀请码才能获取密钥。对此我们深感抱歉。
1.1 将文档添加到索引
$textmit = new \Textmit\Client();
// The socket timeout can be optionally increased
$adf = $this->textmit->getApiDataFetcher();
$adf->setSocketTimeout(30.0); // seconds
$textmit->addDocument([
"type" => "article",
"id" => 123,
"language" => "en",
"a" => "The most relevant textual part",
"d" => "More relevant textual part",
"c" => "Textual part with the default relevance",
"d" => "The least relevant textual part"
]);
同一个对象可以以不同的语言进行索引。
$textmit->addDocument([
"type" => "article",
"id" => 123,
"language" => "cs",
"a" => "Nejvíce relevantní část textu",
"d" => "Více relevantní část textu",
"c" => "Textová část s výchozí relevancí",
"d" => "Nejméně relevantní část textu"
]);
以下是将文档添加到全文索引的简要方法。文本权重为 "c"。默认语言被使用。
$text->addDocument($article,"Lorem Ipsum");
1.2 准备全文数据
为了简化全文数据的准备过程,可以使用 FulltextData 类。
$article = Article::GetInstanceById(333);
$fd_article = new \Textmit\FulltextData($article);
$fd_article->addHtml($article->getBody());
$fd_article->addText($addText->getTitle(),"a");
$fd_article->setDate($article->getPublishedAt()); // "2018-02-17 06:00:00"
$textmit->addDocument($fd_article->toArray());
FulltextData 有 merge() 方法,用于合并其他 FulltextData 对象,例如,可以将图像的 FulltextData 合并到文章的 FulltextData 中。在合并过程中,合并对象的文本权重可以改变(通常降低)。
$fd_article->merge($fd_image,[
"a" => "c",
"b" => "c"
]);
1.3 搜索
可以在特定语言中进行搜索。
$result = $textmit->search("vitamins and minerals",[
"type" => "article",
"language" => "en",
"offset" => 0,
"limit" => 20,
]);
$records_found = $result->getTotalAmount();
print_r($result->getIds()); // ["123","124"...]
可以一次性搜索多种类型的文档。
$result = $textmit->search("vitamins and minerals",[
"language" => "cs",
"types" => ["article","page","image_gallery","video"],
]);
foreach($result->getItems() as $item){
$id = $item->getId();
$type = $item->getType(); // "article", "page", "image_gallery" or "video'
$object = $item->getObject(); // Article#123, Page#332, ImageGallery#453...
}
可以使用 "前缀搜索" 来搜索不匹配完整单词的内容。
// search for documents containing words vita, vitae, vitamine, vitality...
$result = $textmit->search("vita",[
"prefix_search" => true,
]);
1.4 从索引中删除文档
1.4.1 删除单个文档
$textmit->removeDocument(123,"article");
1.4.2 删除整个索引
方法 destroyStage
删除当前阶段所有类型的所有文档。一个项目可以有多个阶段,例如 "生产","开发","测试"...
$textmit->destroyStage();
1.4.3 删除过时文档
对于小型项目(小型网站)的索引,一种技术是在每天一次性索引所有内容,然后删除过时文档。过时文档是指那些在给定阈值日期后未索引或重新索引的文档。
$textmit->removeObsoleteDocuments(date("Y-m-d H:i:s",time() - 60 * 60 * 24)); // 1 day
1.5 配置
有几个配置常量。
define("TEXTMIT_API_KEY","...");
define("TEXTMIT_DEFAULT_LANGUAGE","en"); // "en", "cs"
define("TEXTMIT_DEFAULT_DOCUMENT_TYPE","article");
define("TEXTMIT_STAGE","auto"); // "DEVELOPMENT", "PRODUCTION", "auto" means auto detection - it leads to "PRODUCTION", "DEVELOPMENT@hostname" or "TEST@hostname"
define("TEXTMIT_API_BASE_URL","https://www.textmit.com/api/"); // This is default base url
1.6 Tracy 面板集成
Textmit 包附带用于轻松集成到流行的调试器 Tracy 的面板 (https://packagist.org.cn/packages/tracy/tracy)
$tracy_bar = Tracy\Debugger::getBar();
$tracy_bar->addPanel(new Textmit\Panel());
2. 安装
使用 Composer 安装 Texmit 客户端。
cd path/to/your/project/
composer require atk14/textmit-client
3. 将 Texmit 集成到 ATK14 项目中
3.1 在可搜索模型中使用 Indexable 接口
<?php
// file: app/models/article.php
class Article extends ApplicationModel implements Translatable, \Textmit\Indexable {
static function GetTranslatableFields() { return array("title", "teaser", "body"); }
function isPublished(){
return strtotime($this->getPublishedAt())<time();
}
function isIndexable(){
return $this->isPublished();
}
function getFulltextData($lang){
$fd = new \Textmit\FulltextData($this);
$fd->addText($this->getTitle($lang),"a");
$fd->addHtml($this->getTeaser($lang),"b");
$fd->addHtml($this->getBody($lang)); // default is section "c"
$fd->setDate($this->getPublishedAt());
return $fd;
}
}
3.2 机器人自动索引文档
<?php
// file: robots/fulltext_indexer_robot.php
class FulltextIndexerRobot extends ApplicationRobot {
function run(){
$this->textmit = new \Textmit\Client();
$adf = $this->textmit->getApiDataFetcher();
$adf->setSocketTimeout(30.0);
$this->now = $now = date("Y-m-d H:i:s");
$this->logger->info("using stage: ".$this->textmit->getStage());
$this->logger->flush();
$RECIPE_ITEMS = [
"Article" => ["conditions" => ["published_at<=:now"], "bind_ar" => [":now" => $now]],
];
foreach($RECIPE_ITEMS as $class => $options){
foreach($class::FindAll($options) as $object){
$this->_indexObject($object);
}
}
$deleted = $this->textmit->removeObsoleteDocuments(date("Y-m-d H:i:s",time() - 60 * 60 * 24 * 2)); // 2 days
$this->logger->info("obsolete documents deleted: $deleted");
}
function _indexObject($object){
global $ATK14_GLOBAL;
$obj_str = get_class($object)."#".$object->getId(); // e.g. "Article#123"
$this->logger->info("about to index $obj_str");
$this->logger->flush();
if(method_exists($object,"isIndexable") && !$object->isIndexable()){
$this->textmit->removeDocument($object);
$this->logger->debug("object $obj_str is not indexable: (removed if exists)");
return;
}
foreach($ATK14_GLOBAL->getSupportedLangs() as $lang){
$fd = $object->getFulltextData($lang);
$stat = $this->textmit->addDocument($fd->toArray());
if(!$stat){
$this->logger->warn("adding $obj_str failed");
}else{
$this->logger->debug("successfully indexed: $obj_str");
}
}
}
}
4. 许可证
Textmit 客户端是免费软件,根据 MIT 许可证的条款进行分发 https://open-source.org.cn/licenses/mit-license