partitech / doctrine-pgvector
带有 doctrine 的 PostgreSQL 矢量类型
v0.0.7
2024-08-15 19:23 UTC
Requires
- php: ^8.0
- doctrine/dbal: ^3.6
- doctrine/orm: ^2.19|^3.0
Requires (Dev)
- phpunit/phpunit: ^5.2
README
描述
Doctrine 的 PGVector 类型
安装
composer require partitech/doctrine-pgvector
配置 Doctrine
doctrine: dbal: types: vector: Partitech\DoctrinePgVector\Type\VectorType orm: dql: string_functions: distance: Partitech\DoctrinePgVector\Query\Distance inner_product: Partitech\DoctrinePgVector\Query\InnerProduct cosine_similarity: Partitech\DoctrinePgVector\Query\CosineSimilarity
使用
现在您可以在实体中使用 vector
类型
use Doctrine\ORM\Mapping as ORM; /** * @ORM\Entity() */ class YourEntity { #[ORM\Column(type: 'vector', length: 1024, nullable: true)] private $vectors; }
如果您使用 symfony console make:entity
,请手动将 length
参数属性添加为 vector。长度是您的模型嵌入的维度。
例如,OpenAI 使用以下维度
text-embedding-3-small : 1536
text-embedding-3-large : 3072(可定制)
Mistral AI
Mistral-embed : 1024
此外,您应手动将 HNSW 索引添加到您的矢量列中。请注意,HNSW 索引的维度应不超过 2000。
L2 距离
CREATE INDEX ON items USING hnsw (embedding vector_l2_ops);
内积
CREATE INDEX ON items USING hnsw (embedding vector_ip_ops);
余弦距离
CREATE INDEX ON items USING hnsw (embedding vector_cosine_ops);
基本用法
距离
获取
SELECT * FROM embeddings WHERE vectors <-> '[3,1,2]' < 5
使用
$floatArray = array_map(function() { return mt_rand(0, 1000000) / 1000000; }, array_fill(0, 1024, null)); $query = $this->entityManager->createQuery( "SELECT i FROM App\Entity\Embeddings i ORDER BY distance(i.vectors, :vector) ASC" ); $query->setParameter('vector', $floatArray, 'vector'); $results = $query->setMaxResults(5)->getResult(); dump($results);
$qb = $this->entityManager->createQueryBuilder(); $qb->select('e') ->from('App:Embeddings', 'e') ->orderBy('distance(e.vectors, :vector)') ->setParameter('vector', $floatArray, 'vector') ->setMaxResults(5) ; $result = $qb->getQuery()->getResult(); dump($result);
内积
获取
SELECT (vectors <#> '[3,1,2]') * -1, * FROM embeddings
使用
$floatArray = array_map(function() { return mt_rand(0, 1000000) / 1000000; }, array_fill(0, 1024, null)); $query = $this->entityManager->createQuery( "SELECT inner_product(e.vectors, :vector) , e FROM App\Entity\Embeddings e" ); $query->setParameter('vector', $floatArray, 'vector'); $results = $query->setMaxResults(5)->getResult(); dump($results);
$qb = $this->entityManager->createQueryBuilder(); $qb->select('e') ->addSelect('inner_product(e.vectors, :vector)') ->from('App:Embeddings', 'e') ->setParameter('vector', $floatArray, 'vector') ->setMaxResults(5) ; $result = $qb->getQuery()->getResult(); dump($result);
余弦相似度
获取
SELECT 1 - (vectors <=> '[3,1,2]'), * FROM embeddings
使用
$floatArray = array_map(function() { return mt_rand(0, 1000000) / 1000000; }, array_fill(0, 1024, null)); $query = $this->entityManager->createQuery( "SELECT cosine_similarity(e.vectors, :vector) , e FROM App\Entity\Embeddings e" ); $query->setParameter('vector', $floatArray, 'vector'); $results = $query->setMaxResults(5)->getResult(); dump($results);
$qb = $this->entityManager->createQueryBuilder(); $qb->select('e') ->addSelect('cosine_similarity(e.vectors, :vector)') ->from('App:Embeddings', 'e') ->setParameter('vector', $floatArray, 'vector') ->setMaxResults(5) ; $result = $qb->getQuery()->getResult(); dump($result);