partitech/doctrine-pgvector

带有 doctrine 的 PostgreSQL 矢量类型

v0.0.7 2024-08-15 19:23 UTC

This package is auto-updated.

Last update: 2024-09-15 19:37:36 UTC


README

描述

Doctrine 的 PGVector 类型

安装

composer require partitech/doctrine-pgvector

配置 Doctrine

doctrine:
  dbal:
    types:
      vector: Partitech\DoctrinePgVector\Type\VectorType
  orm:
    dql:
      string_functions:
        distance: Partitech\DoctrinePgVector\Query\Distance
        inner_product: Partitech\DoctrinePgVector\Query\InnerProduct
        cosine_similarity: Partitech\DoctrinePgVector\Query\CosineSimilarity

使用

现在您可以在实体中使用 vector 类型

use Doctrine\ORM\Mapping as ORM;

/**
* @ORM\Entity()
  */
  class YourEntity
  {

    #[ORM\Column(type: 'vector', length: 1024, nullable: true)]
    private $vectors;
    
  }

如果您使用 symfony console make:entity,请手动将 length 参数属性添加为 vector。长度是您的模型嵌入的维度。

例如,OpenAI 使用以下维度

text-embedding-3-small : 1536

text-embedding-3-large : 3072(可定制)

Mistral AI

Mistral-embed : 1024

此外,您应手动将 HNSW 索引添加到您的矢量列中。请注意,HNSW 索引的维度应不超过 2000。

L2 距离

CREATE INDEX ON items USING hnsw (embedding vector_l2_ops);

内积

CREATE INDEX ON items USING hnsw (embedding vector_ip_ops);

余弦距离

CREATE INDEX ON items USING hnsw (embedding vector_cosine_ops);

基本用法

距离

获取

SELECT * FROM embeddings WHERE vectors <-> '[3,1,2]' < 5

使用

$floatArray = array_map(function() {
    return mt_rand(0, 1000000) / 1000000;
}, array_fill(0, 1024, null));

$query = $this->entityManager->createQuery(
    "SELECT i FROM App\Entity\Embeddings i ORDER BY distance(i.vectors, :vector) ASC"
);
$query->setParameter('vector', $floatArray, 'vector');
$results = $query->setMaxResults(5)->getResult();
dump($results);
$qb = $this->entityManager->createQueryBuilder();
$qb->select('e')
    ->from('App:Embeddings', 'e')
    ->orderBy('distance(e.vectors, :vector)')
    ->setParameter('vector', $floatArray, 'vector')
    ->setMaxResults(5)
    ;
$result = $qb->getQuery()->getResult();
dump($result);

内积

获取

SELECT (vectors <#> '[3,1,2]') * -1, * FROM embeddings

使用

$floatArray = array_map(function() {
    return mt_rand(0, 1000000) / 1000000;
}, array_fill(0, 1024, null));

$query = $this->entityManager->createQuery(
    "SELECT inner_product(e.vectors, :vector) , e FROM App\Entity\Embeddings e"
);
$query->setParameter('vector', $floatArray, 'vector');
$results = $query->setMaxResults(5)->getResult();
dump($results);
$qb = $this->entityManager->createQueryBuilder();
$qb->select('e')
    ->addSelect('inner_product(e.vectors, :vector)')
    ->from('App:Embeddings', 'e')
    ->setParameter('vector', $floatArray, 'vector')
    ->setMaxResults(5)
    ;
$result = $qb->getQuery()->getResult();
dump($result);

余弦相似度

获取

SELECT 1 - (vectors <=> '[3,1,2]'), * FROM embeddings

使用

$floatArray = array_map(function() {
return mt_rand(0, 1000000) / 1000000;
}, array_fill(0, 1024, null));

$query = $this->entityManager->createQuery(
    "SELECT cosine_similarity(e.vectors, :vector) , e FROM App\Entity\Embeddings e"
);
$query->setParameter('vector', $floatArray, 'vector');
$results = $query->setMaxResults(5)->getResult();
dump($results);
$qb = $this->entityManager->createQueryBuilder();
$qb->select('e')
    ->addSelect('cosine_similarity(e.vectors, :vector)')
    ->from('App:Embeddings', 'e')
    ->setParameter('vector', $floatArray, 'vector')
    ->setMaxResults(5)
    ;
$result = $qb->getQuery()->getResult();
dump($result);