混合查询和多阶段查询
从 v1.10.0 版本开始可用
随着每个点支持多个命名向量的引入,在某些用例中,通过组合多个查询或分多个阶段执行搜索可以获得最佳搜索结果。
Qdrant 提供了一个灵活通用的接口来实现这一点,称为 Query API
(API 参考)。
实现查询组合的主要组件是 prefetch
参数,它启用子请求功能。
具体来说,只要一个查询至少有一个 prefetch,Qdrant 就会执行以下操作:
- 执行 prefetch 查询(或多个查询),
- 在其 prefetch 的结果上应用主查询。
此外,prefetch 本身也可以包含 prefetch,因此你可以拥有嵌套的 prefetch。
混合搜索
当你拥有同一数据的不同表示形式时,最常见的问题之一是将每种表示形式的查询点合并成一个结果。

合并多个查询的结果
例如,在文本搜索中,结合稠密向量和稀疏向量通常很有用,以获得最佳语义和最佳特定词语匹配。
Qdrant 目前有两种方法来组合来自不同查询的结果:
rrf
- 倒数排名融合考虑结果在每个查询中的位置,并提升在多个查询中都出现在顶部的结果。
dbsf
- 基于分布的分数融合 (从 v1.11.0 版本开始可用)使用均值 +/- 第 3 标准差作为界限来标准化每个查询中点的分数,然后将同一分点在不同查询中的分数相加。
这是一个倒数排名融合的示例,用于包含两个 prefetch 的查询,分别针对配置为存储稀疏向量和稠密向量的不同命名向量。
POST /collections/{collection_name}/points/query
{
"prefetch": [
{
"query": {
"indices": [1, 42], // <┐
"values": [0.22, 0.8] // <┴─sparse vector
},
"using": "sparse",
"limit": 20
},
{
"query": [0.01, 0.45, 0.67, ...], // <-- dense vector
"using": "dense",
"limit": 20
}
],
"query": { "fusion": "rrf" }, // <--- reciprocal rank fusion
"limit": 10
}
from qdrant_client import QdrantClient, models
client = QdrantClient(url="http://localhost:6333")
client.query_points(
collection_name="{collection_name}",
prefetch=[
models.Prefetch(
query=models.SparseVector(indices=[1, 42], values=[0.22, 0.8]),
using="sparse",
limit=20,
),
models.Prefetch(
query=[0.01, 0.45, 0.67], # <-- dense vector
using="dense",
limit=20,
),
],
query=models.FusionQuery(fusion=models.Fusion.RRF),
)
import { QdrantClient } from "@qdrant/js-client-rest";
const client = new QdrantClient({ host: "localhost", port: 6333 });
client.query("{collection_name}", {
prefetch: [
{
query: {
values: [0.22, 0.8],
indices: [1, 42],
},
using: 'sparse',
limit: 20,
},
{
query: [0.01, 0.45, 0.67],
using: 'dense',
limit: 20,
},
],
query: {
fusion: 'rrf',
},
});
use qdrant_client::Qdrant;
use qdrant_client::qdrant::{Fusion, PrefetchQueryBuilder, Query, QueryPointsBuilder};
let client = Qdrant::from_url("http://localhost:6334").build()?;
client.query(
QueryPointsBuilder::new("{collection_name}")
.add_prefetch(PrefetchQueryBuilder::default()
.query(Query::new_nearest([(1, 0.22), (42, 0.8)].as_slice()))
.using("sparse")
.limit(20u64)
)
.add_prefetch(PrefetchQueryBuilder::default()
.query(Query::new_nearest(vec![0.01, 0.45, 0.67]))
.using("dense")
.limit(20u64)
)
.query(Query::new_fusion(Fusion::Rrf))
).await?;
import static io.qdrant.client.QueryFactory.nearest;
import java.util.List;
import static io.qdrant.client.QueryFactory.fusion;
import io.qdrant.client.QdrantClient;
import io.qdrant.client.QdrantGrpcClient;
import io.qdrant.client.grpc.Points.Fusion;
import io.qdrant.client.grpc.Points.PrefetchQuery;
import io.qdrant.client.grpc.Points.QueryPoints;
QdrantClient client = new QdrantClient(QdrantGrpcClient.newBuilder("localhost", 6334, false).build());
client.queryAsync(
QueryPoints.newBuilder()
.setCollectionName("{collection_name}")
.addPrefetch(PrefetchQuery.newBuilder()
.setQuery(nearest(List.of(0.22f, 0.8f), List.of(1, 42)))
.setUsing("sparse")
.setLimit(20)
.build())
.addPrefetch(PrefetchQuery.newBuilder()
.setQuery(nearest(List.of(0.01f, 0.45f, 0.67f)))
.setUsing("dense")
.setLimit(20)
.build())
.setQuery(fusion(Fusion.RRF))
.build())
.get();
using Qdrant.Client;
using Qdrant.Client.Grpc;
var client = new QdrantClient("localhost", 6334);
await client.QueryAsync(
collectionName: "{collection_name}",
prefetch: new List < PrefetchQuery > {
new() {
Query = new(float, uint)[] {
(0.22f, 1), (0.8f, 42),
},
Using = "sparse",
Limit = 20
},
new() {
Query = new float[] {
0.01f, 0.45f, 0.67f
},
Using = "dense",
Limit = 20
}
},
query: Fusion.Rrf
);
import (
"context"
"github.com/qdrant/go-client/qdrant"
)
client, err := qdrant.NewClient(&qdrant.Config{
Host: "localhost",
Port: 6334,
})
client.Query(context.Background(), &qdrant.QueryPoints{
CollectionName: "{collection_name}",
Prefetch: []*qdrant.PrefetchQuery{
{
Query: qdrant.NewQuerySparse([]uint32{1, 42}, []float32{0.22, 0.8}),
Using: qdrant.PtrOf("sparse"),
},
{
Query: qdrant.NewQueryDense([]float32{0.01, 0.45, 0.67}),
Using: qdrant.PtrOf("dense"),
},
},
Query: qdrant.NewQueryFusion(qdrant.Fusion_RRF),
})
多阶段查询
在许多情况下,使用较大的向量表示形式可以提供更准确的搜索结果,但计算成本也更高。
将搜索分成两个阶段是一种已知技术:
- 首先,使用较小、成本较低的表示形式获取大量候选结果。
- 然后,使用更大、更准确的表示形式对候选结果进行重新评分。
围绕这个想法构建搜索架构有几种方法:
- 将量化向量作为第一阶段,将全精度向量作为第二阶段。
- 利用 Matryoshka Representation Learning (MRL) 使用较短向量生成候选向量,然后使用较长向量对其进行精炼。
- 使用常规稠密向量预取候选结果,然后使用像 ColBERT 这样的多向量模型对其进行重新评分。
为了兼顾所有优点,Qdrant 提供了一个便捷的接口来分阶段执行查询,首先获取粗略结果,然后使用更大的向量对其进行精炼。
重新评分示例
使用较短的 MRL 字节向量获取 1000 个结果,然后使用完整向量对其进行重新评分,获取前 10 名。
POST /collections/{collection_name}/points/query
{
"prefetch": {
"query": [1, 23, 45, 67], // <------------- small byte vector
"using": "mrl_byte"
"limit": 1000
},
"query": [0.01, 0.299, 0.45, 0.67, ...], // <-- full vector
"using": "full",
"limit": 10
}
from qdrant_client import QdrantClient, models
client = QdrantClient(url="http://localhost:6333")
client.query_points(
collection_name="{collection_name}",
prefetch=models.Prefetch(
query=[1, 23, 45, 67], # <------------- small byte vector
using="mrl_byte",
limit=1000,
),
query=[0.01, 0.299, 0.45, 0.67], # <-- full vector
using="full",
limit=10,
)
import { QdrantClient } from "@qdrant/js-client-rest";
const client = new QdrantClient({ host: "localhost", port: 6333 });
client.query("{collection_name}", {
prefetch: {
query: [1, 23, 45, 67], // <------------- small byte vector
using: 'mrl_byte',
limit: 1000,
},
query: [0.01, 0.299, 0.45, 0.67], // <-- full vector,
using: 'full',
limit: 10,
});
use qdrant_client::Qdrant;
use qdrant_client::qdrant::{PrefetchQueryBuilder, Query, QueryPointsBuilder};
let client = Qdrant::from_url("http://localhost:6334").build()?;
client.query(
QueryPointsBuilder::new("{collection_name}")
.add_prefetch(PrefetchQueryBuilder::default()
.query(Query::new_nearest(vec![1.0, 23.0, 45.0, 67.0]))
.using("mlr_byte")
.limit(1000u64)
)
.query(Query::new_nearest(vec![0.01, 0.299, 0.45, 0.67]))
.using("full")
.limit(10u64)
).await?;
import static io.qdrant.client.QueryFactory.nearest;
import io.qdrant.client.QdrantClient;
import io.qdrant.client.QdrantGrpcClient;
import io.qdrant.client.grpc.Points.PrefetchQuery;
import io.qdrant.client.grpc.Points.QueryPoints;
QdrantClient client =
new QdrantClient(QdrantGrpcClient.newBuilder("localhost", 6334, false).build());
client
.queryAsync(
QueryPoints.newBuilder()
.setCollectionName("{collection_name}")
.addPrefetch(
PrefetchQuery.newBuilder()
.setQuery(nearest(1, 23, 45, 67)) // <------------- small byte vector
.setLimit(1000)
.setUsing("mrl_byte")
.build())
.setQuery(nearest(0.01f, 0.299f, 0.45f, 0.67f)) // <-- full vector
.setUsing("full")
.setLimit(10)
.build())
.get();
using Qdrant.Client;
using Qdrant.Client.Grpc;
var client = new QdrantClient("localhost", 6334);
await client.QueryAsync(
collectionName: "{collection_name}",
prefetch: new List<PrefetchQuery> {
new() {
Query = new float[] { 1,23, 45, 67 }, // <------------- small byte vector
Using = "mrl_byte",
Limit = 1000
}
},
query: new float[] { 0.01f, 0.299f, 0.45f, 0.67f }, // <-- full vector
usingVector: "full",
limit: 10
);
import (
"context"
"github.com/qdrant/go-client/qdrant"
)
client, err := qdrant.NewClient(&qdrant.Config{
Host: "localhost",
Port: 6334,
})
client.Query(context.Background(), &qdrant.QueryPoints{
CollectionName: "{collection_name}",
Prefetch: []*qdrant.PrefetchQuery{
{
Query: qdrant.NewQueryDense([]float32{1, 23, 45, 67}),
Using: qdrant.PtrOf("mrl_byte"),
Limit: qdrant.PtrOf(uint64(1000)),
},
},
Query: qdrant.NewQueryDense([]float32{0.01, 0.299, 0.45, 0.67}),
Using: qdrant.PtrOf("full"),
})
使用默认向量获取 100 个结果,然后使用多向量对其进行重新评分,获取前 10 名。
POST /collections/{collection_name}/points/query
{
"prefetch": {
"query": [0.01, 0.45, 0.67, ...], // <-- dense vector
"limit": 100
},
"query": [ // <─┐
[0.1, 0.2, ...], // < │
[0.2, 0.1, ...], // < ├─ multi-vector
[0.8, 0.9, ...] // < │
], // <─┘
"using": "colbert",
"limit": 10
}
from qdrant_client import QdrantClient, models
client = QdrantClient(url="http://localhost:6333")
client.query_points(
collection_name="{collection_name}",
prefetch=models.Prefetch(
query=[0.01, 0.45, 0.67, 0.53], # <-- dense vector
limit=100,
),
query=[
[0.1, 0.2, 0.32], # <─┐
[0.2, 0.1, 0.52], # < ├─ multi-vector
[0.8, 0.9, 0.93], # < ┘
],
using="colbert",
limit=10,
)
import { QdrantClient } from "@qdrant/js-client-rest";
const client = new QdrantClient({ host: "localhost", port: 6333 });
client.query("{collection_name}", {
prefetch: {
query: [1, 23, 45, 67], // <------------- small byte vector
limit: 100,
},
query: [
[0.1, 0.2], // <─┐
[0.2, 0.1], // < ├─ multi-vector
[0.8, 0.9], // < ┘
],
using: 'colbert',
limit: 10,
});
use qdrant_client::Qdrant;
use qdrant_client::qdrant::{PrefetchQueryBuilder, Query, QueryPointsBuilder};
let client = Qdrant::from_url("http://localhost:6334").build()?;
client.query(
QueryPointsBuilder::new("{collection_name}")
.add_prefetch(PrefetchQueryBuilder::default()
.query(Query::new_nearest(vec![0.01, 0.45, 0.67]))
.limit(100u64)
)
.query(Query::new_nearest(vec![
vec![0.1, 0.2],
vec![0.2, 0.1],
vec![0.8, 0.9],
]))
.using("colbert")
.limit(10u64)
).await?;
import static io.qdrant.client.QueryFactory.nearest;
import io.qdrant.client.QdrantClient;
import io.qdrant.client.QdrantGrpcClient;
import io.qdrant.client.grpc.Points.PrefetchQuery;
import io.qdrant.client.grpc.Points.QueryPoints;
QdrantClient client =
new QdrantClient(QdrantGrpcClient.newBuilder("localhost", 6334, false).build());
client
.queryAsync(
QueryPoints.newBuilder()
.setCollectionName("{collection_name}")
.addPrefetch(
PrefetchQuery.newBuilder()
.setQuery(nearest(0.01f, 0.45f, 0.67f)) // <-- dense vector
.setLimit(100)
.build())
.setQuery(
nearest(
new float[][] {
{0.1f, 0.2f}, // <─┐
{0.2f, 0.1f}, // < ├─ multi-vector
{0.8f, 0.9f} // < ┘
}))
.setUsing("colbert")
.setLimit(10)
.build())
.get();
using Qdrant.Client;
using Qdrant.Client.Grpc;
var client = new QdrantClient("localhost", 6334);
await client.QueryAsync(
collectionName: "{collection_name}",
prefetch: new List <PrefetchQuery> {
new() {
Query = new float[] { 0.01f, 0.45f, 0.67f }, // <-- dense vector****
Limit = 100
}
},
query: new float[][] {
[0.1f, 0.2f], // <─┐
[0.2f, 0.1f], // < ├─ multi-vector
[0.8f, 0.9f] // < ┘
},
usingVector: "colbert",
limit: 10
);
import (
"context"
"github.com/qdrant/go-client/qdrant"
)
client, err := qdrant.NewClient(&qdrant.Config{
Host: "localhost",
Port: 6334,
})
client.Query(context.Background(), &qdrant.QueryPoints{
CollectionName: "{collection_name}",
Prefetch: []*qdrant.PrefetchQuery{
{
Query: qdrant.NewQueryDense([]float32{0.01, 0.45, 0.67}),
Limit: qdrant.PtrOf(uint64(100)),
},
},
Query: qdrant.NewQueryMulti([][]float32{
{0.1, 0.2},
{0.2, 0.1},
{0.8, 0.9},
}),
Using: qdrant.PtrOf("colbert"),
})
可以在单个查询中结合以上所有技术
POST /collections/{collection_name}/points/query
{
"prefetch": {
"prefetch": {
"query": [1, 23, 45, 67], // <------ small byte vector
"using": "mrl_byte"
"limit": 1000
},
"query": [0.01, 0.45, 0.67, ...], // <-- full dense vector
"using": "full"
"limit": 100
},
"query": [ // <─┐
[0.1, 0.2, ...], // < │
[0.2, 0.1, ...], // < ├─ multi-vector
[0.8, 0.9, ...] // < │
], // <─┘
"using": "colbert",
"limit": 10
}
from qdrant_client import QdrantClient, models
client = QdrantClient(url="http://localhost:6333")
client.query_points(
collection_name="{collection_name}",
prefetch=models.Prefetch(
prefetch=models.Prefetch(
query=[1, 23, 45, 67], # <------ small byte vector
using="mrl_byte",
limit=1000,
),
query=[0.01, 0.45, 0.67], # <-- full dense vector
using="full",
limit=100,
),
query=[
[0.17, 0.23, 0.52], # <─┐
[0.22, 0.11, 0.63], # < ├─ multi-vector
[0.86, 0.93, 0.12], # < ┘
],
using="colbert",
limit=10,
)
import { QdrantClient } from "@qdrant/js-client-rest";
const client = new QdrantClient({ host: "localhost", port: 6333 });
client.query("{collection_name}", {
prefetch: {
prefetch: {
query: [1, 23, 45, 67], // <------------- small byte vector
using: 'mrl_byte',
limit: 1000,
},
query: [0.01, 0.45, 0.67], // <-- full dense vector
using: 'full',
limit: 100,
},
query: [
[0.1, 0.2], // <─┐
[0.2, 0.1], // < ├─ multi-vector
[0.8, 0.9], // < ┘
],
using: 'colbert',
limit: 10,
});
use qdrant_client::Qdrant;
use qdrant_client::qdrant::{PrefetchQueryBuilder, Query, QueryPointsBuilder};
let client = Qdrant::from_url("http://localhost:6334").build()?;
client.query(
QueryPointsBuilder::new("{collection_name}")
.add_prefetch(PrefetchQueryBuilder::default()
.add_prefetch(PrefetchQueryBuilder::default()
.query(Query::new_nearest(vec![1.0, 23.0, 45.0, 67.0]))
.using("mlr_byte")
.limit(1000u64)
)
.query(Query::new_nearest(vec![0.01, 0.45, 0.67]))
.using("full")
.limit(100u64)
)
.query(Query::new_nearest(vec![
vec![0.1, 0.2],
vec![0.2, 0.1],
vec![0.8, 0.9],
]))
.using("colbert")
.limit(10u64)
).await?;
import static io.qdrant.client.QueryFactory.nearest;
import io.qdrant.client.QdrantClient;
import io.qdrant.client.QdrantGrpcClient;
import io.qdrant.client.grpc.Points.PrefetchQuery;
import io.qdrant.client.grpc.Points.QueryPoints;
QdrantClient client =
new QdrantClient(QdrantGrpcClient.newBuilder("localhost", 6334, false).build());
client
.queryAsync(
QueryPoints.newBuilder()
.setCollectionName("{collection_name}")
.addPrefetch(
PrefetchQuery.newBuilder()
.addPrefetch(
PrefetchQuery.newBuilder()
.setQuery(nearest(1, 23, 45, 67)) // <------------- small byte vector
.setUsing("mrl_byte")
.setLimit(1000)
.build())
.setQuery(nearest(0.01f, 0.45f, 0.67f)) // <-- dense vector
.setUsing("full")
.setLimit(100)
.build())
.setQuery(
nearest(
new float[][] {
{0.1f, 0.2f}, // <─┐
{0.2f, 0.1f}, // < ├─ multi-vector
{0.8f, 0.9f} // < ┘
}))
.setUsing("colbert")
.setLimit(10)
.build())
.get();
using Qdrant.Client;
using Qdrant.Client.Grpc;
var client = new QdrantClient("localhost", 6334);
await client.QueryAsync(
collectionName: "{collection_name}",
prefetch: new List <PrefetchQuery> {
new() {
Prefetch = {
new List <PrefetchQuery> {
new() {
Query = new float[] { 1, 23, 45, 67 }, // <------------- small byte vector
Using = "mrl_byte",
Limit = 1000
},
}
},
Query = new float[] {0.01f, 0.45f, 0.67f}, // <-- dense vector
Using = "full",
Limit = 100
}
},
query: new float[][] {
[0.1f, 0.2f], // <─┐
[0.2f, 0.1f], // < ├─ multi-vector
[0.8f, 0.9f] // < ┘
},
usingVector: "colbert",
limit: 10
);
import (
"context"
"github.com/qdrant/go-client/qdrant"
)
client, err := qdrant.NewClient(&qdrant.Config{
Host: "localhost",
Port: 6334,
})
client.Query(context.Background(), &qdrant.QueryPoints{
CollectionName: "{collection_name}",
Prefetch: []*qdrant.PrefetchQuery{
{
Prefetch: []*qdrant.PrefetchQuery{
{
Query: qdrant.NewQueryDense([]float32{1, 23, 45, 67}),
Using: qdrant.PtrOf("mrl_byte"),
Limit: qdrant.PtrOf(uint64(1000)),
},
},
Query: qdrant.NewQueryDense([]float32{0.01, 0.45, 0.67}),
Limit: qdrant.PtrOf(uint64(100)),
Using: qdrant.PtrOf("full"),
},
},
Query: qdrant.NewQueryMulti([][]float32{
{0.1, 0.2},
{0.2, 0.1},
{0.8, 0.9},
}),
Using: qdrant.PtrOf("colbert"),
})
分数提升
从 v1.14.0 版本开始可用
将向量搜索引入特定应用时,有时需要考虑业务逻辑来对最终结果列表进行排名。
一个简单的例子是我们自己的文档搜索栏。它为文档网站的每个部分都提供了向量。如果仅使用向量进行搜索,所有类型的元素都会被同等视为好的结果。然而,在搜索文档时,我们可以建立一个重要性层级:
标题 > 内容 > 片段
解决这个问题的一种方法是根据元素的类型对结果进行加权。例如,我们可以为标题和内容分配更高的权重,并保持片段不进行提升。
伪代码可能看起来像这样:
score = score + (is_title * 0.5) + (is_content * 0.25)
Query API 可以使用自定义公式对点进行重新评分。这些公式可以基于:
- 动态有效载荷值
- 条件
- prefetch 的分数
要表达公式,语法使用对象来标识每个元素。以上述文档示例为例,请求会像这样:
POST /collections/{collection_name}/points/query
{
"prefetch": {
"query": [0.2, 0.8, ...], // <-- dense vector
"limit": 50
}
"query": {
"formula": {
"sum": [
"$score,
{
"mult": [
0.5,
{
"key": "tag",
"match": { "any": ["h1", "h2", "h3", "h4"] } }
]
},
{
"mult": [
0.25,
{
"key": "tag",
"match": { "any": ["p", "li"] }
}
]
}
]
}
}
}
from qdrant_client import models
tag_boosted = client.query_points(
collection_name="{collection_name}",
prefetch=models.Prefetch(
query=[0.2, 0.8, ...], # <-- dense vector
limit=50
),
query=models.FormulaQuery(
formula=models.SumExpression(sum=[
"$score",
models.MultExpression(mult=[0.5, models.FieldCondition(key="tag", match=models.MatchAny(any=["h1", "h2", "h3", "h4"]))]),
models.MultExpression(mult=[0.25, models.FieldCondition(key="tag", match=models.MatchAny(any=["p", "li"]))])
]
))
)
import { QdrantClient } from "@qdrant/js-client-rest";
const client = new QdrantClient({ host: "localhost", port: 6333 });
const tag_boosted = await client.query(collectionName, {
prefetch: {
query: [0.2, 0.8, 0.1, 0.9],
limit: 50
},
query: {
formula: {
sum: [
"$score",
{
mult: [ 0.5, { key: "tag", match: { any: ["h1", "h2", "h3", "h4"] }} ]
},
{
mult: [ 0.25, { key: "tag", match: { any: ["p", "li"] }} ]
}
]
}
}
});
use qdrant_client::qdrant::{
Condition, Expression, FormulaBuilder, PrefetchQueryBuilder, QueryPointsBuilder,
};
use qdrant_client::Qdrant;
let client = Qdrant::from_url("http://localhost:6334").build()?;
let _tag_boosted = client.query(
QueryPointsBuilder::new("{collection_name}")
.add_prefetch(PrefetchQueryBuilder::default()
.query(vec![0.01, 0.45, 0.67])
.limit(100u64)
)
.query(FormulaBuilder::new(Expression::sum_with([
Expression::score(),
Expression::mult_with([
Expression::constant(0.5),
Expression::condition(Condition::matches("tag", ["h1", "h2", "h3", "h4"])),
]),
Expression::mult_with([
Expression::constant(0.25),
Expression::condition(Condition::matches("tag", ["p", "li"])),
]),
])))
.limit(10)
).await?;
import java.util.List;
import static io.qdrant.client.ConditionFactory.matchKeywords;
import static io.qdrant.client.ExpressionFactory.condition;
import static io.qdrant.client.ExpressionFactory.constant;
import static io.qdrant.client.ExpressionFactory.mult;
import static io.qdrant.client.ExpressionFactory.sum;
import static io.qdrant.client.ExpressionFactory.variable;
import static io.qdrant.client.QueryFactory.formula;
import static io.qdrant.client.QueryFactory.nearest;
import io.qdrant.client.QdrantClient;
import io.qdrant.client.QdrantGrpcClient;
import io.qdrant.client.grpc.Points.Formula;
import io.qdrant.client.grpc.Points.MultExpression;
import io.qdrant.client.grpc.Points.PrefetchQuery;
import io.qdrant.client.grpc.Points.QueryPoints;
import io.qdrant.client.grpc.Points.SumExpression;
QdrantClient client =
new QdrantClient(QdrantGrpcClient.newBuilder("localhost", 6334, false).build());
client
.queryAsync(
QueryPoints.newBuilder()
.setCollectionName("{collection_name}")
.addPrefetch(
PrefetchQuery.newBuilder()
.setQuery(nearest(0.01f, 0.45f, 0.67f))
.setLimit(100)
.build())
.setQuery(
formula(
Formula.newBuilder()
.setExpression(
sum(
SumExpression.newBuilder()
.addSum(variable("$score"))
.addSum(
mult(
MultExpression.newBuilder()
.addMult(constant(0.5f))
.addMult(
condition(
matchKeywords(
"tag",
List.of("h1", "h2", "h3", "h4"))))
.build()))
.addSum(mult(MultExpression.newBuilder()
.addMult(constant(0.25f))
.addMult(
condition(
matchKeywords(
"tag",
List.of("p", "li"))))
.build()))
.build()))
.build()))
.build())
.get();
using Qdrant.Client;
using Qdrant.Client.Grpc;
using static Qdrant.Client.Grpc.Conditions;
var client = new QdrantClient("localhost", 6334);
await client.QueryAsync(
collectionName: "{collection_name}",
prefetch:
[
new PrefetchQuery { Query = new float[] { 0.01f, 0.45f, 0.67f }, Limit = 100 },
],
query: new Formula
{
Expression = new SumExpression
{
Sum =
{
"$score",
new MultExpression
{
Mult = { 0.5f, Match("tag", ["h1", "h2", "h3", "h4"]) },
},
new MultExpression { Mult = { 0.25f, Match("tag", ["p", "li"]) } },
},
},
},
limit: 10
);
import (
"context"
"github.com/qdrant/go-client/qdrant"
)
client, err := qdrant.NewClient(&qdrant.Config{
Host: "localhost",
Port: 6334,
})
client.Query(context.Background(), &qdrant.QueryPoints{
CollectionName: "{collection_name}",
Prefetch: []*qdrant.PrefetchQuery{
{
Query: qdrant.NewQuery(0.01, 0.45, 0.67),
},
},
Query: qdrant.NewQueryFormula(&qdrant.Formula{
Expression: qdrant.NewExpressionSum(&qdrant.SumExpression{
Sum: []*qdrant.Expression{
qdrant.NewExpressionVariable("$score"),
qdrant.NewExpressionMult(&qdrant.MultExpression{
Mult: []*qdrant.Expression{
qdrant.NewExpressionConstant(0.5),
qdrant.NewExpressionCondition(qdrant.NewMatchKeywords("tag", "h1", "h2", "h3", "h4")),
},
}),
qdrant.NewExpressionMult(&qdrant.MultExpression{
Mult: []*qdrant.Expression{
qdrant.NewExpressionConstant(0.25),
qdrant.NewExpressionCondition(qdrant.NewMatchKeywords("tag", "p", "li")),
},
}),
},
}),
}),
})
有多种表达式可用,请查看API 文档了解具体详情。
- constant - 一个浮点数。例如
0.5
。 "$score"
- 引用点在 prefetch 中的分数。这与"$score[0]"
相同。"$score[0]"
,"$score[1]"
,"$score[2]"
, … - 使用多个 prefetch 时,你可以使用 prefetch 数组中的索引引用特定的 prefetch。- payload key - 任何普通字符串都将引用一个有效载荷键。它使用与其他地方相同的 jsonpath 格式,例如
key
或key.subkey
。它会尝试从给定的键中提取一个数字。 - condition - 一个过滤条件。如果满足条件,其值为
1.0
,否则为0.0
。 - mult - 乘以表达式数组。
- sum - 求表达式数组的和。
- div - 将一个表达式除以另一个表达式。
- abs - 表达式的绝对值。
- pow - 将一个表达式提升到另一个表达式的幂。
- sqrt - 表达式的平方根。
- log10 - 表达式的以 10 为底的对数。
- ln - 表达式的自然对数。
- exp - 表达式的指数函数 (
e^x
)。 - geo distance - 两个地理点之间的 Haversine 距离。值需要是
{ "lat": 0.0, "lon": 0.0 }
对象。 - decay - 对表达式应用衰减函数,将输出限制在 0 到 1 之间。可用的衰减函数有线性、指数和高斯。查看更多。
- datetime - 解析日期时间字符串(在此处查看格式),并将其用作 POSIX 时间戳(以秒为单位)。
- datetime key - 指定一个有效载荷键包含一个要解析为 POSIX 秒的日期时间字符串。
可以为变量(来自有效载荷或 prefetch 分数)未找到时定义一个默认值。默认值以变量到值的映射形式给出。如果没有变量,且没有定义默认值,则使用默认值 0.0
。
提升距离用户更近的点
另一个示例。将分数与结果距离用户的远近相结合。
考虑到每个点都有一个关联的地理位置,我们可以计算点与请求位置之间的距离。
假设我们在 prefetch 中有余弦相似度分数,我们可以使用辅助函数,通过使用衰减函数将地理距离限制在 0 到 1 之间。限制后,我们可以将分数和距离相加。伪代码:
score = score + gauss_decay(distance)
在这种情况下,我们使用一个 gauss_decay 函数。
POST /collections/{collection_name}/points/query
{
"prefetch": { "query": [0.2, 0.8, ...], "limit": 50 },
"query": {
"formula": {
"sum": [
"$score",
{
"gauss_decay": {
"x": {
"geo_distance": {
"origin": { "lat": 52.504043, "lon": 13.393236 }
"to": "geo.location"
}
},
"scale": 5000 // 5km
}
}
]
},
"defaults": { "geo.location": {"lat": 48.137154, "lon": 11.576124} }
}
}
from qdrant_client import models
geo_boosted = client.query_points(
collection_name="{collection_name}",
prefetch=models.Prefetch(
query=[0.2, 0.8, ...], # <-- dense vector
limit=50
),
query=models.FormulaQuery(
formula=models.SumExpression(sum=[
"$score",
models.GaussDecayExpression(
gauss_decay=models.DecayParamsExpression(
x=models.GeoDistance(
geo_distance=models.GeoDistanceParams(
origin=models.GeoPoint(
lat=52.504043,
lon=13.393236
), # Berlin
to="geo.location"
)
),
scale=5000 # 5km
)
)
]),
defaults={"geo.location": models.GeoPoint(lat=48.137154, lon=11.576124)} # Munich
)
)
import { QdrantClient } from "@qdrant/js-client-rest";
const client = new QdrantClient({ host: "localhost", port: 6333 });
const distance_boosted = await client.query(collectionName, {
prefetch: {
query: [0.2, 0.8, ...],
limit: 50
},
query: {
formula: {
sum: [
"$score",
{
gauss_decay: {
x: {
geo_distance: {
origin: { lat: 52.504043, lon: 13.393236 }, // Berlin
to: "geo.location"
}
},
scale: 5000 // 5km
}
}
]
},
defaults: { "geo.location": { lat: 48.137154, lon: 11.576124 } } // Munich
}
});
use qdrant_client::qdrant::{
GeoPoint, DecayParamsExpressionBuilder, Expression, FormulaBuilder, PrefetchQueryBuilder, QueryPointsBuilder,
};
use qdrant_client::Qdrant;
let client = Qdrant::from_url("http://localhost:6334").build()?;
let _geo_boosted = client.query(
QueryPointsBuilder::new("{collection_name}")
.add_prefetch(
PrefetchQueryBuilder::default()
.query(vec![0.01, 0.45, 0.67])
.limit(100u64),
)
.query(
FormulaBuilder::new(Expression::sum_with([
Expression::score(),
Expression::exp_decay(
DecayParamsExpressionBuilder::new(Expression::geo_distance_with(
// Berlin
GeoPoint { lat: 52.504043, lon: 13.393236 },
"geo.location",
))
.scale(5_000.0),
),
]))
// Munich
.add_default("geo.location", GeoPoint { lat: 48.137154, lon: 11.576124 }),
)
.limit(10),
)
.await?;
import static io.qdrant.client.ExpressionFactory.expDecay;
import static io.qdrant.client.ExpressionFactory.geoDistance;
import static io.qdrant.client.ExpressionFactory.sum;
import static io.qdrant.client.ExpressionFactory.variable;
import static io.qdrant.client.PointIdFactory.id;
import static io.qdrant.client.QueryFactory.formula;
import static io.qdrant.client.QueryFactory.nearest;
import static io.qdrant.client.ValueFactory.value;
import io.qdrant.client.QdrantClient;
import io.qdrant.client.QdrantGrpcClient;
import io.qdrant.client.grpc.Points.DecayParamsExpression;
import io.qdrant.client.grpc.Points.Formula;
import io.qdrant.client.grpc.Points.GeoDistance;
import io.qdrant.client.grpc.Points.GeoPoint;
import io.qdrant.client.grpc.Points.PrefetchQuery;
import io.qdrant.client.grpc.Points.QueryPoints;
import io.qdrant.client.grpc.Points.SumExpression;
QdrantClient client =
new QdrantClient(QdrantGrpcClient.newBuilder("localhost", 6334, false).build());
client
.queryAsync(
QueryPoints.newBuilder()
.setCollectionName("{collection_name}")
.addPrefetch(
PrefetchQuery.newBuilder()
.setQuery(nearest(0.01f, 0.45f, 0.67f))
.setLimit(100)
.build())
.setQuery(
formula(
Formula.newBuilder()
.setExpression(
sum(
SumExpression.newBuilder()
.addSum(variable("$score"))
.addSum(
expDecay(
DecayParamsExpression.newBuilder()
.setX(
geoDistance(
GeoDistance.newBuilder()
.setOrigin(
GeoPoint.newBuilder()
.setLat(52.504043)
.setLon(13.393236)
.build())
.setTo("geo.location")
.build()))
.setScale(5000)
.build()))
.build()))
.putDefaults(
"geo.location",
value(
Map.of(
"lat", value(48.137154),
"lon", value(11.576124))))
.build()))
.build())
.get();
using Qdrant.Client;
using Qdrant.Client.Grpc;
using static Qdrant.Client.Grpc.Expression;
var client = new QdrantClient("localhost", 6334);
await client.QueryAsync(
collectionName: "{collection_name}",
prefetch:
[
new PrefetchQuery { Query = new float[] { 0.01f, 0.45f, 0.67f }, Limit = 100 },
],
query: new Formula
{
Expression = new SumExpression
{
Sum =
{
"$score",
FromExpDecay(
new()
{
X = new GeoDistance
{
Origin = new GeoPoint { Lat = 52.504043, Lon = 13.393236 },
To = "geo.location",
},
Scale = 5000,
}
),
},
},
Defaults =
{
["geo.location"] = new Dictionary<string, Value>
{
["lat"] = 48.137154,
["lon"] = 11.576124,
},
},
}
);
import (
"context"
"github.com/qdrant/go-client/qdrant"
)
client, err := qdrant.NewClient(&qdrant.Config{
Host: "localhost",
Port: 6334,
})
client.Query(context.Background(), &qdrant.QueryPoints{
CollectionName: "{collection_name}",
Prefetch: []*qdrant.PrefetchQuery{
{
Query: qdrant.NewQuery(0.2, 0.8),
},
},
Query: qdrant.NewQueryFormula(&qdrant.Formula{
Expression: qdrant.NewExpressionSum(&qdrant.SumExpression{
Sum: []*qdrant.Expression{
qdrant.NewExpressionVariable("$score"),
qdrant.NewExpressionExpDecay(&qdrant.DecayParamsExpression{
X: qdrant.NewExpressionGeoDistance(&qdrant.GeoDistance{
Origin: &qdrant.GeoPoint{
Lat: 52.504043,
Lon: 13.393236,
},
To: "geo.location",
}),
}),
},
}),
Defaults: qdrant.NewValueMap(map[string]any{
"geo.location": map[string]any{
"lat": 48.137154,
"lon": 11.576124,
},
}),
}),
})
对于所有衰减函数,都有以下参数可用:
参数 | 默认值 | 描述 |
---|---|---|
x | 不适用 | 要衰减的值 |
target | 0.0 | 衰减达到峰值时的值。对于距离,通常设置为 0.0,但可以设置为任何值。 |
scale | 1.0 | 衰减函数等于 midpoint 时的值。它以 x 单位表示,例如,如果 x 以米为单位,则 scale 为 5000 意味着 5 公里。必须是非零正数。 |
midpoint | 0.5 | 当 x 等于 scale 时,输出为 midpoint 。必须在 (0.0, 1.0) 范围内(不包括端点)。 |
每种衰减函数的公式如下:
衰减函数
lin_decay
(绿色),范围:[0, 1]
$$ \text{lin_decay}(x) = \max\left(0,\ -\frac{\left(1-m_{idpoint}\right)}{s_{cale}}\cdot {abs}\left(x-t_{arget}\right)+1\right) $$
exp_decay
(红色),范围:(0, 1]
$$ \text{exp_decay}(x) = \exp\left(\frac{\ln\left(m_{idpoint}\right)}{s_{cale}}\cdot {abs}\left(x-t_{arget}\right)\right) $$
gauss_decay
(紫色),范围:(0, 1]
$$ \text{gauss_decay}(x) = \exp\left(\frac{\ln\left(m_{idpoint}\right)}{s_{cale}^{2}}\cdot \left(x-t_{arget}\right)^{2}\right) $$
分组
从 v1.11.0 版本开始可用
可以按特定字段对结果进行分组。当同一项目有多个点时,这很有用,可以避免结果中出现同一项目的冗余。
REST API (Schema)
POST /collections/{collection_name}/points/query/groups
{
// Same as in the regular query API
"query": [1.1],
// Grouping parameters
"group_by": "document_id", // Path of the field to group by
"limit": 4, // Max amount of groups
"group_size": 2 // Max amount of points per group
}
client.query_points_groups(
collection_name="{collection_name}",
# Same as in the regular query_points() API
query=[1.1],
# Grouping parameters
group_by="document_id", # Path of the field to group by
limit=4, # Max amount of groups
group_size=2, # Max amount of points per group
)
client.queryGroups("{collection_name}", {
query: [1.1],
group_by: "document_id",
limit: 4,
group_size: 2,
});
use qdrant_client::qdrant::QueryPointGroupsBuilder;
client
.query_groups(
QueryPointGroupsBuilder::new("{collection_name}", "document_id")
.query(vec![0.2, 0.1, 0.9, 0.7])
.group_size(2u64)
.with_payload(true)
.with_vectors(true)
.limit(4u64),
)
.await?;
import java.util.List;
import io.qdrant.client.grpc.Points.SearchPointGroups;
client.queryGroupsAsync(
QueryPointGroups.newBuilder()
.setCollectionName("{collection_name}")
.setQuery(nearest(0.2f, 0.1f, 0.9f, 0.7f))
.setGroupBy("document_id")
.setLimit(4)
.setGroupSize(2)
.build())
.get();
using Qdrant.Client;
var client = new QdrantClient("localhost", 6334);
await client.QueryGroupsAsync(
collectionName: "{collection_name}",
query: new float[] { 0.2f, 0.1f, 0.9f, 0.7f },
groupBy: "document_id",
limit: 4,
groupSize: 2
);
import (
"context"
"github.com/qdrant/go-client/qdrant"
)
client, err := qdrant.NewClient(&qdrant.Config{
Host: "localhost",
Port: 6334,
})
client.QueryGroups(context.Background(), &qdrant.QueryPointGroups{
CollectionName: "{collection_name}",
Query: qdrant.NewQuery(0.2, 0.1, 0.9, 0.7),
GroupBy: "document_id",
GroupSize: qdrant.PtrOf(uint64(2)),
})