The AI-Native Database for Smart Retrieval and Reasoning
Power your LLMs, agents, and intelligent applications with a single, unified platform for retrieval, reasoning, and memory.
# Define schema
class Doc(Base):
__tablename__ = "doc"
id = Column(Integer, primary_key=True)
content = Column(Text)
embedding = Column(VectorType(dim=3))
# Create table and index
Base.metadata.create_all(engine)
VectorAdaptor(engine).create_vector_index(
Doc.embedding, tidb_vector.DistanceMetric.L2
)
# Insert
with Session(engine) as session:
session.add(Doc(content="dog", embedding=[1, 2, 1]))
session.add(Doc(content="fish", embedding=[1, 2, 4]))
session.commit()
# Search nearest 1 embedding using L2 distance
with Session(engine) as session:
results = session.execute(
select(Doc.content)
.order_by(Doc.embedding.l2_distance([1, 2, 3]))
.limit(1)
).all()
print(results)
CREATE TABLE doc(
content TEXT,
embedding VECTOR(3),
VECTOR INDEX ((VEC_L2_DISTANCE(embedding)))
);
INSERT INTO doc VALUES
("dog", "[1, 2, 1]"),
("fish", "[1, 2, 4]");
-- Search nearest 1 embedding using L2 distance
SELECT content
FROM doc
ORDER BY VEC_L2_DISTANCE(embedding, "[1, 2, 3]")
LIMIT 1;
# Define schema
class Doc(Base):
__tablename__ = "doc"
id = Column(Integer, primary_key=True)
content = Column(Text)
kind = Column(Text)
embedding = Column(VectorType(dim=3))
# Create table and index
Base.metadata.create_all(engine)
VectorAdaptor(engine).create_vector_index(
Doc.embedding, tidb_vector.DistanceMetric.L2
)
# Insert
with Session(engine) as session:
session.add(Doc(content="dog", kind="animal", embedding=[1, 2, 1]))
session.add(Doc(content="fish", kind="animal", embedding=[1, 2, 4]))
session.add(Doc(content="tree", kind="plant", embedding=[1, 2, 3]))
session.commit()
# Search with any conditions via WHERE clause
with Session(engine) as session:
results = session.execute(
select(Doc.content)
.where(Doc.kind == "animal")
.order_by(Doc.embedding.l2_distance([1, 2, 3]))
.limit(1)
).all()
print(results)
CREATE TABLE doc(
content TEXT,
kind TEXT,
embedding VECTOR(3),
VECTOR INDEX ((VEC_L2_DISTANCE(embedding)))
);
INSERT INTO doc VALUES
("dog", "animal", "[1, 2, 1]"),
("fish", "animal", "[1, 2, 4]"),
("tree", "plant", "[1, 2, 3]");
-- Search with any conditions via WHERE clause
SELECT content
FROM doc
WHERE kind = "animal"
ORDER BY VEC_L2_DISTANCE(embedding, "[1, 2, 3]")
LIMIT 1;
# Schema
class Vertex(Base):
__tablename__ = "vertices"
id = Column(Integer, primary_key=True)
name = Column(Text)
class Edge(Base):
__tablename__ = "edges"
id = Column(Integer, primary_key=True)
source_id = Column(Integer)
target_id = Column(Integer)
description = Column(Text)
description_vec = Column(VectorType(dim=3))
# Insert data here...
# Semantic Search the vertex, then retrieve its source edge and target edge
with Session(engine) as session:
query_edges = (
select(Edge)
.with_only_columns(Edge.source_id, Edge.target_id, Edge.description, Edge.id)
.order_by(Edge.description_vec.cosine_distance([1, 2, 3]))
.limit(100)
).subquery()
# Retrieve edge with vertex all at once
MVertexSource = aliased(Vertex)
MVertexTarget = aliased(Vertex)
MEdge = aliased(Edge, query_edges)
results = session.execute(
select(MEdge, MVertexSource, MVertexTarget)
.join(MVertexSource, MVertexSource.id == MEdge.source_id)
.join(MVertexTarget, MVertexTarget.id == MEdge.target_id)
)
for edge, vertexSource, vertexTarget in results:
print(edge.__dict__, vertexSource.__dict__, vertexTarget.__dict__)
-- Schema
CREATE TABLE vertices (
id INT PRIMARY KEY,
name TEXT
);
CREATE TABLE edges (
id INT PRIMARY KEY,
source_id INT,
target_id INT,
description TEXT,
description_vec VECTOR(3),
VECTOR INDEX ((VEC_COSINE_DISTANCE(description_vec)))
);
-- Insert data here...
-- Semantic Search the vertex, then retrieve its source edge and target edge
SELECT *
FROM (
SELECT source_id, target_id, description
FROM edges
ORDER BY VEC_COSINE_DISTANCE(description_vec, '[1, 2, 3]')
LIMIT 100
) AS e
INNER JOIN vertices AS source_vertex ON source_vertex.id = e.source_id
INNER JOIN vertices AS target_vertex ON target_vertex.id = e.target_id;
# Define schema
class Doc(Base):
__tablename__ = "doc"
id = Column(Integer, primary_key=True)
content = Column(Text)
embedding = Column(VectorType(dim=3))
# Create table and index
Base.metadata.create_all(engine)
VectorAdaptor(engine).create_vector_index(
Doc.embedding, tidb_vector.DistanceMetric.L2
)
# Insert
with Session(engine) as session:
session.add(Doc(content="dog", embedding=[1, 2, 1]))
session.add(Doc(content="fish", embedding=[1, 2, 4]))
session.commit()
# Search
with Session(engine) as session:
results = session.execute(
# Retrieve ID and content in the search result,
# or any other metadata you like
select(Doc.id, Doc.content)
.order_by(Doc.embedding.l2_distance([1, 2, 3]))
.limit(1)
).all()
print(results)
CREATE TABLE doc(
content TEXT,
embedding VECTOR(3),
VECTOR INDEX ((VEC_L2_DISTANCE(embedding)))
);
INSERT INTO doc VALUES
("dog", "[1, 2, 1]"),
("fish", "[1, 2, 4]");
-- Retrieve ID and content in the search result,
-- or any other metadata you like
SELECT id, content
FROM doc
ORDER BY VEC_L2_DISTANCE(embedding, "[1, 2, 3]")
LIMIT 1;
# Schema
class Feedback(Base):
__tablename__ = "feedback"
id = Column(Integer, primary_key=True)
user_id = Column(Integer)
# Use 0=Like, 1=Dislike. Add index for faster query.
is_dislike = Column(Integer, index=True)
chat_context = Column(LONGTEXT)
timestamp = Column(Integer)
# Insert data here...
# Dig into details of recent 100 dislike feedbacks
with Session(engine) as session:
results = session.execute(
select(Feedback)
.where(Feedback.is_dislike == 1)
.order_by(Feedback.timestamp.desc())
.limit(100)
)
for obj in results.scalars():
print(obj.id, obj.user_id, obj.chat_context)
# Plot feedback trends in the last hour
with Session(engine) as session:
results = session.execute(text("""
SELECT COUNT(*), FROM_UNIXTIME(FLOOR(timestamp / 60) * 60) AS window_start
FROM feedback
WHERE timestamp >= UNIX_TIMESTAMP() - 60 * 60
AND is_dislike = 1
GROUP BY window_start
""")).all()
print(results)
CREATE TABLE feedback (
user_id INT,
is_dislike INT, -- 0=Like, 1=Dislike
chat_context LONGTEXT,
timestamp INT,
INDEX (is_dislike)
);
-- Insert data here...
-- Dig into details of recent 100 dislike feedbacks
SELECT *
FROM feedback
WHERE is_dislike = 1
ORDER BY timestamp DESC
LIMIT 100;
-- Plot feedback trends in the last hour
SELECT COUNT(*), FROM_UNIXTIME(FLOOR(timestamp / 60) * 60) AS window_start
FROM feedback
WHERE
timestamp >= UNIX_TIMESTAMP() - 60 * 60
AND is_dislike = 1
GROUP BY window_start;
# Define schema
class Doc(Base):
__tablename__ = "doc"
id = Column(Integer, primary_key=True)
owner_id = Column(Integer)
embedding = Column(VectorType(dim=3))
content = Column(Text)
# Create table and index
Base.metadata.create_all(engine)
VectorAdaptor(engine).create_vector_index(
Doc.embedding, tidb_vector.DistanceMetric.L2
)
# Insert
with Session(engine) as session:
session.add(Doc(owner_id=10, content="dog", embedding=[1, 2, 1]))
session.add(Doc(owner_id=10, content="fish", embedding=[1, 2, 4]))
session.add(Doc(owner_id=17, content="tree", embedding=[1, 0, 0]))
session.commit()
# Search top 10 similar docs owned by user 17
with Session(engine) as session:
results = session.execute(
select(Doc.owner_id, Doc.content)
.where(Doc.owner_id == 17)
.order_by(Doc.embedding.l2_distance([1, 2, 3]))
.limit(10)
).all()
print(results)
CREATE TABLE doc(
owner_id INT,
content TEXT,
embedding VECTOR(3),
VECTOR INDEX ((VEC_L2_DISTANCE(embedding)))
);
INSERT INTO doc VALUES
(10, "dog", "[1, 2, 1]"),
(10, "fish", "[1, 2, 4]"),
(17, "tree", "[1, 2, 4]");
-- Search top 10 similar docs owned by user 17
SELECT owner_id, content
FROM doc
WHERE owner_id = 17
ORDER BY VEC_L2_DISTANCE(embedding, "[1, 2, 3]")
LIMIT 10;
Say goodbye to data synchronization, duplication, or maintaining multiple data stores.
# Define schema
class Doc(Base):
__tablename__ = "doc"
id = Column(Integer, primary_key=True)
content = Column(Text)
embedding = Column(VectorType(dim=3))
# Create table and index
Base.metadata.create_all(engine)
VectorAdaptor(engine).create_vector_index(
Doc.embedding, tidb_vector.DistanceMetric.L2
)
# Insert
with Session(engine) as session:
session.add(Doc(content="dog", embedding=[1, 2, 1]))
session.add(Doc(content="fish", embedding=[1, 2, 4]))
session.commit()
# Search nearest 1 embedding using L2 distance
with Session(engine) as session:
results = session.execute(
select(Doc.content)
.order_by(Doc.embedding.l2_distance([1, 2, 3]))
.limit(1)
).all()
print(results)
CREATE TABLE doc(
content TEXT,
embedding VECTOR(3),
VECTOR INDEX ((VEC_L2_DISTANCE(embedding)))
);
INSERT INTO doc VALUES
("dog", "[1, 2, 1]"),
("fish", "[1, 2, 4]");
-- Search nearest 1 embedding using L2 distance
SELECT content
FROM doc
ORDER BY VEC_L2_DISTANCE(embedding, "[1, 2, 3]")
LIMIT 1;
# Define schema
class Doc(Base):
__tablename__ = "doc"
id = Column(Integer, primary_key=True)
content = Column(Text)
kind = Column(Text)
embedding = Column(VectorType(dim=3))
# Create table and index
Base.metadata.create_all(engine)
VectorAdaptor(engine).create_vector_index(
Doc.embedding, tidb_vector.DistanceMetric.L2
)
# Insert
with Session(engine) as session:
session.add(Doc(content="dog", kind="animal", embedding=[1, 2, 1]))
session.add(Doc(content="fish", kind="animal", embedding=[1, 2, 4]))
session.add(Doc(content="tree", kind="plant", embedding=[1, 2, 3]))
session.commit()
# Search with any conditions via WHERE clause
with Session(engine) as session:
results = session.execute(
select(Doc.content)
.where(Doc.kind == "animal")
.order_by(Doc.embedding.l2_distance([1, 2, 3]))
.limit(1)
).all()
print(results)
CREATE TABLE doc(
content TEXT,
kind TEXT,
embedding VECTOR(3),
VECTOR INDEX ((VEC_L2_DISTANCE(embedding)))
);
INSERT INTO doc VALUES
("dog", "animal", "[1, 2, 1]"),
("fish", "animal", "[1, 2, 4]"),
("tree", "plant", "[1, 2, 3]");
-- Search with any conditions via WHERE clause
SELECT content
FROM doc
WHERE kind = "animal"
ORDER BY VEC_L2_DISTANCE(embedding, "[1, 2, 3]")
LIMIT 1;
# Schema
class Vertex(Base):
__tablename__ = "vertices"
id = Column(Integer, primary_key=True)
name = Column(Text)
class Edge(Base):
__tablename__ = "edges"
id = Column(Integer, primary_key=True)
source_id = Column(Integer)
target_id = Column(Integer)
description = Column(Text)
description_vec = Column(VectorType(dim=3))
# Insert data here...
# Semantic Search the vertex, then retrieve its source edge and target edge
with Session(engine) as session:
query_edges = (
select(Edge)
.with_only_columns(Edge.source_id, Edge.target_id, Edge.description, Edge.id)
.order_by(Edge.description_vec.cosine_distance([1, 2, 3]))
.limit(100)
).subquery()
# Retrieve edge with vertex all at once
MVertexSource = aliased(Vertex)
MVertexTarget = aliased(Vertex)
MEdge = aliased(Edge, query_edges)
results = session.execute(
select(MEdge, MVertexSource, MVertexTarget)
.join(MVertexSource, MVertexSource.id == MEdge.source_id)
.join(MVertexTarget, MVertexTarget.id == MEdge.target_id)
)
for edge, vertexSource, vertexTarget in results:
print(edge.__dict__, vertexSource.__dict__, vertexTarget.__dict__)
-- Schema
CREATE TABLE vertices (
id INT PRIMARY KEY,
name TEXT
);
CREATE TABLE edges (
id INT PRIMARY KEY,
source_id INT,
target_id INT,
description TEXT,
description_vec VECTOR(3),
VECTOR INDEX ((VEC_COSINE_DISTANCE(description_vec)))
);
-- Insert data here...
-- Semantic Search the vertex, then retrieve its source edge and target edge
SELECT *
FROM (
SELECT source_id, target_id, description
FROM edges
ORDER BY VEC_COSINE_DISTANCE(description_vec, '[1, 2, 3]')
LIMIT 100
) AS e
INNER JOIN vertices AS source_vertex ON source_vertex.id = e.source_id
INNER JOIN vertices AS target_vertex ON target_vertex.id = e.target_id;
# Define schema
class Doc(Base):
__tablename__ = "doc"
id = Column(Integer, primary_key=True)
content = Column(Text)
embedding = Column(VectorType(dim=3))
# Create table and index
Base.metadata.create_all(engine)
VectorAdaptor(engine).create_vector_index(
Doc.embedding, tidb_vector.DistanceMetric.L2
)
# Insert
with Session(engine) as session:
session.add(Doc(content="dog", embedding=[1, 2, 1]))
session.add(Doc(content="fish", embedding=[1, 2, 4]))
session.commit()
# Search
with Session(engine) as session:
results = session.execute(
# Retrieve ID and content in the search result,
# or any other metadata you like
select(Doc.id, Doc.content)
.order_by(Doc.embedding.l2_distance([1, 2, 3]))
.limit(1)
).all()
print(results)
CREATE TABLE doc(
content TEXT,
embedding VECTOR(3),
VECTOR INDEX ((VEC_L2_DISTANCE(embedding)))
);
INSERT INTO doc VALUES
("dog", "[1, 2, 1]"),
("fish", "[1, 2, 4]");
-- Retrieve ID and content in the search result,
-- or any other metadata you like
SELECT id, content
FROM doc
ORDER BY VEC_L2_DISTANCE(embedding, "[1, 2, 3]")
LIMIT 1;
# Schema
class Feedback(Base):
__tablename__ = "feedback"
id = Column(Integer, primary_key=True)
user_id = Column(Integer)
# Use 0=Like, 1=Dislike. Add index for faster query.
is_dislike = Column(Integer, index=True)
chat_context = Column(LONGTEXT)
timestamp = Column(Integer)
# Insert data here...
# Dig into details of recent 100 dislike feedbacks
with Session(engine) as session:
results = session.execute(
select(Feedback)
.where(Feedback.is_dislike == 1)
.order_by(Feedback.timestamp.desc())
.limit(100)
)
for obj in results.scalars():
print(obj.id, obj.user_id, obj.chat_context)
# Plot feedback trends in the last hour
with Session(engine) as session:
results = session.execute(text("""
SELECT COUNT(*), FROM_UNIXTIME(FLOOR(timestamp / 60) * 60) AS window_start
FROM feedback
WHERE timestamp >= UNIX_TIMESTAMP() - 60 * 60
AND is_dislike = 1
GROUP BY window_start
""")).all()
print(results)
CREATE TABLE feedback (
user_id INT,
is_dislike INT, -- 0=Like, 1=Dislike
chat_context LONGTEXT,
timestamp INT,
INDEX (is_dislike)
);
-- Insert data here...
-- Dig into details of recent 100 dislike feedbacks
SELECT *
FROM feedback
WHERE is_dislike = 1
ORDER BY timestamp DESC
LIMIT 100;
-- Plot feedback trends in the last hour
SELECT COUNT(*), FROM_UNIXTIME(FLOOR(timestamp / 60) * 60) AS window_start
FROM feedback
WHERE
timestamp >= UNIX_TIMESTAMP() - 60 * 60
AND is_dislike = 1
GROUP BY window_start;
# Define schema
class Doc(Base):
__tablename__ = "doc"
id = Column(Integer, primary_key=True)
owner_id = Column(Integer)
embedding = Column(VectorType(dim=3))
content = Column(Text)
# Create table and index
Base.metadata.create_all(engine)
VectorAdaptor(engine).create_vector_index(
Doc.embedding, tidb_vector.DistanceMetric.L2
)
# Insert
with Session(engine) as session:
session.add(Doc(owner_id=10, content="dog", embedding=[1, 2, 1]))
session.add(Doc(owner_id=10, content="fish", embedding=[1, 2, 4]))
session.add(Doc(owner_id=17, content="tree", embedding=[1, 0, 0]))
session.commit()
# Search top 10 similar docs owned by user 17
with Session(engine) as session:
results = session.execute(
select(Doc.owner_id, Doc.content)
.where(Doc.owner_id == 17)
.order_by(Doc.embedding.l2_distance([1, 2, 3]))
.limit(10)
).all()
print(results)
CREATE TABLE doc(
owner_id INT,
content TEXT,
embedding VECTOR(3),
VECTOR INDEX ((VEC_L2_DISTANCE(embedding)))
);
INSERT INTO doc VALUES
(10, "dog", "[1, 2, 1]"),
(10, "fish", "[1, 2, 4]"),
(17, "tree", "[1, 2, 4]");
-- Search top 10 similar docs owned by user 17
SELECT owner_id, content
FROM doc
WHERE owner_id = 17
ORDER BY VEC_L2_DISTANCE(embedding, "[1, 2, 3]")
LIMIT 10;
curl --proto '=https' --tlsv1.2 -sSf https://tiup-mirrors.pingcap.com/install.sh | sh
~/.tiup/bin/tiup playground nightly --tag my_vector_db
* Supported platforms: Linux (x64 / ARM64), MacOS (x64 / ARM64)
Production-ready integrations for every stage of your AI pipeline.
Quick start: copy paste your Connection String into listed integrations.