Embedders map unstructured data such as text and images into high-dimensional vector representations, enabling semantic similarity computation and comparison. They are a core component of semantic search in knowledge retrieval systems.
Below is an introduction to some commonly used components:
For more components, refer to Langchain Embedding models.
pip install --upgrade --quiet langchain langchain-huggingface sentence_transformers- Create a
HuggingFaceEmbeddingsobject
from langchain_huggingface import HuggingFaceEmbeddings
# Specify the HuggingFace model name to use
model_name = "BAAI/bge-small-en-v1.5"
# Specify model loading parameters; here it is set to run on CPU
model_kwargs = {"device": "cpu"}
# Specify encoding parameters; here it is set to normalize the output embeddings
encode_kwargs = {"normalize_embeddings": True}
# Create the HuggingFaceEmbeddings embedder object
embedder = HuggingFaceEmbeddings(
model_name=model_name,
model_kwargs=model_kwargs,
encode_kwargs=encode_kwargs
)- Construct a
LangchainKnowledgeobject using thisembedderobject
from trpc_agent_sdk.server.knowledge.langchain_knowledge import LangchainKnowledge
rag = LangchainKnowledge(
prompt_template=rag_prompt,
document_loader=text_loader,
document_transformer=text_splitter,
embedder=embedder, # Pass the constructed embedder
vectorstore=vectorstore,
)pip install hunyuan langchain-community
pip install "tencentcloud-sdk-python>=3.0.1139"- Create a
HunyuanEmbeddingsobject
from langchain_community.embeddings import HunyuanEmbeddings
embedder = HunyuanEmbeddings(
hunyuan_secret_id="xxx", # Hunyuan Secret ID, or set via the HUNYUAN_SECRET_ID environment variable
hunyuan_secret_key="xxx", # Hunyuan Secret Key, or set via the HUNYUAN_SECRET_KEY environment variable
region="ap-guangzhou" # Region of the Hunyuan service
)- Construct a
LangchainKnowledgeobject using thisembedderobject
rag = LangchainKnowledge(
...,
embedder=embedder,
...,
)