This overview covers text-based embedding models. LangChain does not currently support multimodal embeddings.
Embedding models transform raw text—such as a sentence, paragraph, or tweet—into a fixed-length vector of numbers that captures its semantic meaning. These vectors allow machines to compare and search text based on meaning rather than exact words.In practice, this means that texts with similar ideas are placed close together in the vector space. For example, instead of matching only the phrase “machine learning”, embeddings can surface documents that discuss related concepts even when different wording is used.
LangChain provides a standard interface for text embedding models (e.g., OpenAI, Cohere, Hugging Face) via the Embeddings interface.Two main methods are available:
embed_documents(texts: List[str]) → List[List[float]]: Embeds a list of documents.
embed_query(text: str) → List[float]: Embeds a single query.
The interface allows queries and documents to be embedded with different strategies, though most providers handle them the same way in practice.
import getpassimport osif not os.environ.get("OPENAI_API_KEY"): os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")from langchain_openai import OpenAIEmbeddingsembeddings = OpenAIEmbeddings(model="text-embedding-3-large")embeddings.embed_query("Hello, world!")
Azure
Copy
pip install -qU "langchain[azure]"
Copy
import getpassimport osif not os.environ.get("AZURE_OPENAI_API_KEY"): os.environ["AZURE_OPENAI_API_KEY"] = getpass.getpass("Enter API key for Azure: ")from langchain_openai import AzureOpenAIEmbeddingsembeddings = AzureOpenAIEmbeddings( azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"], azure_deployment=os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"], openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],)embeddings.embed_query("Hello, world!")
Google Gemini
Copy
pip install -qU langchain-google-genai
Copy
import getpassimport osif not os.environ.get("GOOGLE_API_KEY"): os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter API key for Google Gemini: ")from langchain_google_genai import GoogleGenerativeAIEmbeddingsembeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")embeddings.embed_query("Hello, world!")
Google Vertex
Copy
pip install -qU langchain-google-vertexai
Copy
from langchain_google_vertexai import VertexAIEmbeddingsembeddings = VertexAIEmbeddings(model="text-embedding-005")embeddings.embed_query("Hello, world!")
AWS
Copy
pip install -qU langchain-aws
Copy
from langchain_aws import BedrockEmbeddingsembeddings = BedrockEmbeddings(model_id="amazon.titan-embed-text-v2:0")embeddings.embed_query("Hello, world!")
HuggingFace
Copy
pip install -qU langchain-huggingface
Copy
from langchain_huggingface import HuggingFaceEmbeddingsembeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
Copy
embeddings.embed_query("Hello, world!")
Ollama
Copy
pip install -qU langchain-ollama
Copy
from langchain_ollama import OllamaEmbeddingsembeddings = OllamaEmbeddings(model="llama3")embeddings.embed_query("Hello, world!")
Cohere
Copy
pip install -qU langchain-cohere
Copy
import getpassimport osif not os.environ.get("COHERE_API_KEY"): os.environ["COHERE_API_KEY"] = getpass.getpass("Enter API key for Cohere: ")from langchain_cohere import CohereEmbeddingsembeddings = CohereEmbeddings(model="embed-english-v3.0")embeddings.embed_query("Hello, world!")
Mistral AI
Copy
pip install -qU langchain-mistralai
Copy
import getpassimport osif not os.environ.get("MISTRALAI_API_KEY"): os.environ["MISTRALAI_API_KEY"] = getpass.getpass("Enter API key for MistralAI: ")from langchain_mistralai import MistralAIEmbeddingsembeddings = MistralAIEmbeddings(model="mistral-embed")embeddings.embed_query("Hello, world!")
Nomic
Copy
pip install -qU langchain-nomic
Copy
import getpassimport osif not os.environ.get("NOMIC_API_KEY"): os.environ["NOMIC_API_KEY"] = getpass.getpass("Enter API key for Nomic: ")from langchain_nomic import NomicEmbeddingsembeddings = NomicEmbeddings(model="nomic-embed-text-v1.5")embeddings.embed_query("Hello, world!")
NVIDIA
Copy
pip install -qU langchain-nvidia-ai-endpoints
Copy
import getpassimport osif not os.environ.get("NVIDIA_API_KEY"): os.environ["NVIDIA_API_KEY"] = getpass.getpass("Enter API key for NVIDIA: ")from langchain_nvidia_ai_endpoints import NVIDIAEmbeddingsembeddings = NVIDIAEmbeddings(model="NV-Embed-QA")embeddings.embed_query("Hello, world!")
Voyage AI
Copy
pip install -qU langchain-voyageai
Copy
import getpassimport osif not os.environ.get("VOYAGE_API_KEY"): os.environ["VOYAGE_API_KEY"] = getpass.getpass("Enter API key for Voyage AI: ")from langchain-voyageai import VoyageAIEmbeddingsembeddings = VoyageAIEmbeddings(model="voyage-3")embeddings.embed_query("Hello, world!")
IBM watsonx
Copy
pip install -qU langchain-ibm
Copy
import getpassimport osif not os.environ.get("WATSONX_APIKEY"): os.environ["WATSONX_APIKEY"] = getpass.getpass("Enter API key for IBM watsonx: ")from langchain_ibm import WatsonxEmbeddingsembeddings = WatsonxEmbeddings( model_id="ibm/slate-125m-english-rtrvr", url="https://us-south.ml.cloud.ibm.com", project_id="<WATSONX PROJECT_ID>",)embeddings.embed_query("Hello, world!")
Fake
Copy
pip install -qU langchain-core
Copy
from langchain_core.embeddings import DeterministicFakeEmbeddingembeddings = DeterministicFakeEmbedding(size=4096)embeddings.embed_query("Hello, world!")
xAI
Copy
pip install -qU "langchain[langchain-xai]"
Copy
import getpassimport osif not os.environ.get("XAI_API_KEY"): os.environ["XAI_API_KEY"] = getpass.getpass("Enter API key for xAI: ")from langchain.chat_models import init_chat_modelmodel = init_chat_model("grok-2", model_provider="xai")embeddings.embed_query("Hello, world!")
Perplexity
Copy
pip install -qU "langchain[langchain-perplexity]"
Copy
import getpassimport osif not os.environ.get("PPLX_API_KEY"): os.environ["PPLX_API_KEY"] = getpass.getpass("Enter API key for Perplexity: ")from langchain.chat_models import init_chat_modelmodel = init_chat_model("llama-3.1-sonar-small-128k-online", model_provider="perplexity")embeddings.embed_query("Hello, world!")
DeepSeek
Copy
pip install -qU "langchain[langchain-deepseek]"
Copy
import getpassimport osif not os.environ.get("DEEPSEEK_API_KEY"): os.environ["DEEPSEEK_API_KEY"] = getpass.getpass("Enter API key for DeepSeek: ")from langchain.chat_models import init_chat_modelmodel = init_chat_model("deepseek-chat", model_provider="deepseek")embeddings.embed_query("Hello, world!")
Embeddings can be stored or temporarily cached to avoid needing to recompute them.Caching embeddings can be done using a CacheBackedEmbeddings. This wrapper stores embeddings in a key-value store, where the text is hashed and the hash is used as the key in the cache.The main supported way to initialize a CacheBackedEmbeddings is from_bytes_store. It takes the following parameters:
underlying_embedder: The embedder to use for embedding.
document_embedding_cache: Any ByteStore for caching document embeddings.
batch_size: (optional, defaults to None) The number of documents to embed between store updates.
namespace: (optional, defaults to "") The namespace to use for the document cache. Helps avoid collisions (e.g., set it to the embedding model name).
query_embedding_cache: (optional, defaults to None) A ByteStore for caching query embeddings, or True to reuse the same store as document_embedding_cache.
Copy
import timefrom langchain.embeddings import CacheBackedEmbeddings from langchain.storage import LocalFileStore from langchain_core.vectorstores import InMemoryVectorStore# Create your underlying embeddings modelunderlying_embeddings = ... # e.g., OpenAIEmbeddings(), HuggingFaceEmbeddings(), etc.# Store persists embeddings to the local filesystem# This isn't for production use, but is useful for localstore = LocalFileStore("./cache/") cached_embedder = CacheBackedEmbeddings.from_bytes_store( underlying_embeddings, store, namespace=underlying_embeddings.model)# Example: caching a query embeddingtic = time.time()print(cached_embedder.embed_query("Hello, world!"))print(f"First call took: {time.time() - tic:.2f} seconds")# Subsequent calls use the cachetic = time.time()print(cached_embedder.embed_query("Hello, world!"))print(f"Second call took: {time.time() - tic:.2f} seconds")
In production, you would typically use a more robust persistent store, such as a database or cloud storage. Please see stores integrations for options.