Source code for qdrant_client.fastembed_common
from typing import Any, Optional, Union
from pydantic import BaseModel, Field
from qdrant_client.conversions.common_types import SparseVector
from qdrant_client.http import models
try:
from fastembed import (
TextEmbedding,
SparseTextEmbedding,
ImageEmbedding,
LateInteractionTextEmbedding,
LateInteractionMultimodalEmbedding,
)
from fastembed.common import OnnxProvider, ImageInput
except ImportError:
TextEmbedding = None
SparseTextEmbedding = None
ImageEmbedding = None
LateInteractionTextEmbedding = None
LateInteractionMultimodalEmbedding = None
OnnxProvider = None
ImageInput = None
[docs]class QueryResponse(BaseModel, extra="forbid"): # type: ignore
id: Union[str, int]
embedding: Optional[list[float]]
sparse_embedding: Optional[SparseVector] = Field(default=None)
metadata: dict[str, Any]
document: str
score: float
[docs]class FastEmbedMisc:
IS_INSTALLED: bool = False
_TEXT_MODELS: set[str] = set()
_IMAGE_MODELS: set[str] = set()
_LATE_INTERACTION_TEXT_MODELS: set[str] = set()
_LATE_INTERACTION_MULTIMODAL_MODELS: set[str] = set()
_SPARSE_MODELS: set[str] = set()
[docs] @classmethod
def is_installed(cls) -> bool:
if cls.IS_INSTALLED:
return cls.IS_INSTALLED
try:
from fastembed import (
SparseTextEmbedding,
TextEmbedding,
ImageEmbedding,
LateInteractionMultimodalEmbedding,
LateInteractionTextEmbedding,
)
assert len(SparseTextEmbedding.list_supported_models()) > 0
assert len(TextEmbedding.list_supported_models()) > 0
assert len(ImageEmbedding.list_supported_models()) > 0
assert len(LateInteractionTextEmbedding.list_supported_models()) > 0
assert len(LateInteractionMultimodalEmbedding.list_supported_models()) > 0
cls.IS_INSTALLED = True
except ImportError:
cls.IS_INSTALLED = False
return cls.IS_INSTALLED
[docs] @classmethod
def import_fastembed(cls) -> None:
if cls.IS_INSTALLED:
return
# If it's not, ask the user to install it
raise ImportError(
"fastembed is not installed."
" Please install it to enable fast vector indexing with `pip install fastembed`."
)
[docs] @classmethod
def list_text_models(cls) -> dict[str, tuple[int, models.Distance]]:
"""Lists the supported dense text models.
Requires invocation of TextEmbedding.list_supported_models() to support custom models.
Returns:
dict[str, tuple[int, models.Distance]]: A dict of model names, their dimensions and distance metrics.
"""
return (
{
model["model"]: (model["dim"], models.Distance.COSINE)
for model in TextEmbedding.list_supported_models()
}
if TextEmbedding
else {}
)
[docs] @classmethod
def list_image_models(cls) -> dict[str, tuple[int, models.Distance]]:
"""Lists the supported image dense models.
Custom image models are not supported yet, but calls to ImageEmbedding.list_supported_models() is done each
time in order for preserving the same style as with TextEmbedding.
Returns:
dict[str, tuple[int, models.Distance]]: A dict of model names, their dimensions and distance metrics.
"""
return (
{
model["model"]: (model["dim"], models.Distance.COSINE)
for model in ImageEmbedding.list_supported_models()
}
if ImageEmbedding
else {}
)
[docs] @classmethod
def list_late_interaction_text_models(cls) -> dict[str, tuple[int, models.Distance]]:
"""Lists the supported late interaction text models.
Custom late interaction models are not supported yet, but calls to
LateInteractionTextEmbedding.list_supported_models()
is done each time in order for preserving the same style as with TextEmbedding.
Returns:
dict[str, tuple[int, models.Distance]]: A dict of model names, their dimensions and distance metrics.
"""
return (
{
model["model"]: (model["dim"], models.Distance.COSINE)
for model in LateInteractionTextEmbedding.list_supported_models()
}
if LateInteractionTextEmbedding
else {}
)
[docs] @classmethod
def list_late_interaction_multimodal_models(cls) -> dict[str, tuple[int, models.Distance]]:
"""Lists the supported late interaction multimodal models.
Custom late interaction multimodal models are not supported yet, but calls to
LateInteractionMultimodalEmbedding.list_supported_models()
is done each time in order for preserving the same style as with TextEmbedding.
Returns:
dict[str, tuple[int, models.Distance]]: A dict of model names, their dimensions and distance metrics.
"""
return (
{
model["model"]: (model["dim"], models.Distance.COSINE)
for model in LateInteractionMultimodalEmbedding.list_supported_models()
}
if LateInteractionMultimodalEmbedding
else {}
)
[docs] @classmethod
def list_sparse_models(cls) -> dict[str, dict[str, Any]]:
"""Lists the supported sparse models.
Custom sparse models are not supported yet, but calls to
SparseTextEmbedding.list_supported_models()
is done each time in order for preserving the same style as with TextEmbedding.
Returns:
dict[str, dict[str, Any]]: A dict of model names and their descriptions.
"""
descriptions = {}
if SparseTextEmbedding:
for description in SparseTextEmbedding.list_supported_models():
descriptions[description.pop("model")] = description
return descriptions
[docs] @classmethod
def is_supported_text_model(cls, model_name: str) -> bool:
"""Checks if the model is supported by fastembed.
Args:
model_name (str): The name of the model to check.
Returns:
bool: True if the model is supported, False otherwise.
"""
if model_name.lower() in cls._TEXT_MODELS:
return True
# update cached list in case custom models were added
cls._TEXT_MODELS = {model.lower() for model in cls.list_text_models()}
if model_name.lower() in cls._TEXT_MODELS:
return True
return False
[docs] @classmethod
def is_supported_image_model(cls, model_name: str) -> bool:
"""Checks if the model is supported by fastembed.
Args:
model_name (str): The name of the model to check.
Returns:
bool: True if the model is supported, False otherwise.
"""
if model_name.lower() in cls._IMAGE_MODELS:
return True
# update cached list in case custom models were added
cls._IMAGE_MODELS = {model.lower() for model in cls.list_image_models()}
if model_name.lower() in cls._IMAGE_MODELS:
return True
return False
[docs] @classmethod
def is_supported_late_interaction_text_model(cls, model_name: str) -> bool:
"""Checks if the model is supported by fastembed.
Args:
model_name (str): The name of the model to check.
Returns:
bool: True if the model is supported, False otherwise.
"""
if model_name.lower() in cls._LATE_INTERACTION_TEXT_MODELS:
return True
# update cached list in case custom models were added
cls._LATE_INTERACTION_TEXT_MODELS = {
model.lower() for model in cls.list_late_interaction_text_models()
}
if model_name.lower() in cls._LATE_INTERACTION_TEXT_MODELS:
return True
return False
[docs] @classmethod
def is_supported_late_interaction_multimodal_model(cls, model_name: str) -> bool:
"""Checks if the model is supported by fastembed.
Args:
model_name (str): The name of the model to check.
Returns:
bool: True if the model is supported, False otherwise.
"""
if model_name.lower() in cls._LATE_INTERACTION_MULTIMODAL_MODELS:
return True
# update cached list in case custom models were added
cls._LATE_INTERACTION_MULTIMODAL_MODELS = {
model.lower() for model in cls.list_late_interaction_multimodal_models()
}
if model_name.lower() in cls._LATE_INTERACTION_MULTIMODAL_MODELS:
return True
return False
[docs] @classmethod
def is_supported_sparse_model(cls, model_name: str) -> bool:
"""Checks if the model is supported by fastembed.
Args:
model_name (str): The name of the model to check.
Returns:
bool: True if the model is supported, False otherwise.
"""
if model_name.lower() in cls._SPARSE_MODELS:
return True
# update cached list in case custom models were added
cls._SPARSE_MODELS = {model.lower() for model in cls.list_sparse_models()}
if model_name.lower() in cls._SPARSE_MODELS:
return True
return False
# region deprecated
# prefer using methods builtin into QdrantClient, e.g. list_supported_text_models, list_supported_idf_models, etc.
SUPPORTED_EMBEDDING_MODELS: dict[str, tuple[int, models.Distance]] = (
{
model["model"]: (model["dim"], models.Distance.COSINE)
for model in TextEmbedding.list_supported_models()
}
if TextEmbedding
else {}
)
SUPPORTED_SPARSE_EMBEDDING_MODELS: dict[str, dict[str, Any]] = (
{model["model"]: model for model in SparseTextEmbedding.list_supported_models()}
if SparseTextEmbedding
else {}
)
IDF_EMBEDDING_MODELS: set[str] = (
{
model_config["model"]
for model_config in SparseTextEmbedding.list_supported_models()
if model_config.get("requires_idf", None)
}
if SparseTextEmbedding
else set()
)
_LATE_INTERACTION_EMBEDDING_MODELS: dict[str, tuple[int, models.Distance]] = (
{
model["model"]: (model["dim"], models.Distance.COSINE)
for model in LateInteractionTextEmbedding.list_supported_models()
}
if LateInteractionTextEmbedding
else {}
)
_IMAGE_EMBEDDING_MODELS: dict[str, tuple[int, models.Distance]] = (
{
model["model"]: (model["dim"], models.Distance.COSINE)
for model in ImageEmbedding.list_supported_models()
}
if ImageEmbedding
else {}
)
_LATE_INTERACTION_MULTIMODAL_EMBEDDING_MODELS: dict[str, tuple[int, models.Distance]] = (
{
model["model"]: (model["dim"], models.Distance.COSINE)
for model in LateInteractionMultimodalEmbedding.list_supported_models()
}
if LateInteractionMultimodalEmbedding
else {}
)
# endregion