pinecone_text.dense.cohere_encoder

  1from enum import Enum
  2from typing import Union, List, Any, Optional
  3from pinecone_text.dense.base_dense_ecoder import BaseDenseEncoder
  4
  5try:
  6    import cohere
  7    from cohere.error import CohereError
  8except (OSError, ImportError, ModuleNotFoundError) as e:
  9    _cohere_installed = False
 10else:
 11    _cohere_installed = True
 12
 13
 14class CohereEncoderName(Enum):
 15    """
 16    Supported Cohere encoder models.
 17    """
 18
 19    ENGLISH_V3 = "embed-english-v3.0"
 20    ENGLISH_LIGHT_V3 = "embed-english-light-3.0"
 21    MULTILINGUAL_V3 = "embed-multilingual-v3.0"
 22    MULTILINGUAL_LIGHT_V3 = "embed-multilingual-light-v3.0"
 23
 24    @classmethod
 25    def list_models(cls) -> List[str]:
 26        """Method to get a list of all model names."""
 27        return [model.value for model in cls]
 28
 29
 30class CohereInputType(Enum):
 31    SEARCH_DOCUMENT = "search_document"
 32    SEARCH_QUERY = "search_query"
 33    # CLASSIFICATION = "classification"
 34    # CLUSTERING = "clustering"
 35
 36
 37class CohereEncoder(BaseDenseEncoder):
 38    """
 39    Cohere's text embedding wrapper. See https://docs.cohere.com/reference/embed
 40
 41    Note: You should provide an API key as the environment variable `CO_API_KEY`.
 42          Or you can pass it as argument to the constructor as `api_key`.
 43    """
 44
 45    def __init__(
 46        self,
 47        model_name: str = "embed-english-v3.0",
 48        api_key: Optional[str] = None,
 49        **kwargs: Any,
 50    ):
 51        """
 52        Initialize the Cohere encoder.
 53
 54        :param model_name: The name of the embedding model to use. See https://docs.cohere.com/reference/embed
 55        :param kwargs: Additional arguments to pass to the underlying cohere client. See https://github.com/openai/openai-python
 56        """
 57        if not _cohere_installed:
 58            raise ImportError(
 59                "Failed to import cohere. Make sure you install cohere extra "
 60                "dependencies by running: "
 61                "`pip install pinecone-text[cohere]"
 62            )
 63        if model_name not in CohereEncoderName.list_models():
 64            raise ValueError(
 65                f"Model '{model_name}' not supported. Please use one of:"
 66                + "\n"
 67                + "\n".join([f"- {x}" for x in CohereEncoderName.list_models()])
 68            )
 69        super().__init__()
 70        self._model_name = model_name
 71        self._client = cohere.Client(api_key=api_key, **kwargs)
 72
 73    def encode_documents(
 74        self, texts: Union[str, List[str]]
 75    ) -> Union[List[float], List[List[float]]]:
 76        return self._encode(texts, CohereInputType.SEARCH_DOCUMENT.value)
 77
 78    def encode_queries(
 79        self, texts: Union[str, List[str]]
 80    ) -> Union[List[float], List[List[float]]]:
 81        return self._encode(texts, CohereInputType.SEARCH_QUERY.value)
 82
 83    def _encode(
 84        self, texts: Union[str, List[str]], input_type: str
 85    ) -> Union[List[float], List[List[float]]]:
 86        if isinstance(texts, str):
 87            texts_input = [texts]
 88        elif isinstance(texts, list):
 89            texts_input = texts
 90        else:
 91            raise ValueError(
 92                f"texts must be a string or list of strings, got: {type(texts)}"
 93            )
 94
 95        try:
 96            response = self._client.embed(
 97                texts=texts_input,
 98                model=self._model_name,
 99                input_type=input_type,
100            )
101        except CohereError as e:
102            # TODO: consider wrapping external provider errors
103            raise e
104
105        if isinstance(texts, str):
106            return response.embeddings[0]
107        return [embedding for embedding in response.embeddings]
class CohereEncoderName(enum.Enum):
15class CohereEncoderName(Enum):
16    """
17    Supported Cohere encoder models.
18    """
19
20    ENGLISH_V3 = "embed-english-v3.0"
21    ENGLISH_LIGHT_V3 = "embed-english-light-3.0"
22    MULTILINGUAL_V3 = "embed-multilingual-v3.0"
23    MULTILINGUAL_LIGHT_V3 = "embed-multilingual-light-v3.0"
24
25    @classmethod
26    def list_models(cls) -> List[str]:
27        """Method to get a list of all model names."""
28        return [model.value for model in cls]

Supported Cohere encoder models.

ENGLISH_V3 = <CohereEncoderName.ENGLISH_V3: 'embed-english-v3.0'>
ENGLISH_LIGHT_V3 = <CohereEncoderName.ENGLISH_LIGHT_V3: 'embed-english-light-3.0'>
MULTILINGUAL_V3 = <CohereEncoderName.MULTILINGUAL_V3: 'embed-multilingual-v3.0'>
MULTILINGUAL_LIGHT_V3 = <CohereEncoderName.MULTILINGUAL_LIGHT_V3: 'embed-multilingual-light-v3.0'>
@classmethod
def list_models(cls) -> List[str]:
25    @classmethod
26    def list_models(cls) -> List[str]:
27        """Method to get a list of all model names."""
28        return [model.value for model in cls]

Method to get a list of all model names.

Inherited Members
enum.Enum
name
value
class CohereInputType(enum.Enum):
31class CohereInputType(Enum):
32    SEARCH_DOCUMENT = "search_document"
33    SEARCH_QUERY = "search_query"
34    # CLASSIFICATION = "classification"
35    # CLUSTERING = "clustering"

An enumeration.

SEARCH_DOCUMENT = <CohereInputType.SEARCH_DOCUMENT: 'search_document'>
SEARCH_QUERY = <CohereInputType.SEARCH_QUERY: 'search_query'>
Inherited Members
enum.Enum
name
value
class CohereEncoder(pinecone_text.dense.base_dense_ecoder.BaseDenseEncoder):
 38class CohereEncoder(BaseDenseEncoder):
 39    """
 40    Cohere's text embedding wrapper. See https://docs.cohere.com/reference/embed
 41
 42    Note: You should provide an API key as the environment variable `CO_API_KEY`.
 43          Or you can pass it as argument to the constructor as `api_key`.
 44    """
 45
 46    def __init__(
 47        self,
 48        model_name: str = "embed-english-v3.0",
 49        api_key: Optional[str] = None,
 50        **kwargs: Any,
 51    ):
 52        """
 53        Initialize the Cohere encoder.
 54
 55        :param model_name: The name of the embedding model to use. See https://docs.cohere.com/reference/embed
 56        :param kwargs: Additional arguments to pass to the underlying cohere client. See https://github.com/openai/openai-python
 57        """
 58        if not _cohere_installed:
 59            raise ImportError(
 60                "Failed to import cohere. Make sure you install cohere extra "
 61                "dependencies by running: "
 62                "`pip install pinecone-text[cohere]"
 63            )
 64        if model_name not in CohereEncoderName.list_models():
 65            raise ValueError(
 66                f"Model '{model_name}' not supported. Please use one of:"
 67                + "\n"
 68                + "\n".join([f"- {x}" for x in CohereEncoderName.list_models()])
 69            )
 70        super().__init__()
 71        self._model_name = model_name
 72        self._client = cohere.Client(api_key=api_key, **kwargs)
 73
 74    def encode_documents(
 75        self, texts: Union[str, List[str]]
 76    ) -> Union[List[float], List[List[float]]]:
 77        return self._encode(texts, CohereInputType.SEARCH_DOCUMENT.value)
 78
 79    def encode_queries(
 80        self, texts: Union[str, List[str]]
 81    ) -> Union[List[float], List[List[float]]]:
 82        return self._encode(texts, CohereInputType.SEARCH_QUERY.value)
 83
 84    def _encode(
 85        self, texts: Union[str, List[str]], input_type: str
 86    ) -> Union[List[float], List[List[float]]]:
 87        if isinstance(texts, str):
 88            texts_input = [texts]
 89        elif isinstance(texts, list):
 90            texts_input = texts
 91        else:
 92            raise ValueError(
 93                f"texts must be a string or list of strings, got: {type(texts)}"
 94            )
 95
 96        try:
 97            response = self._client.embed(
 98                texts=texts_input,
 99                model=self._model_name,
100                input_type=input_type,
101            )
102        except CohereError as e:
103            # TODO: consider wrapping external provider errors
104            raise e
105
106        if isinstance(texts, str):
107            return response.embeddings[0]
108        return [embedding for embedding in response.embeddings]

Cohere's text embedding wrapper. See https://docs.cohere.com/reference/embed

Note: You should provide an API key as the environment variable CO_API_KEY. Or you can pass it as argument to the constructor as api_key.

CohereEncoder( model_name: str = 'embed-english-v3.0', api_key: Optional[str] = None, **kwargs: Any)
46    def __init__(
47        self,
48        model_name: str = "embed-english-v3.0",
49        api_key: Optional[str] = None,
50        **kwargs: Any,
51    ):
52        """
53        Initialize the Cohere encoder.
54
55        :param model_name: The name of the embedding model to use. See https://docs.cohere.com/reference/embed
56        :param kwargs: Additional arguments to pass to the underlying cohere client. See https://github.com/openai/openai-python
57        """
58        if not _cohere_installed:
59            raise ImportError(
60                "Failed to import cohere. Make sure you install cohere extra "
61                "dependencies by running: "
62                "`pip install pinecone-text[cohere]"
63            )
64        if model_name not in CohereEncoderName.list_models():
65            raise ValueError(
66                f"Model '{model_name}' not supported. Please use one of:"
67                + "\n"
68                + "\n".join([f"- {x}" for x in CohereEncoderName.list_models()])
69            )
70        super().__init__()
71        self._model_name = model_name
72        self._client = cohere.Client(api_key=api_key, **kwargs)

Initialize the Cohere encoder.

Parameters
  • model_name: The name of the embedding model to use. See https: //docs.cohere.com/reference/embed
  • kwargs: Additional arguments to pass to the underlying cohere client. See https: //github.com/openai/openai-python
def encode_documents( self, texts: Union[str, List[str]]) -> Union[List[float], List[List[float]]]:
74    def encode_documents(
75        self, texts: Union[str, List[str]]
76    ) -> Union[List[float], List[List[float]]]:
77        return self._encode(texts, CohereInputType.SEARCH_DOCUMENT.value)

encode documents to a dense vector (for upsert to pinecone)

Arguments:
  • texts: a single or list of documents to encode as a string
def encode_queries( self, texts: Union[str, List[str]]) -> Union[List[float], List[List[float]]]:
79    def encode_queries(
80        self, texts: Union[str, List[str]]
81    ) -> Union[List[float], List[List[float]]]:
82        return self._encode(texts, CohereInputType.SEARCH_QUERY.value)

encode queries to a dense vector

Arguments:
  • texts: a single or list of queries to encode as a string