pinecone_text.dense.cohere_encoder
1from enum import Enum 2from typing import Union, List, Any, Optional 3from pinecone_text.dense.base_dense_ecoder import BaseDenseEncoder 4 5try: 6 import cohere 7 from cohere.error import CohereError 8except (OSError, ImportError, ModuleNotFoundError) as e: 9 _cohere_installed = False 10else: 11 _cohere_installed = True 12 13 14class CohereEncoderName(Enum): 15 """ 16 Supported Cohere encoder models. 17 """ 18 19 ENGLISH_V3 = "embed-english-v3.0" 20 ENGLISH_LIGHT_V3 = "embed-english-light-3.0" 21 MULTILINGUAL_V3 = "embed-multilingual-v3.0" 22 MULTILINGUAL_LIGHT_V3 = "embed-multilingual-light-v3.0" 23 24 @classmethod 25 def list_models(cls) -> List[str]: 26 """Method to get a list of all model names.""" 27 return [model.value for model in cls] 28 29 30class CohereInputType(Enum): 31 SEARCH_DOCUMENT = "search_document" 32 SEARCH_QUERY = "search_query" 33 # CLASSIFICATION = "classification" 34 # CLUSTERING = "clustering" 35 36 37class CohereEncoder(BaseDenseEncoder): 38 """ 39 Cohere's text embedding wrapper. See https://docs.cohere.com/reference/embed 40 41 Note: You should provide an API key as the environment variable `CO_API_KEY`. 42 Or you can pass it as argument to the constructor as `api_key`. 43 """ 44 45 def __init__( 46 self, 47 model_name: str = "embed-english-v3.0", 48 api_key: Optional[str] = None, 49 **kwargs: Any, 50 ): 51 """ 52 Initialize the Cohere encoder. 53 54 :param model_name: The name of the embedding model to use. See https://docs.cohere.com/reference/embed 55 :param kwargs: Additional arguments to pass to the underlying cohere client. See https://github.com/openai/openai-python 56 """ 57 if not _cohere_installed: 58 raise ImportError( 59 "Failed to import cohere. Make sure you install cohere extra " 60 "dependencies by running: " 61 "`pip install pinecone-text[cohere]" 62 ) 63 if model_name not in CohereEncoderName.list_models(): 64 raise ValueError( 65 f"Model '{model_name}' not supported. Please use one of:" 66 + "\n" 67 + "\n".join([f"- {x}" for x in CohereEncoderName.list_models()]) 68 ) 69 super().__init__() 70 self._model_name = model_name 71 self._client = cohere.Client(api_key=api_key, **kwargs) 72 73 def encode_documents( 74 self, texts: Union[str, List[str]] 75 ) -> Union[List[float], List[List[float]]]: 76 return self._encode(texts, CohereInputType.SEARCH_DOCUMENT.value) 77 78 def encode_queries( 79 self, texts: Union[str, List[str]] 80 ) -> Union[List[float], List[List[float]]]: 81 return self._encode(texts, CohereInputType.SEARCH_QUERY.value) 82 83 def _encode( 84 self, texts: Union[str, List[str]], input_type: str 85 ) -> Union[List[float], List[List[float]]]: 86 if isinstance(texts, str): 87 texts_input = [texts] 88 elif isinstance(texts, list): 89 texts_input = texts 90 else: 91 raise ValueError( 92 f"texts must be a string or list of strings, got: {type(texts)}" 93 ) 94 95 try: 96 response = self._client.embed( 97 texts=texts_input, 98 model=self._model_name, 99 input_type=input_type, 100 ) 101 except CohereError as e: 102 # TODO: consider wrapping external provider errors 103 raise e 104 105 if isinstance(texts, str): 106 return response.embeddings[0] 107 return [embedding for embedding in response.embeddings]
class
CohereEncoderName(enum.Enum):
15class CohereEncoderName(Enum): 16 """ 17 Supported Cohere encoder models. 18 """ 19 20 ENGLISH_V3 = "embed-english-v3.0" 21 ENGLISH_LIGHT_V3 = "embed-english-light-3.0" 22 MULTILINGUAL_V3 = "embed-multilingual-v3.0" 23 MULTILINGUAL_LIGHT_V3 = "embed-multilingual-light-v3.0" 24 25 @classmethod 26 def list_models(cls) -> List[str]: 27 """Method to get a list of all model names.""" 28 return [model.value for model in cls]
Supported Cohere encoder models.
ENGLISH_V3 =
<CohereEncoderName.ENGLISH_V3: 'embed-english-v3.0'>
ENGLISH_LIGHT_V3 =
<CohereEncoderName.ENGLISH_LIGHT_V3: 'embed-english-light-3.0'>
MULTILINGUAL_V3 =
<CohereEncoderName.MULTILINGUAL_V3: 'embed-multilingual-v3.0'>
MULTILINGUAL_LIGHT_V3 =
<CohereEncoderName.MULTILINGUAL_LIGHT_V3: 'embed-multilingual-light-v3.0'>
@classmethod
def
list_models(cls) -> List[str]:
25 @classmethod 26 def list_models(cls) -> List[str]: 27 """Method to get a list of all model names.""" 28 return [model.value for model in cls]
Method to get a list of all model names.
Inherited Members
- enum.Enum
- name
- value
class
CohereInputType(enum.Enum):
31class CohereInputType(Enum): 32 SEARCH_DOCUMENT = "search_document" 33 SEARCH_QUERY = "search_query" 34 # CLASSIFICATION = "classification" 35 # CLUSTERING = "clustering"
An enumeration.
SEARCH_DOCUMENT =
<CohereInputType.SEARCH_DOCUMENT: 'search_document'>
SEARCH_QUERY =
<CohereInputType.SEARCH_QUERY: 'search_query'>
Inherited Members
- enum.Enum
- name
- value
38class CohereEncoder(BaseDenseEncoder): 39 """ 40 Cohere's text embedding wrapper. See https://docs.cohere.com/reference/embed 41 42 Note: You should provide an API key as the environment variable `CO_API_KEY`. 43 Or you can pass it as argument to the constructor as `api_key`. 44 """ 45 46 def __init__( 47 self, 48 model_name: str = "embed-english-v3.0", 49 api_key: Optional[str] = None, 50 **kwargs: Any, 51 ): 52 """ 53 Initialize the Cohere encoder. 54 55 :param model_name: The name of the embedding model to use. See https://docs.cohere.com/reference/embed 56 :param kwargs: Additional arguments to pass to the underlying cohere client. See https://github.com/openai/openai-python 57 """ 58 if not _cohere_installed: 59 raise ImportError( 60 "Failed to import cohere. Make sure you install cohere extra " 61 "dependencies by running: " 62 "`pip install pinecone-text[cohere]" 63 ) 64 if model_name not in CohereEncoderName.list_models(): 65 raise ValueError( 66 f"Model '{model_name}' not supported. Please use one of:" 67 + "\n" 68 + "\n".join([f"- {x}" for x in CohereEncoderName.list_models()]) 69 ) 70 super().__init__() 71 self._model_name = model_name 72 self._client = cohere.Client(api_key=api_key, **kwargs) 73 74 def encode_documents( 75 self, texts: Union[str, List[str]] 76 ) -> Union[List[float], List[List[float]]]: 77 return self._encode(texts, CohereInputType.SEARCH_DOCUMENT.value) 78 79 def encode_queries( 80 self, texts: Union[str, List[str]] 81 ) -> Union[List[float], List[List[float]]]: 82 return self._encode(texts, CohereInputType.SEARCH_QUERY.value) 83 84 def _encode( 85 self, texts: Union[str, List[str]], input_type: str 86 ) -> Union[List[float], List[List[float]]]: 87 if isinstance(texts, str): 88 texts_input = [texts] 89 elif isinstance(texts, list): 90 texts_input = texts 91 else: 92 raise ValueError( 93 f"texts must be a string or list of strings, got: {type(texts)}" 94 ) 95 96 try: 97 response = self._client.embed( 98 texts=texts_input, 99 model=self._model_name, 100 input_type=input_type, 101 ) 102 except CohereError as e: 103 # TODO: consider wrapping external provider errors 104 raise e 105 106 if isinstance(texts, str): 107 return response.embeddings[0] 108 return [embedding for embedding in response.embeddings]
Cohere's text embedding wrapper. See https://docs.cohere.com/reference/embed
Note: You should provide an API key as the environment variable CO_API_KEY
.
Or you can pass it as argument to the constructor as api_key
.
CohereEncoder( model_name: str = 'embed-english-v3.0', api_key: Optional[str] = None, **kwargs: Any)
46 def __init__( 47 self, 48 model_name: str = "embed-english-v3.0", 49 api_key: Optional[str] = None, 50 **kwargs: Any, 51 ): 52 """ 53 Initialize the Cohere encoder. 54 55 :param model_name: The name of the embedding model to use. See https://docs.cohere.com/reference/embed 56 :param kwargs: Additional arguments to pass to the underlying cohere client. See https://github.com/openai/openai-python 57 """ 58 if not _cohere_installed: 59 raise ImportError( 60 "Failed to import cohere. Make sure you install cohere extra " 61 "dependencies by running: " 62 "`pip install pinecone-text[cohere]" 63 ) 64 if model_name not in CohereEncoderName.list_models(): 65 raise ValueError( 66 f"Model '{model_name}' not supported. Please use one of:" 67 + "\n" 68 + "\n".join([f"- {x}" for x in CohereEncoderName.list_models()]) 69 ) 70 super().__init__() 71 self._model_name = model_name 72 self._client = cohere.Client(api_key=api_key, **kwargs)
Initialize the Cohere encoder.
Parameters
- model_name: The name of the embedding model to use. See https: //docs.cohere.com/reference/embed
- kwargs: Additional arguments to pass to the underlying cohere client. See https: //github.com/openai/openai-python
def
encode_documents( self, texts: Union[str, List[str]]) -> Union[List[float], List[List[float]]]:
74 def encode_documents( 75 self, texts: Union[str, List[str]] 76 ) -> Union[List[float], List[List[float]]]: 77 return self._encode(texts, CohereInputType.SEARCH_DOCUMENT.value)
encode documents to a dense vector (for upsert to pinecone)
Arguments:
- texts: a single or list of documents to encode as a string
def
encode_queries( self, texts: Union[str, List[str]]) -> Union[List[float], List[List[float]]]:
79 def encode_queries( 80 self, texts: Union[str, List[str]] 81 ) -> Union[List[float], List[List[float]]]: 82 return self._encode(texts, CohereInputType.SEARCH_QUERY.value)
encode queries to a dense vector
Arguments:
- texts: a single or list of queries to encode as a string