pinecone_text.dense.openai_encoder
1import os 2from typing import Union, List, Any, Optional, Dict 3from pinecone_text.dense.base_dense_ecoder import BaseDenseEncoder 4 5try: 6 import openai 7 from openai import OpenAIError 8except (OSError, ImportError, ModuleNotFoundError) as e: 9 _openai_installed = False 10else: 11 _openai_installed = True 12 13 14class OpenAIEncoder(BaseDenseEncoder): 15 """ 16 OpenAI's text embedding wrapper. See https://platform.openai.com/docs/guides/embeddings 17 18 Note: this method reflects the OpenAI client initialization behaviour (See https://github.com/openai/openai-python/blob/main/src/openai/_client.py) 19 On initialization, You may explicitly pass any argument that the OpenAI client accepts, or use the following environment variables: 20 21 - `OPENAI_API_KEY` as `api_key` 22 - `OPENAI_ORG_ID` as `organization` 23 - `OPENAI_BASE_URL` as `base_url` 24 25 Example: 26 Using environment variables: 27 >>> import os 28 >>> from pinecone_text.dense import OpenAIEncoder 29 >>> os.environ['OPENAI_API_KEY'] = "sk-..." 30 >>> encoder = OpenAIEncoder() 31 >>> encoder.encode_documents(["some text", "some other text"]) 32 33 Passing arguments explicitly: 34 >>> from pinecone_text.dense import OpenAIEncoder 35 >>> encoder = OpenAIEncoder(api_key="sk-...") 36 """ # noqa: E501 37 38 def __init__( 39 self, 40 model_name: str = "text-embedding-3-small", 41 *, 42 dimension: Optional[int] = None, 43 **kwargs: Any, 44 ): 45 if not _openai_installed: 46 raise ImportError( 47 "Failed to import openai. Make sure you install openai extra " 48 "dependencies by running: " 49 "`pip install pinecone-text[openai]" 50 ) 51 super().__init__() 52 53 if dimension is not None: 54 assert dimension > 0, "dimension must be a positive integer" 55 56 self._model_name = model_name 57 self._dimension = dimension 58 self._client = self._create_client(**kwargs) 59 60 @staticmethod 61 def _create_client(**kwargs: Any) -> Union[openai.OpenAI, openai.AzureOpenAI]: 62 return openai.OpenAI(**kwargs) 63 64 def encode_documents( 65 self, texts: Union[str, List[str]] 66 ) -> Union[List[float], List[List[float]]]: 67 return self._encode(texts) 68 69 def encode_queries( 70 self, texts: Union[str, List[str]] 71 ) -> Union[List[float], List[List[float]]]: 72 return self._encode(texts) 73 74 def _encode( 75 self, texts: Union[str, List[str]] 76 ) -> Union[List[float], List[List[float]]]: 77 if isinstance(texts, str): 78 texts_input = [texts] 79 elif isinstance(texts, list): 80 texts_input = texts 81 else: 82 raise ValueError( 83 f"texts must be a string or list of strings, got: {type(texts)}" 84 ) 85 86 try: 87 params: Dict[str, Any] = dict( 88 input=texts_input, 89 model=self._model_name, 90 ) 91 if self._dimension is not None: 92 params["dimensions"] = self._dimension 93 response = self._client.embeddings.create(**params) 94 except OpenAIError as e: 95 # TODO: consider wrapping external provider errors 96 raise e 97 98 if isinstance(texts, str): 99 return response.data[0].embedding 100 return [result.embedding for result in response.data] 101 102 103class AzureOpenAIEncoder(OpenAIEncoder): 104 """ 105 Initialize the Azure OpenAI encoder. 106 107 Note: this method reflects the AzureOpenAI client initialization behaviour (See https://github.com/openai/openai-python/blob/main/src/openai/lib/azure.py). 108 You may explicitly pass any argument that the AzureOpenAI client accepts, or use the following environment variables: 109 110 - `AZURE_OPENAI_API_KEY` as `api_key` 111 - `AZURE_OPENAI_ENDPOINT` as `azure_endpoint` 112 - `OPENAI_API_VERSION` as `api_version` 113 - `OPENAI_ORG_ID` as `organization` 114 - `AZURE_OPENAI_AD_TOKEN` as `azure_ad_token` 115 116 In addition, you must pass the `model_name` argument with the name of the deployment you wish to use in your own Azure account. 117 118 Example: 119 Using environment variables: 120 >>> import os 121 >>> from pinecone_text.dense import AzureOpenAIEncoder 122 >>> os.environ['AZURE_OPENAI_API_KEY'] = "sk-..." 123 >>> os.environ['AZURE_OPENAI_ENDPOINT'] = "https://.....openai.azure.com/" 124 >>> os.environ['OPENAI_API_VERSION'] = "2023-12-01-preview" 125 >>> encoder = AzureOpenAIEncoder(model_name="my-ada-002-deployment") 126 >>> encoder.encode_documents(["some text", "some other text"]) 127 128 Passing arguments explicitly: 129 >>> from pinecone_text.dense import AzureOpenAIEncoder 130 >>> encoder = AzureOpenAIEncoder(api_key="sk-...", azure_endpoint="https://.....openai.azure.com/", api_version="2023-12-01-preview") 131 """ # noqa: E501 132 133 def __init__(self, model_name: str, **kwargs: Any): 134 super().__init__(model_name=model_name, **kwargs) 135 136 @staticmethod 137 def _create_client(**kwargs: Any) -> openai.AzureOpenAI: 138 return openai.AzureOpenAI(**kwargs)
15class OpenAIEncoder(BaseDenseEncoder): 16 """ 17 OpenAI's text embedding wrapper. See https://platform.openai.com/docs/guides/embeddings 18 19 Note: this method reflects the OpenAI client initialization behaviour (See https://github.com/openai/openai-python/blob/main/src/openai/_client.py) 20 On initialization, You may explicitly pass any argument that the OpenAI client accepts, or use the following environment variables: 21 22 - `OPENAI_API_KEY` as `api_key` 23 - `OPENAI_ORG_ID` as `organization` 24 - `OPENAI_BASE_URL` as `base_url` 25 26 Example: 27 Using environment variables: 28 >>> import os 29 >>> from pinecone_text.dense import OpenAIEncoder 30 >>> os.environ['OPENAI_API_KEY'] = "sk-..." 31 >>> encoder = OpenAIEncoder() 32 >>> encoder.encode_documents(["some text", "some other text"]) 33 34 Passing arguments explicitly: 35 >>> from pinecone_text.dense import OpenAIEncoder 36 >>> encoder = OpenAIEncoder(api_key="sk-...") 37 """ # noqa: E501 38 39 def __init__( 40 self, 41 model_name: str = "text-embedding-3-small", 42 *, 43 dimension: Optional[int] = None, 44 **kwargs: Any, 45 ): 46 if not _openai_installed: 47 raise ImportError( 48 "Failed to import openai. Make sure you install openai extra " 49 "dependencies by running: " 50 "`pip install pinecone-text[openai]" 51 ) 52 super().__init__() 53 54 if dimension is not None: 55 assert dimension > 0, "dimension must be a positive integer" 56 57 self._model_name = model_name 58 self._dimension = dimension 59 self._client = self._create_client(**kwargs) 60 61 @staticmethod 62 def _create_client(**kwargs: Any) -> Union[openai.OpenAI, openai.AzureOpenAI]: 63 return openai.OpenAI(**kwargs) 64 65 def encode_documents( 66 self, texts: Union[str, List[str]] 67 ) -> Union[List[float], List[List[float]]]: 68 return self._encode(texts) 69 70 def encode_queries( 71 self, texts: Union[str, List[str]] 72 ) -> Union[List[float], List[List[float]]]: 73 return self._encode(texts) 74 75 def _encode( 76 self, texts: Union[str, List[str]] 77 ) -> Union[List[float], List[List[float]]]: 78 if isinstance(texts, str): 79 texts_input = [texts] 80 elif isinstance(texts, list): 81 texts_input = texts 82 else: 83 raise ValueError( 84 f"texts must be a string or list of strings, got: {type(texts)}" 85 ) 86 87 try: 88 params: Dict[str, Any] = dict( 89 input=texts_input, 90 model=self._model_name, 91 ) 92 if self._dimension is not None: 93 params["dimensions"] = self._dimension 94 response = self._client.embeddings.create(**params) 95 except OpenAIError as e: 96 # TODO: consider wrapping external provider errors 97 raise e 98 99 if isinstance(texts, str): 100 return response.data[0].embedding 101 return [result.embedding for result in response.data]
OpenAI's text embedding wrapper. See https://platform.openai.com/docs/guides/embeddings
Note: this method reflects the OpenAI client initialization behaviour (See https://github.com/openai/openai-python/blob/main/src/openai/_client.py) On initialization, You may explicitly pass any argument that the OpenAI client accepts, or use the following environment variables:
OPENAI_API_KEY
asapi_key
OPENAI_ORG_ID
asorganization
OPENAI_BASE_URL
asbase_url
Example:
Using environment variables:
>>> import os >>> from pinecone_text.dense import OpenAIEncoder >>> os.environ['OPENAI_API_KEY'] = "sk-..." >>> encoder = OpenAIEncoder() >>> encoder.encode_documents(["some text", "some other text"])
Passing arguments explicitly:
>>> from pinecone_text.dense import OpenAIEncoder >>> encoder = OpenAIEncoder(api_key="sk-...")
39 def __init__( 40 self, 41 model_name: str = "text-embedding-3-small", 42 *, 43 dimension: Optional[int] = None, 44 **kwargs: Any, 45 ): 46 if not _openai_installed: 47 raise ImportError( 48 "Failed to import openai. Make sure you install openai extra " 49 "dependencies by running: " 50 "`pip install pinecone-text[openai]" 51 ) 52 super().__init__() 53 54 if dimension is not None: 55 assert dimension > 0, "dimension must be a positive integer" 56 57 self._model_name = model_name 58 self._dimension = dimension 59 self._client = self._create_client(**kwargs)
65 def encode_documents( 66 self, texts: Union[str, List[str]] 67 ) -> Union[List[float], List[List[float]]]: 68 return self._encode(texts)
encode documents to a dense vector (for upsert to pinecone)
Arguments:
- texts: a single or list of documents to encode as a string
70 def encode_queries( 71 self, texts: Union[str, List[str]] 72 ) -> Union[List[float], List[List[float]]]: 73 return self._encode(texts)
encode queries to a dense vector
Arguments:
- texts: a single or list of queries to encode as a string
104class AzureOpenAIEncoder(OpenAIEncoder): 105 """ 106 Initialize the Azure OpenAI encoder. 107 108 Note: this method reflects the AzureOpenAI client initialization behaviour (See https://github.com/openai/openai-python/blob/main/src/openai/lib/azure.py). 109 You may explicitly pass any argument that the AzureOpenAI client accepts, or use the following environment variables: 110 111 - `AZURE_OPENAI_API_KEY` as `api_key` 112 - `AZURE_OPENAI_ENDPOINT` as `azure_endpoint` 113 - `OPENAI_API_VERSION` as `api_version` 114 - `OPENAI_ORG_ID` as `organization` 115 - `AZURE_OPENAI_AD_TOKEN` as `azure_ad_token` 116 117 In addition, you must pass the `model_name` argument with the name of the deployment you wish to use in your own Azure account. 118 119 Example: 120 Using environment variables: 121 >>> import os 122 >>> from pinecone_text.dense import AzureOpenAIEncoder 123 >>> os.environ['AZURE_OPENAI_API_KEY'] = "sk-..." 124 >>> os.environ['AZURE_OPENAI_ENDPOINT'] = "https://.....openai.azure.com/" 125 >>> os.environ['OPENAI_API_VERSION'] = "2023-12-01-preview" 126 >>> encoder = AzureOpenAIEncoder(model_name="my-ada-002-deployment") 127 >>> encoder.encode_documents(["some text", "some other text"]) 128 129 Passing arguments explicitly: 130 >>> from pinecone_text.dense import AzureOpenAIEncoder 131 >>> encoder = AzureOpenAIEncoder(api_key="sk-...", azure_endpoint="https://.....openai.azure.com/", api_version="2023-12-01-preview") 132 """ # noqa: E501 133 134 def __init__(self, model_name: str, **kwargs: Any): 135 super().__init__(model_name=model_name, **kwargs) 136 137 @staticmethod 138 def _create_client(**kwargs: Any) -> openai.AzureOpenAI: 139 return openai.AzureOpenAI(**kwargs)
Initialize the Azure OpenAI encoder.
Note: this method reflects the AzureOpenAI client initialization behaviour (See https://github.com/openai/openai-python/blob/main/src/openai/lib/azure.py). You may explicitly pass any argument that the AzureOpenAI client accepts, or use the following environment variables:
- `AZURE_OPENAI_API_KEY` as `api_key`
- `AZURE_OPENAI_ENDPOINT` as `azure_endpoint`
- `OPENAI_API_VERSION` as `api_version`
- `OPENAI_ORG_ID` as `organization`
- `AZURE_OPENAI_AD_TOKEN` as `azure_ad_token`
In addition, you must pass the model_name
argument with the name of the deployment you wish to use in your own Azure account.
Example:
Using environment variables:
>>> import os >>> from pinecone_text.dense import AzureOpenAIEncoder >>> os.environ['AZURE_OPENAI_API_KEY'] = "sk-..." >>> os.environ['AZURE_OPENAI_ENDPOINT'] = "https://.....openai.azure.com/" >>> os.environ['OPENAI_API_VERSION'] = "2023-12-01-preview" >>> encoder = AzureOpenAIEncoder(model_name="my-ada-002-deployment") >>> encoder.encode_documents(["some text", "some other text"])
Passing arguments explicitly:
>>> from pinecone_text.dense import AzureOpenAIEncoder >>> encoder = AzureOpenAIEncoder(api_key="sk-...", azure_endpoint="https://.....openai.azure.com/", api_version="2023-12-01-preview")