pinecone_text.dense.openai_encoder

  1import os
  2from typing import Union, List, Any, Optional, Dict
  3from pinecone_text.dense.base_dense_ecoder import BaseDenseEncoder
  4
  5try:
  6    import openai
  7    from openai import OpenAIError
  8except (OSError, ImportError, ModuleNotFoundError) as e:
  9    _openai_installed = False
 10else:
 11    _openai_installed = True
 12
 13
 14class OpenAIEncoder(BaseDenseEncoder):
 15    """
 16    OpenAI's text embedding wrapper. See https://platform.openai.com/docs/guides/embeddings
 17
 18    Note: this method reflects the OpenAI client initialization behaviour (See https://github.com/openai/openai-python/blob/main/src/openai/_client.py)
 19      On initialization, You may explicitly pass any argument that the OpenAI client accepts, or use the following environment variables:
 20
 21    - `OPENAI_API_KEY` as `api_key`
 22    - `OPENAI_ORG_ID` as `organization`
 23    - `OPENAI_BASE_URL` as `base_url`
 24
 25    Example:
 26        Using environment variables:
 27        >>> import os
 28        >>> from pinecone_text.dense import OpenAIEncoder
 29        >>> os.environ['OPENAI_API_KEY'] = "sk-..."
 30        >>> encoder = OpenAIEncoder()
 31        >>> encoder.encode_documents(["some text", "some other text"])
 32
 33        Passing arguments explicitly:
 34        >>> from pinecone_text.dense import OpenAIEncoder
 35        >>> encoder = OpenAIEncoder(api_key="sk-...")
 36    """  # noqa: E501
 37
 38    def __init__(
 39        self,
 40        model_name: str = "text-embedding-3-small",
 41        *,
 42        dimension: Optional[int] = None,
 43        **kwargs: Any,
 44    ):
 45        if not _openai_installed:
 46            raise ImportError(
 47                "Failed to import openai. Make sure you install openai extra "
 48                "dependencies by running: "
 49                "`pip install pinecone-text[openai]"
 50            )
 51        super().__init__()
 52
 53        if dimension is not None:
 54            assert dimension > 0, "dimension must be a positive integer"
 55
 56        self._model_name = model_name
 57        self._dimension = dimension
 58        self._client = self._create_client(**kwargs)
 59
 60    @staticmethod
 61    def _create_client(**kwargs: Any) -> Union[openai.OpenAI, openai.AzureOpenAI]:
 62        return openai.OpenAI(**kwargs)
 63
 64    def encode_documents(
 65        self, texts: Union[str, List[str]]
 66    ) -> Union[List[float], List[List[float]]]:
 67        return self._encode(texts)
 68
 69    def encode_queries(
 70        self, texts: Union[str, List[str]]
 71    ) -> Union[List[float], List[List[float]]]:
 72        return self._encode(texts)
 73
 74    def _encode(
 75        self, texts: Union[str, List[str]]
 76    ) -> Union[List[float], List[List[float]]]:
 77        if isinstance(texts, str):
 78            texts_input = [texts]
 79        elif isinstance(texts, list):
 80            texts_input = texts
 81        else:
 82            raise ValueError(
 83                f"texts must be a string or list of strings, got: {type(texts)}"
 84            )
 85
 86        try:
 87            params: Dict[str, Any] = dict(
 88                input=texts_input,
 89                model=self._model_name,
 90            )
 91            if self._dimension is not None:
 92                params["dimensions"] = self._dimension
 93            response = self._client.embeddings.create(**params)
 94        except OpenAIError as e:
 95            # TODO: consider wrapping external provider errors
 96            raise e
 97
 98        if isinstance(texts, str):
 99            return response.data[0].embedding
100        return [result.embedding for result in response.data]
101
102
103class AzureOpenAIEncoder(OpenAIEncoder):
104    """
105    Initialize the Azure OpenAI encoder.
106
107    Note: this method reflects the AzureOpenAI client initialization behaviour (See https://github.com/openai/openai-python/blob/main/src/openai/lib/azure.py).
108           You may explicitly pass any argument that the AzureOpenAI client accepts, or use the following environment variables:
109
110        - `AZURE_OPENAI_API_KEY` as `api_key`
111        - `AZURE_OPENAI_ENDPOINT` as `azure_endpoint`
112        - `OPENAI_API_VERSION` as `api_version`
113        - `OPENAI_ORG_ID` as `organization`
114        - `AZURE_OPENAI_AD_TOKEN` as `azure_ad_token`
115
116    In addition, you must pass the `model_name` argument with the name of the deployment you wish to use in your own Azure account.
117
118    Example:
119        Using environment variables:
120        >>> import os
121        >>> from pinecone_text.dense import AzureOpenAIEncoder
122        >>> os.environ['AZURE_OPENAI_API_KEY'] = "sk-..."
123        >>> os.environ['AZURE_OPENAI_ENDPOINT'] = "https://.....openai.azure.com/"
124        >>> os.environ['OPENAI_API_VERSION'] = "2023-12-01-preview"
125        >>> encoder = AzureOpenAIEncoder(model_name="my-ada-002-deployment")
126        >>> encoder.encode_documents(["some text", "some other text"])
127
128        Passing arguments explicitly:
129        >>> from pinecone_text.dense import AzureOpenAIEncoder
130        >>> encoder = AzureOpenAIEncoder(api_key="sk-...", azure_endpoint="https://.....openai.azure.com/", api_version="2023-12-01-preview")
131    """  # noqa: E501
132
133    def __init__(self, model_name: str, **kwargs: Any):
134        super().__init__(model_name=model_name, **kwargs)
135
136    @staticmethod
137    def _create_client(**kwargs: Any) -> openai.AzureOpenAI:
138        return openai.AzureOpenAI(**kwargs)
class OpenAIEncoder(pinecone_text.dense.base_dense_ecoder.BaseDenseEncoder):
 15class OpenAIEncoder(BaseDenseEncoder):
 16    """
 17    OpenAI's text embedding wrapper. See https://platform.openai.com/docs/guides/embeddings
 18
 19    Note: this method reflects the OpenAI client initialization behaviour (See https://github.com/openai/openai-python/blob/main/src/openai/_client.py)
 20      On initialization, You may explicitly pass any argument that the OpenAI client accepts, or use the following environment variables:
 21
 22    - `OPENAI_API_KEY` as `api_key`
 23    - `OPENAI_ORG_ID` as `organization`
 24    - `OPENAI_BASE_URL` as `base_url`
 25
 26    Example:
 27        Using environment variables:
 28        >>> import os
 29        >>> from pinecone_text.dense import OpenAIEncoder
 30        >>> os.environ['OPENAI_API_KEY'] = "sk-..."
 31        >>> encoder = OpenAIEncoder()
 32        >>> encoder.encode_documents(["some text", "some other text"])
 33
 34        Passing arguments explicitly:
 35        >>> from pinecone_text.dense import OpenAIEncoder
 36        >>> encoder = OpenAIEncoder(api_key="sk-...")
 37    """  # noqa: E501
 38
 39    def __init__(
 40        self,
 41        model_name: str = "text-embedding-3-small",
 42        *,
 43        dimension: Optional[int] = None,
 44        **kwargs: Any,
 45    ):
 46        if not _openai_installed:
 47            raise ImportError(
 48                "Failed to import openai. Make sure you install openai extra "
 49                "dependencies by running: "
 50                "`pip install pinecone-text[openai]"
 51            )
 52        super().__init__()
 53
 54        if dimension is not None:
 55            assert dimension > 0, "dimension must be a positive integer"
 56
 57        self._model_name = model_name
 58        self._dimension = dimension
 59        self._client = self._create_client(**kwargs)
 60
 61    @staticmethod
 62    def _create_client(**kwargs: Any) -> Union[openai.OpenAI, openai.AzureOpenAI]:
 63        return openai.OpenAI(**kwargs)
 64
 65    def encode_documents(
 66        self, texts: Union[str, List[str]]
 67    ) -> Union[List[float], List[List[float]]]:
 68        return self._encode(texts)
 69
 70    def encode_queries(
 71        self, texts: Union[str, List[str]]
 72    ) -> Union[List[float], List[List[float]]]:
 73        return self._encode(texts)
 74
 75    def _encode(
 76        self, texts: Union[str, List[str]]
 77    ) -> Union[List[float], List[List[float]]]:
 78        if isinstance(texts, str):
 79            texts_input = [texts]
 80        elif isinstance(texts, list):
 81            texts_input = texts
 82        else:
 83            raise ValueError(
 84                f"texts must be a string or list of strings, got: {type(texts)}"
 85            )
 86
 87        try:
 88            params: Dict[str, Any] = dict(
 89                input=texts_input,
 90                model=self._model_name,
 91            )
 92            if self._dimension is not None:
 93                params["dimensions"] = self._dimension
 94            response = self._client.embeddings.create(**params)
 95        except OpenAIError as e:
 96            # TODO: consider wrapping external provider errors
 97            raise e
 98
 99        if isinstance(texts, str):
100            return response.data[0].embedding
101        return [result.embedding for result in response.data]

OpenAI's text embedding wrapper. See https://platform.openai.com/docs/guides/embeddings

Note: this method reflects the OpenAI client initialization behaviour (See https://github.com/openai/openai-python/blob/main/src/openai/_client.py) On initialization, You may explicitly pass any argument that the OpenAI client accepts, or use the following environment variables:

  • OPENAI_API_KEY as api_key
  • OPENAI_ORG_ID as organization
  • OPENAI_BASE_URL as base_url
Example:

Using environment variables:

>>> import os
>>> from pinecone_text.dense import OpenAIEncoder
>>> os.environ['OPENAI_API_KEY'] = "sk-..."
>>> encoder = OpenAIEncoder()
>>> encoder.encode_documents(["some text", "some other text"])

Passing arguments explicitly:

>>> from pinecone_text.dense import OpenAIEncoder
>>> encoder = OpenAIEncoder(api_key="sk-...")
OpenAIEncoder( model_name: str = 'text-embedding-3-small', *, dimension: Optional[int] = None, **kwargs: Any)
39    def __init__(
40        self,
41        model_name: str = "text-embedding-3-small",
42        *,
43        dimension: Optional[int] = None,
44        **kwargs: Any,
45    ):
46        if not _openai_installed:
47            raise ImportError(
48                "Failed to import openai. Make sure you install openai extra "
49                "dependencies by running: "
50                "`pip install pinecone-text[openai]"
51            )
52        super().__init__()
53
54        if dimension is not None:
55            assert dimension > 0, "dimension must be a positive integer"
56
57        self._model_name = model_name
58        self._dimension = dimension
59        self._client = self._create_client(**kwargs)
def encode_documents( self, texts: Union[str, List[str]]) -> Union[List[float], List[List[float]]]:
65    def encode_documents(
66        self, texts: Union[str, List[str]]
67    ) -> Union[List[float], List[List[float]]]:
68        return self._encode(texts)

encode documents to a dense vector (for upsert to pinecone)

Arguments:
  • texts: a single or list of documents to encode as a string
def encode_queries( self, texts: Union[str, List[str]]) -> Union[List[float], List[List[float]]]:
70    def encode_queries(
71        self, texts: Union[str, List[str]]
72    ) -> Union[List[float], List[List[float]]]:
73        return self._encode(texts)

encode queries to a dense vector

Arguments:
  • texts: a single or list of queries to encode as a string
class AzureOpenAIEncoder(OpenAIEncoder):
104class AzureOpenAIEncoder(OpenAIEncoder):
105    """
106    Initialize the Azure OpenAI encoder.
107
108    Note: this method reflects the AzureOpenAI client initialization behaviour (See https://github.com/openai/openai-python/blob/main/src/openai/lib/azure.py).
109           You may explicitly pass any argument that the AzureOpenAI client accepts, or use the following environment variables:
110
111        - `AZURE_OPENAI_API_KEY` as `api_key`
112        - `AZURE_OPENAI_ENDPOINT` as `azure_endpoint`
113        - `OPENAI_API_VERSION` as `api_version`
114        - `OPENAI_ORG_ID` as `organization`
115        - `AZURE_OPENAI_AD_TOKEN` as `azure_ad_token`
116
117    In addition, you must pass the `model_name` argument with the name of the deployment you wish to use in your own Azure account.
118
119    Example:
120        Using environment variables:
121        >>> import os
122        >>> from pinecone_text.dense import AzureOpenAIEncoder
123        >>> os.environ['AZURE_OPENAI_API_KEY'] = "sk-..."
124        >>> os.environ['AZURE_OPENAI_ENDPOINT'] = "https://.....openai.azure.com/"
125        >>> os.environ['OPENAI_API_VERSION'] = "2023-12-01-preview"
126        >>> encoder = AzureOpenAIEncoder(model_name="my-ada-002-deployment")
127        >>> encoder.encode_documents(["some text", "some other text"])
128
129        Passing arguments explicitly:
130        >>> from pinecone_text.dense import AzureOpenAIEncoder
131        >>> encoder = AzureOpenAIEncoder(api_key="sk-...", azure_endpoint="https://.....openai.azure.com/", api_version="2023-12-01-preview")
132    """  # noqa: E501
133
134    def __init__(self, model_name: str, **kwargs: Any):
135        super().__init__(model_name=model_name, **kwargs)
136
137    @staticmethod
138    def _create_client(**kwargs: Any) -> openai.AzureOpenAI:
139        return openai.AzureOpenAI(**kwargs)

Initialize the Azure OpenAI encoder.

Note: this method reflects the AzureOpenAI client initialization behaviour (See https://github.com/openai/openai-python/blob/main/src/openai/lib/azure.py). You may explicitly pass any argument that the AzureOpenAI client accepts, or use the following environment variables:

- `AZURE_OPENAI_API_KEY` as `api_key`
- `AZURE_OPENAI_ENDPOINT` as `azure_endpoint`
- `OPENAI_API_VERSION` as `api_version`
- `OPENAI_ORG_ID` as `organization`
- `AZURE_OPENAI_AD_TOKEN` as `azure_ad_token`

In addition, you must pass the model_name argument with the name of the deployment you wish to use in your own Azure account.

Example:

Using environment variables:

>>> import os
>>> from pinecone_text.dense import AzureOpenAIEncoder
>>> os.environ['AZURE_OPENAI_API_KEY'] = "sk-..."
>>> os.environ['AZURE_OPENAI_ENDPOINT'] = "https://.....openai.azure.com/"
>>> os.environ['OPENAI_API_VERSION'] = "2023-12-01-preview"
>>> encoder = AzureOpenAIEncoder(model_name="my-ada-002-deployment")
>>> encoder.encode_documents(["some text", "some other text"])

Passing arguments explicitly:

>>> from pinecone_text.dense import AzureOpenAIEncoder
>>> encoder = AzureOpenAIEncoder(api_key="sk-...", azure_endpoint="https://.....openai.azure.com/", api_version="2023-12-01-preview")
AzureOpenAIEncoder(model_name: str, **kwargs: Any)
134    def __init__(self, model_name: str, **kwargs: Any):
135        super().__init__(model_name=model_name, **kwargs)