pinecone_text.dense.jina_encoder
1from typing import Union, List, Any, Optional 2from pinecone_text.dense.base_dense_ecoder import BaseDenseEncoder 3import os 4import requests 5 6JINA_API_URL: str = "https://api.jina.ai/v1/embeddings" 7 8 9class JinaEncoder(BaseDenseEncoder): 10 """ 11 JinaAI's text embedding wrapper. See https://jina.ai/embeddings/ 12 13 Note: You should provide an API key in the environment variable JINA_API_KEY. 14 Or you can pass it as argument to the constructor as `api_key`. 15 """ 16 17 def __init__( 18 self, 19 model_name: str = "jina-embeddings-v2-base-en", 20 api_key: Optional[str] = None, 21 **kwargs: Any, 22 ): 23 """ 24 Initialize the OpenAI encoder. 25 26 :param model_name: The name of the embedding model to use. 27 :param kwargs: Additional arguments 28 """ 29 if api_key is None: 30 api_key = os.environ.get("JINA_API_KEY", None) 31 32 if api_key is None: 33 raise ValueError( 34 "JinaEncoder requires an API key to work. Please provide `api_key` argument or set `JINA_API_KEY` environment variable" 35 ) 36 super().__init__() 37 self._model_name = model_name 38 self._session = requests.Session() 39 self._session.headers.update( 40 { 41 "Authorization": f"Bearer {api_key}", 42 "Accept-Encoding": "identity", 43 "Content-type": "application/json", 44 } 45 ) 46 47 def encode_documents( 48 self, texts: Union[str, List[str]] 49 ) -> Union[List[float], List[List[float]]]: 50 return self._encode(texts) 51 52 def encode_queries( 53 self, texts: Union[str, List[str]] 54 ) -> Union[List[float], List[List[float]]]: 55 return self._encode(texts) 56 57 def _encode( 58 self, texts: Union[str, List[str]] 59 ) -> Union[List[float], List[List[float]]]: 60 if isinstance(texts, str): 61 texts_input = [texts] 62 elif isinstance(texts, list): 63 texts_input = texts 64 else: 65 raise ValueError( 66 f"texts must be a string or list of strings, got: {type(texts)}" 67 ) 68 69 resp = self._session.post( # type: ignore 70 JINA_API_URL, json={"input": texts_input, "model": self._model_name} 71 ).json() 72 if "data" not in resp: 73 raise RuntimeError(resp["detail"]) 74 75 embeddings = resp["data"] 76 77 # Sort resulting embeddings by index 78 sorted_embeddings = sorted(embeddings, key=lambda e: e["index"]) # type: ignore 79 80 # Return just the embeddings 81 res = [result["embedding"] for result in sorted_embeddings] 82 83 if isinstance(texts, str): 84 res = res[0] 85 return res
10class JinaEncoder(BaseDenseEncoder): 11 """ 12 JinaAI's text embedding wrapper. See https://jina.ai/embeddings/ 13 14 Note: You should provide an API key in the environment variable JINA_API_KEY. 15 Or you can pass it as argument to the constructor as `api_key`. 16 """ 17 18 def __init__( 19 self, 20 model_name: str = "jina-embeddings-v2-base-en", 21 api_key: Optional[str] = None, 22 **kwargs: Any, 23 ): 24 """ 25 Initialize the OpenAI encoder. 26 27 :param model_name: The name of the embedding model to use. 28 :param kwargs: Additional arguments 29 """ 30 if api_key is None: 31 api_key = os.environ.get("JINA_API_KEY", None) 32 33 if api_key is None: 34 raise ValueError( 35 "JinaEncoder requires an API key to work. Please provide `api_key` argument or set `JINA_API_KEY` environment variable" 36 ) 37 super().__init__() 38 self._model_name = model_name 39 self._session = requests.Session() 40 self._session.headers.update( 41 { 42 "Authorization": f"Bearer {api_key}", 43 "Accept-Encoding": "identity", 44 "Content-type": "application/json", 45 } 46 ) 47 48 def encode_documents( 49 self, texts: Union[str, List[str]] 50 ) -> Union[List[float], List[List[float]]]: 51 return self._encode(texts) 52 53 def encode_queries( 54 self, texts: Union[str, List[str]] 55 ) -> Union[List[float], List[List[float]]]: 56 return self._encode(texts) 57 58 def _encode( 59 self, texts: Union[str, List[str]] 60 ) -> Union[List[float], List[List[float]]]: 61 if isinstance(texts, str): 62 texts_input = [texts] 63 elif isinstance(texts, list): 64 texts_input = texts 65 else: 66 raise ValueError( 67 f"texts must be a string or list of strings, got: {type(texts)}" 68 ) 69 70 resp = self._session.post( # type: ignore 71 JINA_API_URL, json={"input": texts_input, "model": self._model_name} 72 ).json() 73 if "data" not in resp: 74 raise RuntimeError(resp["detail"]) 75 76 embeddings = resp["data"] 77 78 # Sort resulting embeddings by index 79 sorted_embeddings = sorted(embeddings, key=lambda e: e["index"]) # type: ignore 80 81 # Return just the embeddings 82 res = [result["embedding"] for result in sorted_embeddings] 83 84 if isinstance(texts, str): 85 res = res[0] 86 return res
JinaAI's text embedding wrapper. See https://jina.ai/embeddings/
Note: You should provide an API key in the environment variable JINA_API_KEY.
Or you can pass it as argument to the constructor as api_key
.
JinaEncoder( model_name: str = 'jina-embeddings-v2-base-en', api_key: Optional[str] = None, **kwargs: Any)
18 def __init__( 19 self, 20 model_name: str = "jina-embeddings-v2-base-en", 21 api_key: Optional[str] = None, 22 **kwargs: Any, 23 ): 24 """ 25 Initialize the OpenAI encoder. 26 27 :param model_name: The name of the embedding model to use. 28 :param kwargs: Additional arguments 29 """ 30 if api_key is None: 31 api_key = os.environ.get("JINA_API_KEY", None) 32 33 if api_key is None: 34 raise ValueError( 35 "JinaEncoder requires an API key to work. Please provide `api_key` argument or set `JINA_API_KEY` environment variable" 36 ) 37 super().__init__() 38 self._model_name = model_name 39 self._session = requests.Session() 40 self._session.headers.update( 41 { 42 "Authorization": f"Bearer {api_key}", 43 "Accept-Encoding": "identity", 44 "Content-type": "application/json", 45 } 46 )
Initialize the OpenAI encoder.
Parameters
- model_name: The name of the embedding model to use.
- kwargs: Additional arguments
def
encode_documents( self, texts: Union[str, List[str]]) -> Union[List[float], List[List[float]]]:
48 def encode_documents( 49 self, texts: Union[str, List[str]] 50 ) -> Union[List[float], List[List[float]]]: 51 return self._encode(texts)
encode documents to a dense vector (for upsert to pinecone)
Arguments:
- texts: a single or list of documents to encode as a string
def
encode_queries( self, texts: Union[str, List[str]]) -> Union[List[float], List[List[float]]]:
53 def encode_queries( 54 self, texts: Union[str, List[str]] 55 ) -> Union[List[float], List[List[float]]]: 56 return self._encode(texts)
encode queries to a dense vector
Arguments:
- texts: a single or list of queries to encode as a string