pinecone_text.dense.sentence_transformer_encoder
Sentance Transformers are a class of models that take a sentence as input and output a vector representation of the sentence. These models are useful for tasks such as semantic search, clustering, and classification. The sentence transformer models are the work of the research team led by Nils Reimers at the University of Stuttgart. For more information, see the Sentence Transformers paper.
1""" 2Sentance Transformers are a class of models that take a sentence as input and output a vector representation of the sentence. 3These models are useful for tasks such as semantic search, clustering, and classification. The sentence transformer models are 4the work of the research team led by Nils Reimers at the University of Stuttgart. For more information, see the [Sentence Transformers paper](https://arxiv.org/abs/1908.10084). 5 6""" 7 8try: 9 import torch 10except (OSError, ImportError, ModuleNotFoundError) as e: 11 _torch_installed = False 12else: 13 _torch_installed = True 14 15from typing import Optional, Union, List 16 17try: 18 from sentence_transformers import SentenceTransformer 19except (ImportError, ModuleNotFoundError) as e: 20 _transformers_installed = False 21else: 22 _transformers_installed = True 23 24 25from pinecone_text.dense.base_dense_ecoder import BaseDenseEncoder 26 27 28class SentenceTransformerEncoder(BaseDenseEncoder): 29 def __init__( 30 self, 31 document_encoder_name: str, 32 query_encoder_name: Optional[str] = None, 33 device: Optional[str] = None, 34 ): 35 if not _torch_installed: 36 raise ImportError( 37 """Failed to import torch. Make sure you install dense extra 38 dependencies by running: `pip install pinecone-text[dense]` 39 If this doesn't help, it is probably a CUDA error. If you do want to use GPU, 40 please check your CUDA driver. 41 If you want to use CPU only, run the following command: 42 `pip uninstall -y torch torchvision;pip install -y torch torchvision 43 --index-url https://download.pytorch.org/whl/cpu`""" 44 ) 45 46 if not _transformers_installed: 47 raise ImportError( 48 "Failed to import sentence transformers. Make sure you install dense " 49 "extra dependencies by running: `pip install pinecone-text[dense]`" 50 ) 51 super().__init__() 52 device = device or ("cuda" if torch.cuda.is_available() else "cpu") 53 self.document_encoder = SentenceTransformer( 54 document_encoder_name, device=device 55 ) 56 if query_encoder_name: 57 self.query_encoder = SentenceTransformer(query_encoder_name, device=device) 58 else: 59 self.query_encoder = self.document_encoder 60 61 def encode_documents( 62 self, texts: Union[str, List[str]] 63 ) -> Union[List[float], List[List[float]]]: 64 return self.document_encoder.encode( 65 texts, show_progress_bar=False, convert_to_numpy=True 66 ).tolist() 67 68 def encode_queries( 69 self, texts: Union[str, List[str]] 70 ) -> Union[List[float], List[List[float]]]: 71 return self.query_encoder.encode( 72 texts, show_progress_bar=False, convert_to_numpy=True 73 ).tolist()
29class SentenceTransformerEncoder(BaseDenseEncoder): 30 def __init__( 31 self, 32 document_encoder_name: str, 33 query_encoder_name: Optional[str] = None, 34 device: Optional[str] = None, 35 ): 36 if not _torch_installed: 37 raise ImportError( 38 """Failed to import torch. Make sure you install dense extra 39 dependencies by running: `pip install pinecone-text[dense]` 40 If this doesn't help, it is probably a CUDA error. If you do want to use GPU, 41 please check your CUDA driver. 42 If you want to use CPU only, run the following command: 43 `pip uninstall -y torch torchvision;pip install -y torch torchvision 44 --index-url https://download.pytorch.org/whl/cpu`""" 45 ) 46 47 if not _transformers_installed: 48 raise ImportError( 49 "Failed to import sentence transformers. Make sure you install dense " 50 "extra dependencies by running: `pip install pinecone-text[dense]`" 51 ) 52 super().__init__() 53 device = device or ("cuda" if torch.cuda.is_available() else "cpu") 54 self.document_encoder = SentenceTransformer( 55 document_encoder_name, device=device 56 ) 57 if query_encoder_name: 58 self.query_encoder = SentenceTransformer(query_encoder_name, device=device) 59 else: 60 self.query_encoder = self.document_encoder 61 62 def encode_documents( 63 self, texts: Union[str, List[str]] 64 ) -> Union[List[float], List[List[float]]]: 65 return self.document_encoder.encode( 66 texts, show_progress_bar=False, convert_to_numpy=True 67 ).tolist() 68 69 def encode_queries( 70 self, texts: Union[str, List[str]] 71 ) -> Union[List[float], List[List[float]]]: 72 return self.query_encoder.encode( 73 texts, show_progress_bar=False, convert_to_numpy=True 74 ).tolist()
Helper class that provides a standard way to create an ABC using inheritance.
SentenceTransformerEncoder( document_encoder_name: str, query_encoder_name: Optional[str] = None, device: Optional[str] = None)
30 def __init__( 31 self, 32 document_encoder_name: str, 33 query_encoder_name: Optional[str] = None, 34 device: Optional[str] = None, 35 ): 36 if not _torch_installed: 37 raise ImportError( 38 """Failed to import torch. Make sure you install dense extra 39 dependencies by running: `pip install pinecone-text[dense]` 40 If this doesn't help, it is probably a CUDA error. If you do want to use GPU, 41 please check your CUDA driver. 42 If you want to use CPU only, run the following command: 43 `pip uninstall -y torch torchvision;pip install -y torch torchvision 44 --index-url https://download.pytorch.org/whl/cpu`""" 45 ) 46 47 if not _transformers_installed: 48 raise ImportError( 49 "Failed to import sentence transformers. Make sure you install dense " 50 "extra dependencies by running: `pip install pinecone-text[dense]`" 51 ) 52 super().__init__() 53 device = device or ("cuda" if torch.cuda.is_available() else "cpu") 54 self.document_encoder = SentenceTransformer( 55 document_encoder_name, device=device 56 ) 57 if query_encoder_name: 58 self.query_encoder = SentenceTransformer(query_encoder_name, device=device) 59 else: 60 self.query_encoder = self.document_encoder
def
encode_documents( self, texts: Union[str, List[str]]) -> Union[List[float], List[List[float]]]:
62 def encode_documents( 63 self, texts: Union[str, List[str]] 64 ) -> Union[List[float], List[List[float]]]: 65 return self.document_encoder.encode( 66 texts, show_progress_bar=False, convert_to_numpy=True 67 ).tolist()
encode documents to a dense vector (for upsert to pinecone)
Arguments:
- texts: a single or list of documents to encode as a string
def
encode_queries( self, texts: Union[str, List[str]]) -> Union[List[float], List[List[float]]]:
69 def encode_queries( 70 self, texts: Union[str, List[str]] 71 ) -> Union[List[float], List[List[float]]]: 72 return self.query_encoder.encode( 73 texts, show_progress_bar=False, convert_to_numpy=True 74 ).tolist()
encode queries to a dense vector
Arguments:
- texts: a single or list of queries to encode as a string