pinecone_text.dense.sentence_transformer_encoder

Sentance Transformers are a class of models that take a sentence as input and output a vector representation of the sentence. These models are useful for tasks such as semantic search, clustering, and classification. The sentence transformer models are the work of the research team led by Nils Reimers at the University of Stuttgart. For more information, see the Sentence Transformers paper.

 1"""
 2Sentance Transformers are a class of models that take a sentence as input and output a vector representation of the sentence.
 3These models are useful for tasks such as semantic search, clustering, and classification. The sentence transformer models are
 4the work of the research team led by Nils Reimers at the University of Stuttgart. For more information, see the [Sentence Transformers paper](https://arxiv.org/abs/1908.10084).
 5
 6"""
 7
 8try:
 9    import torch
10except (OSError, ImportError, ModuleNotFoundError) as e:
11    _torch_installed = False
12else:
13    _torch_installed = True
14
15from typing import Optional, Union, List
16
17try:
18    from sentence_transformers import SentenceTransformer
19except (ImportError, ModuleNotFoundError) as e:
20    _transformers_installed = False
21else:
22    _transformers_installed = True
23
24
25from pinecone_text.dense.base_dense_ecoder import BaseDenseEncoder
26
27
28class SentenceTransformerEncoder(BaseDenseEncoder):
29    def __init__(
30        self,
31        document_encoder_name: str,
32        query_encoder_name: Optional[str] = None,
33        device: Optional[str] = None,
34    ):
35        if not _torch_installed:
36            raise ImportError(
37                """Failed to import torch. Make sure you install dense extra 
38                dependencies by running: `pip install pinecone-text[dense]`
39        If this doesn't help, it is probably a CUDA error. If you do want to use GPU, 
40        please check your CUDA driver.
41        If you want to use CPU only, run the following command:
42        `pip uninstall -y torch torchvision;pip install -y torch torchvision 
43        --index-url https://download.pytorch.org/whl/cpu`"""
44            )
45
46        if not _transformers_installed:
47            raise ImportError(
48                "Failed to import sentence transformers. Make sure you install dense "
49                "extra dependencies by running: `pip install pinecone-text[dense]`"
50            )
51        super().__init__()
52        device = device or ("cuda" if torch.cuda.is_available() else "cpu")
53        self.document_encoder = SentenceTransformer(
54            document_encoder_name, device=device
55        )
56        if query_encoder_name:
57            self.query_encoder = SentenceTransformer(query_encoder_name, device=device)
58        else:
59            self.query_encoder = self.document_encoder
60
61    def encode_documents(
62        self, texts: Union[str, List[str]]
63    ) -> Union[List[float], List[List[float]]]:
64        return self.document_encoder.encode(
65            texts, show_progress_bar=False, convert_to_numpy=True
66        ).tolist()
67
68    def encode_queries(
69        self, texts: Union[str, List[str]]
70    ) -> Union[List[float], List[List[float]]]:
71        return self.query_encoder.encode(
72            texts, show_progress_bar=False, convert_to_numpy=True
73        ).tolist()
class SentenceTransformerEncoder(pinecone_text.dense.base_dense_ecoder.BaseDenseEncoder):
29class SentenceTransformerEncoder(BaseDenseEncoder):
30    def __init__(
31        self,
32        document_encoder_name: str,
33        query_encoder_name: Optional[str] = None,
34        device: Optional[str] = None,
35    ):
36        if not _torch_installed:
37            raise ImportError(
38                """Failed to import torch. Make sure you install dense extra 
39                dependencies by running: `pip install pinecone-text[dense]`
40        If this doesn't help, it is probably a CUDA error. If you do want to use GPU, 
41        please check your CUDA driver.
42        If you want to use CPU only, run the following command:
43        `pip uninstall -y torch torchvision;pip install -y torch torchvision 
44        --index-url https://download.pytorch.org/whl/cpu`"""
45            )
46
47        if not _transformers_installed:
48            raise ImportError(
49                "Failed to import sentence transformers. Make sure you install dense "
50                "extra dependencies by running: `pip install pinecone-text[dense]`"
51            )
52        super().__init__()
53        device = device or ("cuda" if torch.cuda.is_available() else "cpu")
54        self.document_encoder = SentenceTransformer(
55            document_encoder_name, device=device
56        )
57        if query_encoder_name:
58            self.query_encoder = SentenceTransformer(query_encoder_name, device=device)
59        else:
60            self.query_encoder = self.document_encoder
61
62    def encode_documents(
63        self, texts: Union[str, List[str]]
64    ) -> Union[List[float], List[List[float]]]:
65        return self.document_encoder.encode(
66            texts, show_progress_bar=False, convert_to_numpy=True
67        ).tolist()
68
69    def encode_queries(
70        self, texts: Union[str, List[str]]
71    ) -> Union[List[float], List[List[float]]]:
72        return self.query_encoder.encode(
73            texts, show_progress_bar=False, convert_to_numpy=True
74        ).tolist()

Helper class that provides a standard way to create an ABC using inheritance.

SentenceTransformerEncoder( document_encoder_name: str, query_encoder_name: Optional[str] = None, device: Optional[str] = None)
30    def __init__(
31        self,
32        document_encoder_name: str,
33        query_encoder_name: Optional[str] = None,
34        device: Optional[str] = None,
35    ):
36        if not _torch_installed:
37            raise ImportError(
38                """Failed to import torch. Make sure you install dense extra 
39                dependencies by running: `pip install pinecone-text[dense]`
40        If this doesn't help, it is probably a CUDA error. If you do want to use GPU, 
41        please check your CUDA driver.
42        If you want to use CPU only, run the following command:
43        `pip uninstall -y torch torchvision;pip install -y torch torchvision 
44        --index-url https://download.pytorch.org/whl/cpu`"""
45            )
46
47        if not _transformers_installed:
48            raise ImportError(
49                "Failed to import sentence transformers. Make sure you install dense "
50                "extra dependencies by running: `pip install pinecone-text[dense]`"
51            )
52        super().__init__()
53        device = device or ("cuda" if torch.cuda.is_available() else "cpu")
54        self.document_encoder = SentenceTransformer(
55            document_encoder_name, device=device
56        )
57        if query_encoder_name:
58            self.query_encoder = SentenceTransformer(query_encoder_name, device=device)
59        else:
60            self.query_encoder = self.document_encoder
def encode_documents( self, texts: Union[str, List[str]]) -> Union[List[float], List[List[float]]]:
62    def encode_documents(
63        self, texts: Union[str, List[str]]
64    ) -> Union[List[float], List[List[float]]]:
65        return self.document_encoder.encode(
66            texts, show_progress_bar=False, convert_to_numpy=True
67        ).tolist()

encode documents to a dense vector (for upsert to pinecone)

Arguments:
  • texts: a single or list of documents to encode as a string
def encode_queries( self, texts: Union[str, List[str]]) -> Union[List[float], List[List[float]]]:
69    def encode_queries(
70        self, texts: Union[str, List[str]]
71    ) -> Union[List[float], List[List[float]]]:
72        return self.query_encoder.encode(
73            texts, show_progress_bar=False, convert_to_numpy=True
74        ).tolist()

encode queries to a dense vector

Arguments:
  • texts: a single or list of queries to encode as a string