pinecone_datasets.cfg

 1# from polars.datatypes import Utf8, Float32, List, Struct, Field, UInt32
 2
 3
 4class Storage:
 5    endpoint: str = "gs://pinecone-datasets-dev"
 6
 7
 8class Schema:
 9    class Names:
10        documents = [
11            ("id", False, None),
12            ("values", False, None),
13            ("sparse_values", True, None),
14            ("metadata", True, None),
15            ("blob", True, None),
16        ]
17        queries = [
18            ("vector", False, None),
19            ("sparse_vector", True, None),
20            ("filter", True, None),
21            ("top_k", False, 5),
22            ("blob", True, None),
23        ]
24
25    # documents = {
26    #     "id": Utf8,
27    #     "values": List(Float32),
28    #     "sparse_values": Struct(
29    #         [Field("indices", List(UInt32)), Field("values", List(Float32))]
30    #     ),
31    # }
32    documents_select_columns = ["id", "values", "sparse_values", "metadata"]
33
34    # queries = {
35    #     "vector": List(Float32),
36    #     "sparse_vector": Struct(
37    #         [Field("indices", List(UInt32)), Field("values", List(Float32))]
38    #     ),
39    #     "top_k": UInt32,
40    # }
41    queries_select_columns = ["vector", "sparse_vector", "filter", "top_k"]
class Storage:
5class Storage:
6    endpoint: str = "gs://pinecone-datasets-dev"
class Schema:
 9class Schema:
10    class Names:
11        documents = [
12            ("id", False, None),
13            ("values", False, None),
14            ("sparse_values", True, None),
15            ("metadata", True, None),
16            ("blob", True, None),
17        ]
18        queries = [
19            ("vector", False, None),
20            ("sparse_vector", True, None),
21            ("filter", True, None),
22            ("top_k", False, 5),
23            ("blob", True, None),
24        ]
25
26    # documents = {
27    #     "id": Utf8,
28    #     "values": List(Float32),
29    #     "sparse_values": Struct(
30    #         [Field("indices", List(UInt32)), Field("values", List(Float32))]
31    #     ),
32    # }
33    documents_select_columns = ["id", "values", "sparse_values", "metadata"]
34
35    # queries = {
36    #     "vector": List(Float32),
37    #     "sparse_vector": Struct(
38    #         [Field("indices", List(UInt32)), Field("values", List(Float32))]
39    #     ),
40    #     "top_k": UInt32,
41    # }
42    queries_select_columns = ["vector", "sparse_vector", "filter", "top_k"]
class Schema.Names:
10    class Names:
11        documents = [
12            ("id", False, None),
13            ("values", False, None),
14            ("sparse_values", True, None),
15            ("metadata", True, None),
16            ("blob", True, None),
17        ]
18        queries = [
19            ("vector", False, None),
20            ("sparse_vector", True, None),
21            ("filter", True, None),
22            ("top_k", False, 5),
23            ("blob", True, None),
24        ]