pinecone_datasets.public

 1from .dataset import Dataset
 2from .catalog import Catalog
 3from . import cfg
 4
 5catalog = None
 6
 7
 8def list_datasets(as_df=False, **kwargs) -> list:
 9    """
10    List all datasets in the catalog, optionally as a pandas DataFrame.
11    Catalog is set using the `DATASETS_CATALOG_BASEPATH` environment variable.
12
13    Args:
14        as_df (bool, optional): Whether to return the list as a pandas DataFrame. Defaults to False.
15
16    Returns:
17        list: A list of dataset names; or
18        df: A pandas DataFrame of dataset names and metadata
19
20    Example:
21
22        ```python
23        from pinecone_datasets import list_datasets
24        list_datasets() # -> ['dataset1', 'dataset2', ...]
25        list_datasets(as_df=True) # -> pandas DataFrame of dataset names and metadata
26        ```
27
28    """
29    global catalog
30    catalog = Catalog.load(**kwargs)
31    return catalog.list_datasets(as_df=as_df)
32
33
34def load_dataset(dataset_id: str, **kwargs) -> Dataset:
35    """
36    Load a dataset from the catalog
37
38    Args:
39        dataset_id (str): The name of the dataset to load
40        **kwargs: Additional keyword arguments to pass to the Dataset constructor, e.g. `engine='polars'`
41
42    Returns:
43        Dataset: A Dataset object
44
45    Example:
46
47        ```python
48        from pinecone_datasets import load_dataset
49        dataset = load_dataset("dataset_name")
50        ```
51    """
52    if not catalog:
53        lst = list_datasets(as_df=False)
54    else:
55        lst = catalog.list_datasets(as_df=False)
56    if dataset_id not in lst:
57        raise FileNotFoundError(f"Dataset {dataset_id} not found in catalog")
58    else:
59        return Dataset.from_catalog(dataset_id, **kwargs)
def list_datasets(as_df=False, **kwargs) -> list:
 9def list_datasets(as_df=False, **kwargs) -> list:
10    """
11    List all datasets in the catalog, optionally as a pandas DataFrame.
12    Catalog is set using the `DATASETS_CATALOG_BASEPATH` environment variable.
13
14    Args:
15        as_df (bool, optional): Whether to return the list as a pandas DataFrame. Defaults to False.
16
17    Returns:
18        list: A list of dataset names; or
19        df: A pandas DataFrame of dataset names and metadata
20
21    Example:
22
23        ```python
24        from pinecone_datasets import list_datasets
25        list_datasets() # -> ['dataset1', 'dataset2', ...]
26        list_datasets(as_df=True) # -> pandas DataFrame of dataset names and metadata
27        ```
28
29    """
30    global catalog
31    catalog = Catalog.load(**kwargs)
32    return catalog.list_datasets(as_df=as_df)

List all datasets in the catalog, optionally as a pandas DataFrame. Catalog is set using the DATASETS_CATALOG_BASEPATH environment variable.

Arguments:
  • as_df (bool, optional): Whether to return the list as a pandas DataFrame. Defaults to False.
Returns:

list: A list of dataset names; or df: A pandas DataFrame of dataset names and metadata

Example:
from pinecone_datasets import list_datasets
list_datasets() # -> ['dataset1', 'dataset2', ...]
list_datasets(as_df=True) # -> pandas DataFrame of dataset names and metadata
def load_dataset(dataset_id: str, **kwargs) -> pinecone_datasets.dataset.Dataset:
35def load_dataset(dataset_id: str, **kwargs) -> Dataset:
36    """
37    Load a dataset from the catalog
38
39    Args:
40        dataset_id (str): The name of the dataset to load
41        **kwargs: Additional keyword arguments to pass to the Dataset constructor, e.g. `engine='polars'`
42
43    Returns:
44        Dataset: A Dataset object
45
46    Example:
47
48        ```python
49        from pinecone_datasets import load_dataset
50        dataset = load_dataset("dataset_name")
51        ```
52    """
53    if not catalog:
54        lst = list_datasets(as_df=False)
55    else:
56        lst = catalog.list_datasets(as_df=False)
57    if dataset_id not in lst:
58        raise FileNotFoundError(f"Dataset {dataset_id} not found in catalog")
59    else:
60        return Dataset.from_catalog(dataset_id, **kwargs)

Load a dataset from the catalog

Arguments:
  • dataset_id (str): The name of the dataset to load
  • **kwargs: Additional keyword arguments to pass to the Dataset constructor, e.g. engine='polars'
Returns:

Dataset: A Dataset object

Example:
from pinecone_datasets import load_dataset
dataset = load_dataset("dataset_name")