pinecone_datasets.public
1from .dataset import Dataset 2from .catalog import Catalog 3from . import cfg 4 5catalog = None 6 7 8def list_datasets(as_df=False, **kwargs) -> list: 9 """ 10 List all datasets in the catalog, optionally as a pandas DataFrame. 11 Catalog is set using the `DATASETS_CATALOG_BASEPATH` environment variable. 12 13 Args: 14 as_df (bool, optional): Whether to return the list as a pandas DataFrame. Defaults to False. 15 16 Returns: 17 list: A list of dataset names; or 18 df: A pandas DataFrame of dataset names and metadata 19 20 Example: 21 22 ```python 23 from pinecone_datasets import list_datasets 24 list_datasets() # -> ['dataset1', 'dataset2', ...] 25 list_datasets(as_df=True) # -> pandas DataFrame of dataset names and metadata 26 ``` 27 28 """ 29 global catalog 30 catalog = Catalog.load(**kwargs) 31 return catalog.list_datasets(as_df=as_df) 32 33 34def load_dataset(dataset_id: str, **kwargs) -> Dataset: 35 """ 36 Load a dataset from the catalog 37 38 Args: 39 dataset_id (str): The name of the dataset to load 40 **kwargs: Additional keyword arguments to pass to the Dataset constructor, e.g. `engine='polars'` 41 42 Returns: 43 Dataset: A Dataset object 44 45 Example: 46 47 ```python 48 from pinecone_datasets import load_dataset 49 dataset = load_dataset("dataset_name") 50 ``` 51 """ 52 if not catalog: 53 lst = list_datasets(as_df=False) 54 else: 55 lst = catalog.list_datasets(as_df=False) 56 if dataset_id not in lst: 57 raise FileNotFoundError(f"Dataset {dataset_id} not found in catalog") 58 else: 59 return Dataset.from_catalog(dataset_id, **kwargs)
def
list_datasets(as_df=False, **kwargs) -> list:
9def list_datasets(as_df=False, **kwargs) -> list: 10 """ 11 List all datasets in the catalog, optionally as a pandas DataFrame. 12 Catalog is set using the `DATASETS_CATALOG_BASEPATH` environment variable. 13 14 Args: 15 as_df (bool, optional): Whether to return the list as a pandas DataFrame. Defaults to False. 16 17 Returns: 18 list: A list of dataset names; or 19 df: A pandas DataFrame of dataset names and metadata 20 21 Example: 22 23 ```python 24 from pinecone_datasets import list_datasets 25 list_datasets() # -> ['dataset1', 'dataset2', ...] 26 list_datasets(as_df=True) # -> pandas DataFrame of dataset names and metadata 27 ``` 28 29 """ 30 global catalog 31 catalog = Catalog.load(**kwargs) 32 return catalog.list_datasets(as_df=as_df)
List all datasets in the catalog, optionally as a pandas DataFrame.
Catalog is set using the DATASETS_CATALOG_BASEPATH
environment variable.
Arguments:
- as_df (bool, optional): Whether to return the list as a pandas DataFrame. Defaults to False.
Returns:
list: A list of dataset names; or df: A pandas DataFrame of dataset names and metadata
Example:
from pinecone_datasets import list_datasets list_datasets() # -> ['dataset1', 'dataset2', ...] list_datasets(as_df=True) # -> pandas DataFrame of dataset names and metadata
35def load_dataset(dataset_id: str, **kwargs) -> Dataset: 36 """ 37 Load a dataset from the catalog 38 39 Args: 40 dataset_id (str): The name of the dataset to load 41 **kwargs: Additional keyword arguments to pass to the Dataset constructor, e.g. `engine='polars'` 42 43 Returns: 44 Dataset: A Dataset object 45 46 Example: 47 48 ```python 49 from pinecone_datasets import load_dataset 50 dataset = load_dataset("dataset_name") 51 ``` 52 """ 53 if not catalog: 54 lst = list_datasets(as_df=False) 55 else: 56 lst = catalog.list_datasets(as_df=False) 57 if dataset_id not in lst: 58 raise FileNotFoundError(f"Dataset {dataset_id} not found in catalog") 59 else: 60 return Dataset.from_catalog(dataset_id, **kwargs)
Load a dataset from the catalog
Arguments:
- dataset_id (str): The name of the dataset to load
- **kwargs: Additional keyword arguments to pass to the Dataset constructor, e.g.
engine='polars'
Returns:
Dataset: A Dataset object
Example:
from pinecone_datasets import load_dataset dataset = load_dataset("dataset_name")