dataset viewer
Enables interaction with the Hugging Face Dataset Viewer API, allowing users to browse, search, filter, and analyze datasets hosted on the Hugging Face Hub.
Enables interaction with the Hugging Face Dataset Viewer API, allowing users to browse, search, filter, and analyze datasets hosted on the Hugging Face Hub.
An MCP server for interacting with the Hugging Face Dataset Viewer API, providing capabilities to browse and analyze datasets hosted on the Hugging Face Hub.
dataset://
URI scheme for accessing Hugging Face datasetsThe server provides the following tools:
Parameters:
dataset
: Dataset identifier (e.g. 'stanfordnlp/imdb')auth_token
(optional): For private datasetsget_info
Parameters:
dataset
: Dataset identifierauth_token
(optional): For private datasetsget_rows
Parameters:
dataset
: Dataset identifierconfig
: Configuration namesplit
: Split namepage
(optional): Page number (0-based)auth_token
(optional): For private datasetsget_first_rows
Parameters:
dataset
: Dataset identifierconfig
: Configuration namesplit
: Split nameauth_token
(optional): For private datasetsget_statistics
Parameters:
dataset
: Dataset identifierconfig
: Configuration namesplit
: Split nameauth_token
(optional): For private datasetssearch_dataset
Parameters:
dataset
: Dataset identifierconfig
: Configuration namesplit
: Split namequery
: Text to search forauth_token
(optional): For private datasetsfilter
Parameters:
dataset
: Dataset identifierconfig
: Configuration namesplit
: Split namewhere
: SQL WHERE clause (e.g. "score > 0.5")orderby
(optional): SQL ORDER BY clausepage
(optional): Page number (0-based)auth_token
(optional): For private datasetsget_parquet
dataset
: Dataset identifierauth_token
(optional): For private datasetsClone the repository:
git clone https://github.com/privetin/dataset-viewer.git
cd dataset-viewer
Create a virtual environment and install:
# Create virtual environment
uv venv
# Activate virtual environment
# On Unix:
source .venv/bin/activate
# On Windows:
.venvScriptsactivate
# Install in development mode
uv add -e .
HUGGINGFACE_TOKEN
: Your Hugging Face API token for accessing private datasetsAdd the following to your Claude Desktop config file:
On Windows: %APPDATA%Claudeclaude_desktop_config.json
On MacOS: ~/Library/Application Support/Claude/claude_desktop_config.json
{
"mcpServers": {
"dataset-viewer": {
"command": "uv",
"args": [
"run",
"dataset-viewer"
]
}
}
}
Validate a dataset:
{
"dataset": "stanfordnlp/imdb"
}
Get dataset information:
{
"dataset": "stanfordnlp/imdb"
}
Search dataset contents:
{
"dataset": "stanfordnlp/imdb",
"config": "plain_text",
"split": "train",
"query": "great movie"
}
Filter and sort rows:
{
"dataset": "stanfordnlp/imdb",
"config": "plain_text",
"split": "train",
"where": "label = 'positive'",
"orderby": "text DESC",
"page": 0
}
Get dataset statistics:
{
"dataset": "stanfordnlp/imdb",
"config": "plain_text",
"split": "train"
}
MIT License - see LICENSE for details
[
{
"description": "Get detailed information about a Hugging Face dataset including description, features, splits, and statistics. Run validate first to check if the dataset exists and is accessible.",
"inputSchema": {
"properties": {
"auth_token": {
"description": "Hugging Face auth token for private/gated datasets",
"optional": true,
"type": "string"
},
"dataset": {
"description": "Hugging Face dataset identifier in the format owner/dataset",
"examples": [
"ylecun/mnist",
"stanfordnlp/imdb"
],
"pattern": "^[^/]+/[^/]+$",
"type": "string"
}
},
"required": [
"dataset"
],
"type": "object"
},
"name": "get_info"
},
{
"description": "Get paginated rows from a Hugging Face dataset",
"inputSchema": {
"properties": {
"auth_token": {
"description": "Hugging Face auth token for private/gated datasets",
"optional": true,
"type": "string"
},
"config": {
"description": "Dataset configuration/subset name. Use get_info to list available configs",
"examples": [
"default",
"en",
"es"
],
"type": "string"
},
"dataset": {
"description": "Hugging Face dataset identifier in the format owner/dataset",
"examples": [
"ylecun/mnist",
"stanfordnlp/imdb"
],
"pattern": "^[^/]+/[^/]+$",
"type": "string"
},
"page": {
"default": 0,
"description": "Page number (0-based), returns 100 rows per page",
"type": "integer"
},
"split": {
"description": "Dataset split name. Splits partition the data for training/evaluation",
"examples": [
"train",
"validation",
"test"
],
"type": "string"
}
},
"required": [
"dataset",
"config",
"split"
],
"type": "object"
},
"name": "get_rows"
},
{
"description": "Get first rows from a Hugging Face dataset split",
"inputSchema": {
"properties": {
"auth_token": {
"description": "Hugging Face auth token for private/gated datasets",
"optional": true,
"type": "string"
},
"config": {
"description": "Dataset configuration/subset name. Use get_info to list available configs",
"examples": [
"default",
"en",
"es"
],
"type": "string"
},
"dataset": {
"description": "Hugging Face dataset identifier in the format owner/dataset",
"examples": [
"ylecun/mnist",
"stanfordnlp/imdb"
],
"pattern": "^[^/]+/[^/]+$",
"type": "string"
},
"split": {
"description": "Dataset split name. Splits partition the data for training/evaluation",
"examples": [
"train",
"validation",
"test"
],
"type": "string"
}
},
"required": [
"dataset",
"config",
"split"
],
"type": "object"
},
"name": "get_first_rows"
},
{
"description": "Search for text within a Hugging Face dataset",
"inputSchema": {
"properties": {
"auth_token": {
"description": "Hugging Face auth token for private/gated datasets",
"optional": true,
"type": "string"
},
"config": {
"description": "Dataset configuration/subset name. Use get_info to list available configs",
"examples": [
"default",
"en",
"es"
],
"type": "string"
},
"dataset": {
"description": "Hugging Face dataset identifier in the format owner/dataset",
"examples": [
"ylecun/mnist",
"stanfordnlp/imdb"
],
"pattern": "^[^/]+/[^/]+$",
"type": "string"
},
"query": {
"description": "Text to search for in the dataset",
"type": "string"
},
"split": {
"description": "Dataset split name. Splits partition the data for training/evaluation",
"examples": [
"train",
"validation",
"test"
],
"type": "string"
}
},
"required": [
"dataset",
"config",
"split",
"query"
],
"type": "object"
},
"name": "search_dataset"
},
{
"description": "Filter rows in a Hugging Face dataset using SQL-like conditions",
"inputSchema": {
"properties": {
"auth_token": {
"description": "Hugging Face auth token for private/gated datasets",
"optional": true,
"type": "string"
},
"config": {
"description": "Dataset configuration/subset name. Use get_info to list available configs",
"examples": [
"default",
"en",
"es"
],
"type": "string"
},
"dataset": {
"description": "Hugging Face dataset identifier in the format owner/dataset",
"examples": [
"ylecun/mnist",
"stanfordnlp/imdb"
],
"pattern": "^[^/]+/[^/]+$",
"type": "string"
},
"orderby": {
"description": "SQL-like ORDER BY clause to sort results",
"examples": [
"column ASC",
"score DESC",
"name ASC, id DESC"
],
"optional": true,
"type": "string"
},
"page": {
"default": 0,
"description": "Page number for paginated results (100 rows per page)",
"minimum": 0,
"type": "integer"
},
"split": {
"description": "Dataset split name. Splits partition the data for training/evaluation",
"examples": [
"train",
"validation",
"test"
],
"type": "string"
},
"where": {
"description": "SQL-like WHERE clause to filter rows",
"examples": [
"column = "value"",
"score > 0.5",
"text LIKE "%query%""
],
"type": "string"
}
},
"required": [
"dataset",
"config",
"split",
"where"
],
"type": "object"
},
"name": "filter"
},
{
"description": "Get statistics about a Hugging Face dataset",
"inputSchema": {
"properties": {
"auth_token": {
"description": "Hugging Face auth token for private/gated datasets",
"optional": true,
"type": "string"
},
"config": {
"description": "Dataset configuration/subset name. Use get_info to list available configs",
"examples": [
"default",
"en",
"es"
],
"type": "string"
},
"dataset": {
"description": "Hugging Face dataset identifier in the format owner/dataset",
"examples": [
"ylecun/mnist",
"stanfordnlp/imdb"
],
"pattern": "^[^/]+/[^/]+$",
"type": "string"
},
"split": {
"description": "Dataset split name. Splits partition the data for training/evaluation",
"examples": [
"train",
"validation",
"test"
],
"type": "string"
}
},
"required": [
"dataset",
"config",
"split"
],
"type": "object"
},
"name": "get_statistics"
},
{
"description": "Export Hugging Face dataset split as Parquet file",
"inputSchema": {
"properties": {
"auth_token": {
"description": "Hugging Face auth token for private/gated datasets",
"optional": true,
"type": "string"
},
"dataset": {
"description": "Hugging Face dataset identifier in the format owner/dataset",
"examples": [
"ylecun/mnist",
"stanfordnlp/imdb"
],
"pattern": "^[^/]+/[^/]+$",
"type": "string"
}
},
"required": [
"dataset"
],
"type": "object"
},
"name": "get_parquet"
},
{
"description": "Check if a Hugging Face dataset exists and is accessible",
"inputSchema": {
"properties": {
"auth_token": {
"description": "Hugging Face auth token for private/gated datasets",
"optional": true,
"type": "string"
},
"dataset": {
"description": "Hugging Face dataset identifier in the format owner/dataset",
"examples": [
"ylecun/mnist",
"stanfordnlp/imdb"
],
"pattern": "^[^/]+/[^/]+$",
"type": "string"
}
},
"required": [
"dataset"
],
"type": "object"
},
"name": "validate"
}
]