Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
2d4003b
Add support and test for pd.Series
EkberHasanov Jun 25, 2023
43539fb
Adding docs for basic usage
EkberHasanov Jun 25, 2023
497ff1f
Fix documentation
EkberHasanov Jun 25, 2023
c218b20
Add pandas to optional dependencies
EkberHasanov Jun 25, 2023
a2cd952
fix python3.8 issues
EkberHasanov Jun 26, 2023
0a07e92
fix py3.7 issues
EkberHasanov Jun 26, 2023
5d9df6e
fix macos error
EkberHasanov Jun 27, 2023
7d7173b
fix indentation error in ci.yml
EkberHasanov Jun 27, 2023
c7e09e8
ci: fix dependency issue in macos
EkberHasanov Jun 30, 2023
a337c14
improve test coverage to 100%
EkberHasanov Jun 30, 2023
ab11328
🔧 update code
yezz123 Jun 30, 2023
88de1b0
🍱 update requirements
yezz123 Jun 30, 2023
eeff98a
🍱 Fix Requirements
yezz123 Jun 30, 2023
f3f3853
🍱 Fix Requirements
yezz123 Jun 30, 2023
43a7e8a
Update pydantic_extra_types/pandas_types.py
yezz123 Jul 1, 2023
e8117c4
Inheriting directly from pd.Series
EkberHasanov Jul 6, 2023
ca0c9db
delete extra files
EkberHasanov Jul 12, 2023
dba691b
upgrading version of pandas
EkberHasanov Feb 25, 2024
6d24583
Update test_json_schema.py
EkberHasanov Feb 25, 2024
7068c41
fixing some issues
EkberHasanov Feb 25, 2024
f6679dc
change core_schema func
EkberHasanov Feb 29, 2024
3bf6d65
Update test_json_schema.py
EkberHasanov Feb 29, 2024
109f493
Restore non-pandas files to match upstream/main
Apr 7, 2026
e3b57ec
build: add pandas optional dependency group
Apr 7, 2026
5019571
feat: add Series pydantic type with generic validation
Apr 7, 2026
5c4de4b
feat: add Index pydantic type with generic validation
Apr 7, 2026
94cfa2b
feat: add DataFrame pydantic type with TypedDict column validation
Apr 7, 2026
5aadbe2
docs: update pandas_types docs for Series, Index, DataFrame
Apr 7, 2026
625e8b3
style: apply ruff format to pandas_types
Apr 7, 2026
1c3fd39
build: regenerate uv.lock with pandas dependency
Apr 7, 2026
947e405
fix: add type: ignore[import-untyped] for pandas, remove unused overr…
Apr 7, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 95 additions & 0 deletions docs/pandas_types.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# Pandas Types

Pydantic types for [pandas](https://pandas.pydata.org/) objects. Supports `Series`, `Index`, and `DataFrame` with optional generic type validation.

## Installation

```bash
pip install "pydantic-extra-types[pandas]"
```

## Series

A validated `pandas.Series`. Use `Series[T]` to validate that every element is of type `T`.

```python
from pydantic import BaseModel
from pydantic_extra_types.pandas_types import Series

class MyModel(BaseModel):
values: Series[int]

model = MyModel(values=[1, 2, 3])
print(model.values.tolist()) # [1, 2, 3]

# Also accepts an existing pd.Series
import pandas as pd
model = MyModel(values=pd.Series([4, 5, 6]))
print(model.values.tolist()) # [4, 5, 6]
```

Use bare `Series` (no type parameter) to accept elements of any type:

```python
class AnyModel(BaseModel):
values: Series

model = AnyModel(values=[1, 'two', None])
```

## Index

A validated `pandas.Index`. Use `Index[T]` to validate element types.

```python
from pydantic import BaseModel
from pydantic_extra_types.pandas_types import Index

class MyModel(BaseModel):
idx: Index[str]

model = MyModel(idx=['a', 'b', 'c'])
print(model.idx.tolist()) # ['a', 'b', 'c']
```

## DataFrame

A validated `pandas.DataFrame`. Pass a `TypedDict` (or any class with `__annotations__`) as the type parameter to validate column names and element types.

```python
from typing import TypedDict
from pydantic import BaseModel
from pydantic_extra_types.pandas_types import DataFrame

class PeopleSchema(TypedDict):
name: str
age: int

class MyModel(BaseModel):
people: DataFrame[PeopleSchema]

model = MyModel(people={'name': ['Alice', 'Bob'], 'age': [30, 25]})
print(model.people)
# name age
# 0 Alice 30
# 1 Bob 25
```

Extra columns beyond those defined in the schema are preserved without validation:

```python
import pandas as pd

df = pd.DataFrame({'name': ['Alice'], 'age': [30], 'extra': ['kept']})
model = MyModel(people=df)
print('extra' in model.people.columns) # True
```

Use bare `DataFrame` (no type parameter) to accept any dict or `pd.DataFrame`:

```python
class AnyModel(BaseModel):
df: DataFrame

model = AnyModel(df={'x': [1, 2], 'y': [3, 4]})
```
215 changes: 215 additions & 0 deletions pydantic_extra_types/pandas_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
from __future__ import annotations

try:
import pandas as pd # type: ignore[import-untyped]
except ModuleNotFoundError: # pragma: no cover
raise RuntimeError(
'The `pandas_types` module requires "pandas" to be installed. '
'You can install it with "pip install \'pydantic-extra-types[pandas]\'".'
)

from typing import Any, ClassVar

from pydantic import GetCoreSchemaHandler
from pydantic_core import PydanticCustomError, core_schema


class Series(pd.Series): # type: ignore[misc]
"""
A `pandas.Series` with Pydantic validation support.

Supports both untyped and typed usage:

```python
from pydantic import BaseModel
from pydantic_extra_types.pandas_types import Series


class MyModel(BaseModel):
values: Series[int]


model = MyModel(values=[1, 2, 3])
print(model.values.tolist()) # [1, 2, 3]
```
"""

_item_type: ClassVar[type | None] = None

def __class_getitem__(cls, item: type) -> type:
return type(f'Series[{item.__name__}]', (cls,), {'_item_type': item})

@classmethod
def __get_pydantic_core_schema__(cls, source: type[Any], handler: GetCoreSchemaHandler) -> core_schema.CoreSchema:
if cls._item_type is not None:
item_schema = handler.generate_schema(cls._item_type)
else:
item_schema = core_schema.any_schema()

list_schema = core_schema.list_schema(item_schema)

return core_schema.no_info_wrap_validator_function(
cls._validate,
list_schema,
serialization=core_schema.plain_serializer_function_ser_schema(
lambda v: v.tolist(),
info_arg=False,
return_schema=core_schema.list_schema(item_schema),
),
)

@classmethod
def _validate(cls, value: Any, handler: core_schema.ValidatorFunctionWrapHandler) -> Series:
if isinstance(value, pd.Series):
value = value.tolist()
elif not isinstance(value, list):
try:
value = list(value)
except Exception as exc:
raise PydanticCustomError(
'series_invalid',
'Value must be list-like or a pandas Series, got {type}',
{'type': type(value).__name__},
) from exc
validated: list[Any] = handler(value)
return Series(validated)


class Index:
"""
A `pandas.Index` with Pydantic validation support.

Supports both untyped and typed usage:

```python
from pydantic import BaseModel
from pydantic_extra_types.pandas_types import Index


class MyModel(BaseModel):
idx: Index[str]


model = MyModel(idx=['a', 'b', 'c'])
print(model.idx.tolist()) # ['a', 'b', 'c']
```
"""

_item_type: ClassVar[type | None] = None

def __class_getitem__(cls, item: type) -> type:
return type(f'Index[{item.__name__}]', (cls,), {'_item_type': item})

@classmethod
def __get_pydantic_core_schema__(cls, source: type[Any], handler: GetCoreSchemaHandler) -> core_schema.CoreSchema:
if cls._item_type is not None:
item_schema = handler.generate_schema(cls._item_type)
else:
item_schema = core_schema.any_schema()

list_schema = core_schema.list_schema(item_schema)

return core_schema.no_info_wrap_validator_function(
cls._validate,
list_schema,
serialization=core_schema.plain_serializer_function_ser_schema(
lambda v: v.tolist(),
info_arg=False,
return_schema=core_schema.list_schema(item_schema),
),
)

@classmethod
def _validate(cls, value: Any, handler: core_schema.ValidatorFunctionWrapHandler) -> pd.Index:
if isinstance(value, pd.Index):
value = value.tolist()
elif not isinstance(value, list):
try:
value = list(value)
except Exception as exc:
raise PydanticCustomError(
'index_invalid',
'Value must be list-like or a pandas Index, got {type}',
{'type': type(value).__name__},
) from exc
validated: list[Any] = handler(value)
return pd.Index(validated)


class DataFrame:
"""
A `pandas.DataFrame` with Pydantic validation support.

Accepts a TypedDict (or any class with ``__annotations__``) as a type parameter
to validate column names and element types:

```python
from typing import TypedDict
from pydantic import BaseModel
from pydantic_extra_types.pandas_types import DataFrame


class MySchema(TypedDict):
name: str
age: int


class MyModel(BaseModel):
people: DataFrame[MySchema]


model = MyModel(people={'name': ['Alice', 'Bob'], 'age': [30, 25]})
print(model.people)
```
"""

_schema_cls: ClassVar[type | None] = None

def __class_getitem__(cls, schema_cls: type) -> type:
return type(f'DataFrame[{schema_cls.__name__}]', (cls,), {'_schema_cls': schema_cls})

@classmethod
def __get_pydantic_core_schema__(cls, source: type[Any], handler: GetCoreSchemaHandler) -> core_schema.CoreSchema:
if cls._schema_cls is not None:
annotations = cls._schema_cls.__annotations__
fields = {
col: core_schema.typed_dict_field(core_schema.list_schema(handler.generate_schema(col_type)))
for col, col_type in annotations.items()
}
inner_schema: core_schema.CoreSchema = core_schema.typed_dict_schema(fields)
else:
inner_schema = core_schema.any_schema()

return core_schema.no_info_wrap_validator_function(
cls._validate,
inner_schema,
serialization=core_schema.plain_serializer_function_ser_schema(
lambda v: {col: v[col].tolist() for col in v.columns},
info_arg=False,
),
)

@classmethod
def _validate(cls, value: Any, handler: core_schema.ValidatorFunctionWrapHandler) -> pd.DataFrame:
extra_data: dict[str, Any] = {}

if isinstance(value, pd.DataFrame):
if cls._schema_cls is not None:
known_cols = set(cls._schema_cls.__annotations__.keys())
extra_cols = [c for c in value.columns if c not in known_cols]
extra_data = {c: value[c].tolist() for c in extra_cols}
value = {c: value[c].tolist() for c in value.columns if c in known_cols}
else:
value = {c: value[c].tolist() for c in value.columns}
elif not isinstance(value, dict):
raise PydanticCustomError(
'dataframe_invalid',
'Value must be a dict or pandas DataFrame, got {type}',
{'type': type(value).__name__},
)

validated = handler(value)
result = pd.DataFrame(validated)
for col, data in extra_data.items():
result[col] = data
return result
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ dependencies = ['pydantic>=2.5.2','typing-extensions']
dynamic = ['version']

[project.optional-dependencies]
pandas = [
'pandas>=2.0.0',
]
all = [
'phonenumbers>=8,<10',
'pycountry>=23',
Expand All @@ -57,6 +60,7 @@ all = [
'tzdata>=2024.1',
"cron-converter>=1.2.2",
'uuid-utils>=0.6.0; python_version<"3.14"',
'pandas>=2.0.0',
]
phonenumbers = ['phonenumbers>=8,<10']
pycountry = ['pycountry>=23']
Expand Down
Loading
Loading