Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions frictionless/detector/detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ def detect_schema(

# Handle name/empty
for index, name in enumerate(names):
names[index] = name or f"field{index+1}"
names[index] = name or f"field{index + 1}"

# Deduplicate names
if len(names) != len(set(names)):
Expand All @@ -354,16 +354,19 @@ def detect_schema(
runner_fields: List[Field] = [] # we use shared fields
for candidate in field_candidates:
descriptor = candidate.copy()

if descriptor["type"] == "boolean":
if self.field_true_values != settings.DEFAULT_TRUE_VALUES:
descriptor["true_values"] = self.field_true_values # type: ignore
if self.field_false_values != settings.DEFAULT_FALSE_VALUES:
descriptor["false_values"] = self.field_false_values # type: ignore

descriptor["name"] = "shared"
field = Field.from_descriptor(descriptor)
if field.type == "number" and self.field_float_numbers:
field.float_number = True # type: ignore
elif field.type == "boolean":
if self.field_true_values != settings.DEFAULT_TRUE_VALUES:
field.true_values = self.field_true_values # type: ignore
if self.field_false_values != settings.DEFAULT_FALSE_VALUES:
field.false_values = self.field_false_values # type: ignore
runner_fields.append(field)

for index, name in enumerate(names):
runners.append([])
for field in runner_fields:
Expand Down
2 changes: 1 addition & 1 deletion frictionless/fields/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from .any import AnyField as AnyField
from .array import ArrayField as ArrayField
from .boolean import BooleanField as BooleanField
from .boolean import BooleanField
from .date import DateField as DateField
from .datetime import DatetimeField as DatetimeField
from .duration import DurationField as DurationField
Expand Down
21 changes: 21 additions & 0 deletions frictionless/fields/any_descriptor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from typing import Any, Literal, Optional

from .base_field_descriptor import BaseFieldDescriptor
from .field_constraints import BaseConstraints


class AnyFieldDescriptor(BaseFieldDescriptor):
"""The field contains values of a unspecified or mixed type."""

type: Literal["any"] = "any"
format: Optional[Literal["default"]] = None
constraints: Optional[BaseConstraints[str]] = None

def read_value(self, cell: Any) -> Any:
# Any field accepts any value as-is
return cell

def write_value(self, cell: Any) -> Any:
# Any field returns the value as-is
return cell

29 changes: 0 additions & 29 deletions frictionless/fields/array.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

import json
from typing import Any, Dict, Optional

import attrs
Expand Down Expand Up @@ -55,34 +54,6 @@ def cell_reader(cell: Any):

return cell_reader

def create_value_reader(self):
# Create reader
def value_reader(cell: Any): # type: ignore
if not isinstance(cell, list):
if isinstance(cell, str):
try:
cell = json.loads(cell)
except Exception:
return None
if not isinstance(cell, list):
return None
elif isinstance(cell, tuple):
cell = list(cell) # type: ignore
else:
return None
return cell # type: ignore

return value_reader

# Write

def create_value_writer(self):
# Create writer
def value_writer(cell: Any):
return json.dumps(cell)

return value_writer

# Metadata

metadata_profile_patch = {
Expand Down
40 changes: 40 additions & 0 deletions frictionless/fields/array_descriptor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from __future__ import annotations

import json
from typing import Any, Literal, Optional

from pydantic import Field as PydanticField

from .base_field_descriptor import BaseFieldDescriptor
from .field_constraints import JSONConstraints


class ArrayFieldDescriptor(BaseFieldDescriptor):
"""The field contains a valid JSON array."""

type: Literal["array"] = "array"
format: Optional[Literal["default"]] = None
constraints: Optional[JSONConstraints] = None
# TODO: check later:
# arrayItem in Frictionless schemas is an unnamed field-like descriptor to prevent using a full FieldDescriptor with "name" (backward compatibility)
array_item: Optional[dict[str, Any]] = PydanticField(default=None, alias="arrayItem")

def read_value(self, cell: Any) -> Optional[list[Any]]:
if not isinstance(cell, list):
if isinstance(cell, str):
try:
cell = json.loads(cell)
except Exception:
return None
if not isinstance(cell, list):
return None
elif isinstance(cell, tuple):
cell = list(cell) # type: ignore[arg-type]
else:
return None
return cell # type: ignore[return-value]

def write_value(self, cell: Any) -> str:
return json.dumps(cell)


65 changes: 65 additions & 0 deletions frictionless/fields/base_field_descriptor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
"""base_field_descriptor.py provides the base Pydantic model for all field descriptors"""

from __future__ import annotations

from pydantic import BaseModel, Field as PydanticField, model_validator
from typing import Any, Dict, List, Optional
from typing_extensions import Self


class BaseFieldDescriptor(BaseModel):
"""Data model of a (unspecialised) field descriptor"""

name: str
"""
The field descriptor MUST contain a name property.
"""

title: Optional[str] = None
"""
A human readable label or title for the field
"""

description: Optional[str] = None
"""
A description for this field e.g. "The recipient of the funds"
"""

missing_values: Optional[List[str]] = PydanticField(
default=None, alias="missingValues"
)
"""
A list of field values to consider as null values
"""

example: Optional[Any] = None
"""
An example of a value for the field.
"""

@model_validator(mode="before")
@classmethod
def compat(cls, data: Dict[str, Any]) -> Dict[str, Any]:
# Backward compatibility for field.format

format_ = data.get("format")
if format_:
if format_.startswith("fmt:"):
data["format"] = format_[4:]

return data

@model_validator(mode="after")
def validate_example(self) -> Self:
"""Validate that the example value can be converted using read_value() if available"""
if self.example is not None:
if hasattr(self, "read_value"):
read_value_method = getattr(self, "read_value")
result = read_value_method(self.example)
if result is None:
raise ValueError(
f'example value "{self.example}" for field "{self.name}" is not valid'
)

return self

68 changes: 5 additions & 63 deletions frictionless/fields/boolean.py
Original file line number Diff line number Diff line change
@@ -1,67 +1,9 @@
from __future__ import annotations
from ..schema.field import Field

from typing import Any, Dict, List

import attrs

from .. import settings
from ..schema import Field


@attrs.define(kw_only=True, repr=False)
class BooleanField(Field):
### TEMP Only required for Metadata compatibility
### This is required because "metadata_import" makes a distinction based
### on the "type" property (`is_typed_class`)
type = "boolean"
builtin = True
supported_constraints = [
"required",
"enum",
]

true_values: List[str] = attrs.field(factory=settings.DEFAULT_TRUE_VALUES.copy)
"""
It defines the values to be read as true values while reading data. The default
true values are ["true", "True", "TRUE", "1"].
"""

false_values: List[str] = attrs.field(factory=settings.DEFAULT_FALSE_VALUES.copy)
"""
It defines the values to be read as false values while reading data. The default
true values are ["false", "False", "FALSE", "0"].
"""

# Read

def create_value_reader(self):
# Create mapping
mapping: Dict[str, bool] = {}
for value in self.true_values:
mapping[value] = True
for value in self.false_values:
mapping[value] = False

# Create reader
def value_reader(cell: Any):
if cell is True or cell is False:
return cell
if isinstance(cell, str):
return mapping.get(cell)

return value_reader

# Write

def create_value_writer(self):
# Create writer
def value_writer(cell: Any):
return self.true_values[0] if cell else self.false_values[0]

return value_writer

# Metadata

metadata_profile_patch = {
"properties": {
"trueValues": {"type": "array", "items": {"type": "string"}},
"falseValues": {"type": "array", "items": {"type": "string"}},
}
}

50 changes: 50 additions & 0 deletions frictionless/fields/boolean_descriptor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from typing import Any, ClassVar, List, Literal, Optional

from pydantic import Field as PydanticField, AliasChoices

from .. import settings
from .base_field_descriptor import BaseFieldDescriptor
from .field_constraints import BaseConstraints

class BooleanFieldDescriptor(BaseFieldDescriptor):
"""The field contains boolean (true/false) data."""

type: ClassVar[Literal["boolean"]] = "boolean"

format: Optional[Literal["default"]] = None
constraints: Optional[BaseConstraints[bool]] = None

true_values: Optional[List[str]] = PydanticField(
default=settings.DEFAULT_TRUE_VALUES,
alias="trueValues",
validation_alias=AliasChoices("trueValues", "true_values"),
)
"""
Values to be interpreted as "true" for boolean fields
"""

false_values: Optional[List[str]] = PydanticField(
default=settings.DEFAULT_FALSE_VALUES,
alias="falseValues",
validation_alias=AliasChoices("falseValues", "false_values"),
)
"""
Values to be interpreted as "false" for boolean fields
"""

def read_value(self, cell: Any) -> Optional[bool]:
if isinstance(cell, bool):
return cell

if isinstance(cell, str):
if self.true_values and cell in self.true_values:
return True
if self.false_values and cell in self.false_values:
return False

return None

def write_value(self, cell: Optional[bool]) -> Optional[str]:
if self.true_values and self.false_values:
return self.true_values[0] if cell else self.false_values[0]
return None
Loading
Loading