Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
63 commits
Select commit Hold shift + click to select a range
46d68a6
feat: Create a new validator with valid values getted from a url path
Sarrabah Apr 3, 2025
67145f3
test: Test a real case
Sarrabah Apr 3, 2025
1d6c038
refactor: Linting
Sarrabah Apr 3, 2025
ac68603
docs: Generate documentation to the testing validator
Sarrabah Apr 3, 2025
1659be0
refactor: Reduce the test file size
Sarrabah Apr 3, 2025
95c805d
refactor: Delete the local csv file
Sarrabah Apr 3, 2025
c425167
test: test get valid values function
Sarrabah Apr 3, 2025
0d2d290
refactor: Renaming
Sarrabah Apr 3, 2025
f86ae8e
refactor: Linting
Sarrabah Apr 3, 2025
f901028
refactor: Add comment
Sarrabah Apr 3, 2025
ff7e693
feat: Apply changes to canton
Sarrabah Apr 7, 2025
765288f
fix: Treat csv files
Sarrabah Apr 7, 2025
1e69356
test: Add more tests
Sarrabah Apr 7, 2025
ab96f18
refactor: Reorganize data files for testing
Sarrabah Apr 7, 2025
23ab4b9
test: Ignore this one
Sarrabah Apr 7, 2025
8960f68
test: Remove comment
Sarrabah Apr 7, 2025
645ce55
A void commit
Sarrabah Apr 7, 2025
601878d
test: Reorganization
Sarrabah Apr 7, 2025
fbffcc7
refactor: Linting
Sarrabah Apr 7, 2025
6f97b69
fix: Skip the csv verification
Sarrabah Apr 7, 2025
38f0410
Reorder imports
Sarrabah Apr 7, 2025
b84db05
refactor: Linting
Sarrabah Apr 7, 2025
a107581
fix: Add sleeping time
Sarrabah Apr 7, 2025
c28df74
fix: Return to the first canton code logic
Sarrabah Apr 7, 2025
cdf3d4e
refactor: Remove unused import
Sarrabah Apr 7, 2025
d7c467b
test: Use a simple incorrect url
Sarrabah Apr 7, 2025
aa50bb4
feat: Readapt canton
Sarrabah Apr 7, 2025
502d01f
fix: Correct the import
Sarrabah Apr 7, 2025
49a6525
refactor: Ignore type
Sarrabah Apr 7, 2025
4c917a4
fix: Add the forgotten import
Sarrabah Apr 7, 2025
c530b36
test: Remove the test code
Sarrabah Apr 7, 2025
3547351
refactor: Remove exceptions
Sarrabah Apr 7, 2025
42b443f
refactor: Linting
Sarrabah Apr 7, 2025
ef120be
feat: Ignore Canton changes
Sarrabah Apr 8, 2025
997b15d
refactor: Renaming function
Sarrabah Apr 9, 2025
9536e5d
test: Test a local csv file
Sarrabah Apr 9, 2025
cb51cc6
refactor: Implement private functions to be more redable
Sarrabah Apr 9, 2025
996f343
refactor: Use meaningful names
Sarrabah Apr 9, 2025
e899501
feat: Error handling when the csv file is not well formatted
Sarrabah Apr 10, 2025
45c056a
test: Test a non existing column
Sarrabah Apr 10, 2025
e7f5354
test: Test a missing file
Sarrabah Apr 10, 2025
27dc7f1
feat: Use guard clauses and start without error handling
Sarrabah Apr 14, 2025
5f5143a
test: Testing and errro handling when the csv file is not well formatted
Sarrabah Apr 14, 2025
c5f374e
test: Test a missing column
Sarrabah Apr 14, 2025
0cb318d
test: Test a non existing file
Sarrabah Apr 14, 2025
8f8ece0
test: Testing file and ftp schema
Sarrabah Apr 14, 2025
3f94dea
refactor: Add more errors handling
Sarrabah Apr 14, 2025
51e1514
docs: Add a docstring
Sarrabah Apr 14, 2025
dc3de41
test: WIP to use dependency inversion
Sarrabah Apr 14, 2025
d24e7f8
refactor: Remove a comment
Sarrabah Apr 15, 2025
2a8677b
refactor: Renaming and typing variables
Sarrabah Apr 15, 2025
c10900e
refactor: Raise exceptions without printing a message
Sarrabah Apr 15, 2025
6bb9399
feat: Implement dependency inversion WIP
Sarrabah Apr 16, 2025
bc3ee55
fix: Use Union term
Sarrabah Apr 16, 2025
9c6903d
test: Test a not well formatted csv file
Sarrabah Apr 16, 2025
d2c5082
docs: Update docstring
Sarrabah Apr 17, 2025
2cfe6bb
refactor: Renaming
Sarrabah Apr 17, 2025
755b638
refactor: Remove a non important error handling
Sarrabah Apr 17, 2025
ae61328
refactor: Refactoring conditions
Sarrabah Apr 22, 2025
7db2ca1
refactor: Using the same type
Sarrabah Apr 22, 2025
2fe5c16
test: Implement the dependency inversion
Sarrabah Apr 23, 2025
dc8a951
test: Reorganize tests and renaming
Sarrabah Apr 23, 2025
04abe80
refactor: Move get values function to set_format file
Sarrabah Apr 24, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/formats.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,6 @@
| Region | Nom de région | Vérifie les régions françaises valides pour un Code Officiel Géographique donné. Depuis 2017, la liste des régions françaises n'a pas changé. |
| Siren | SIREN | Check french SIREN number validity, but does not check if SIREN number exists. |
| Siret | SIRET | Check french SIRET number validity, but does not check if SIRET number exists. |
| Validator | Nom de validateur | Vérification si la valeur donnée est bien valide selon des valeurs valides récupérées |

[If you want to add a new French Format, click here!](../CONTRIBUTING.md#implementing-a-new-french-format)
2 changes: 2 additions & 0 deletions src/frformat/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,12 @@
from .formats.region import Region as Region
from .formats.siren import Siren as Siren
from .formats.siret import Siret as Siret
from .formats.validator import Validator as Validator
from .options import Options as Options
from .versions import Millesime as Millesime

all_formats = [
Validator,
Canton,
CodeCommuneInsee,
CodeFantoir,
Expand Down
20 changes: 20 additions & 0 deletions src/frformat/formats/validator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from frformat import set_format
from frformat.versioned_set import VersionedSet
from frformat.versions import Millesime

name = "Nom de validateur"
description = "Vérification si la valeur donnée est bien valide selon des valeurs valides récupérées"
source = "source1"

validator_versioned_data = VersionedSet[Millesime]()


valid_values_2023 = set_format.SingleSetFormat.get_values_from_csv(
"src/tests/test_files_data/values.csv", "Username"
)
validator_versioned_data.add_version(Millesime.M2023, valid_values_2023)


Validator = set_format.new(
"Validator", name, description, source, validator_versioned_data
)
79 changes: 79 additions & 0 deletions src/frformat/get_values.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import csv
import io
import logging
import os
import urllib.parse
from typing import Protocol


class IFileReader(Protocol):

def read_file(self, path: str) -> io.TextIOBase: ...


def get_values_from_csv(
path: str, column: str, remote_reader: IFileReader, local_reader: IFileReader
) -> frozenset[str]:
"""
Extract all values from a given column in a well-formatted CSV file
located either locally or remotely.

Supported sources:
- Local files and file with 'file' scheme.
- Remote files using 'http'and 'https' schemes.

Args:
path: The path or URL to the CSV file.
column: The name of the column from which to extract values.

Raises:
ValueError: If the file is missing, the column is not found, the path uses
an unsupported scheme or the file cannot be parsed as a valid CSV.

Returns:
A frozenset containing the values found in the specified column.
"""

values: list[str] = []

try:
parsed_uri: urllib.parse.ParseResult = urllib.parse.urlparse(path)
except ValueError:
logging.error(f"An error is occured while parsing url using this path: {path}")
return frozenset({})

is_valid_scheme: bool = parsed_uri.scheme in ("http", "https", "file")

if not is_valid_scheme and not os.path.isfile(path):
raise ValueError(
f"Invalid path: {path}.The URI must use one of the following schemes: http, https, or file or it must be existing csv file."
)

try:
if is_valid_scheme:
csvfile = remote_reader.read_file(path)

else:
csvfile = local_reader.read_file(path)
except Exception as e:
logging.error(
f"While reading the file getted from this path: {path} there is this exception: {e}"
)
return frozenset({})

with csvfile:
reader: csv.DictReader[str] = csv.DictReader(csvfile)
try:
for row in reader:
if column in row:
values.append(row[column])
else:
logging.error(f"CSV file is missing the {column} column.")
return frozenset({})
except ValueError:
logging.error(
f"The file associated to this path: {path} is not well csv formatted"
)
return frozenset({})

return frozenset(values)
15 changes: 15 additions & 0 deletions src/frformat/infra_file_reader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import io
import urllib.request


class RemoteReader:
def read_file(self, path: str) -> io.TextIOBase:
response: urllib.request._UrlopenRet = urllib.request.urlopen(path)
csvfile = io.StringIO(response.read().decode("utf-8"))
return csvfile


class LocalReader:
def read_file(self, path: str) -> io.TextIOBase:
csvfile = open(path, newline="", encoding="utf-8")
return csvfile
92 changes: 91 additions & 1 deletion src/frformat/set_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,33 @@
- `new` creates specialized versions where data is tied to the class
"""

from typing import FrozenSet, Generic, Type, TypeVar, Union, overload
import csv
import io
import logging
import os
import urllib.parse
from typing import (
ClassVar,
FrozenSet,
Generic,
Protocol,
Type,
TypeVar,
Union,
overload,
)

from frformat import CustomStrFormat, Metadata
from frformat.common import normalize_value
from frformat.infra_file_reader import LocalReader, RemoteReader
from frformat.options import Options
from frformat.versioned_set import Version, VersionedSet


class IFileReader(Protocol):
def read_file(self, path: str) -> io.TextIOBase: ...


class SingleSetFormat(CustomStrFormat):
"""This format defines a closed list of valid values"""

Expand All @@ -27,6 +46,11 @@ class SingleSetFormat(CustomStrFormat):
Beware, child classes may define an instance `_valid_values` attribute, which
will always take precedence over the class attribute for the validation.
"""
remote_reader: ClassVar[IFileReader] = RemoteReader()
""" RemoteReader is a class that contain a method to read file with remote path"""

local_reader: ClassVar[IFileReader] = LocalReader()
""" LocalReader is a class that contain a method to read file with local path"""

def __init__(self, options: Options = Options()):
self._options = options
Expand All @@ -47,6 +71,72 @@ def is_valid(self, value: str) -> bool:
normalized_value = normalize_value(value, self._options)
return normalized_value in self._normalized_values

@classmethod
def get_values_from_csv(cls, path: str, column: str) -> frozenset[str]:
"""
Extract all values from a given column in a well-formatted CSV file
located either locally or remotely.

Supported sources:
- Local files and file with 'file' scheme.
- Remote files using 'http'and 'https' schemes.

Args:
path: The path or URL to the CSV file.
column: The name of the column from which to extract values.

Raises:
ValueError: If the file is missing, the column is not found, the path uses
an unsupported scheme or the file cannot be parsed as a valid CSV.

Returns:
A frozenset containing the values found in the specified column.
"""

values: list[str] = []

try:
parsed_uri: urllib.parse.ParseResult = urllib.parse.urlparse(path)
except ValueError:
logging.error(
f"An error is occured while parsing url using this path: {path}"
)
return frozenset({})

is_valid_scheme: bool = parsed_uri.scheme in ("http", "https", "file")

if not is_valid_scheme and not os.path.isfile(path):
raise ValueError(
f"Invalid path: {path}.The URI must use one of the following schemes: http, https, or file or it must be existing csv file."
)

try:
if is_valid_scheme:
csvfile = cls.remote_reader.read_file(path)

else:
csvfile = cls.local_reader.read_file(path)
except Exception as e:
logging.error(f"Error while reading the file at {path}: {e}")
return frozenset({})

with csvfile:
reader: csv.DictReader[str] = csv.DictReader(csvfile)
try:
for row in reader:
if column in row:
values.append(row[column])
else:
logging.error(f"CSV file is missing the {column} column.")
return frozenset({})
except ValueError:
logging.error(
f"The file associated to this path: {path} is not well csv formatted"
)
return frozenset({})
cls._valid_values = frozenset(values)
return frozenset(values)

def get_valid_values_set(self) -> FrozenSet[str]:
"""Returns the canonical set of valid values.

Expand Down
3 changes: 3 additions & 0 deletions src/tests/test_files_data/values.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"Username","Identifier","First name","Last name"
"booker12","9012","Rachel","Booker"
"grey07","2070","Laura","Grey"
109 changes: 109 additions & 0 deletions src/tests/test_validator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
from io import StringIO, TextIOBase

import pytest

from frformat import Millesime, Validator
from frformat.get_values import IFileReader, get_values_from_csv
from frformat.infra_file_reader import LocalReader, RemoteReader
from frformat.set_format import SingleSetFormat


def test_validator():
isvalid = Validator(Millesime.M2023).is_valid("booker12")
assert isvalid is True


class FakeFileReader(IFileReader):
def __init__(self, data: str):
self._data = data

def read_file(self, path: str) -> TextIOBase:
return StringIO(self._data)


def test_get_values_from_local_file():

csv_data: str = (
"Username,Email\nbooker1,booker12@example.com\ngrey7,grey07@example.com"
)

SingleSetFormat.local_reader = FakeFileReader(csv_data)

SingleSetFormat.remote_reader = RemoteReader()

valid_values = SingleSetFormat.get_values_from_csv(
"src/tests/test_files_data/values.csv", "Username"
)

assert valid_values == frozenset({"booker1", "grey7"})

values = SingleSetFormat.get_values_from_csv(
"src/tests/test_files_data/values.csv", "Link"
)
assert values == frozenset({})


def test_get_values_from_not_well_formatted_local_file():
csv_data: str = "xff�Name,Age\nJohn,30"

local_reader = FakeFileReader(csv_data)

remote_reader = RemoteReader()

values = get_values_from_csv(
"src/tests/test_files_data/values.csv",
"coucou",
remote_reader,
local_reader,
)
assert values == frozenset({})


def test_invalid_path_file():
remote_reader = RemoteReader()
local_reader = LocalReader()

with pytest.raises(
ValueError,
match="Invalid path: src/tests/test_files_data/non_existed_file.csv.The URI must use one of the following schemes: http, https, or file or it must be existing csv file.",
):
get_values_from_csv(
"src/tests/test_files_data/non_existed_file.csv",
"DEP",
remote_reader,
local_reader,
)


def test_get_values_from_remote_csv():
csv_data: str = "Age,RegionCode\n23,7653\n22,5498"

remote_reader = FakeFileReader(csv_data)

local_reader = LocalReader()

valid_values = get_values_from_csv(
"https://some.fake.url/values.csv",
"Age",
remote_reader,
local_reader,
)

assert valid_values == frozenset({"23", "22"})

valid_values = get_values_from_csv(
"https://some.fake.url/values.csv",
"Name",
remote_reader,
local_reader,
)

assert valid_values == frozenset({})

valid_values = get_values_from_csv(
"file:///fakedata/values.csv",
"RegionCode",
remote_reader,
local_reader,
)
assert valid_values == frozenset({"7653", "5498"})