Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions .github/workflows/codespell.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Codespell configuration is within pyproject.toml
---
name: Codespell

on:
push:
branches: [master]
pull_request:
branches: [master]

permissions:
contents: read

jobs:
codespell:
name: Check for spelling errors
runs-on: ubuntu-latest

steps:
- name: Checkout
uses: actions/checkout@v4
- name: Codespell
uses: codespell-project/actions-codespell@8f01853be192eb0f849a5c7d721450e7a467c579 # v2.2
8 changes: 8 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,11 @@ repos:
args: [--fix, --show-fixes, --exit-non-zero-on-fix]
- id: ruff-format
types: [python]

- repo: https://github.com/codespell-project/codespell
# Configuration for codespell is in pyproject.toml
rev: v2.4.1
hooks:
- id: codespell
additional_dependencies:
- tomli; python_version<'3.11'
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@
# The format is a list of tuples containing the path and title.
# epub_pre_files = []

# HTML files shat should be inserted after the pages created by sphinx.
# HTML files that should be inserted after the pages created by sphinx.
# The format is a list of tuples containing the path and title.
# epub_post_files = []

Expand Down
2 changes: 1 addition & 1 deletion fuji_server/controllers/fair_object_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ async def assess_by_id(body):
allow_remote_logging = False
# Request POST BODY has to be JSON
if connexion.request.content_type == "application/json":
# The client has to send this HTTP header (Allow-Remote-Logging:True) explicitely to enable remote logging
# The client has to send this HTTP header (Allow-Remote-Logging:True) explicitly to enable remote logging
# Useful for e.g. web clients..
allow_remote_logging = connexion.request.headers.get("Allow-Remote-Logging")
debug = True
Expand Down
2 changes: 1 addition & 1 deletion fuji_server/data/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
- [`creativeworktypes.txt`](./creativeworktypes.txt)
- [`default_namespaces.txt`](./default_namespaces.txt): Excluded during evaluation of the semantic vocabulary, FsF-I2-01M.
- [`file_formats.yaml`](./file_formats.yaml): Dictionary of scientific file formats. Used in evaluation of R1.3-02D to check the file format of the data.
- [`google_cache.db`](./google_cache.db): Used for evaluating FsF-F4-01M (searchability in major catalogues like DataCite registry, Google Dataset, Mendeley, ...). Google Data search is queried for a PID in column `google_links`. It's a dataset with metadata about datasets that have a DOI or persistent identifier from `identifer.org`.
- [`google_cache.db`](./google_cache.db): Used for evaluating FsF-F4-01M (searchability in major catalogues like DataCite registry, Google Dataset, Mendeley, ...). Google Data search is queried for a PID in column `google_links`. It's a dataset with metadata about datasets that have a DOI or persistent identifier from `identifiers.org`.
- [`identifiers_org__data.yaml`](./identifiers_org_resolver_data.yaml): Used in [`IdentifierHelper`](fuji_server/helper/identifier_helper.py).
- [`jsonldcontext.yaml`](./jsonldcontext.yaml)
- [`licenses.yaml`](./licenses.yaml): Used to populate `Preprocessor.license_names`, a list of SPDX licences. Used in evaluation of licenses, FsF-R1.1-01M.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ def retrieve_metadata_standards_from_apis(self):
self.retrieve_metadata_standards_from_sparql()
else:
self.logger.warning(
"{} : Skipped external ressources (e.g. OAI, re3data) checks since landing page could not be resolved".format(
"{} : Skipped external resources (e.g. OAI, re3data) checks since landing page could not be resolved".format(
"FsF-R1.3-01M"
)
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ def testSizeAndTypeOrProtocolMatchesMetadata(self, test_data_content_url):
if object_size == int(float(data_object.get("content_size"))):
size_matches = True
self.logger.info(
"{} : Sucessfully verified content size from downloaded file -: (expected: {}, found: {})".format(
"{} : Successfully verified content size from downloaded file -: (expected: {}, found: {})".format(
self.metric_identifier,
str(data_object.get("claimed_size")),
str(data_object.get("content_size")),
Expand Down Expand Up @@ -336,7 +336,7 @@ def testSizeAndTypeOrProtocolMatchesMetadata(self, test_data_content_url):
) in data_object.get("tika_content_type"):
type_matches = True
self.logger.info(
"{} : Sucessfully verified content type from downloaded file -: (expected: {}, found: via tika {})".format(
"{} : Successfully verified content type from downloaded file -: (expected: {}, found: via tika {})".format(
self.metric_identifier,
data_object.get("claimed_type"),
str(data_object.get("tika_content_type"))
Expand All @@ -361,7 +361,7 @@ def testSizeAndTypeOrProtocolMatchesMetadata(self, test_data_content_url):
if tika_type in protocol_mime_types:
protocol_matches = True
self.logger.info(
"{} : Sucessfully verified commonly used protocol mime type -: (expected: {}, found: via tika {})".format(
"{} : Successfully verified commonly used protocol mime type -: (expected: {}, found: via tika {})".format(
self.metric_identifier,
protocol_mime_types,
str(data_object.get("tika_content_type")),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def testDataUrlOrPIDAvailable(self, datainfolist):
else:
self.logger.warning(
self.metric_identifier
+ f" : Object (content) url is empty or not identied as GUID -: {datainfo}"
+ f" : Object (content) url is empty or not identified as GUID -: {datainfo}"
)
if test_result:
self.score.earned += test_score
Expand Down
2 changes: 1 addition & 1 deletion fuji_server/evaluators/fair_evaluator_file_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ def evaluate(self):

if not mime_url_dict:
self.logger.warning(
f"{self.metric_identifier} : Could not perform file format checks as data content identifier(s) unavailable/inaccesible"
f"{self.metric_identifier} : Could not perform file format checks as data content identifier(s) unavailable/inaccessible"
)

self.output = self.data_file_list
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ def evaluate(self):
self.result.test_status = "pass"
"""else:
self.score.earned = 0
self.logger.warning(self.metric_identifier + ' : Could not identify a valid peristent identifier based on scheme and resolution')"""
self.logger.warning(self.metric_identifier + ' : Could not identify a valid persistent identifier based on scheme and resolution')"""

self.result.score = self.score
self.result.maturity = self.maturity
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class FAIREvaluatorStandardisedProtocolData(FAIREvaluator):
Methods
------
evaluate()
This method will evaluate the accesibility of the data on whether the URI's scheme is based on
This method will evaluate the accessibility of the data on whether the URI's scheme is based on
a shared application protocol.
"""

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class FAIREvaluatorUniqueIdentifierData(FAIREvaluator):
Methods
------
evaluate()
This method will evaluate whether the data is assigned to a unique identifier (UUID/HASH) that folows a proper syntax or
This method will evaluate whether the data is assigned to a unique identifier (UUID/HASH) that follows a proper syntax or
identifier is resolvable and follows a defined unique identifier syntax (URL, IRI).
"""

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class FAIREvaluatorUniqueIdentifierMetadata(FAIREvaluator):
Methods
------
evaluate()
This method will evaluate whether the data is assigned to a unique identifier (UUID/HASH) that folows a proper syntax or
This method will evaluate whether the data is assigned to a unique identifier (UUID/HASH) that follows a proper syntax or
identifier is resolvable and follows a defined unique identifier syntax (URL, IRI).
"""

Expand All @@ -27,7 +27,7 @@ def __init__(self, fuji_instance):
metric = "FsF-F1-01MD"
else:
metric = "FsF-F1-01D"
# after 0.5 seperate metrics for metadata and data
# after 0.5 separate metrics for metadata and data
self.set_metric(metric)

def testMetadataIdentifierCompliesWithIdutilsScheme(self):
Expand Down
2 changes: 1 addition & 1 deletion fuji_server/harvester/data_harvester.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,5 +305,5 @@ def tika(self, file_buffer_object, url):

# Escape any slash # test_data_content_text = parsed_content.replace('\\', '\\\\').replace('"', '\\"')
if fileinfo["test_data_content_text"]:
self.logger.info(f"FsF-R1-01MD : Succesfully parsed data file(s) -: {url}")
self.logger.info(f"FsF-R1-01MD : Successfully parsed data file(s) -: {url}")
return fileinfo
6 changes: 3 additions & 3 deletions fuji_server/harvester/metadata_harvester.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def __init__(
self.landing_page_status = None
self.isLandingPageAccessible = False
self.landing_redirect_list = [] # urlsvisited during redirects
self.landing_redirect_status_list = [] # list with stati
self.landing_redirect_status_list = [] # list with statuses
self.landing_content_type = None
self.origin_url = None
self.pid_url = None
Expand Down Expand Up @@ -182,7 +182,7 @@ def merge_metadata(self, metadict, url, method, format, mimetype, schema="", nam
allow_merge = False
self.logger.warning(
self.logger_target.get("metadata_properties")
+ " : Harvesting of this metadata is explicitely disabled in the metric configuration-:"
+ " : Harvesting of this metadata is explicitly disabled in the metric configuration-:"
+ str(metadata_standard)
)
if isinstance(metadict, dict) and allow_merge is True:
Expand Down Expand Up @@ -1265,7 +1265,7 @@ def retrieve_metadata_external_rdf_negotiated(self, target_url_list=[]):
neg_rdf_collector.set_auth_token(self.auth_token, self.auth_token_type)
if neg_rdf_collector is not None:
source_rdf, rdf_dict = neg_rdf_collector.parse_metadata()
# in case F-UJi was redirected and the landing page content negotiation doesnt return anything try the origin URL
# in case F-UJi was redirected and the landing page content negotiation doesn't return anything try the origin URL
if not rdf_dict:
if self.origin_url is not None and self.origin_url != targeturl:
neg_rdf_collector.target_url = self.origin_url
Expand Down
8 changes: 4 additions & 4 deletions fuji_server/helper/metadata_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ class MetaDataCollector:
Sources : enum.Enum
Enum class to enumerate metadata sources
source_metadata : dict
Metadata souce in a dictionary.
Metadata source in a dictionary.
metadata_mapping : metadata_mapper.Mapper
Metadata mapping to metadata sources
logger : logging.Logger
Expand All @@ -214,15 +214,15 @@ class MetaDataCollector:
getLogger()
Get/return the logger object.
setLogger(l)
Set the logger according to inpur paramter l.
Set the logger according to inpur parameter l.
getSourceMetadata()
Get source metadata.
setSourceMetadata(em)
Set the source metadata according to input parameter em.
setTargetMetadata(tm)
Set the target metadata according to input parameter tm.
getTargetMetadata()
Returm the target metadata.
Return the target metadata.
getNamespaces()
Return the namespaces of the metadata.
getNamespacesfromIRIs(meta_source)
Expand All @@ -241,7 +241,7 @@ def __init__(
Parameters
----------
sourcemetadata : dict, optional
Metadata souce in a dictionary, default is None
Metadata source in a dictionary, default is None
mapping : metadata_mapper.Mapper, optional
Metadata mapping to metadata sources, default is None
logger : logging.Logger, optional
Expand Down
2 changes: 1 addition & 1 deletion fuji_server/helper/metadata_collector_dublincore.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def parse_metadata(self):
try:
self.metadata_format = MetadataFormats.XHTML
# self.logger.info('FsF-F2-01M : Trying to extract DublinCore metadata from html page')
# get core metadat from dublin core meta tags:
# get core metadata from dublin core meta tags:
# < meta name = "DCTERMS.element" content = "Value" / >
# meta_dc_matches = re.findall('<meta\s+([^\>]*)name=\"(DC|DCTERMS)?\.([a-z]+)\"(.*?)content=\"(.*?)\"',self.landing_html)
# exp = '<\s*meta\s*([^\>]*)name\s*=\s*\"(DC|DCTERMS)?\.([A-Za-z]+)(\.[A-Za-z]+)?\"(.*?)content\s*=\s*\"(.*?)\"'
Expand Down
6 changes: 3 additions & 3 deletions fuji_server/helper/metadata_collector_rdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def get_metadata_from_graph(self, rdf_response_graph):
rdflib.term.URIRef("http://www.w3.org/2002/07/owl#"),
]
if isinstance(rdf_response_graph, rdflib.graph.Graph) or isinstance(rdflib.graph.ConjunctiveGraph):
self.logger.info("FsF-F2-01M : Found RDF Graph which was sucessfully parsed")
self.logger.info("FsF-F2-01M : Found RDF Graph which was successfully parsed")
self.logger.info("FsF-F2-01M : Trying to identify namespaces in RDF Graph")
graph_namespaces = self.set_namespaces(rdf_response_graph)
# self.getNamespacesfromIRIs(graph_text)
Expand Down Expand Up @@ -840,8 +840,8 @@ def get_schemaorg_metadata(self, graph):
schema_metadata = {}
SMA = Namespace("http://schema.org/")
# use only schema.org properties and create graph using these.
# is e.g. important in case schema.org is encoded as RDFa and variuos namespaces are used
# this is tested by namepace elsewhere
# is e.g. important in case schema.org is encoded as RDFa and various namespaces are used
# this is tested by namespace elsewhere
if "schema.org" in str(main_entity_namespace):
self.main_entity_format = str(SDO)
schema_metadata = self.get_core_metadata(graph, creative_work, type=creative_work_type)
Expand Down
4 changes: 2 additions & 2 deletions fuji_server/helper/metadata_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def flip_dict(dict_to_flip):
"summary": ["abstract", "description"],
"keywords": "subject",
"object_type": "type",
"object_size": "exent",
"object_size": "extent",
"modified_date": "modified",
"created_date": "created",
"license": "license",
Expand Down Expand Up @@ -329,7 +329,7 @@ def flip_dict(dict_to_flip):
"""

################# XML Mappings ###############
# relations: indicate type using: related_resource_[opional relation type] alternative: define a list 'related_resource_type'
# relations: indicate type using: related_resource_[optional relation type] alternative: define a list 'related_resource_type'
# content identifiers: object_content_identifier_url, object_content_identifier_size, object_content_identifier_type (should have same length)
# otherwise take a look at the ISO/GCMD mapping
# attributes: must be indicated like this: tag@@attribute
Expand Down
2 changes: 1 addition & 1 deletion fuji_server/helper/metadata_provider_sparql.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class SPARQLMetadataProvider(MetadataProvider):
"""

def getMetadataStandards(self):
"""Method will return the matadata standards in the namespaces
"""Method will return the metadata standards in the namespaces

Returns
-------
Expand Down
2 changes: 1 addition & 1 deletion fuji_server/helper/repository_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def lookup_re3data(self):
re3link = root.xpath("//link")[0].attrib["href"]
if re3link is not None:
self.logger.info("FsF-R1.3-01M : Found match re3data metadata record -: " + str(re3link))
# query reposiroty metadata
# query repository metadata
q2 = RequestHelper(url=re3link)
q2.setAcceptType(AcceptTypes.xml)
_re3_source, re3_response = q2.content_negotiate(metric_id="FsF-R1.3-01M")
Expand Down
2 changes: 1 addition & 1 deletion fuji_server/helper/request_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ def request_content(self, metric_id="", ignore_html=True):
)
elif e.code == 400:
try:
# browsers automatically redirect to https in case a 400 occured for a http URL
# browsers automatically redirect to https in case a 400 occurred for a http URL
if redirect_handler.redirect_list:
last_redirect_url = redirect_handler.redirect_list[-1]
if "http://" in last_redirect_url:
Expand Down
4 changes: 2 additions & 2 deletions fuji_server/models/body.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ def use_github(self, use_github: bool):
def metric_version(self) -> str:
"""Gets the metric_version of this Body.

The FAIRsFAIR metric version be used fo rthe assessment # noqa: E501
The FAIRsFAIR metric version to be used for the assessment # noqa: E501

:return: The metric_version of this Body.
:rtype: str
Expand All @@ -252,7 +252,7 @@ def metric_version(self) -> str:
def metric_version(self, metric_version: str):
"""Sets the metric_version of this Body.

The FAIRsFAIR metric version be used fo rthe assessment # noqa: E501
The FAIRsFAIR metric version to be used for the assessment # noqa: E501

:param metric_version: The metric_version of this Body.
:type metric_version: str
Expand Down
2 changes: 1 addition & 1 deletion fuji_server/models/core_metadata_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def core_metadata_status(self, core_metadata_status: str):
:param core_metadata_status: The core_metadata_status of this CoreMetadataOutput.
:type core_metadata_status: str
"""
allowed_values = ["insufficent metadata", "partial metadata", "all metadata"]
allowed_values = ["insufficient metadata", "partial metadata", "all metadata"]
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

potential functional bugfix

if core_metadata_status not in allowed_values:
raise ValueError(
f"Invalid value for `core_metadata_status` ({core_metadata_status}), must be one of {allowed_values}"
Expand Down
Loading