pangaea-data-publisher · yarikoptic · Apr 20, 2026 · Apr 20, 2026 · Apr 20, 2026 · Apr 20, 2026
diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml
@@ -0,0 +1,23 @@
+# Codespell configuration is within pyproject.toml
+---
+name: Codespell
+
+on:
+  push:
+    branches: [master]
+  pull_request:
+    branches: [master]
+
+permissions:
+  contents: read
+
+jobs:
+  codespell:
+    name: Check for spelling errors
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Codespell
+        uses: codespell-project/actions-codespell@8f01853be192eb0f849a5c7d721450e7a467c579  # v2.2
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -38,3 +38,11 @@ repos:
     args: [--fix, --show-fixes, --exit-non-zero-on-fix]
   - id: ruff-format
     types: [python]
+
+- repo: https://github.com/codespell-project/codespell
+  # Configuration for codespell is in pyproject.toml
+  rev: v2.4.1
+  hooks:
+  - id: codespell
+    additional_dependencies:
+    - tomli; python_version<'3.11'
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -321,7 +321,7 @@
 # The format is a list of tuples containing the path and title.
 # epub_pre_files = []
 
-# HTML files shat should be inserted after the pages created by sphinx.
+# HTML files that should be inserted after the pages created by sphinx.
 # The format is a list of tuples containing the path and title.
 # epub_post_files = []
 

diff --git a/fuji_server/controllers/fair_object_controller.py b/fuji_server/controllers/fair_object_controller.py
@@ -28,7 +28,7 @@ async def assess_by_id(body):
     allow_remote_logging = False
     # Request POST BODY has to be JSON
     if connexion.request.content_type == "application/json":
-        # The client has to send this HTTP header (Allow-Remote-Logging:True) explicitely to enable remote logging
+        # The client has to send this HTTP header (Allow-Remote-Logging:True) explicitly to enable remote logging
         # Useful for e.g. web clients..
         allow_remote_logging = connexion.request.headers.get("Allow-Remote-Logging")
         debug = True

diff --git a/fuji_server/data/README.md b/fuji_server/data/README.md
@@ -6,7 +6,7 @@
 - [`creativeworktypes.txt`](./creativeworktypes.txt)
 - [`default_namespaces.txt`](./default_namespaces.txt): Excluded during evaluation of the semantic vocabulary, FsF-I2-01M.
 - [`file_formats.yaml`](./file_formats.yaml): Dictionary of scientific file formats. Used in evaluation of R1.3-02D to check the file format of the data.
-- [`google_cache.db`](./google_cache.db): Used for evaluating FsF-F4-01M (searchability in major catalogues like DataCite registry, Google Dataset, Mendeley, ...). Google Data search is queried for a PID in column `google_links`. It's a dataset with metadata about datasets that have a DOI or persistent identifier from `identifer.org`.
+- [`google_cache.db`](./google_cache.db): Used for evaluating FsF-F4-01M (searchability in major catalogues like DataCite registry, Google Dataset, Mendeley, ...). Google Data search is queried for a PID in column `google_links`. It's a dataset with metadata about datasets that have a DOI or persistent identifier from `identifiers.org`.
 - [`identifiers_org__data.yaml`](./identifiers_org_resolver_data.yaml): Used in [`IdentifierHelper`](fuji_server/helper/identifier_helper.py).
 - [`jsonldcontext.yaml`](./jsonldcontext.yaml)
 - [`licenses.yaml`](./licenses.yaml): Used to populate `Preprocessor.license_names`, a list of SPDX licences. Used in evaluation of licenses, FsF-R1.1-01M.

diff --git a/fuji_server/evaluators/fair_evaluator_community_metadata.py b/fuji_server/evaluators/fair_evaluator_community_metadata.py
@@ -230,7 +230,7 @@ def retrieve_metadata_standards_from_apis(self):
             self.retrieve_metadata_standards_from_sparql()
         else:
             self.logger.warning(
-                "{} : Skipped external ressources (e.g. OAI, re3data) checks since landing page could not be resolved".format(
+                "{} : Skipped external resources (e.g. OAI, re3data) checks since landing page could not be resolved".format(
                     "FsF-R1.3-01M"
                 )
             )

diff --git a/fuji_server/evaluators/fair_evaluator_data_content_metadata.py b/fuji_server/evaluators/fair_evaluator_data_content_metadata.py
@@ -289,7 +289,7 @@ def testSizeAndTypeOrProtocolMatchesMetadata(self, test_data_content_url):
                             if object_size == int(float(data_object.get("content_size"))):
                                 size_matches = True
                                 self.logger.info(
-                                    "{} : Sucessfully verified content size from downloaded file -: (expected: {}, found: {})".format(
+                                    "{} : Successfully verified content size from downloaded file -: (expected: {}, found: {})".format(
                                         self.metric_identifier,
                                         str(data_object.get("claimed_size")),
                                         str(data_object.get("content_size")),
@@ -336,7 +336,7 @@ def testSizeAndTypeOrProtocolMatchesMetadata(self, test_data_content_url):
                     ) in data_object.get("tika_content_type"):
                         type_matches = True
                         self.logger.info(
-                            "{} : Sucessfully verified content type from downloaded file -: (expected: {}, found: via tika {})".format(
+                            "{} : Successfully verified content type from downloaded file -: (expected: {}, found: via tika {})".format(
                                 self.metric_identifier,
                                 data_object.get("claimed_type"),
                                 str(data_object.get("tika_content_type"))
@@ -361,7 +361,7 @@ def testSizeAndTypeOrProtocolMatchesMetadata(self, test_data_content_url):
                             if tika_type in protocol_mime_types:
                                 protocol_matches = True
                                 self.logger.info(
-                                    "{} : Sucessfully verified commonly used protocol mime type -: (expected: {}, found: via tika {})".format(
+                                    "{} : Successfully verified commonly used protocol mime type -: (expected: {}, found: via tika {})".format(
                                         self.metric_identifier,
                                         protocol_mime_types,
                                         str(data_object.get("tika_content_type")),

diff --git a/fuji_server/evaluators/fair_evaluator_data_identifier_included.py b/fuji_server/evaluators/fair_evaluator_data_identifier_included.py
@@ -100,7 +100,7 @@ def testDataUrlOrPIDAvailable(self, datainfolist):
                         else:
                             self.logger.warning(
                                 self.metric_identifier
-                                + f" : Object (content) url is empty or not identied as GUID -: {datainfo}"
+                                + f" : Object (content) url is empty or not identified as GUID -: {datainfo}"
                             )
             if test_result:
                 self.score.earned += test_score

diff --git a/fuji_server/evaluators/fair_evaluator_file_format.py b/fuji_server/evaluators/fair_evaluator_file_format.py
@@ -331,7 +331,7 @@ def evaluate(self):
 
         if not mime_url_dict:
             self.logger.warning(
-                f"{self.metric_identifier} : Could not perform file format checks as data content identifier(s) unavailable/inaccesible"
+                f"{self.metric_identifier} : Could not perform file format checks as data content identifier(s) unavailable/inaccessible"
             )
 
         self.output = self.data_file_list

diff --git a/fuji_server/evaluators/fair_evaluator_persistent_identifier_metadata_data.py b/fuji_server/evaluators/fair_evaluator_persistent_identifier_metadata_data.py
@@ -242,7 +242,7 @@ def evaluate(self):
                 self.result.test_status = "pass"
         """else:
             self.score.earned = 0
-            self.logger.warning(self.metric_identifier + ' : Could not identify a valid peristent identifier based on scheme and resolution')"""
+            self.logger.warning(self.metric_identifier + ' : Could not identify a valid persistent identifier based on scheme and resolution')"""
 
         self.result.score = self.score
         self.result.maturity = self.maturity

diff --git a/fuji_server/evaluators/fair_evaluator_standardised_protocol_data.py b/fuji_server/evaluators/fair_evaluator_standardised_protocol_data.py
@@ -19,7 +19,7 @@ class FAIREvaluatorStandardisedProtocolData(FAIREvaluator):
     Methods
     ------
     evaluate()
-        This method will evaluate the accesibility of the data on whether the URI's scheme is based on
+        This method will evaluate the accessibility of the data on whether the URI's scheme is based on
         a shared application protocol.
     """
 

diff --git a/fuji_server/evaluators/fair_evaluator_unique_identifier_data.py b/fuji_server/evaluators/fair_evaluator_unique_identifier_data.py
@@ -16,7 +16,7 @@ class FAIREvaluatorUniqueIdentifierData(FAIREvaluator):
     Methods
     ------
     evaluate()
-        This method will evaluate whether the data is assigned to a unique identifier (UUID/HASH) that folows a proper syntax or
+        This method will evaluate whether the data is assigned to a unique identifier (UUID/HASH) that follows a proper syntax or
         identifier is resolvable and follows a defined unique identifier syntax (URL, IRI).
     """
 

diff --git a/fuji_server/evaluators/fair_evaluator_unique_identifier_metadata.py b/fuji_server/evaluators/fair_evaluator_unique_identifier_metadata.py
@@ -17,7 +17,7 @@ class FAIREvaluatorUniqueIdentifierMetadata(FAIREvaluator):
     Methods
     ------
     evaluate()
-        This method will evaluate whether the data is assigned to a unique identifier (UUID/HASH) that folows a proper syntax or
+        This method will evaluate whether the data is assigned to a unique identifier (UUID/HASH) that follows a proper syntax or
         identifier is resolvable and follows a defined unique identifier syntax (URL, IRI).
     """
 
@@ -27,7 +27,7 @@ def __init__(self, fuji_instance):
             metric = "FsF-F1-01MD"
         else:
             metric = "FsF-F1-01D"
-            # after 0.5 seperate metrics for metadata and data
+            # after 0.5 separate metrics for metadata and data
         self.set_metric(metric)
 
     def testMetadataIdentifierCompliesWithIdutilsScheme(self):

diff --git a/fuji_server/harvester/data_harvester.py b/fuji_server/harvester/data_harvester.py
@@ -305,5 +305,5 @@ def tika(self, file_buffer_object, url):
 
         # Escape any slash # test_data_content_text = parsed_content.replace('\\', '\\\\').replace('"', '\\"')
         if fileinfo["test_data_content_text"]:
-            self.logger.info(f"FsF-R1-01MD : Succesfully parsed data file(s) -: {url}")
+            self.logger.info(f"FsF-R1-01MD : Successfully parsed data file(s) -: {url}")
         return fileinfo
diff --git a/fuji_server/harvester/metadata_harvester.py b/fuji_server/harvester/metadata_harvester.py
@@ -108,7 +108,7 @@ def __init__(
         self.landing_page_status = None
         self.isLandingPageAccessible = False
         self.landing_redirect_list = []  # urlsvisited during redirects
-        self.landing_redirect_status_list = []  # list with stati
+        self.landing_redirect_status_list = []  # list with statuses
         self.landing_content_type = None
         self.origin_url = None
         self.pid_url = None
@@ -182,7 +182,7 @@ def merge_metadata(self, metadict, url, method, format, mimetype, schema="", nam
                     allow_merge = False
                     self.logger.warning(
                         self.logger_target.get("metadata_properties")
-                        + " : Harvesting of this metadata is explicitely disabled in the metric configuration-:"
+                        + " : Harvesting of this metadata is explicitly disabled in the metric configuration-:"
                         + str(metadata_standard)
                     )
             if isinstance(metadict, dict) and allow_merge is True:
@@ -1265,7 +1265,7 @@ def retrieve_metadata_external_rdf_negotiated(self, target_url_list=[]):
                 neg_rdf_collector.set_auth_token(self.auth_token, self.auth_token_type)
                 if neg_rdf_collector is not None:
                     source_rdf, rdf_dict = neg_rdf_collector.parse_metadata()
-                    # in case F-UJi was redirected and the landing page content negotiation doesnt return anything try the origin URL
+                    # in case F-UJi was redirected and the landing page content negotiation doesn't return anything try the origin URL
                     if not rdf_dict:
                         if self.origin_url is not None and self.origin_url != targeturl:
                             neg_rdf_collector.target_url = self.origin_url

diff --git a/fuji_server/helper/metadata_collector.py b/fuji_server/helper/metadata_collector.py
@@ -197,7 +197,7 @@ class MetaDataCollector:
     Sources : enum.Enum
         Enum class to enumerate metadata sources
     source_metadata : dict
-        Metadata souce in a dictionary.
+        Metadata source in a dictionary.
     metadata_mapping : metadata_mapper.Mapper
         Metadata mapping to metadata sources
     logger : logging.Logger
@@ -214,15 +214,15 @@ class MetaDataCollector:
     getLogger()
         Get/return the logger object.
     setLogger(l)
-        Set the logger according to inpur paramter l.
+        Set the logger according to inpur parameter l.
     getSourceMetadata()
         Get source metadata.
     setSourceMetadata(em)
         Set the source metadata according to input parameter em.
     setTargetMetadata(tm)
         Set the target metadata according to input parameter tm.
     getTargetMetadata()
-        Returm the target metadata.
+        Return the target metadata.
     getNamespaces()
         Return the namespaces of the metadata.
     getNamespacesfromIRIs(meta_source)
@@ -241,7 +241,7 @@ def __init__(
         Parameters
         ----------
         sourcemetadata : dict, optional
-            Metadata souce in a dictionary, default is None
+            Metadata source in a dictionary, default is None
         mapping : metadata_mapper.Mapper, optional
             Metadata mapping to metadata sources, default is None
         logger : logging.Logger, optional

diff --git a/fuji_server/helper/metadata_collector_dublincore.py b/fuji_server/helper/metadata_collector_dublincore.py
@@ -105,7 +105,7 @@ def parse_metadata(self):
             try:
                 self.metadata_format = MetadataFormats.XHTML
                 # self.logger.info('FsF-F2-01M : Trying to extract DublinCore metadata from html page')
-                # get core metadat from dublin core meta tags:
+                # get core metadata from dublin core meta tags:
                 # < meta name = "DCTERMS.element" content = "Value" / >
                 # meta_dc_matches = re.findall('<meta\s+([^\>]*)name=\"(DC|DCTERMS)?\.([a-z]+)\"(.*?)content=\"(.*?)\"',self.landing_html)
                 # exp = '<\s*meta\s*([^\>]*)name\s*=\s*\"(DC|DCTERMS)?\.([A-Za-z]+)(\.[A-Za-z]+)?\"(.*?)content\s*=\s*\"(.*?)\"'

diff --git a/fuji_server/helper/metadata_collector_rdf.py b/fuji_server/helper/metadata_collector_rdf.py
@@ -156,7 +156,7 @@ def get_metadata_from_graph(self, rdf_response_graph):
                 rdflib.term.URIRef("http://www.w3.org/2002/07/owl#"),
             ]
             if isinstance(rdf_response_graph, rdflib.graph.Graph) or isinstance(rdflib.graph.ConjunctiveGraph):
-                self.logger.info("FsF-F2-01M : Found RDF Graph which was sucessfully parsed")
+                self.logger.info("FsF-F2-01M : Found RDF Graph which was successfully parsed")
                 self.logger.info("FsF-F2-01M : Trying to identify namespaces in RDF Graph")
                 graph_namespaces = self.set_namespaces(rdf_response_graph)
                 # self.getNamespacesfromIRIs(graph_text)
@@ -840,8 +840,8 @@ def get_schemaorg_metadata(self, graph):
         schema_metadata = {}
         SMA = Namespace("http://schema.org/")
         # use only schema.org properties and create graph using these.
-        # is e.g. important in case schema.org is encoded as RDFa and variuos namespaces are used
-        # this is tested by namepace elsewhere
+        # is e.g. important in case schema.org is encoded as RDFa and various namespaces are used
+        # this is tested by namespace elsewhere
         if "schema.org" in str(main_entity_namespace):
             self.main_entity_format = str(SDO)
             schema_metadata = self.get_core_metadata(graph, creative_work, type=creative_work_type)

diff --git a/fuji_server/helper/metadata_mapper.py b/fuji_server/helper/metadata_mapper.py
@@ -161,7 +161,7 @@ def flip_dict(dict_to_flip):
         "summary": ["abstract", "description"],
         "keywords": "subject",
         "object_type": "type",
-        "object_size": "exent",
+        "object_size": "extent",
         "modified_date": "modified",
         "created_date": "created",
         "license": "license",
@@ -329,7 +329,7 @@ def flip_dict(dict_to_flip):
             """
 
     #################  XML Mappings ###############
-    # relations: indicate type using: related_resource_[opional relation type] alternative: define a list 'related_resource_type'
+    # relations: indicate type using: related_resource_[optional relation type] alternative: define a list 'related_resource_type'
     # content identifiers: object_content_identifier_url, object_content_identifier_size, object_content_identifier_type (should have same length)
     # otherwise take a look at the ISO/GCMD mapping
     # attributes: must be indicated like this: tag@@attribute

diff --git a/fuji_server/helper/metadata_provider_sparql.py b/fuji_server/helper/metadata_provider_sparql.py
@@ -27,7 +27,7 @@ class SPARQLMetadataProvider(MetadataProvider):
     """
 
     def getMetadataStandards(self):
-        """Method will return the matadata standards in the namespaces
+        """Method will return the metadata standards in the namespaces
 
         Returns
         -------

diff --git a/fuji_server/helper/repository_helper.py b/fuji_server/helper/repository_helper.py
@@ -55,7 +55,7 @@ def lookup_re3data(self):
                     re3link = root.xpath("//link")[0].attrib["href"]
                     if re3link is not None:
                         self.logger.info("FsF-R1.3-01M : Found match re3data metadata record -: " + str(re3link))
-                        # query reposiroty metadata
+                        # query repository metadata
                         q2 = RequestHelper(url=re3link)
                         q2.setAcceptType(AcceptTypes.xml)
                         _re3_source, re3_response = q2.content_negotiate(metric_id="FsF-R1.3-01M")

diff --git a/fuji_server/helper/request_helper.py b/fuji_server/helper/request_helper.py
@@ -204,7 +204,7 @@ def request_content(self, metric_id="", ignore_html=True):
                             )
                     elif e.code == 400:
                         try:
-                            # browsers automatically redirect to https in case a 400 occured for a http URL
+                            # browsers automatically redirect to https in case a 400 occurred for a http URL
                             if redirect_handler.redirect_list:
                                 last_redirect_url = redirect_handler.redirect_list[-1]
                                 if "http://" in last_redirect_url:

diff --git a/fuji_server/models/body.py b/fuji_server/models/body.py
@@ -241,7 +241,7 @@ def use_github(self, use_github: bool):
     def metric_version(self) -> str:
         """Gets the metric_version of this Body.
 
-        The FAIRsFAIR metric version be used fo rthe assessment  # noqa: E501
+        The FAIRsFAIR metric version to be used for the assessment  # noqa: E501
 
         :return: The metric_version of this Body.
         :rtype: str
@@ -252,7 +252,7 @@ def metric_version(self) -> str:
     def metric_version(self, metric_version: str):
         """Sets the metric_version of this Body.
 
-        The FAIRsFAIR metric version be used fo rthe assessment  # noqa: E501
+        The FAIRsFAIR metric version to be used for the assessment  # noqa: E501
 
         :param metric_version: The metric_version of this Body.
         :type metric_version: str

diff --git a/fuji_server/models/core_metadata_output.py b/fuji_server/models/core_metadata_output.py
@@ -76,7 +76,7 @@ def core_metadata_status(self, core_metadata_status: str):
         :param core_metadata_status: The core_metadata_status of this CoreMetadataOutput.
         :type core_metadata_status: str
         """
-        allowed_values = ["insufficent metadata", "partial metadata", "all metadata"]
+        allowed_values = ["insufficient metadata", "partial metadata", "all metadata"]
         if core_metadata_status not in allowed_values:
             raise ValueError(
                 f"Invalid value for `core_metadata_status` ({core_metadata_status}), must be one of {allowed_values}"
-Original file line number
+Diff line change
@@ Expand Up / @@ -27,7 +27,7 @@ class SPARQLMetadataProvider(MetadataProvider): @@
         """
         def getMetadataStandards(self):
-            """Method will return the matadata standards in the namespaces
+            """Method will return the metadata standards in the namespaces
             Returns
             -------
@@ Expand Down @@