-
Notifications
You must be signed in to change notification settings - Fork 32
improve logging and page validator #1359
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
19b3032
759ba52
8cc98b0
3372bd2
fae27c5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -121,7 +121,7 @@ | |
| :py:meth:`Processor.process()` was not overridden. | ||
| """ | ||
|
|
||
| class DummyFuture: | ||
|
Check failure on line 124 in src/ocrd/processor/base.py
|
||
| """ | ||
| Mimics some of `concurrent.futures.Future` but runs immediately. | ||
| """ | ||
|
|
@@ -708,7 +708,7 @@ | |
| nr_errors = dict(nr_errors) | ||
| nr_all = nr_succeeded + nr_failed | ||
| if nr_failed > 0: | ||
| if config.OCRD_MAX_MISSING_OUTPUTS > 0 and nr_failed / nr_all > config.OCRD_MAX_MISSING_OUTPUTS: | ||
| if config.OCRD_MAX_MISSING_OUTPUTS >= 0 and nr_failed / nr_all > config.OCRD_MAX_MISSING_OUTPUTS: | ||
| raise Exception(f"too many failures with {reason} output ({nr_failed} of {nr_all}, {str(nr_errors)})") | ||
| self._base_logger.warning("%s %d of %d pages due to %s", reason, nr_failed, nr_all, str(nr_errors)) | ||
| self._base_logger.debug("succeeded %d, missed %d of %d pages due to %s", | ||
|
|
@@ -941,7 +941,7 @@ | |
| value=self.version), | ||
| LabelType(type_='ocrd/core', | ||
| value=OCRD_VERSION)]) | ||
| ]) | ||
|
Check failure on line 944 in src/ocrd/processor/base.py
|
||
| metadata_obj.add_MetadataItem(metadata_item) | ||
|
|
||
| def resolve_resource(self, val): | ||
|
|
@@ -1199,6 +1199,9 @@ | |
| if log_queue: | ||
| # replace all log handlers with just one queue handler | ||
| logging.root.handlers = [logging.handlers.QueueHandler(log_queue)] | ||
| logging.root.handlers[0].setFormatter( | ||
| # insert pageId before actual message | ||
| logging.Formatter(fmt='[%(pageId)s] %(message)s')) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Very useful for debugging. An eventual (yet another) refactoring of the logging to provide such metadata per-message throughout would be tremendous. "Wenn mal Zeit ist..." |
||
|
|
||
|
|
||
| def _page_worker(*input_files, timeout=0): | ||
|
|
@@ -1209,6 +1212,11 @@ | |
| #_page_worker_processor.process_page_file(*input_files) | ||
| page_id = next((file.pageId for file in input_files | ||
| if hasattr(file, 'pageId')), "") | ||
| # update log records for QueueHandler formatter | ||
| def log_filter(record: logging.LogRecord): | ||
|
Check failure on line 1216 in src/ocrd/processor/base.py
|
||
| record.pageId = page_id | ||
| return record | ||
| logging.root.handlers[0].filters = [log_filter] | ||
| if timeout: | ||
| if threading.current_thread() is not threading.main_thread(): | ||
| # does not work outside of main thread | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good for consistency and useful for doing targeted validation of experimental workspaces with lots of groups.