1- #!/usr/bin/env python
2- # -*- coding: utf-8 -*-
3- # vim: ai ts=4 sts=4 et sw=4 nu
4-
51import datetime
62import queue
73import threading
8- from typing import Callable
4+ from collections . abc import Callable
95
10- from . shared import logger
6+ from zimscraperlib import logger
117
128_shutdown = False
139# Lock that ensures that new workers are not created while the interpreter is
1410# shutting down. Must be held while mutating _threads_queues and _shutdown.
1511_global_shutdown_lock = threading .Lock ()
16- thread_deadline_sec = 60
1712
1813
19- def excepthook (args ):
14+ def excepthook (args ): # pragma: no cover
2015 logger .error (f"UNHANDLED Exception in { args .thread .name } : { args .exc_type } " )
2116 logger .exception (args .exc_value )
2217
2318
2419threading .excepthook = excepthook
2520
2621
27- class SotokiExecutor (queue .Queue ):
22+ class ScraperExecutor (queue .Queue ):
2823 """Custom FIFO queue based Executor that's less generic than ThreadPoolExec one
2924
3025 Providing more flexibility for the use cases we're interested about:
@@ -34,12 +29,19 @@ class SotokiExecutor(queue.Queue):
3429 See: https://github.com/python/cpython/blob/3.8/Lib/concurrent/futures/thread.py
3530 """
3631
37- def __init__ (self , queue_size : int = 10 , nb_workers : int = 1 , prefix : str = "T-" ):
32+ def __init__ (
33+ self ,
34+ queue_size : int = 10 ,
35+ nb_workers : int = 1 ,
36+ executor_name : str = "executor" ,
37+ thread_deadline_sec : int = 60 ,
38+ ):
3839 super ().__init__ (queue_size )
39- self .prefix = prefix
40+ self .executor_name = executor_name
4041 self ._shutdown_lock = threading .Lock ()
4142 self .nb_workers = nb_workers
4243 self .exceptions = []
44+ self .thread_deadline_sec = thread_deadline_sec
4345
4446 @property
4547 def exception (self ):
@@ -59,30 +61,38 @@ def submit(self, task: Callable, **kwargs):
5961 with self ._shutdown_lock , _global_shutdown_lock :
6062 if not self .alive :
6163 raise RuntimeError ("cannot submit task to dead executor" )
64+ if self .no_more :
65+ raise RuntimeError (
66+ "cannot submit task to a joined executor, restart it first"
67+ )
6268 if _shutdown :
63- raise RuntimeError ("cannot submit task after " "interpreter shutdown" )
69+ raise RuntimeError ( # pragma: no cover
70+ "cannot submit task after interpreter shutdown"
71+ )
6472
6573 while True :
6674 try :
6775 self .put ((task , kwargs ), block = True , timeout = 3.0 )
6876 except queue .Full :
6977 if self .no_more :
70- break
78+ # rarely happens except if submit and join are done in different
79+ # threads, but we need this to escape the while loop
80+ break # pragma: no cover
7181 else :
7282 break
7383
7484 def start (self ):
7585 """Enable executor, starting requested amount of workers
7686
77- Workers are started always, not provisioned dynamicaly """
87+ Workers are started always, not provisioned dynamically """
7888 self .drain ()
79- self .release_halt ()
80- self ._workers = set ()
89+ self ._workers : set [ threading . Thread ] = set ()
90+ self .no_more = False
8191 self ._shutdown = False
8292 self .exceptions [:] = []
8393
8494 for n in range (self .nb_workers ):
85- t = threading .Thread (target = self .worker , name = f"{ self .prefix } { n } " )
95+ t = threading .Thread (target = self .worker , name = f"{ self .executor_name } - { n } " )
8696 t .daemon = True
8797 t .start ()
8898 self ._workers .add (t )
@@ -95,7 +105,7 @@ def worker(self):
95105 if self .no_more :
96106 break
97107 continue
98- except TypeError :
108+ except TypeError : # pragma: no cover
99109 # received None from the queue. most likely shuting down
100110 return
101111
@@ -108,7 +118,7 @@ def worker(self):
108118 except Exception as exc :
109119 logger .error (f"Error processing { func } with { kwargs = } " )
110120 logger .exception (exc )
111- if raises :
121+ if raises : # to cover when raises = False
112122 self .exceptions .append (exc )
113123 self .shutdown ()
114124 finally :
@@ -129,30 +139,30 @@ def drain(self):
129139
130140 def join (self ):
131141 """Await completion of workers, requesting them to stop taking new task"""
132- logger .debug (f"joining all threads for { self .prefix } " )
142+ logger .debug (f"joining all threads for { self .executor_name } " )
133143 self .no_more = True
134144 for num , t in enumerate (self ._workers ):
135- deadline = datetime .datetime .now () + datetime .timedelta (
136- seconds = thread_deadline_sec
145+ deadline = datetime .datetime .now (tz = datetime .UTC ) + datetime .timedelta (
146+ seconds = self .thread_deadline_sec
147+ )
148+ logger .debug (
149+ f"Giving { self .executor_name } -{ num } { self .thread_deadline_sec } s to join"
137150 )
138- logger .debug (f"Giving { self .prefix } { num } { thread_deadline_sec } s to join" )
139151 e = threading .Event ()
140- while t .is_alive () and datetime .datetime .now () < deadline :
152+ while t .is_alive () and datetime .datetime .now (tz = datetime . UTC ) < deadline :
141153 t .join (1 )
142154 e .wait (timeout = 2 )
143155 if t .is_alive ():
144- logger .debug (f"Thread { self .prefix } { num } is not joining. Skipping…" )
156+ logger .debug (
157+ f"Thread { self .executor_name } -{ num } is not joining. Skipping…"
158+ )
145159 else :
146- logger .debug (f"Thread { self .prefix } { num } joined" )
147- logger .debug (f"all threads joined for { self .prefix } " )
148-
149- def release_halt (self ):
150- """release the `no_more` flag preventing workers from taking up tasks"""
151- self .no_more = False
160+ logger .debug (f"Thread { self .executor_name } -{ num } joined" )
161+ logger .debug (f"all threads joined for { self .executor_name } " )
152162
153- def shutdown (self , wait = True ):
163+ def shutdown (self , * , wait = True ):
154164 """stop the executor, either somewhat immediately or awaiting completion"""
155- logger .debug (f"shutting down executor { self .prefix } with { wait = } " )
165+ logger .debug (f"shutting down { self .executor_name } with { wait = } " )
156166 with self ._shutdown_lock :
157167 self ._shutdown = True
158168
0 commit comments