Skip to content

Commit d2a9d34

Browse files
fix models not persisting between restart
1 parent f022fba commit d2a9d34

4 files changed

Lines changed: 9 additions & 18 deletions

File tree

gradia/backend/ocr.py

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
from gradia.backend.logger import Logger
2727
from gradia.backend.settings import Settings
2828
from gradia.constants import app_id
29-
from gradia.constants import ocr_tesseract_cmd, ocr_original_tessdata, ocr_user_tessdata
29+
from gradia.constants import ocr_tesseract_cmd, ocr_original_tessdata
3030

3131

3232
logger = Logger()
@@ -63,7 +63,7 @@ class OCR:
6363
def __init__(self, window=None):
6464
self.tesseract_cmd = ocr_tesseract_cmd
6565
self.original_tessdata_dir = ocr_original_tessdata
66-
self.user_tessdata_dir = ocr_user_tessdata
66+
self.user_tessdata_dir = os.path.expanduser(f"~/.var/app/{app_id}/data/tessdata")
6767
self.window = window
6868

6969
pytesseract.pytesseract.tesseract_cmd = self.tesseract_cmd
@@ -90,27 +90,21 @@ def set_current_model(self, model_code: str):
9090
logger.warning(f"Cannot set model {model_code}: not installed")
9191
raise ValueError(f"Model {model_code} is not installed")
9292

93-
def extract_text(self, image, primary_lang="eng", secondary_lang="eng"):
93+
def extract_text(self, image, primary_lang):
9494
if not self.get_installed_models():
9595
raise RuntimeError("No OCR language models are available")
9696

9797
if not self.is_model_installed(primary_lang):
98-
available_models = self.get_installed_models()
99-
if available_models:
100-
primary_lang = available_models[0]
101-
logger.warning(f"Requested language not available, using {primary_lang}")
102-
else:
103-
raise RuntimeError("No OCR language models are available")
98+
raise RuntimeError(f"OCR language model '{primary_lang}' is not installed")
10499

105100
self.set_current_model(primary_lang)
101+
106102
try:
107103
tessdata_dir = self._get_tessdata_dir_for_lang(primary_lang)
108104
config = f'--tessdata-dir "{tessdata_dir}"'
109-
110-
if self.is_model_installed(secondary_lang) and secondary_lang != primary_lang:
111-
lang = f"{primary_lang}+{secondary_lang}"
112-
else:
113-
lang = primary_lang
105+
lang = primary_lang
106+
if self.is_model_installed("eng") and primary_lang != "eng":
107+
lang = f"{primary_lang}+eng"
114108

115109
extracted_text = pytesseract.image_to_string(
116110
image,
@@ -168,6 +162,7 @@ def on_download_complete(session, result, user_data):
168162

169163
with open(output_path, 'wb') as f:
170164
f.write(raw_bytes)
165+
logger.info(f"saving to {output_path} ")
171166

172167
logger.info(f"Downloaded OCR model: {model_code}")
173168
self.set_current_model(model_code)

gradia/constants.in

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,4 +33,3 @@ help_url = '@HELP_URL@'
3333
# OCR paths
3434
ocr_tesseract_cmd = '@OCR_TESSERACT_CMD@'
3535
ocr_original_tessdata = '@OCR_ORIGINAL_TESSDATA_DIR@'
36-
ocr_user_tessdata = '@OCR_USER_TESSDATA_DIR@'

gradia/ui/dialog/ocr_dialog.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ def __init__(self, image=None, **kwargs):
3737
self.image = image
3838
self.ocr = OCR()
3939
self.primary_lang = "eng"
40-
self.secondary_lang = None
4140
self._setup_language_button()
4241
self._start_ocr()
4342
self.ocr_text_view.remove_css_class("view")

meson.build

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,6 @@ endif
6464
# OCR Directories
6565
OCR_TESSERACT_CMD = '/app/bin/tesseract'
6666
OCR_ORIGINAL_TESSDATA_DIR = '/app/share/tessdata'
67-
OCR_USER_TESSDATA_DIR = '~/.var/app' / APPLICATION_ID / 'data' / 'tessdata'
6867

6968
# Install configuration data
7069
conf = configuration_data()
@@ -82,7 +81,6 @@ conf.set('BUILD_DIR', meson.current_build_dir())
8281

8382
conf.set('OCR_TESSERACT_CMD', OCR_TESSERACT_CMD)
8483
conf.set('OCR_ORIGINAL_TESSDATA_DIR', OCR_ORIGINAL_TESSDATA_DIR)
85-
conf.set('OCR_USER_TESSDATA_DIR', OCR_USER_TESSDATA_DIR)
8684

8785
# Install project information
8886
conf.set('RELEASE_VER', meson.project_version())

0 commit comments

Comments
 (0)