py-pdf · otreci4sgelt0nas · Sep 4, 2025 · Jan 31, 2026 · Jan 31, 2026 · andersonhc
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -21,6 +21,7 @@ This can also be enabled programmatically with `warnings.simplefilter('default',
 * support for SVG `<linearGradient>` and `<radialGradient>` elements - _cf._ [issue #1580](https://github.com/py-pdf/fpdf2/issues/1580) - thanks to @Ani07-05
 * mypy and pyright checks in the CI pipeline to enforce strict typing
 * support WOFF and WOFF2 fonts - thanks to @BharathPESU
+* Unicode font detection and enhanced error handling via `UnicodeFontManager` class for automatic font recommendations when working with non-Latin scripts (Cyrillic, Arabic, Chinese, etc.), providing helpful suggestions in encoding error messages - _cf._ [PR #1563](https://github.com/py-pdf/fpdf2/pull/1563) - thanks to @otrepid4github
 ### Fixed
 * the `A5` value that could be specified as page `format` to the `FPDF` constructor was slightly incorrect, and the corresponding page dimensions have been fixed. This could lead to a minor change in your documents dimensions if you used this `A5` page format. - _cf._ [issue #1699](https://github.com/py-pdf/fpdf2/issues/1699)
 * a bug when rendering empty tables with `INTERNAL` layout, that caused an extra border to be rendered due to an erroneous use of `list.index()` - _cf._ [issue #1669](https://github.com/py-pdf/fpdf2/issues/1669)

diff --git a/fpdf/errors.py b/fpdf/errors.py
@@ -35,22 +35,29 @@ def __str__(self) -> str:
 class FPDFUnicodeEncodingException(FPDFException):
     """Error is thrown when a character that cannot be encoded by the chosen encoder is provided"""
 
-    def __init__(self, text_index: int, character: str, font_name: str) -> None:
+    def __init__(
+        self, text_index: int, character: str, font_name: str, suggestion: str = None
+    ) -> None:
         super().__init__()
         self.text_index = text_index
         self.character = character
         self.font_name = font_name
+        self.suggestion = suggestion
 
     def __repr__(self) -> str:
-        return f"{self.__class__.__name__}({repr(self.text_index), repr(self.character), repr(self.font_name)})"
+        return f"{self.__class__.__name__}({repr(self.text_index)}, {repr(self.character)}, {repr(self.font_name)}, {repr(self.suggestion)})"
 
     def __str__(self) -> str:
-        return (
+        base_message = (
             f'Character "{self.character}" at index {self.text_index} in text is outside the range of characters'
             f' supported by the font used: "{self.font_name}".'
-            " Please consider using a Unicode font."
         )
 
+        if self.suggestion:
+            return f"{base_message}\n\n{self.suggestion}"
+        else:
+            return f"{base_message} Please consider using a Unicode font."
+
 
 class ComplianceError(FPDFException):
     """Base class for standards-compliance violations (PDF/A, PDF/X, etc.)."""

diff --git a/fpdf/fpdf.py b/fpdf/fpdf.py
@@ -123,6 +123,7 @@ class Image:  # type: ignore[no-redef]
     FPDFUnicodeEncodingException,
     PDFAComplianceError,
 )
+from .unicode_font_utils import suggest_unicode_font_for_error
 from .fonts import CORE_FONTS, CoreFont, FontFace, TextStyle, TitleStyle, TTFFont
 from .graphics_state import GraphicsStateMixin, StateStackType
 from .html import HTML2FPDF
@@ -5639,10 +5640,13 @@ def normalize_text(self, text: str) -> str:
             try:
                 return text.encode(self.core_fonts_encoding).decode("latin-1")
             except UnicodeEncodeError as error:
+                font_name = self.font_family + self.font_style
+                suggestion = suggest_unicode_font_for_error(text, font_name)
                 raise FPDFUnicodeEncodingException(
                     text_index=error.start,
                     character=text[error.start],
-                    font_name=self.font_family + self.font_style,
+                    font_name=font_name,
+                    suggestion=suggestion,
                 ) from error
         return text
 

diff --git a/fpdf/unicode_font_utils.py b/fpdf/unicode_font_utils.py
@@ -0,0 +1,292 @@
+"""
+Unicode font utilities for fpdf2.
+
+This module provides utilities for automatic Unicode font detection and management,
+helping users work with non-Latin scripts like Cyrillic, Arabic, Chinese, etc.
+
+The contents of this module are internal to fpdf2, and not part of the public API.
+They may change at any time without prior warning or any deprecation period,
+in non-backward-compatible ways.
+"""
+
+import os
+import platform
+from pathlib import Path
+from typing import List, Optional, Tuple, Dict
+import logging
+
+from .unicode_script import get_unicode_script, UnicodeScript
+
+LOGGER = logging.getLogger(__name__)
+
+
+class UnicodeFontManager:
+    """
+    Manages Unicode font detection and provides recommendations for different scripts.
+    """
+
+    def __init__(self):
+        self.system = platform.system().lower()
+        self.font_paths = self._get_system_font_paths()
+        self.available_fonts = self._scan_available_fonts()
+
+    def _get_system_font_paths(self) -> List[Path]:
+        """Get common font paths for the current system."""
+        paths = []
+
+        if self.system == "darwin":  # macOS
+            paths.extend(
+                [
+                    Path("/System/Library/Fonts"),
+                    Path("/Library/Fonts"),
+                    Path.home() / "Library/Fonts",
+                ]
+            )
+        elif self.system == "linux":
+            paths.extend(
+                [
+                    Path("/usr/share/fonts"),
+                    Path("/usr/local/share/fonts"),
+                    Path.home() / ".fonts",
+                    Path.home() / ".local/share/fonts",
+                ]
+            )
+        elif self.system == "windows":
+            paths.extend(
+                [
+                    Path("C:/Windows/Fonts"),
+                    Path.home() / "AppData/Local/Microsoft/Windows/Fonts",
+                ]
+            )
+
+        return [p for p in paths if p.exists()]
+
+    def _scan_available_fonts(self) -> Dict[str, Path]:
+        """Scan for available TrueType/OpenType fonts."""
+        fonts = {}
+
+        for font_dir in self.font_paths:
+            for font_file in font_dir.rglob("*.ttf"):
+                font_name = font_file.stem.lower()
+                if font_name not in fonts:  # Prefer first found
+                    fonts[font_name] = font_file
+            for font_file in font_dir.rglob("*.otf"):
+                font_name = font_file.stem.lower()
+                if font_name not in fonts:  # Prefer first found
+                    fonts[font_name] = font_file
+            for font_file in font_dir.rglob("*.ttc"):
+                font_name = font_file.stem.lower()
+                if font_name not in fonts:  # Prefer first found
+                    fonts[font_name] = font_file
+
+        return fonts
+
+    def get_recommended_fonts_for_script(
+        self, script: UnicodeScript
+    ) -> List[Tuple[str, str]]:
+        """
+        Get recommended font names and their file paths for a specific Unicode script.
+
+        Returns:
+            List of (font_name, file_path) tuples, ordered by preference.
+        """
+        recommendations = []
+
+        # Define font recommendations by script
+        script_fonts = {
+            UnicodeScript.CYRILLIC: [
+                "dejavusans",
+                "dejavuserif",
+                "dejavusanscondensed",
+                "arial",
+                "helvetica",
+                "liberationsans",
+                "liberationserif",
+                "notosans",
+                "notoserif",
+                "roboto",
+                "opensans",
+            ],
+            UnicodeScript.ARABIC: [
+                "dejavusans",
+                "dejavuserif",
+                "notosansarabic",
+                "notoserifarabic",
+                "amiri",
+                "scheherazade",
+                "lateef",
+            ],
+            UnicodeScript.HAN: [
+                "dejavusans",
+                "dejavuserif",
+                "notosanscjk",
+                "notoserifcjk",
+                "sourcehansans",
+                "sourcehanserif",
+                "fireflysung",
+            ],
+            UnicodeScript.HANGUL: [
+                "dejavusans",
+                "dejavuserif",
+                "notosanskr",
+                "notoserifkr",
+                "nanumgothic",
+                "nanummyeongjo",
+            ],
+            UnicodeScript.HIRAGANA: [
+                "dejavusans",
+                "dejavuserif",
+                "notosansjp",
+                "notoserifjp",
+                "sourcehansans",
+                "sourcehanserif",
+            ],
+            UnicodeScript.KATAKANA: [
+                "dejavusans",
+                "dejavuserif",
+                "notosansjp",
+                "notoserifjp",
+                "sourcehansans",
+                "sourcehanserif",
+            ],
+            UnicodeScript.DEVANAGARI: [
+                "dejavusans",
+                "dejavuserif",
+                "notosansdevanagari",
+                "notoserifdevanagari",
+                "gargi",
+                "lohitdevanagari",
+            ],
+            UnicodeScript.THAI: [
+                "dejavusans",
+                "dejavuserif",
+                "notosansthai",
+                "notoserifthai",
+                "waree",
+                "garuda",
+            ],
+            UnicodeScript.HEBREW: [
+                "dejavusans",
+                "dejavuserif",
+                "notosanshebrew",
+                "notoserifhebrew",
+                "frankruehl",
+                "david",
+            ],
+        }
+
+        # Get fonts for the specific script
+        preferred_fonts = script_fonts.get(script, ["dejavusans", "dejavuserif"])
+
+        # Find available fonts from the preferred list
+        for font_name in preferred_fonts:
+            if font_name in self.available_fonts:
+                recommendations.append(
+                    (font_name, str(self.available_fonts[font_name]))
+                )
+
+        # If no specific fonts found, recommend DejaVu fonts (most comprehensive Unicode support)
+        if not recommendations:
+            for fallback in ["dejavusans", "dejavuserif", "arial", "helvetica"]:
+                if fallback in self.available_fonts:
+                    recommendations.append(
+                        (fallback, str(self.available_fonts[fallback]))
+                    )
+                    break
+
+        return recommendations
+
+    def detect_script_in_text(self, text: str) -> Optional[UnicodeScript]:
+        """
+        Detect the primary Unicode script in the given text.
+
+        Returns:
+            The most common non-Latin Unicode script in the text, or None if only Latin/Common scripts are found.
+        """
+        script_counts = {}
+
+        for char in text:
+            script = get_unicode_script(char)
+            if script != UnicodeScript.COMMON and script != UnicodeScript.LATIN:
+                script_counts[script] = script_counts.get(script, 0) + 1
+
+        if not script_counts:
+            return None
+
+        return max(script_counts.items(), key=lambda x: x[1])[0]
+
+    def get_font_recommendation_for_text(self, text: str) -> Optional[Tuple[str, str]]:
+        """
+        Get a font recommendation for the given text based on its Unicode script.
+
+        Returns:
+            (font_name, file_path) tuple for the recommended font, or None if no recommendation.
+        """
+        script = self.detect_script_in_text(text)
+        if not script:
+            return None
+
+        recommendations = self.get_recommended_fonts_for_script(script)
+        return recommendations[0] if recommendations else None
+
+    def list_available_unicode_fonts(self) -> Dict[str, str]:
+        """
+        List all available Unicode-capable fonts.
+
+        Returns:
+            Dictionary mapping font names to file paths.
+        """
+        return {name: str(path) for name, path in self.available_fonts.items()}
+
+
+def get_unicode_font_recommendation(text: str) -> Optional[Tuple[str, str]]:
+    """
+    Convenience function to get a Unicode font recommendation for text.
+
+    Args:
+        text: The text to analyze for Unicode script detection.
+
+    Returns:
+        (font_name, file_path) tuple for the recommended font, or None if no recommendation.
+    """
+    manager = UnicodeFontManager()
+    return manager.get_font_recommendation_for_text(text)
+
+
+def suggest_unicode_font_for_error(error_text: str, font_name: str) -> str:
+    """
+    Generate a helpful error message suggesting Unicode fonts when encoding errors occur.
+
+    Args:
+        error_text: The text that caused the encoding error.
+        font_name: The name of the font that failed.
+
+    Returns:
+        A helpful error message with font suggestions.
+    """
+    manager = UnicodeFontManager()
+    script = manager.detect_script_in_text(error_text)
+
+    if not script:
+        return (
+            f"The text contains characters that cannot be encoded with the '{font_name}' font. "
+            "Consider using a Unicode font like DejaVu Sans or Arial."
+        )
+
+    script_name = script.name.replace("_", " ").title()
+    recommendations = manager.get_recommended_fonts_for_script(script)
+
+    if recommendations:
+        font_name_rec, font_path = recommendations[0]
+        message = (
+            f"The text contains {script_name} characters that cannot be encoded with the '{font_name}' font. "
+            f"Consider using a Unicode font like '{font_name_rec}' instead.\n"
+            f"To use it, add the font with: pdf.add_font('{font_name_rec}', '', '{font_path}')"
+        )
+    else:
+        message = (
+            f"The text contains {script_name} characters that cannot be encoded with the '{font_name}' font. "
+            "Consider using a Unicode font like DejaVu Sans or Arial."
+        )
+
+    return message