We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent d78d4b4 commit 4a5b8aaCopy full SHA for 4a5b8aa
1 file changed
pdftext/pdf/pages.py
@@ -107,16 +107,11 @@ def span_break():
107
span_break()
108
continue
109
110
- # we also break on hyphenation
+ # we break on hyphenation or newline
111
if span['text'].endswith("\x02") or span['text'].endswith("\n"):
112
113
114
115
- # sometimes pdfium doesn't inject a linebreak, so we check the span positions
116
- if char["bbox"].y_start > span["bbox"].y_end:
117
- span_break()
118
- continue
119
-
120
# Character is likely a superscript
121
if all([
122
char["bbox"][1] < (span["bbox"][1] - span["bbox"].height * line_distance_threshold), # char top is above span
0 commit comments