Skip to content

Commit 40a0be0

Browse files
committed
Cleanup implementation
1 parent 50aaf06 commit 40a0be0

2 files changed

Lines changed: 4 additions & 15 deletions

File tree

pdftext/pdf/chars.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,10 +88,7 @@ def word_break():
8888
# we break on any change in font info - optimized comparison
8989
char_font = char['font']
9090
word_font = word['font']
91-
if (char_font['name'] != word_font['name'] or
92-
char_font['flags'] != word_font['flags'] or
93-
char_font['size'] != word_font['size'] or
94-
char_font['weight'] != word_font['weight']):
91+
if any(char_font[k] != word_font[k] for k in ['name', 'flags', 'size', 'weight']):
9592
word_break()
9693
continue
9794

pdftext/pdf/pages.py

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -132,11 +132,8 @@ def span_break():
132132
def get_lines(spans: Spans) -> Lines:
133133
lines: Lines = []
134134
line: Line = None
135-
line_text: str = ""
136135

137136
def line_break():
138-
global line_text
139-
line_text = ""
140137
lines.append({"spans": [span], "bbox": span["bbox"], "rotation": span["rotation"]})
141138

142139
for span in spans:
@@ -147,13 +144,9 @@ def line_break():
147144
line_break()
148145
continue
149146

150-
# we break if the previous span ends with a linebreak, and the line has text
151-
if line["spans"][-1]["text"].endswith("\n") and line_text.strip():
152-
line_break()
153-
continue
154-
155-
# we break if the current line ends with a hyphen
156-
if line["spans"][-1]["text"].endswith("\x02"):
147+
# we break if the previous span ends with a linebreak
148+
last_text = line["spans"][-1]["text"]
149+
if any(last_text.endswith(suffix) for suffix in ["\n", "\x02"]):
157150
line_break()
158151
continue
159152

@@ -167,7 +160,6 @@ def line_break():
167160
continue
168161

169162
line["spans"].append(span)
170-
line_text += span["text"]
171163
line["bbox"] = line["bbox"].merge(span["bbox"])
172164

173165
return lines

0 commit comments

Comments
 (0)