We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
2 parents a002c7f + 4a5b8aa commit 4021f6eCopy full SHA for 4021f6e
2 files changed
pdftext/pdf/pages.py
@@ -107,8 +107,8 @@ def span_break():
107
span_break()
108
continue
109
110
- # we also break on hyphenation
111
- if span['text'].endswith("\x02"):
+ # we break on hyphenation or newline
+ if span['text'].endswith("\x02") or span['text'].endswith("\n"):
112
113
114
pyproject.toml
@@ -1,6 +1,6 @@
1
[tool.poetry]
2
name = "pdftext"
3
-version = "0.6.1"
+version = "0.6.2"
4
description = "Extract structured text from pdfs quickly"
5
authors = ["Vik Paruchuri <vik.paruchuri@gmail.com>"]
6
license = "Apache-2.0"
0 commit comments