Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion gensim/summarization/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@

# bring model classes directly into package namespace, to save some typing
from .summarizer import summarize, summarize_corpus
from .keywords import keywords
from .keywords import keywords
6 changes: 3 additions & 3 deletions gensim/summarization/bm25.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,16 @@ def initialize(self):
self.df[word] += 1

for word, freq in iteritems(self.df):
self.idf[word] = math.log(self.corpus_size-freq+0.5) - math.log(freq+0.5)
self.idf[word] = math.log(self.corpus_size - freq + 0.5) - math.log(freq + 0.5)

def get_score(self, document, index, average_idf):
score = 0
for word in document:
if word not in self.f[index]:
continue
idf = self.idf[word] if self.idf[word] >= 0 else EPSILON * average_idf
score += (idf*self.f[index][word]*(PARAM_K1+1)
/ (self.f[index][word] + PARAM_K1*(1 - PARAM_B+PARAM_B*self.corpus_size / self.avgdl)))
score += (idf * self.f[index][word] * (PARAM_K1 + 1)
/ (self.f[index][word] + PARAM_K1 * (1 - PARAM_B + PARAM_B * self.corpus_size / self.avgdl)))
return score

def get_scores(self, document, average_idf):
Expand Down
2 changes: 1 addition & 1 deletion gensim/summarization/keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def _get_combined_keywords(_keywords, split_text):
result.append(word) # appends last word if keyword and doesn't iterate
for j in xrange(i + 1, len_text):
other_word = _strip_word(split_text[j])
if other_word in _keywords and other_word == split_text[j] and not other_word in combined_word:
if other_word in _keywords and other_word == split_text[j] and other_word not in combined_word:
combined_word.append(other_word)
else:
for keyword in combined_word:
Expand Down
4 changes: 2 additions & 2 deletions gensim/summarization/summarizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,10 +198,10 @@ def summarize(text, ratio=0.2, word_count=None, split=False):
logger.warning("Input text is empty.")
return

# If only one sentence is present, the function raises an error (Avoids ZeroDivisionError).
# If only one sentence is present, the function raises an error (Avoids ZeroDivisionError).
if len(sentences) == 1:
raise ValueError("input must have more than one sentence")

# Warns if the text is too short.
if len(sentences) < INPUT_MIN_LENGTH:
logger.warning("Input text is expected to have at least " + str(INPUT_MIN_LENGTH) + " sentences.")
Expand Down