From fd42abde530cc6ba136c1f000044e49d7464d993 Mon Sep 17 00:00:00 2001 From: SamriddhiJain Date: Fri, 10 Mar 2017 14:39:31 +0530 Subject: [PATCH 1/4] Initial fixes with autopep8 --- gensim/summarization/__init__.py | 2 +- gensim/summarization/bm25.py | 6 +++--- gensim/summarization/summarizer.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/gensim/summarization/__init__.py b/gensim/summarization/__init__.py index 57c9a7c815..c7efb84d4a 100644 --- a/gensim/summarization/__init__.py +++ b/gensim/summarization/__init__.py @@ -1,4 +1,4 @@ # bring model classes directly into package namespace, to save some typing from .summarizer import summarize, summarize_corpus -from .keywords import keywords \ No newline at end of file +from .keywords import keywords diff --git a/gensim/summarization/bm25.py b/gensim/summarization/bm25.py index 6704146d54..d634a32b54 100644 --- a/gensim/summarization/bm25.py +++ b/gensim/summarization/bm25.py @@ -40,7 +40,7 @@ def initialize(self): self.df[word] += 1 for word, freq in iteritems(self.df): - self.idf[word] = math.log(self.corpus_size-freq+0.5) - math.log(freq+0.5) + self.idf[word] = math.log(self.corpus_size - freq + 0.5) - math.log(freq + 0.5) def get_score(self, document, index, average_idf): score = 0 @@ -48,8 +48,8 @@ def get_score(self, document, index, average_idf): if word not in self.f[index]: continue idf = self.idf[word] if self.idf[word] >= 0 else EPSILON * average_idf - score += (idf*self.f[index][word]*(PARAM_K1+1) - / (self.f[index][word] + PARAM_K1*(1 - PARAM_B+PARAM_B*self.corpus_size / self.avgdl))) + score += (idf * self.f[index][word] * (PARAM_K1 + 1) + / (self.f[index][word] + PARAM_K1 * (1 - PARAM_B + PARAM_B * self.corpus_size / self.avgdl))) return score def get_scores(self, document, average_idf): diff --git a/gensim/summarization/summarizer.py b/gensim/summarization/summarizer.py index 0779011999..e749b4cc66 100644 --- a/gensim/summarization/summarizer.py +++ b/gensim/summarization/summarizer.py @@ -198,10 +198,10 @@ def summarize(text, ratio=0.2, word_count=None, split=False): logger.warning("Input text is empty.") return - # If only one sentence is present, the function raises an error (Avoids ZeroDivisionError). + # If only one sentence is present, the function raises an error (Avoids ZeroDivisionError). if len(sentences) == 1: raise ValueError("input must have more than one sentence") - + # Warns if the text is too short. if len(sentences) < INPUT_MIN_LENGTH: logger.warning("Input text is expected to have at least " + str(INPUT_MIN_LENGTH) + " sentences.") From b1dd5416768cc3d8aeb61a739ef93a99551db10e Mon Sep 17 00:00:00 2001 From: SamriddhiJain Date: Fri, 10 Mar 2017 15:56:11 +0530 Subject: [PATCH 2/4] updated for E713 --- gensim/summarization/keywords.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gensim/summarization/keywords.py b/gensim/summarization/keywords.py index fe09ae1947..b24e6f1f04 100644 --- a/gensim/summarization/keywords.py +++ b/gensim/summarization/keywords.py @@ -164,7 +164,7 @@ def _get_combined_keywords(_keywords, split_text): result.append(word) # appends last word if keyword and doesn't iterate for j in xrange(i + 1, len_text): other_word = _strip_word(split_text[j]) - if other_word in _keywords and other_word == split_text[j] and not other_word in combined_word: + if other_word in _keywords and other_word == split_text[j] and other_word not in combined_word: combined_word.append(other_word) else: for keyword in combined_word: From 382453ab9aee7ad1392021deba1f872a52bcf242 Mon Sep 17 00:00:00 2001 From: SamriddhiJain Date: Fri, 10 Mar 2017 16:55:25 +0530 Subject: [PATCH 3/4] Updated to hanging indents --- gensim/summarization/bm25.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/gensim/summarization/bm25.py b/gensim/summarization/bm25.py index d634a32b54..16f054bc9c 100644 --- a/gensim/summarization/bm25.py +++ b/gensim/summarization/bm25.py @@ -48,8 +48,9 @@ def get_score(self, document, index, average_idf): if word not in self.f[index]: continue idf = self.idf[word] if self.idf[word] >= 0 else EPSILON * average_idf - score += (idf * self.f[index][word] * (PARAM_K1 + 1) - / (self.f[index][word] + PARAM_K1 * (1 - PARAM_B + PARAM_B * self.corpus_size / self.avgdl))) + score += ( + idf * self.f[index][word] * (PARAM_K1 + 1) / + (self.f[index][word] + PARAM_K1 * (1 - PARAM_B + PARAM_B * self.corpus_size / self.avgdl))) return score def get_scores(self, document, average_idf): From c0b1774d606d29f822e21a9a5d74ab0ff8efc9c2 Mon Sep 17 00:00:00 2001 From: SamriddhiJain Date: Mon, 20 Mar 2017 17:09:44 +0530 Subject: [PATCH 4/4] added line braek before operator --- gensim/summarization/bm25.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/gensim/summarization/bm25.py b/gensim/summarization/bm25.py index 16f054bc9c..d634a32b54 100644 --- a/gensim/summarization/bm25.py +++ b/gensim/summarization/bm25.py @@ -48,9 +48,8 @@ def get_score(self, document, index, average_idf): if word not in self.f[index]: continue idf = self.idf[word] if self.idf[word] >= 0 else EPSILON * average_idf - score += ( - idf * self.f[index][word] * (PARAM_K1 + 1) / - (self.f[index][word] + PARAM_K1 * (1 - PARAM_B + PARAM_B * self.corpus_size / self.avgdl))) + score += (idf * self.f[index][word] * (PARAM_K1 + 1) + / (self.f[index][word] + PARAM_K1 * (1 - PARAM_B + PARAM_B * self.corpus_size / self.avgdl))) return score def get_scores(self, document, average_idf):