fixed DisambiguationError crash and removed time prints for verbose=True

flowun · flowun · commit 1545d443fda2 · 2024-05-07T17:10:02.000+02:00
diff --git a/contextplus/main.py b/contextplus/main.py
@@ -16,55 +16,63 @@ def context(query, n_wiki_pages=5, n_top_chunks=8, min_summary_length=100, max_s
     :return: summarized facts from the wikipedia pages as a string
     """
     # todo: finding optimal default values for the parameters
-    # todo: remove time measurements
+    time1, time2, time3, time4, time5, time6, time7, time8, time9 = 0, 0, 0, 0, 0, 0, 0, 0, 0
     if verbose:
         print("Query:", query)
-    time1 = time.time()
+        time1 = time.time()
     # create wikipedia search prompt
     wiki_search_prompt = model.create_wiki_search_prompt(query, verbose=verbose)
-    time2 = time.time()
-    print("Time taken to get wiki search prompt:", time2 - time1, "seconds")
+    if verbose:
+        time2 = time.time()
+        print("Time taken to get wiki search prompt:", time2 - time1, "seconds")
 
     # get relevant wikipedia pages
     page_titles = wiki.get_pages(wiki_search_prompt, n_results=n_wiki_pages)
     if verbose:
         print("Page titles:", page_titles)
     # get the content of the wikipedia pages and split it into chunks
-    time3 = time.time()
-    print("Time taken to get wiki pages:", time3 - time2, "seconds")
+    if verbose:
+        time3 = time.time()
+        print("Time taken to get wiki pages:", time3 - time2, "seconds")
     wiki_chunks = wiki.get_text_chunks(page_titles, chunk_length=512, verbose=verbose)
-    time4 = time.time()
-    print("Time taken to get wiki chunks:", time4 - time3, "seconds")
+    if verbose:
+        time4 = time.time()
+        print("Time taken to get wiki chunks:", time4 - time3, "seconds")
 
     # get the embeddings for the query and the wiki chunks
     query_embedding = model.get_embeddings([query])
-    time5 = time.time()
-    print("Time taken to get query embedding:", time5 - time4, "seconds")
+    if verbose:
+        time5 = time.time()
+        print("Time taken to get query embedding:", time5 - time4, "seconds")
     wiki_embeddings = model.get_embeddings(wiki_chunks)
-    time6 = time.time()
-    print("Time taken to get wiki embeddings:", time6 - time5, "seconds")
+    if verbose:
+        time6 = time.time()
+        print("Time taken to get wiki embeddings:", time6 - time5, "seconds")
     # calculate the similarity between the query and the wiki chunks
     similarities = model.calculate_similarity(query_embedding, wiki_embeddings, top_k=n_top_chunks)
-    time7 = time.time()
-    print("Time taken to calculate similarity:", time7 - time6, "seconds")
+    if verbose:
+        time7 = time.time()
+        print("Time taken to calculate similarity:", time7 - time6, "seconds")
     top_chunks = ""
 
     for i, similarity in enumerate(similarities):
         top_chunks += "<" + str(i + 1) + "> " + wiki_chunks[similarity['corpus_id']] + " </" + str(i + 1) + ">\n\n"
         if verbose:
             print("Chunk" + str(i + 1) + ":", wiki_chunks[similarity['corpus_id']], "\t\t\tscore:", similarity['score'])
 
-    time8 = time.time()
-    print("Time taken to get concatenated top chunk string:", time8 - time7, "seconds")
+    if verbose:
+        time8 = time.time()
+        print("Time taken to get concatenated top chunk string:", time8 - time7, "seconds")
     # summarize facts from the top wiki chunks
     summarized_facts = model.summarize_facts(top_chunks, min_length=min_summary_length, max_length=max_summary_length)
-    time9 = time.time()
-    print("Time taken to summarize facts:", time9 - time8, "seconds")
-    print("Total time taken:", time9 - time1, "seconds")
+    if verbose:
+        time9 = time.time()
+        print("Time taken to summarize facts:", time9 - time8, "seconds")
+        print("Total time taken:", time9 - time1, "seconds")
     return summarized_facts
 
 
 if __name__ == "__main__":
     user_query = "What are the names of Barack Obamas children?"
-    context = context(user_query, verbose=True)
+    context = context(user_query, verbose=False)
     print(context)
diff --git a/contextplus/wiki.py b/contextplus/wiki.py
@@ -31,9 +31,9 @@ def get_text_chunks(page_titles, chunk_length=512, verbose=False):
                 if verbose:
                     print(f"getting content of page {page_title}")
                 wiki_chunks.extend(wiki_content)
-            except wikipedia.exceptions.PageError or wikipedia.exceptions.DisambiguationError as e:
+            except (wikipedia.exceptions.PageError, wikipedia.exceptions.DisambiguationError):
                 if verbose:
-                    print(f"page {page_title} not found, {e}")
+                    print(f"page {page_title} not found")
                 continue  # skip the page if it is not available
     return wiki_chunks