Skip to content

Commit 1545d44

Browse files
committed
fixed DisambiguationError crash and removed time prints for verbose=True
1 parent 7b2a382 commit 1545d44

2 files changed

Lines changed: 30 additions & 22 deletions

File tree

contextplus/main.py

Lines changed: 28 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -16,55 +16,63 @@ def context(query, n_wiki_pages=5, n_top_chunks=8, min_summary_length=100, max_s
1616
:return: summarized facts from the wikipedia pages as a string
1717
"""
1818
# todo: finding optimal default values for the parameters
19-
# todo: remove time measurements
19+
time1, time2, time3, time4, time5, time6, time7, time8, time9 = 0, 0, 0, 0, 0, 0, 0, 0, 0
2020
if verbose:
2121
print("Query:", query)
22-
time1 = time.time()
22+
time1 = time.time()
2323
# create wikipedia search prompt
2424
wiki_search_prompt = model.create_wiki_search_prompt(query, verbose=verbose)
25-
time2 = time.time()
26-
print("Time taken to get wiki search prompt:", time2 - time1, "seconds")
25+
if verbose:
26+
time2 = time.time()
27+
print("Time taken to get wiki search prompt:", time2 - time1, "seconds")
2728

2829
# get relevant wikipedia pages
2930
page_titles = wiki.get_pages(wiki_search_prompt, n_results=n_wiki_pages)
3031
if verbose:
3132
print("Page titles:", page_titles)
3233
# get the content of the wikipedia pages and split it into chunks
33-
time3 = time.time()
34-
print("Time taken to get wiki pages:", time3 - time2, "seconds")
34+
if verbose:
35+
time3 = time.time()
36+
print("Time taken to get wiki pages:", time3 - time2, "seconds")
3537
wiki_chunks = wiki.get_text_chunks(page_titles, chunk_length=512, verbose=verbose)
36-
time4 = time.time()
37-
print("Time taken to get wiki chunks:", time4 - time3, "seconds")
38+
if verbose:
39+
time4 = time.time()
40+
print("Time taken to get wiki chunks:", time4 - time3, "seconds")
3841

3942
# get the embeddings for the query and the wiki chunks
4043
query_embedding = model.get_embeddings([query])
41-
time5 = time.time()
42-
print("Time taken to get query embedding:", time5 - time4, "seconds")
44+
if verbose:
45+
time5 = time.time()
46+
print("Time taken to get query embedding:", time5 - time4, "seconds")
4347
wiki_embeddings = model.get_embeddings(wiki_chunks)
44-
time6 = time.time()
45-
print("Time taken to get wiki embeddings:", time6 - time5, "seconds")
48+
if verbose:
49+
time6 = time.time()
50+
print("Time taken to get wiki embeddings:", time6 - time5, "seconds")
4651
# calculate the similarity between the query and the wiki chunks
4752
similarities = model.calculate_similarity(query_embedding, wiki_embeddings, top_k=n_top_chunks)
48-
time7 = time.time()
49-
print("Time taken to calculate similarity:", time7 - time6, "seconds")
53+
if verbose:
54+
time7 = time.time()
55+
print("Time taken to calculate similarity:", time7 - time6, "seconds")
5056
top_chunks = ""
5157

5258
for i, similarity in enumerate(similarities):
5359
top_chunks += "<" + str(i + 1) + "> " + wiki_chunks[similarity['corpus_id']] + " </" + str(i + 1) + ">\n\n"
5460
if verbose:
5561
print("Chunk" + str(i + 1) + ":", wiki_chunks[similarity['corpus_id']], "\t\t\tscore:", similarity['score'])
5662

57-
time8 = time.time()
58-
print("Time taken to get concatenated top chunk string:", time8 - time7, "seconds")
63+
if verbose:
64+
time8 = time.time()
65+
print("Time taken to get concatenated top chunk string:", time8 - time7, "seconds")
5966
# summarize facts from the top wiki chunks
6067
summarized_facts = model.summarize_facts(top_chunks, min_length=min_summary_length, max_length=max_summary_length)
61-
time9 = time.time()
62-
print("Time taken to summarize facts:", time9 - time8, "seconds")
63-
print("Total time taken:", time9 - time1, "seconds")
68+
if verbose:
69+
time9 = time.time()
70+
print("Time taken to summarize facts:", time9 - time8, "seconds")
71+
print("Total time taken:", time9 - time1, "seconds")
6472
return summarized_facts
6573

6674

6775
if __name__ == "__main__":
6876
user_query = "What are the names of Barack Obamas children?"
69-
context = context(user_query, verbose=True)
77+
context = context(user_query, verbose=False)
7078
print(context)

contextplus/wiki.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,9 @@ def get_text_chunks(page_titles, chunk_length=512, verbose=False):
3131
if verbose:
3232
print(f"getting content of page {page_title}")
3333
wiki_chunks.extend(wiki_content)
34-
except wikipedia.exceptions.PageError or wikipedia.exceptions.DisambiguationError as e:
34+
except (wikipedia.exceptions.PageError, wikipedia.exceptions.DisambiguationError):
3535
if verbose:
36-
print(f"page {page_title} not found, {e}")
36+
print(f"page {page_title} not found")
3737
continue # skip the page if it is not available
3838
return wiki_chunks
3939

0 commit comments

Comments
 (0)