@@ -16,55 +16,63 @@ def context(query, n_wiki_pages=5, n_top_chunks=8, min_summary_length=100, max_s
1616 :return: summarized facts from the wikipedia pages as a string
1717 """
1818 # todo: finding optimal default values for the parameters
19- # todo: remove time measurements
19+ time1 , time2 , time3 , time4 , time5 , time6 , time7 , time8 , time9 = 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0
2020 if verbose :
2121 print ("Query:" , query )
22- time1 = time .time ()
22+ time1 = time .time ()
2323 # create wikipedia search prompt
2424 wiki_search_prompt = model .create_wiki_search_prompt (query , verbose = verbose )
25- time2 = time .time ()
26- print ("Time taken to get wiki search prompt:" , time2 - time1 , "seconds" )
25+ if verbose :
26+ time2 = time .time ()
27+ print ("Time taken to get wiki search prompt:" , time2 - time1 , "seconds" )
2728
2829 # get relevant wikipedia pages
2930 page_titles = wiki .get_pages (wiki_search_prompt , n_results = n_wiki_pages )
3031 if verbose :
3132 print ("Page titles:" , page_titles )
3233 # get the content of the wikipedia pages and split it into chunks
33- time3 = time .time ()
34- print ("Time taken to get wiki pages:" , time3 - time2 , "seconds" )
34+ if verbose :
35+ time3 = time .time ()
36+ print ("Time taken to get wiki pages:" , time3 - time2 , "seconds" )
3537 wiki_chunks = wiki .get_text_chunks (page_titles , chunk_length = 512 , verbose = verbose )
36- time4 = time .time ()
37- print ("Time taken to get wiki chunks:" , time4 - time3 , "seconds" )
38+ if verbose :
39+ time4 = time .time ()
40+ print ("Time taken to get wiki chunks:" , time4 - time3 , "seconds" )
3841
3942 # get the embeddings for the query and the wiki chunks
4043 query_embedding = model .get_embeddings ([query ])
41- time5 = time .time ()
42- print ("Time taken to get query embedding:" , time5 - time4 , "seconds" )
44+ if verbose :
45+ time5 = time .time ()
46+ print ("Time taken to get query embedding:" , time5 - time4 , "seconds" )
4347 wiki_embeddings = model .get_embeddings (wiki_chunks )
44- time6 = time .time ()
45- print ("Time taken to get wiki embeddings:" , time6 - time5 , "seconds" )
48+ if verbose :
49+ time6 = time .time ()
50+ print ("Time taken to get wiki embeddings:" , time6 - time5 , "seconds" )
4651 # calculate the similarity between the query and the wiki chunks
4752 similarities = model .calculate_similarity (query_embedding , wiki_embeddings , top_k = n_top_chunks )
48- time7 = time .time ()
49- print ("Time taken to calculate similarity:" , time7 - time6 , "seconds" )
53+ if verbose :
54+ time7 = time .time ()
55+ print ("Time taken to calculate similarity:" , time7 - time6 , "seconds" )
5056 top_chunks = ""
5157
5258 for i , similarity in enumerate (similarities ):
5359 top_chunks += "<" + str (i + 1 ) + "> " + wiki_chunks [similarity ['corpus_id' ]] + " </" + str (i + 1 ) + ">\n \n "
5460 if verbose :
5561 print ("Chunk" + str (i + 1 ) + ":" , wiki_chunks [similarity ['corpus_id' ]], "\t \t \t score:" , similarity ['score' ])
5662
57- time8 = time .time ()
58- print ("Time taken to get concatenated top chunk string:" , time8 - time7 , "seconds" )
63+ if verbose :
64+ time8 = time .time ()
65+ print ("Time taken to get concatenated top chunk string:" , time8 - time7 , "seconds" )
5966 # summarize facts from the top wiki chunks
6067 summarized_facts = model .summarize_facts (top_chunks , min_length = min_summary_length , max_length = max_summary_length )
61- time9 = time .time ()
62- print ("Time taken to summarize facts:" , time9 - time8 , "seconds" )
63- print ("Total time taken:" , time9 - time1 , "seconds" )
68+ if verbose :
69+ time9 = time .time ()
70+ print ("Time taken to summarize facts:" , time9 - time8 , "seconds" )
71+ print ("Total time taken:" , time9 - time1 , "seconds" )
6472 return summarized_facts
6573
6674
6775if __name__ == "__main__" :
6876 user_query = "What are the names of Barack Obamas children?"
69- context = context (user_query , verbose = True )
77+ context = context (user_query , verbose = False )
7078 print (context )
0 commit comments