33import duckdb
44import json
55from collections import Counter
6+ from concurrent .futures import ThreadPoolExecutor
7+ from functools import partial
68
79app = Flask (__name__ )
810# Configure CORS to allow all origins and methods
@@ -23,8 +25,9 @@ def process_topics():
2325 else : # GET request
2426 search_term = request .args .get ('searchTerm' , '' ).lower ()
2527
26- # Step 1: Load JSON into a DuckDB temp table
28+ # Step 1: Load JSON into a DuckDB temp table with parallel processing enabled
2729 con = duckdb .connect (database = ':memory:' )
30+ con .execute ("SET threads TO 16;" ) # Adjust number based on your CPU cores
2831 con .execute ("""
2932 CREATE TEMP TABLE repo AS
3033 SELECT * FROM read_json_auto('../public/data/repo_metadata.json');
@@ -49,11 +52,11 @@ def extract_names(item_ls):
4952 all_topics = [topic for topics in filtered_df ["topics" ] for topic in topics ]
5053 topic_counts = Counter (all_topics )
5154
52- # Optional: Remove the searched topic itself
55+ # Remove the searched topic itself
5356 topic_counts .pop (search_term , None )
5457
55- # Step 6: Convert to list of dicts and sort
56- topics = [{"name" : name , "count" : count } for name , count in topic_counts .items ()]
58+ # Step 6: Convert to list of dicts and sort, only including topics with count > 1
59+ topics = [{"name" : name , "count" : count } for name , count in topic_counts .items () if count > 2 ]
5760 topics = sorted (topics , key = lambda x : x ["count" ], reverse = True )
5861
5962 return jsonify ({
0 commit comments