11from flask import Flask , jsonify , request
22from flask_cors import CORS
3- import psutil
4- import socket
5- import signal
6- import sys
7- import time
3+ import duckdb
4+ import json
5+ from collections import Counter
86
97app = Flask (__name__ )
108# Configure CORS to allow all origins and methods
@@ -25,34 +23,43 @@ def process_topics():
2523 else : # GET request
2624 search_term = request .args .get ('searchTerm' , '' ).lower ()
2725
28- # Updated topic data to match frontend mock data
29- all_topics = [
30- {"name" : "visual-programming" , "count" : 342 },
31- {"name" : "graph-theory" , "count" : 289 },
32- {"name" : "network-analysis" , "count" : 256 },
33- {"name" : "scientific-computing" , "count" : 198 },
34- {"name" : "python" , "count" : 187 },
35- {"name" : "javascript" , "count" : 165 },
36- {"name" : "d3" , "count" : 142 },
37- {"name" : "typescript" , "count" : 128 },
38- {"name" : "react" , "count" : 112 },
39- {"name" : "machine-learning" , "count" : 98 },
40- {"name" : "data-science" , "count" : 87 },
41- {"name" : "visualization" , "count" : 76 },
42- {"name" : "neo4j" , "count" : 65 },
43- {"name" : "graphql" , "count" : 54 },
44- {"name" : "sigma-js" , "count" : 43 },
45- ]
26+ # Step 1: Load JSON into a DuckDB temp table
27+ con = duckdb .connect (database = ':memory:' )
28+ con .execute ("""
29+ CREATE TEMP TABLE repo AS
30+ SELECT * FROM read_json_auto('../public/data/repo_metadata.json');
31+ """ )
4632
47- filtered_topics = [
48- topic for topic in all_topics
49- if search_term in topic ["name" ].lower ()
50- ]
33+ # Step 2: Get nameWithOwner and topics into a pandas DataFrame
34+ query = "SELECT nameWithOwner, topics FROM repo"
35+ df = con .execute (query ).fetchdf ()
36+
37+ # Step 3: Normalize topics into list of names
38+ def extract_names (item_ls ):
39+ if item_ls is not None and len (item_ls ) > 0 :
40+ return [item ["name" ] for item in item_ls if "name" in item ]
41+ return []
42+
43+ df ["topics" ] = df ["topics" ].apply (extract_names )
44+
45+ # Step 4: Filter repos based on search term in topics
46+ filtered_df = df [df ["topics" ].apply (lambda x : search_term in [t .lower () for t in x ])]
47+
48+ # Step 5: Count all co-occurring topics
49+ all_topics = [topic for topics in filtered_df ["topics" ] for topic in topics ]
50+ topic_counts = Counter (all_topics )
51+
52+ # Optional: Remove the searched topic itself
53+ topic_counts .pop (search_term , None )
54+
55+ # Step 6: Convert to list of dicts and sort
56+ topics = [{"name" : name , "count" : count } for name , count in topic_counts .items ()]
57+ topics = sorted (topics , key = lambda x : x ["count" ], reverse = True )
5158
5259 return jsonify ({
5360 "success" : True ,
54- "data" : filtered_topics ,
55- "total" : len (filtered_topics )
61+ "data" : topics ,
62+ "total" : len (topics )
5663 })
5764
5865 except Exception as e :
@@ -66,12 +73,7 @@ def process_topics():
6673def home ():
6774 return "Hello World!"
6875
69- def signal_handler (sig , frame ):
70- print ('\n Shutting down the server...' )
71- sys .exit (0 )
72-
7376if __name__ == '__main__' :
74- signal .signal (signal .SIGINT , signal_handler )
7577 print ("Starting Flask server..." )
7678 port = 5002
7779
0 commit comments