Skip to content

Commit 4c33c13

Browse files
committed
search works
1 parent c7a1793 commit 4c33c13

1 file changed

Lines changed: 36 additions & 34 deletions

File tree

backend/topic_processor.py

Lines changed: 36 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
11
from flask import Flask, jsonify, request
22
from flask_cors import CORS
3-
import psutil
4-
import socket
5-
import signal
6-
import sys
7-
import time
3+
import duckdb
4+
import json
5+
from collections import Counter
86

97
app = Flask(__name__)
108
# Configure CORS to allow all origins and methods
@@ -25,34 +23,43 @@ def process_topics():
2523
else: # GET request
2624
search_term = request.args.get('searchTerm', '').lower()
2725

28-
# Updated topic data to match frontend mock data
29-
all_topics = [
30-
{"name": "visual-programming", "count": 342},
31-
{"name": "graph-theory", "count": 289},
32-
{"name": "network-analysis", "count": 256},
33-
{"name": "scientific-computing", "count": 198},
34-
{"name": "python", "count": 187},
35-
{"name": "javascript", "count": 165},
36-
{"name": "d3", "count": 142},
37-
{"name": "typescript", "count": 128},
38-
{"name": "react", "count": 112},
39-
{"name": "machine-learning", "count": 98},
40-
{"name": "data-science", "count": 87},
41-
{"name": "visualization", "count": 76},
42-
{"name": "neo4j", "count": 65},
43-
{"name": "graphql", "count": 54},
44-
{"name": "sigma-js", "count": 43},
45-
]
26+
# Step 1: Load JSON into a DuckDB temp table
27+
con = duckdb.connect(database=':memory:')
28+
con.execute("""
29+
CREATE TEMP TABLE repo AS
30+
SELECT * FROM read_json_auto('../public/data/repo_metadata.json');
31+
""")
4632

47-
filtered_topics = [
48-
topic for topic in all_topics
49-
if search_term in topic["name"].lower()
50-
]
33+
# Step 2: Get nameWithOwner and topics into a pandas DataFrame
34+
query = "SELECT nameWithOwner, topics FROM repo"
35+
df = con.execute(query).fetchdf()
36+
37+
# Step 3: Normalize topics into list of names
38+
def extract_names(item_ls):
39+
if item_ls is not None and len(item_ls) > 0:
40+
return [item["name"] for item in item_ls if "name" in item]
41+
return []
42+
43+
df["topics"] = df["topics"].apply(extract_names)
44+
45+
# Step 4: Filter repos based on search term in topics
46+
filtered_df = df[df["topics"].apply(lambda x: search_term in [t.lower() for t in x])]
47+
48+
# Step 5: Count all co-occurring topics
49+
all_topics = [topic for topics in filtered_df["topics"] for topic in topics]
50+
topic_counts = Counter(all_topics)
51+
52+
# Optional: Remove the searched topic itself
53+
topic_counts.pop(search_term, None)
54+
55+
# Step 6: Convert to list of dicts and sort
56+
topics = [{"name": name, "count": count} for name, count in topic_counts.items()]
57+
topics = sorted(topics, key=lambda x: x["count"], reverse=True)
5158

5259
return jsonify({
5360
"success": True,
54-
"data": filtered_topics,
55-
"total": len(filtered_topics)
61+
"data": topics,
62+
"total": len(topics)
5663
})
5764

5865
except Exception as e:
@@ -66,12 +73,7 @@ def process_topics():
6673
def home():
6774
return "Hello World!"
6875

69-
def signal_handler(sig, frame):
70-
print('\nShutting down the server...')
71-
sys.exit(0)
72-
7376
if __name__ == '__main__':
74-
signal.signal(signal.SIGINT, signal_handler)
7577
print("Starting Flask server...")
7678
port = 5002
7779

0 commit comments

Comments
 (0)