1+ import os
2+ import duckdb
3+ import psutil
4+ import networkx as nx
5+
6+ class GexfNodeGenerator :
7+ def __init__ (self ):
8+ self .save_dir = os .path .join (os .path .dirname (os .path .dirname (__file__ )), 'gexf' )
9+ os .makedirs (self .save_dir , exist_ok = True )
10+ self .gexf_path = os .path .join (self .save_dir , 'generated_nodes.gexf' )
11+
12+ # DuckDB connection (copied from TopicService)
13+ db_path = os .path .join (os .path .dirname (os .path .dirname (os .path .dirname (os .path .dirname (__file__ )))), 'public' , 'data' , 'github_meta.duckdb' )
14+ if os .path .exists (db_path ):
15+ self .con = duckdb .connect (database = db_path , read_only = True )
16+ available_memory = psutil .virtual_memory ().available
17+ memory_limit = min (available_memory * 0.3 , 0.5 * 1024 * 1024 * 1024 )
18+ self .con .execute (f"SET memory_limit TO '{ int (memory_limit )} B'" )
19+ cpu_count = psutil .cpu_count (logical = False ) or 1
20+ thread_count = max (1 , min (cpu_count , 2 ))
21+ self .con .execute (f"SET threads TO { thread_count } " )
22+ else :
23+ raise FileNotFoundError (f"Database not found at { db_path } . Please ensure the database file exists before running the application." )
24+
25+ def generate_gexf_nodes_for_topics (self , topics ):
26+ """
27+ Generate and store a GEXF file for all repos containing any of the given topics.
28+ Returns the path to the generated GEXF file.
29+ """
30+ if not topics :
31+ return None
32+ topics_lower = [t .lower () for t in topics ]
33+ placeholders = ',' .join (['?' ] * len (topics_lower ))
34+ query = f'''
35+ SELECT DISTINCT r.nameWithOwner, r.stars, r.forks, r.watchers, r.isFork, r.isArchived, r.languageCount, r.pullRequests, r.issues, r.primaryLanguage, r.createdAt, r.license, r.codeOfConduct
36+ FROM repos r
37+ JOIN repo_topics t ON r.nameWithOwner = t.repo
38+ WHERE LOWER(t.topic) IN ({ placeholders } )
39+ '''
40+ result = self .con .execute (query , topics_lower ).fetchall ()
41+ columns = ["nameWithOwner" , "stars" , "forks" , "watchers" , "isFork" , "isArchived" , "languageCount" , "pullRequests" , "issues" , "primaryLanguage" , "createdAt" , "license" , "codeOfConduct" ]
42+ G = nx .Graph ()
43+
44+ # Define default values for each column type
45+ default_values = {
46+ "stars" : 0 ,
47+ "forks" : 0 ,
48+ "watchers" : 0 ,
49+ "isFork" : False ,
50+ "isArchived" : False ,
51+ "languageCount" : 0 ,
52+ "pullRequests" : 0 ,
53+ "issues" : 0 ,
54+ "primaryLanguage" : "" ,
55+ "createdAt" : "" ,
56+ "license" : "" ,
57+ "codeOfConduct" : ""
58+ }
59+
60+ for row in result :
61+ node_attrs = {}
62+ for col , val in zip (columns , row ):
63+ if col == "nameWithOwner" :
64+ repo_name = val
65+ else :
66+ # Use default value if the value is None
67+ node_attrs [col ] = default_values [col ] if val is None else val
68+ G .add_node (repo_name , ** node_attrs )
69+
70+ nx .write_gexf (G , self .gexf_path )
71+ return self .gexf_path # Return the file path
0 commit comments