data-exp-lab
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 1 deletion b/‎.gitignore‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎backend/app/__init__.py‎ b/‎backend/app/__init__.py‎
diff --git a/‎backend/app/main.py‎
Lines changed: 80 additions & 0 deletions b/‎backend/app/main.py‎
Lines changed: 80 additions & 0 deletions
diff --git a/‎backend/app/services/__init__.py‎ b/‎backend/app/services/__init__.py‎
diff --git a/‎backend/app/services/ai_service.py‎
Lines changed: 136 additions & 0 deletions b/‎backend/app/services/ai_service.py‎
Lines changed: 136 additions & 0 deletions
diff --git a/‎backend/app/services/topic_service.py‎
Lines changed: 65 additions & 0 deletions b/‎backend/app/services/topic_service.py‎
Lines changed: 65 additions & 0 deletions
diff --git a/‎backend/app/utils/__init__.py‎ b/‎backend/app/utils/__init__.py‎
diff --git a/‎backend/app/utils/cache.py‎
Lines changed: 16 additions & 0 deletions b/‎backend/app/utils/cache.py‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎backend/requirements.txt‎
Lines changed: 7 additions & 0 deletions b/‎backend/requirements.txt‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎backend/duckdb_test.ipynb‎ ‎backend/test/duckdb_test.ipynb‎backend/duckdb_test.ipynb renamed to backend/test/duckdb_test.ipynb b/‎backend/duckdb_test.ipynb‎ ‎backend/test/duckdb_test.ipynb‎backend/duckdb_test.ipynb renamed to backend/test/duckdb_test.ipynb
@@ -9,4 +9,5 @@ node_modules
 dist
 .DS_Store
 *temp
-*repo_metadata.json
+*repo_metadata.json
+__pycache__
@@ -0,0 +1,80 @@
+from flask import Flask, jsonify, request
+from flask_cors import CORS
+from app.services.topic_service import TopicService
+from app.services.ai_service import AITopicProcessor
+
+app = Flask(__name__)
+CORS(app, resources={
+    r"/*": {
+        "origins": "*",
+        "methods": ["GET", "POST", "OPTIONS"],
+        "allow_headers": ["Content-Type", "Authorization"]
+    }
+})
+
+topic_service = TopicService()
+ai_processor = AITopicProcessor()
+
+@app.route('/process-topics', methods=['GET', 'POST'])
+def process_topics():
+    try:
+        if request.method == 'POST':
+            data = request.get_json()
+            search_term = data.get('searchTerm', '')
+        else:
+            search_term = request.args.get('searchTerm', '')
+        result = topic_service.process_topics(search_term)
+        return jsonify(result)
+    
+    except Exception as e:
+        return jsonify({
+            "success": False,
+            "error": str(e),
+            "message": "An error occurred while processing the request"
+        }), 500
+
+@app.route('/ai-process', methods=['GET', 'POST'])
+def ai_process():
+    try:
+        if request.method == 'POST':
+            data = request.get_json()
+            print(data)
+        
+        # Extract parameters using frontend names
+        model = data.get('selectedModel', 'gpt-3.5-turbo')
+        api_token = data.get('apiKey', '')
+        prompt = data.get('customPrompt', '')
+        selected_topics = data.get('selectedTopics', [])
+
+        print(f"Selected topics: {selected_topics}")
+        print(f"Using model: {model}")  # Debug log
+        print(f"Prompt length: {len(prompt)}")  # Debug log
+        
+        # Use the AI processor to analyze the topics
+        print("About to call AI processor...")  # Debug log
+        ai_result = ai_processor.process_topics(
+            model=model,
+            api_token=api_token,
+            prompt=prompt,
+            selected_topics=selected_topics
+        )
+        print(f"AI processing complete. Result length: {len(str(ai_result))}")  # Debug log
+        
+        return jsonify({
+            "success": True,
+            "result": ai_result
+        })
+    
+    except Exception as e:
+        print(f"Error occurred: {str(e)}")  # Debug log
+        return jsonify(["error1","error2","error3"]), 500
+
+@app.route('/')
+def home():
+    return "Hello World!"
+
+if __name__ == '__main__':
+    print("Starting Flask server...")
+    port = 5002
+    print(f"Server running on: http://127.0.0.1:{port}")
+    app.run(host='127.0.0.1', port=port, debug=True) 
@@ -0,0 +1,136 @@
+import logging
+from typing import List
+import google.generativeai as genai
+from openai import OpenAI
+from fastapi import HTTPException
+
+# Configure logging
+logging.basicConfig(level=logging.DEBUG)
+logger = logging.getLogger(__name__)
+
+class AITopicProcessor:
+    def __init__(self):
+        self.openai_client = None
+        self.gemini_client = None
+
+    def initialize_client(self, model: str, api_key: str):
+        if model.startswith("gpt"):
+            self.openai_client = OpenAI(api_key=api_key)
+        elif model.startswith("gemini"):
+            genai.configure(api_key=api_key)
+            self.gemini_client = genai.GenerativeModel(model)
+
+    async def process_with_openai(
+        self, prompt: str, topics: List[str], model: str
+    ) -> List[str]:
+        try:
+            full_prompt = f"""Current topics: {", ".join(topics)}
+
+{prompt}
+
+Please provide suggestions as a simple list, one per line. Keep each suggestion concise."""
+
+            response = self.openai_client.chat.completions.create(
+                model=model,
+                messages=[{"role": "user", "content": full_prompt}],
+                temperature=0.7,
+                max_tokens=500,
+            )
+
+            suggestions = response.choices[0].message.content.strip().split("\n")
+            # Clean up suggestions (remove bullet points, numbers, etc.)
+            suggestions = [
+                s.lstrip("- ").lstrip("* ").lstrip("1234567890. ") for s in suggestions
+            ]
+            return [s for s in suggestions if s]  # Remove empty strings
+
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=f"OpenAI API error: {str(e)}")
+
+    async def process_with_gemini(self, prompt: str, topics: List[str]) -> List[str]:
+        try:
+            full_prompt = f"""Current topics: {", ".join(topics)}
+
+{prompt}
+
+Please provide suggestions as a simple list, one per line. Keep each suggestion concise."""
+
+            response = self.gemini_client.generate_content(full_prompt)
+
+            if response.text:
+                suggestions = response.text.strip().split("\n")
+                # Clean up suggestions (remove bullet points, numbers, etc.)
+                suggestions = [
+                    s.lstrip("- ").lstrip("* ").lstrip("1234567890. ")
+                    for s in suggestions
+                ]
+                return [s for s in suggestions if s]  # Remove empty strings
+            return []
+
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=f"Gemini API error: {str(e)}")
+
+    async def process_topics(
+        self, model: str, api_key: str, prompt: str, topics: List[str]
+    ) -> List[str]:
+        """
+        Process topics using the specified AI model and return suggestions.
+        """
+        # Enhanced debug logging
+        logger.debug("\n=== Incoming Request Validation ===")
+        logger.debug(f"Model: {model if model else 'NOT PROVIDED'}")
+        logger.debug(f"API Key: {'[PROVIDED]' if api_key else 'NOT PROVIDED'}")
+        logger.debug(f"Prompt: {prompt if prompt else 'NOT PROVIDED'}")
+        logger.debug(f"Topics: {topics if topics else 'NOT PROVIDED'}")
+        logger.debug(f"Topics length: {len(topics) if topics else 0}")
+        logger.debug("Request data types:")
+        logger.debug(f"- Model type: {type(model)}")
+        logger.debug(f"- API Key type: {type(api_key)}")
+        logger.debug(f"- Prompt type: {type(prompt)}")
+        logger.debug(f"- Topics type: {type(topics)}")
+        logger.debug("=" * 50)
+
+        # More detailed input validation
+        validation_errors = []
+        if not model or not isinstance(model, str):
+            validation_errors.append("Invalid or missing model")
+        if not api_key or not isinstance(api_key, str):
+            validation_errors.append("Invalid or missing API key")
+        if not isinstance(topics, list):
+            validation_errors.append("Topics must be a list")
+        elif len(topics) == 0:
+            validation_errors.append("Topics list cannot be empty")
+        if not prompt or not isinstance(prompt, str):
+            validation_errors.append("Invalid or missing prompt")
+
+        if validation_errors:
+            error_message = "; ".join(validation_errors)
+            logger.error(f"Validation failed: {error_message}")
+            raise HTTPException(status_code=400, detail=error_message)
+
+        try:
+            # Initialize the appropriate client
+            self.initialize_client(model, api_key)
+
+            # Validate client initialization
+            if model.startswith("gpt") and not self.openai_client:
+                raise HTTPException(
+                    status_code=500, detail="Failed to initialize OpenAI client"
+                )
+            if model.startswith("gemini") and not self.gemini_client:
+                raise HTTPException(
+                    status_code=500, detail="Failed to initialize Gemini client"
+                )
+
+            # Process with appropriate model
+            if model.startswith("gpt"):
+                return await self.process_with_openai(prompt, topics, model)
+            elif model.startswith("gemini"):
+                return await self.process_with_gemini(prompt, topics)
+            else:
+                raise HTTPException(
+                    status_code=400, detail=f"Unsupported model: {model}"
+                )
+
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=str(e))
@@ -0,0 +1,65 @@
+from collections import Counter
+import duckdb
+from app.utils.cache import get_cached_topics, save_cached_topics
+
+class TopicService:
+    def __init__(self):
+        self.con = duckdb.connect(database=':memory:')
+        self.con.execute("SET threads TO 16;")
+        self.con.execute("""
+            CREATE TEMP TABLE repo AS 
+            SELECT * FROM read_json_auto('../public/data/repo_metadata.json');
+        """)
+
+    def process_topics(self, search_term: str):
+        try:
+            search_term = search_term.lower()
+            
+            # Check cache
+            cached_result = get_cached_topics(search_term)
+            if cached_result:
+                return {
+                    "success": True,
+                    "data": cached_result,
+                    "total": len(cached_result),
+                    "cached": True
+                }
+
+            # Get data from DuckDB
+            query = "SELECT nameWithOwner, topics FROM repo"
+            df = self.con.execute(query).fetchdf()
+            
+            # Process topics
+            def extract_names(item_ls):
+                if item_ls is not None and len(item_ls) > 0:
+                    return [item["name"] for item in item_ls if "name" in item]
+                return []
+            
+            df["topics"] = df["topics"].apply(extract_names)
+            filtered_df = df[df["topics"].apply(lambda x: search_term in [t.lower() for t in x])]
+            
+            # Count topics
+            all_topics = [topic for topics in filtered_df["topics"] for topic in topics]
+            topic_counts = Counter(all_topics)
+            topic_counts.pop(search_term, None)
+            
+            # Format results
+            topics = [{"name": name, "count": count} for name, count in topic_counts.items() if count > 2]
+            topics = sorted(topics, key=lambda x: x["count"], reverse=True)
+            
+            # Cache results
+            save_cached_topics(search_term, topics)
+            
+            return {
+                "success": True,
+                "data": topics,
+                "total": len(topics),
+                "cached": False
+            }
+        
+        except Exception as e:
+            return {
+                "success": False,
+                "error": str(e),
+                "message": "An error occurred while processing the request"
+            } 
@@ -0,0 +1,16 @@
+from pathlib import Path
+import json
+
+def get_cached_topics(search_term):
+    cache_file = Path('../public/data/cached_topics') / f"{search_term}.json"
+    if cache_file.exists():
+        with open(cache_file, 'r') as f:
+            return json.load(f)
+    return None
+
+def save_cached_topics(search_term, topics_data):
+    cache_dir = Path('../public/data/cached_topics')
+    cache_dir.mkdir(exist_ok=True)
+    cache_file = cache_dir / f"{search_term}.json"
+    with open(cache_file, 'w') as f:
+        json.dump(topics_data, f) 
@@ -0,0 +1,7 @@
+flask
+flask-cors
+duckdb
+google-generativeai
+openai
+fastapi
+uvicorn
-Original file line number
+Diff line change
@@ @@ -0,0 +1,7 @@ @@
 +flask
 +flask-cors
 +duckdb
 +google-generativeai
 +openai
 +fastapi
 +uvicorn