diff --git a/backend/internal/api/handlers/processing.go b/backend/internal/api/handlers/processing.go index c701514..3994541 100644 --- a/backend/internal/api/handlers/processing.go +++ b/backend/internal/api/handlers/processing.go @@ -18,6 +18,7 @@ import ( "github.com/nesposito/frfr/internal/services/extraction" "github.com/nesposito/frfr/internal/services/pdf" "github.com/nesposito/frfr/internal/services/session" + slackext "github.com/nesposito/frfr/internal/services/slack" ) // ProcessingHandler handles processing-related API requests @@ -249,9 +250,9 @@ func (h *ProcessingHandler) processDocuments(sessionID string, documents []strin Progress: float64(i) / float64(totalDocs), }) - // Step 1: Extract text from document + // Step 1: Extract text based on document source textFile := filepath.Join(sessionDir, "text", docName+".txt") - textContent, err := h.extractFileText(ctx, sessionID, docName, docInfo, textFile) + textContent, err := h.extractText(ctx, sessionID, docName, docInfo, textFile) if err != nil { continue } @@ -375,6 +376,84 @@ func (h *ProcessingHandler) processDocuments(sessionID string, documents []strin }) } +// extractText extracts text content from a document, dispatching to the appropriate +// source-specific method. On failure it updates the document status and broadcasts +// an error event, so callers can simply `continue` on error. +func (h *ProcessingHandler) extractText(ctx context.Context, sessionID, docName string, docInfo models.DocumentInfo, textFile string) (string, error) { + if docInfo.Source == models.DocumentSourceSlack { + return h.extractSlackText(ctx, sessionID, docName, docInfo, textFile) + } + return h.extractFileText(ctx, sessionID, docName, docInfo, textFile) +} + +// extractSlackText fetches messages from a Slack channel and returns the text content. +func (h *ProcessingHandler) extractSlackText(ctx context.Context, sessionID, docName string, docInfo models.DocumentInfo, textFile string) (string, error) { + if docInfo.SlackMeta == nil { + h.store.UpdateDocumentStatus(sessionID, docName, models.DocumentStatusFailed, "missing slack metadata") + h.broadcast(sessionID, models.ProcessingEvent{ + Type: models.EventTypeError, + Timestamp: time.Now(), + Document: docName, + Message: "Slack document missing metadata", + }) + return "", fmt.Errorf("missing slack metadata") + } + + h.broadcast(sessionID, models.ProcessingEvent{ + Type: "slack_extraction_start", + Timestamp: time.Now(), + Document: docName, + Message: fmt.Sprintf("Fetching messages from Slack channel #%s...", docInfo.SlackMeta.ChannelName), + }) + + slackExtractor := slackext.NewExtractor(h.config.SlackBotToken, h.config.SlackMaxMessages, h.config.SlackLookbackDays) + + opts := slackext.ExtractOptions{IncludeThreads: true} + if docInfo.SlackMeta.Since != "" { + if t, err := time.Parse("2006-01-02", docInfo.SlackMeta.Since); err == nil { + opts.Since = t + } + } + if docInfo.SlackMeta.Until != "" { + if t, err := time.Parse("2006-01-02", docInfo.SlackMeta.Until); err == nil { + opts.Until = t + } + } + + result, err := slackExtractor.Extract(ctx, docInfo.SlackMeta.ChannelID, textFile, opts) + if err != nil { + h.store.UpdateDocumentStatus(sessionID, docName, models.DocumentStatusFailed, err.Error()) + h.broadcast(sessionID, models.ProcessingEvent{ + Type: models.EventTypeError, + Timestamp: time.Now(), + Document: docName, + Message: fmt.Sprintf("Slack extraction failed: %v", err), + }) + return "", err + } + + h.broadcast(sessionID, models.ProcessingEvent{ + Type: "slack_extraction_complete", + Timestamp: time.Now(), + Document: docName, + Message: fmt.Sprintf("Extracted %d messages (%d threads), %d characters from #%s", + result.MessageCount, result.ThreadCount, result.TotalChars, result.ChannelName), + }) + + data, err := os.ReadFile(textFile) + if err != nil { + h.store.UpdateDocumentStatus(sessionID, docName, models.DocumentStatusFailed, err.Error()) + h.broadcast(sessionID, models.ProcessingEvent{ + Type: models.EventTypeError, + Timestamp: time.Now(), + Document: docName, + Message: fmt.Sprintf("Failed to read extracted text: %v", err), + }) + return "", err + } + return string(data), nil +} + // extractFileText extracts text from a PDF or reads a plain text/markdown file. func (h *ProcessingHandler) extractFileText(ctx context.Context, sessionID, docName string, docInfo models.DocumentInfo, textFile string) (string, error) { pdfPath := expandTilde(docInfo.OriginalPDFPath) diff --git a/backend/internal/api/handlers/slack.go b/backend/internal/api/handlers/slack.go new file mode 100644 index 0000000..3e5c188 --- /dev/null +++ b/backend/internal/api/handlers/slack.go @@ -0,0 +1,98 @@ +package handlers + +import ( + "encoding/json" + "net/http" + "strings" + "time" + + "github.com/nesposito/frfr/internal/config" + "github.com/nesposito/frfr/internal/domain/models" + "github.com/nesposito/frfr/internal/services/session" +) + +// SlackHandler handles Slack-related API requests +type SlackHandler struct { + store *session.Store + config *config.Config +} + +// NewSlackHandler creates a new Slack handler +func NewSlackHandler(store *session.Store, cfg *config.Config) *SlackHandler { + return &SlackHandler{store: store, config: cfg} +} + +// AddSlackChannelRequest is the request body for importing a Slack channel +type AddSlackChannelRequest struct { + ChannelID string `json:"channel_id"` + Token string `json:"token,omitempty"` // Optional; falls back to SLACK_BOT_TOKEN env + Since string `json:"since,omitempty"` // Date string: "2025-01-01" + Until string `json:"until,omitempty"` // Date string: "2025-03-01" +} + +// Add imports a Slack channel as a document source in a session +func (h *SlackHandler) Add(w http.ResponseWriter, r *http.Request) { + sessionID := r.PathValue("id") + if sessionID == "" { + writeError(w, http.StatusBadRequest, "Session ID is required") + return + } + + var req AddSlackChannelRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + writeError(w, http.StatusBadRequest, "Invalid request body: "+err.Error()) + return + } + + if req.ChannelID == "" { + writeError(w, http.StatusBadRequest, "channel_id is required") + return + } + + // Check that we have a token somewhere + token := req.Token + if token == "" { + token = h.config.SlackBotToken + } + if token == "" { + writeError(w, http.StatusBadRequest, "No Slack token provided. Set SLACK_BOT_TOKEN env or pass 'token' in request.") + return + } + + // Get session + sess, err := h.store.Get(sessionID) + if err != nil { + if strings.Contains(err.Error(), "not found") { + writeError(w, http.StatusNotFound, err.Error()) + } else { + writeError(w, http.StatusInternalServerError, "Failed to get session: "+err.Error()) + } + return + } + + // Create document name from channel ID + docName := "slack-" + req.ChannelID + + // Register as a document in the session + if sess.DocumentRegistry == nil { + sess.DocumentRegistry = make(map[string]models.DocumentInfo) + } + + sess.DocumentRegistry[docName] = models.DocumentInfo{ + Status: models.DocumentStatusPending, + AddedAt: models.FlexibleTime{Time: time.Now()}, + Source: models.DocumentSourceSlack, + SlackMeta: &models.SlackDocumentMeta{ + ChannelID: req.ChannelID, + Since: req.Since, + Until: req.Until, + }, + } + + if err := h.store.Update(sess); err != nil { + writeError(w, http.StatusInternalServerError, "Failed to update session: "+err.Error()) + return + } + + writeJSON(w, http.StatusCreated, sess.DocumentRegistry[docName]) +} diff --git a/backend/internal/api/router.go b/backend/internal/api/router.go index 1e8d80b..aab1a97 100644 --- a/backend/internal/api/router.go +++ b/backend/internal/api/router.go @@ -36,6 +36,7 @@ func (s *Server) registerRoutes() { factsHandler := handlers.NewFactsHandler(s.sessionStore) processingHandler := handlers.NewProcessingHandler(s.sessionStore, s.config) queryHandler := handlers.NewQueryHandler(s.sessionStore, s.config) + slackHandler := handlers.NewSlackHandler(s.sessionStore, s.config) filePickerHandler := handlers.NewFilePickerHandler() claudeHandler := handlers.NewClaudeHandler(s.config) @@ -60,6 +61,9 @@ func (s *Server) registerRoutes() { s.mux.HandleFunc("POST /api/sessions/{id}/query/stream", queryHandler.SubmitStream) s.mux.HandleFunc("GET /api/sessions/{id}/query/history", queryHandler.History) + // Slack + s.mux.HandleFunc("POST /api/sessions/{id}/slack", slackHandler.Add) + // Processing s.mux.HandleFunc("POST /api/sessions/{id}/process", processingHandler.Start) s.mux.HandleFunc("GET /api/sessions/{id}/process/events", processingHandler.Events) diff --git a/backend/internal/config/config.go b/backend/internal/config/config.go index 0c466c0..3ce192d 100644 --- a/backend/internal/config/config.go +++ b/backend/internal/config/config.go @@ -30,6 +30,11 @@ type Config struct { AnthropicAPIKey string MaxWorkers int MaxRetries int + + // Slack settings + SlackBotToken string + SlackMaxMessages int + SlackLookbackDays int } // DefaultConfig returns the default configuration @@ -59,6 +64,11 @@ func DefaultConfig() *Config { AnthropicAPIKey: getAnthropicAPIKey(), MaxWorkers: getEnvInt("FRFR_MAX_WORKERS", 20), MaxRetries: getEnvInt("FRFR_MAX_RETRIES", 3), + + // Slack + SlackBotToken: os.Getenv("SLACK_BOT_TOKEN"), + SlackMaxMessages: getEnvInt("FRFR_SLACK_MAX_MESSAGES", 1000), + SlackLookbackDays: getEnvInt("FRFR_SLACK_LOOKBACK_DAYS", 90), } } diff --git a/backend/internal/domain/models/document.go b/backend/internal/domain/models/document.go index ade7d63..50d0915 100644 --- a/backend/internal/domain/models/document.go +++ b/backend/internal/domain/models/document.go @@ -10,6 +10,22 @@ const ( DocumentStatusFailed DocumentStatus = "failed" ) +// DocumentSource indicates where a document came from +type DocumentSource string + +const ( + DocumentSourceFile DocumentSource = "" // Default: local file (PDF/Markdown) + DocumentSourceSlack DocumentSource = "slack" +) + +// SlackDocumentMeta contains Slack-specific metadata for a document +type SlackDocumentMeta struct { + ChannelID string `json:"channel_id"` + ChannelName string `json:"channel_name,omitempty"` + Since string `json:"since,omitempty"` + Until string `json:"until,omitempty"` +} + // DocumentInfo contains metadata about a document in a session type DocumentInfo struct { OriginalPDFPath string `json:"original_pdf_path"` @@ -20,6 +36,8 @@ type DocumentInfo struct { AddedAt FlexibleTime `json:"added_at"` CompletedAt *FlexibleTime `json:"completed_at,omitempty"` ErrorMessage string `json:"error_message,omitempty"` + Source DocumentSource `json:"source,omitempty"` + SlackMeta *SlackDocumentMeta `json:"slack_meta,omitempty"` } // DocumentSummary contains the LLM-generated summary of a document diff --git a/backend/internal/services/slack/client.go b/backend/internal/services/slack/client.go new file mode 100644 index 0000000..bf9b1d2 --- /dev/null +++ b/backend/internal/services/slack/client.go @@ -0,0 +1,299 @@ +package slack + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "time" +) + +// Client is a thin Slack Web API client. +type Client struct { + token string + httpClient *http.Client +} + +// NewClient creates a new Slack API client with the given bot token. +func NewClient(token string) *Client { + return &Client{ + token: token, + httpClient: &http.Client{ + Timeout: 30 * time.Second, + }, + } +} + +// Message represents a Slack message. +type Message struct { + User string `json:"user"` + Text string `json:"text"` + Timestamp string `json:"ts"` + ThreadTS string `json:"thread_ts,omitempty"` + ReplyCount int `json:"reply_count,omitempty"` + SubType string `json:"subtype,omitempty"` +} + +// User represents a Slack user profile. +type User struct { + ID string `json:"id"` + DisplayName string `json:"display_name"` + RealName string `json:"real_name"` +} + +// ChannelInfo represents basic Slack channel metadata. +type ChannelInfo struct { + ID string `json:"id"` + Name string `json:"name"` + Topic string `json:"topic"` + Purpose string `json:"purpose"` + MemberCount int `json:"num_members"` +} + +// conversationsHistoryResponse is the Slack API response for conversations.history. +type conversationsHistoryResponse struct { + OK bool `json:"ok"` + Error string `json:"error,omitempty"` + Messages []Message `json:"messages"` + HasMore bool `json:"has_more"` + ResponseMetadata struct { + NextCursor string `json:"next_cursor"` + } `json:"response_metadata"` +} + +// conversationsRepliesResponse is the Slack API response for conversations.replies. +type conversationsRepliesResponse struct { + OK bool `json:"ok"` + Error string `json:"error,omitempty"` + Messages []Message `json:"messages"` + HasMore bool `json:"has_more"` + ResponseMetadata struct { + NextCursor string `json:"next_cursor"` + } `json:"response_metadata"` +} + +// conversationsInfoResponse is the Slack API response for conversations.info. +type conversationsInfoResponse struct { + OK bool `json:"ok"` + Error string `json:"error,omitempty"` + Channel struct { + ID string `json:"id"` + Name string `json:"name"` + Topic struct { + Value string `json:"value"` + } `json:"topic"` + Purpose struct { + Value string `json:"value"` + } `json:"purpose"` + NumMembers int `json:"num_members"` + } `json:"channel"` +} + +// usersInfoResponse is the Slack API response for users.info. +type usersInfoResponse struct { + OK bool `json:"ok"` + Error string `json:"error,omitempty"` + User struct { + ID string `json:"id"` + Profile struct { + DisplayName string `json:"display_name"` + RealName string `json:"real_name"` + } `json:"profile"` + } `json:"user"` +} + +// GetChannelInfo fetches metadata about a channel. +func (c *Client) GetChannelInfo(ctx context.Context, channelID string) (*ChannelInfo, error) { + params := url.Values{"channel": {channelID}} + body, err := c.apiGet(ctx, "conversations.info", params) + if err != nil { + return nil, err + } + + var resp conversationsInfoResponse + if err := json.Unmarshal(body, &resp); err != nil { + return nil, fmt.Errorf("failed to parse conversations.info response: %w", err) + } + if !resp.OK { + return nil, fmt.Errorf("slack API error: %s", resp.Error) + } + + return &ChannelInfo{ + ID: resp.Channel.ID, + Name: resp.Channel.Name, + Topic: resp.Channel.Topic.Value, + Purpose: resp.Channel.Purpose.Value, + MemberCount: resp.Channel.NumMembers, + }, nil +} + +// ProbeVolume fetches a single page of history to check if the channel has more +// than `limit` messages. Returns (messageCount, hasMore, error). +func (c *Client) ProbeVolume(ctx context.Context, channelID string, limit int) (int, bool, error) { + params := url.Values{ + "channel": {channelID}, + "limit": {fmt.Sprintf("%d", limit)}, + } + + body, err := c.apiGet(ctx, "conversations.history", params) + if err != nil { + return 0, false, err + } + + var resp conversationsHistoryResponse + if err := json.Unmarshal(body, &resp); err != nil { + return 0, false, fmt.Errorf("failed to parse response: %w", err) + } + if !resp.OK { + return 0, false, fmt.Errorf("slack API error: %s", resp.Error) + } + + return len(resp.Messages), resp.HasMore, nil +} + +// GetHistory fetches channel message history with pagination. +// If since is non-zero, only messages after that time are returned. +func (c *Client) GetHistory(ctx context.Context, channelID string, since, until time.Time) ([]Message, error) { + var allMessages []Message + cursor := "" + + for { + params := url.Values{ + "channel": {channelID}, + "limit": {"200"}, + } + if !since.IsZero() { + params.Set("oldest", fmt.Sprintf("%d.000000", since.Unix())) + } + if !until.IsZero() { + params.Set("latest", fmt.Sprintf("%d.000000", until.Unix())) + } + if cursor != "" { + params.Set("cursor", cursor) + } + + body, err := c.apiGet(ctx, "conversations.history", params) + if err != nil { + return nil, err + } + + var resp conversationsHistoryResponse + if err := json.Unmarshal(body, &resp); err != nil { + return nil, fmt.Errorf("failed to parse conversations.history response: %w", err) + } + if !resp.OK { + return nil, fmt.Errorf("slack API error: %s", resp.Error) + } + + allMessages = append(allMessages, resp.Messages...) + + if !resp.HasMore || resp.ResponseMetadata.NextCursor == "" { + break + } + cursor = resp.ResponseMetadata.NextCursor + } + + return allMessages, nil +} + +// GetThreadReplies fetches all replies in a thread. +func (c *Client) GetThreadReplies(ctx context.Context, channelID, threadTS string) ([]Message, error) { + var allReplies []Message + cursor := "" + + for { + params := url.Values{ + "channel": {channelID}, + "ts": {threadTS}, + "limit": {"200"}, + } + if cursor != "" { + params.Set("cursor", cursor) + } + + body, err := c.apiGet(ctx, "conversations.replies", params) + if err != nil { + return nil, err + } + + var resp conversationsRepliesResponse + if err := json.Unmarshal(body, &resp); err != nil { + return nil, fmt.Errorf("failed to parse conversations.replies response: %w", err) + } + if !resp.OK { + return nil, fmt.Errorf("slack API error: %s", resp.Error) + } + + // Skip the first message (parent) — it's already in history + for _, msg := range resp.Messages { + if msg.Timestamp != threadTS { + allReplies = append(allReplies, msg) + } + } + + if !resp.HasMore || resp.ResponseMetadata.NextCursor == "" { + break + } + cursor = resp.ResponseMetadata.NextCursor + } + + return allReplies, nil +} + +// GetUserInfo fetches a user's profile. +func (c *Client) GetUserInfo(ctx context.Context, userID string) (*User, error) { + params := url.Values{"user": {userID}} + body, err := c.apiGet(ctx, "users.info", params) + if err != nil { + return nil, err + } + + var resp usersInfoResponse + if err := json.Unmarshal(body, &resp); err != nil { + return nil, fmt.Errorf("failed to parse users.info response: %w", err) + } + if !resp.OK { + return nil, fmt.Errorf("slack API error: %s", resp.Error) + } + + name := resp.User.Profile.DisplayName + if name == "" { + name = resp.User.Profile.RealName + } + + return &User{ + ID: resp.User.ID, + DisplayName: name, + RealName: resp.User.Profile.RealName, + }, nil +} + +// apiGet makes an authenticated GET request to a Slack API method. +func (c *Client) apiGet(ctx context.Context, method string, params url.Values) ([]byte, error) { + u := fmt.Sprintf("https://slack.com/api/%s?%s", method, params.Encode()) + + req, err := http.NewRequestWithContext(ctx, "GET", u, nil) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + req.Header.Set("Authorization", "Bearer "+c.token) + + resp, err := c.httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("slack API request failed: %w", err) + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read response body: %w", err) + } + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("slack API returned status %d: %s", resp.StatusCode, string(body)) + } + + return body, nil +} diff --git a/backend/internal/services/slack/extractor.go b/backend/internal/services/slack/extractor.go new file mode 100644 index 0000000..eb72db6 --- /dev/null +++ b/backend/internal/services/slack/extractor.go @@ -0,0 +1,310 @@ +package slack + +import ( + "context" + "fmt" + "os" + "path/filepath" + "regexp" + "sort" + "strings" + "sync" + "time" +) + +// ExtractionResult contains the result of Slack channel text extraction. +type ExtractionResult struct { + Status string `json:"status"` + Method string `json:"method"` + ChannelID string `json:"channel_id"` + ChannelName string `json:"channel_name"` + MessageCount int `json:"message_count"` + ThreadCount int `json:"thread_count"` + TotalChars int `json:"total_chars"` + OutputFile string `json:"output_file"` + Error string `json:"error,omitempty"` + ErrorType string `json:"error_type,omitempty"` +} + +// ExtractOptions configures what to extract from a Slack channel. +type ExtractOptions struct { + Since time.Time + Until time.Time + IncludeThreads bool +} + +// Extractor handles text extraction from Slack channels. +type Extractor struct { + client *Client + userCache map[string]string // userID -> display name + mu sync.Mutex + maxMessages int + lookbackDays int +} + +// NewExtractor creates a new Slack extractor with the given bot token. +func NewExtractor(token string, maxMessages, lookbackDays int) *Extractor { + return &Extractor{ + client: NewClient(token), + userCache: make(map[string]string), + maxMessages: maxMessages, + lookbackDays: lookbackDays, + } +} + +// GetInfo fetches metadata about a Slack channel. +func (e *Extractor) GetInfo(ctx context.Context, channelID string) (*ChannelInfo, error) { + return e.client.GetChannelInfo(ctx, channelID) +} + +// Extract fetches messages from a Slack channel and writes them as text to outputPath. +func (e *Extractor) Extract(ctx context.Context, channelID, outputPath string, opts ExtractOptions) (*ExtractionResult, error) { + // Fetch channel info + info, err := e.client.GetChannelInfo(ctx, channelID) + if err != nil { + return &ExtractionResult{ + Status: "error", + Method: "slack_api", + ChannelID: channelID, + Error: fmt.Sprintf("failed to get channel info: %v", err), + ErrorType: "channel_not_found", + }, fmt.Errorf("failed to get channel info: %w", err) + } + + // If no date range specified, probe volume to decide whether to limit + if opts.Since.IsZero() && opts.Until.IsZero() { + _, hasMore, err := e.client.ProbeVolume(ctx, channelID, e.maxMessages) + if err == nil && hasMore { + // High-volume channel — apply lookback + opts.Since = time.Now().AddDate(0, 0, -e.lookbackDays) + } + // Otherwise fetch everything + } + + // Fetch message history + messages, err := e.client.GetHistory(ctx, channelID, opts.Since, opts.Until) + if err != nil { + return &ExtractionResult{ + Status: "error", + Method: "slack_api", + ChannelID: channelID, + ChannelName: info.Name, + Error: fmt.Sprintf("failed to fetch history: %v", err), + ErrorType: "fetch_failed", + }, fmt.Errorf("failed to fetch channel history: %w", err) + } + + // Sort messages chronologically (Slack returns newest first) + sort.Slice(messages, func(i, j int) bool { + return messages[i].Timestamp < messages[j].Timestamp + }) + + // Filter out subtypes (joins, leaves, etc.) — keep only real messages + var realMessages []Message + for _, msg := range messages { + if msg.SubType == "" || msg.SubType == "file_share" || msg.SubType == "me_message" { + realMessages = append(realMessages, msg) + } + } + + // Render messages to text + var buf strings.Builder + threadCount := 0 + + // Write header + fmt.Fprintf(&buf, "# Slack Channel: #%s\n", info.Name) + if info.Topic != "" { + fmt.Fprintf(&buf, "# Topic: %s\n", info.Topic) + } + if !opts.Since.IsZero() || !opts.Until.IsZero() { + fmt.Fprintf(&buf, "# Date range: %s to %s\n", + formatDateOrOpen(opts.Since, "beginning"), + formatDateOrOpen(opts.Until, "now")) + } + fmt.Fprintf(&buf, "# Messages: %d\n\n", len(realMessages)) + + for _, msg := range realMessages { + userName := e.resolveUser(ctx, msg.User) + ts := parseSlackTimestamp(msg.Timestamp) + permalink := buildPermalink(info.Name, channelID, msg.Timestamp) + + fmt.Fprintf(&buf, "[%s] @%s (%s):\n%s\n\n", + ts.Format("2006-01-02 15:04"), + userName, + permalink, + e.cleanSlackMarkup(ctx, msg.Text), + ) + + // Fetch thread replies if this message has them + if opts.IncludeThreads && msg.ReplyCount > 0 { + replies, err := e.client.GetThreadReplies(ctx, channelID, msg.Timestamp) + if err != nil { + // Log but continue — don't fail the whole extraction for one thread + fmt.Fprintf(&buf, " [thread: failed to fetch %d replies]\n\n", msg.ReplyCount) + continue + } + + threadCount++ + for _, reply := range replies { + replyUser := e.resolveUser(ctx, reply.User) + replyTS := parseSlackTimestamp(reply.Timestamp) + replyPermalink := buildThreadPermalink(info.Name, channelID, reply.Timestamp, msg.Timestamp) + cleanText := e.cleanSlackMarkup(ctx, reply.Text) + + fmt.Fprintf(&buf, " [thread reply %s] @%s (%s):\n %s\n\n", + replyTS.Format("2006-01-02 15:04"), + replyUser, + replyPermalink, + strings.ReplaceAll(cleanText, "\n", "\n "), + ) + } + } + } + + text := buf.String() + + // Ensure output directory exists + if err := os.MkdirAll(filepath.Dir(outputPath), 0755); err != nil { + return &ExtractionResult{ + Status: "error", + Method: "slack_api", + ChannelID: channelID, + ChannelName: info.Name, + Error: fmt.Sprintf("failed to create output directory: %v", err), + ErrorType: "io_error", + }, fmt.Errorf("failed to create output directory: %w", err) + } + + if err := os.WriteFile(outputPath, []byte(text), 0644); err != nil { + return &ExtractionResult{ + Status: "error", + Method: "slack_api", + ChannelID: channelID, + ChannelName: info.Name, + Error: fmt.Sprintf("failed to write output file: %v", err), + ErrorType: "io_error", + }, fmt.Errorf("failed to write output file: %w", err) + } + + absOutputPath, _ := filepath.Abs(outputPath) + + return &ExtractionResult{ + Status: "success", + Method: "slack_api", + ChannelID: channelID, + ChannelName: info.Name, + MessageCount: len(realMessages), + ThreadCount: threadCount, + TotalChars: len(text), + OutputFile: absOutputPath, + }, nil +} + +// ExtractToSessionDir extracts Slack channel text and saves it to the session's text directory. +func (e *Extractor) ExtractToSessionDir(ctx context.Context, channelID, sessionDir, docName string, opts ExtractOptions) (*ExtractionResult, error) { + textDir := filepath.Join(sessionDir, "text") + outputPath := filepath.Join(textDir, docName+".txt") + return e.Extract(ctx, channelID, outputPath, opts) +} + +// Slack markup patterns +var ( + // <@U123ABC> user mentions + slackUserMentionRe = regexp.MustCompile(`<@(U[A-Z0-9]+)>`) + // <#C123ABC|channel-name> channel references + slackChannelRefRe = regexp.MustCompile(`<#[A-Z0-9]+\|([^>]+)>`) + // <#C123ABC> channel references without label + slackChannelRefBareRe = regexp.MustCompile(`<#([A-Z0-9]+)>`) + // links with display text + slackLinkLabelRe = regexp.MustCompile(`<(https?://[^|>]+)\|([^>]+)>`) + // bare links + slackLinkBareRe = regexp.MustCompile(`<(https?://[^>]+)>`) +) + +// cleanSlackMarkup converts Slack mrkdwn to plain readable text. +// Keeps emoji shortcodes (e.g. :wave:) intact since they carry meaning. +func (e *Extractor) cleanSlackMarkup(ctx context.Context, text string) string { + // Resolve user mentions: <@U123> → @displayname + text = slackUserMentionRe.ReplaceAllStringFunc(text, func(match string) string { + userID := slackUserMentionRe.FindStringSubmatch(match)[1] + name := e.resolveUser(ctx, userID) + return "@" + name + }) + + // Channel references: <#C123|channel-name> → #channel-name + text = slackChannelRefRe.ReplaceAllString(text, "#$1") + text = slackChannelRefBareRe.ReplaceAllString(text, "#$1") + + // Links with labels: → Example (https://example.com) + text = slackLinkLabelRe.ReplaceAllString(text, "$2 ($1)") + + // Bare links: → https://example.com + text = slackLinkBareRe.ReplaceAllString(text, "$1") + + // HTML entities + text = strings.ReplaceAll(text, "&", "&") + text = strings.ReplaceAll(text, "<", "<") + text = strings.ReplaceAll(text, ">", ">") + + return text +} + +// resolveUser looks up a user's display name, caching results. +func (e *Extractor) resolveUser(ctx context.Context, userID string) string { + if userID == "" { + return "unknown" + } + + e.mu.Lock() + if name, ok := e.userCache[userID]; ok { + e.mu.Unlock() + return name + } + e.mu.Unlock() + + user, err := e.client.GetUserInfo(ctx, userID) + if err != nil { + return userID // Fall back to raw ID + } + + e.mu.Lock() + e.userCache[userID] = user.DisplayName + e.mu.Unlock() + + return user.DisplayName +} + +// parseSlackTimestamp converts a Slack timestamp (e.g., "1234567890.123456") to time.Time. +func parseSlackTimestamp(ts string) time.Time { + parts := strings.SplitN(ts, ".", 2) + if len(parts) == 0 { + return time.Time{} + } + var sec int64 + for _, c := range parts[0] { + sec = sec*10 + int64(c-'0') + } + return time.Unix(sec, 0) +} + +// buildPermalink builds a Slack message permalink. +func buildPermalink(channelName, channelID, messageTS string) string { + // Slack permalinks use the timestamp without the dot + tsNoDot := strings.ReplaceAll(messageTS, ".", "") + return fmt.Sprintf("https://slack.com/archives/%s/p%s", channelID, tsNoDot) +} + +// buildThreadPermalink builds a Slack thread reply permalink. +func buildThreadPermalink(channelName, channelID, replyTS, threadTS string) string { + tsNoDot := strings.ReplaceAll(replyTS, ".", "") + return fmt.Sprintf("https://slack.com/archives/%s/p%s?thread_ts=%s&cid=%s", + channelID, tsNoDot, threadTS, channelID) +} + +// formatDateOrOpen formats a time, returning fallback if zero. +func formatDateOrOpen(t time.Time, fallback string) string { + if t.IsZero() { + return fallback + } + return t.Format("2006-01-02") +} diff --git a/backend/internal/services/slack/extractor_test.go b/backend/internal/services/slack/extractor_test.go new file mode 100644 index 0000000..396edba --- /dev/null +++ b/backend/internal/services/slack/extractor_test.go @@ -0,0 +1,195 @@ +package slack + +import ( + "context" + "testing" +) + +// newTestExtractor creates an extractor with a pre-populated user cache and no real API client. +func newTestExtractor(users map[string]string) *Extractor { + e := NewExtractor("xoxb-fake", 1000, 90) + for k, v := range users { + e.userCache[k] = v + } + return e +} + +func TestCleanSlackMarkup_UserMentions(t *testing.T) { + e := newTestExtractor(map[string]string{"U123ABC": "alice"}) + ctx := context.Background() + + tests := []struct { + name string + input string + want string + }{ + {"resolved mention", "<@U123ABC> said hello", "@alice said hello"}, + {"cached mention", "<@U123ABC> and <@U123ABC>", "@alice and @alice"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := e.cleanSlackMarkup(ctx, tt.input) + if got != tt.want { + t.Errorf("got %q, want %q", got, tt.want) + } + }) + } +} + +func TestCleanSlackMarkup_Links(t *testing.T) { + e := newTestExtractor(nil) + ctx := context.Background() + + tests := []struct { + name string + input string + want string + }{ + {"labeled link", "", "Example Site (https://example.com)"}, + {"bare link", "", "https://example.com/path"}, + {"link with query params", "", "click here (https://example.com?foo=bar)"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := e.cleanSlackMarkup(ctx, tt.input) + if got != tt.want { + t.Errorf("got %q, want %q", got, tt.want) + } + }) + } +} + +func TestCleanSlackMarkup_Channels(t *testing.T) { + e := newTestExtractor(nil) + ctx := context.Background() + + tests := []struct { + name string + input string + want string + }{ + {"labeled channel", "<#C0123ABCDEF|general>", "#general"}, + {"bare channel", "<#C0123ABCDEF>", "#C0123ABCDEF"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := e.cleanSlackMarkup(ctx, tt.input) + if got != tt.want { + t.Errorf("got %q, want %q", got, tt.want) + } + }) + } +} + +func TestCleanSlackMarkup_HTMLEntities(t *testing.T) { + e := newTestExtractor(nil) + ctx := context.Background() + + tests := []struct { + name string + input string + want string + }{ + {"ampersand", "A & B", "A & B"}, + {"less than", "x < y", "x < y"}, + {"greater than", "x > y", "x > y"}, + {"all entities", "<div> & stuff", "
& stuff"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := e.cleanSlackMarkup(ctx, tt.input) + if got != tt.want { + t.Errorf("got %q, want %q", got, tt.want) + } + }) + } +} + +func TestCleanSlackMarkup_PreservesEmoji(t *testing.T) { + e := newTestExtractor(nil) + ctx := context.Background() + + tests := []struct { + name string + input string + want string + }{ + {"wave emoji", ":wave: hello", ":wave: hello"}, + {"thumbsup", "looks good :+1:", "looks good :+1:"}, + {"multiple emoji", ":fire: :rocket: shipped", ":fire: :rocket: shipped"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := e.cleanSlackMarkup(ctx, tt.input) + if got != tt.want { + t.Errorf("got %q, want %q", got, tt.want) + } + }) + } +} + +func TestCleanSlackMarkup_Combined(t *testing.T) { + e := newTestExtractor(map[string]string{"U999TESTID": "alice"}) + ctx := context.Background() + + input := ":wave: Hey <@U999TESTID>, check out & <#C0123ABCDEF|data-team>" + want := ":wave: Hey @alice, check out project metrics (https://example.com/metrics) & #data-team" + + got := e.cleanSlackMarkup(ctx, input) + if got != want { + t.Errorf("got %q, want %q", got, want) + } +} + +func TestParseSlackTimestamp(t *testing.T) { + tests := []struct { + name string + ts string + unix int64 + }{ + {"standard", "1762447128.235749", 1762447128}, + {"round", "1700000000.000000", 1700000000}, + {"no decimal", "1234567890", 1234567890}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := parseSlackTimestamp(tt.ts) + if got.Unix() != tt.unix { + t.Errorf("got unix %d, want %d", got.Unix(), tt.unix) + } + }) + } +} + +func TestBuildPermalink(t *testing.T) { + got := buildPermalink("general", "C0123ABCDEF", "1762447128.235749") + want := "https://slack.com/archives/C0123ABCDEF/p1762447128235749" + if got != want { + t.Errorf("got %q, want %q", got, want) + } +} + +func TestBuildThreadPermalink(t *testing.T) { + got := buildThreadPermalink("general", "C0123ABCDEF", "1762448407.712439", "1762447128.235749") + want := "https://slack.com/archives/C0123ABCDEF/p1762448407712439?thread_ts=1762447128.235749&cid=C0123ABCDEF" + if got != want { + t.Errorf("got %q, want %q", got, want) + } +} + +func TestExtractOptions_VolumeProbeDefaults(t *testing.T) { + // Verify that NewExtractor stores config values correctly + e := NewExtractor("xoxb-fake", 500, 30) + if e.maxMessages != 500 { + t.Errorf("maxMessages: got %d, want 500", e.maxMessages) + } + if e.lookbackDays != 30 { + t.Errorf("lookbackDays: got %d, want 30", e.lookbackDays) + } +} diff --git a/docs/screenshots/slack-import-modal.png b/docs/screenshots/slack-import-modal.png new file mode 100644 index 0000000..22fcc3b Binary files /dev/null and b/docs/screenshots/slack-import-modal.png differ diff --git a/frontend/src/api/client.ts b/frontend/src/api/client.ts index 26ddac0..7540226 100644 --- a/frontend/src/api/client.ts +++ b/frontend/src/api/client.ts @@ -10,6 +10,7 @@ import type { ProcessingEvent, CreateSessionRequest, AddDocumentRequest, + AddSlackChannelRequest, APIError, BatchProgress, QueryStreamCallbacks, @@ -111,6 +112,17 @@ class APIClient { ); } + async addSlackChannel( + sessionId: string, + req: AddSlackChannelRequest + ): Promise { + return this.request( + 'POST', + `/sessions/${sessionId}/slack`, + req + ); + } + async reprocessDocument( sessionId: string, docName: string diff --git a/frontend/src/api/types.ts b/frontend/src/api/types.ts index 6a34ee4..1f901be 100644 --- a/frontend/src/api/types.ts +++ b/frontend/src/api/types.ts @@ -160,6 +160,13 @@ export interface AddDocumentRequest { name?: string; } +export interface AddSlackChannelRequest { + channel_id: string; + token?: string; + since?: string; + until?: string; +} + // Claude status export interface ClaudeStatusResponse { available: boolean; diff --git a/frontend/src/components/documents/AddSlackChannelModal.tsx b/frontend/src/components/documents/AddSlackChannelModal.tsx new file mode 100644 index 0000000..7cf2731 --- /dev/null +++ b/frontend/src/components/documents/AddSlackChannelModal.tsx @@ -0,0 +1,141 @@ +import { useState } from 'react'; +import { api } from '../../api/client'; + +interface Props { + sessionId: string; + onClose: () => void; + onAdded: () => void; +} + +function AddSlackChannelModal({ sessionId, onClose, onAdded }: Props) { + const [channelId, setChannelId] = useState(''); + const [since, setSince] = useState(''); + const [until, setUntil] = useState(''); + const [loading, setLoading] = useState(false); + const [error, setError] = useState(null); + + const handleSubmit = async (e: React.FormEvent) => { + e.preventDefault(); + + const id = parseChannelInput(channelId.trim()); + if (!id) { + setError('Please enter a valid Slack channel ID or URL'); + return; + } + + try { + setLoading(true); + setError(null); + + await api.addSlackChannel(sessionId, { + channel_id: id, + since: since || undefined, + until: until || undefined, + }); + + onAdded(); + } catch (e) { + setError(e instanceof Error ? e.message : 'Failed to add Slack channel'); + } finally { + setLoading(false); + } + }; + + return ( +
+
e.stopPropagation()}> +
+

Import from Slack

+ +
+ +
+
+ + setChannelId(e.target.value)} + disabled={loading} + autoFocus + /> +

+ Find the channel ID in Slack's channel details +

+
+ +
+
+ + setSince(e.target.value)} + disabled={loading} + /> +
+
+ + setUntil(e.target.value)} + disabled={loading} + /> +
+
+ + {error && ( +

+ {error} +

+ )} + +
+ + +
+
+
+
+ ); +} + +// parseChannelInput extracts a channel ID from a raw ID or Slack URL +function parseChannelInput(input: string): string | null { + // Direct channel ID (starts with C, D, or G) + if (/^[CDG][A-Z0-9]{8,}$/.test(input)) { + return input; + } + // Slack archive URL: https://slack.com/archives/C0123ABCDEF or similar + const match = input.match(/\/archives\/([CDG][A-Z0-9]+)/); + if (match) { + return match[1]; + } + // If it looks like just an ID without the right prefix, still accept it + if (/^[A-Z0-9]{9,}$/.test(input)) { + return input; + } + return null; +} + +export default AddSlackChannelModal; diff --git a/frontend/src/pages/SessionPage.tsx b/frontend/src/pages/SessionPage.tsx index ca491c4..261620b 100644 --- a/frontend/src/pages/SessionPage.tsx +++ b/frontend/src/pages/SessionPage.tsx @@ -4,6 +4,7 @@ import { api } from '../api/client'; import type { Session, DocumentListItem, ProcessingEvent } from '../api/types'; import DocumentsTable from '../components/documents/DocumentsTable'; import AddDocumentModal from '../components/documents/AddDocumentModal'; +import AddSlackChannelModal from '../components/documents/AddSlackChannelModal'; import ProcessingView from '../components/processing/ProcessingView'; function SessionPage() { @@ -13,6 +14,7 @@ function SessionPage() { const [loading, setLoading] = useState(true); const [error, setError] = useState(null); const [showAddDoc, setShowAddDoc] = useState(false); + const [showAddSlack, setShowAddSlack] = useState(false); const [processing, setProcessing] = useState(false); const [events, setEvents] = useState([]); const subscriptionRef = useRef<(() => void) | null>(null); @@ -203,6 +205,12 @@ function SessionPage() { Process {pendingDocs.length} Document{pendingDocs.length !== 1 ? 's' : ''} )} +
); }