diff --git a/hospexplorer/ask/admin.py b/hospexplorer/ask/admin.py index cd99b3e..6ee1a97 100644 --- a/hospexplorer/ask/admin.py +++ b/hospexplorer/ask/admin.py @@ -5,8 +5,22 @@ from django.contrib.auth.admin import UserAdmin from django.contrib.auth.models import User from django.db import transaction - -from ask.models import Conversation, TermsAcceptance, QARecord, SimWorkflow, WebsiteResource, PDFResource +from django.http import HttpResponseRedirect +from django.shortcuts import render +from django.urls import path, reverse + +from ask.models import ( + Conversation, + TermsAcceptance, + QARecord, + SimWorkflow, + WebsiteResource, + PDFResource, + DocumentType, + DocumentAuthorInstitution, + InstitutionType, +) +from ask.admin_csv import import_names_csv from ask.kb_connector import delete_kb_document from ask.tasks import run_kb_resource_upload @@ -181,16 +195,98 @@ def delete_queryset(self, request, queryset): return +class LookupCSVImportMixin: + """Adds an Import CSV button + upload view to a lookup ModelAdmin. + + CSV is single-column name. Duplicates are skipped, header row optional. + """ + + change_list_template = "admin/ask/lookup_change_list.html" + + def get_urls(self): + urls = super().get_urls() + info = (self.model._meta.app_label, self.model._meta.model_name) + return [ + path( + "import-csv/", + self.admin_site.admin_view(self.import_csv_view), + name=f"{info[0]}_{info[1]}_import_csv", + ), + ] + urls + + def import_csv_view(self, request): + info = (self.model._meta.app_label, self.model._meta.model_name) + changelist_url = reverse(f"admin:{info[0]}_{info[1]}_changelist") + + if request.method == "POST": + file_obj = request.FILES.get("csv_file") + if file_obj is None: + self.message_user(request, "No file provided.", level="error") + elif not file_obj.name.lower().endswith(".csv"): + self.message_user(request, "File must have a .csv extension.", level="error") + else: + try: + created, skipped = import_names_csv(self.model, file_obj) + except Exception as e: + logger.exception("CSV import failed for %s", self.model.__name__) + self.message_user(request, f"Import failed: {e}", level="error") + else: + self.message_user( + request, + f"Imported {created} new {self.model._meta.verbose_name_plural} " + f"(skipped {skipped} duplicate or empty rows).", + ) + return HttpResponseRedirect(changelist_url) + + context = { + **self.admin_site.each_context(request), + "title": f"Import {self.model._meta.verbose_name_plural} from CSV", + "opts": self.model._meta, + "changelist_url": changelist_url, + } + return render(request, "admin/ask/lookup_csv_import.html", context) + + +@admin.register(DocumentType) +class DocumentTypeAdmin(LookupCSVImportMixin, admin.ModelAdmin): + list_display = ("name",) + search_fields = ("name",) + + +@admin.register(DocumentAuthorInstitution) +class DocumentAuthorInstitutionAdmin(LookupCSVImportMixin, admin.ModelAdmin): + list_display = ("name",) + search_fields = ("name",) + + +@admin.register(InstitutionType) +class InstitutionTypeAdmin(LookupCSVImportMixin, admin.ModelAdmin): + list_display = ("name",) + search_fields = ("name",) + + @admin.register(WebsiteResource) class WebsiteResourceAdmin(KBDeleteAdminMixin, admin.ModelAdmin): list_display = ("title", "url", "creator", "status", "modified_at") list_filter = ("status",) search_fields = ("title", "url") readonly_fields = ("created_at", "modified_at", "creator", "modifier", "mcp_kb_document_id", "status", "status_message") + fieldsets = ( + (None, {"fields": ("title", "description", "url")}), + ("Metadata", {"fields": ( + "date_published", "date_published_precision", + "document_type", "document_author_institution", "institution_type", + )}), + ("Status", {"fields": ( + "status", "status_message", "mcp_kb_document_id", + "created_at", "modified_at", "creator", "modifier", + )}), + ) help_texts = { "title": "A short name to identify this website resource.", "description": "Optional details about what this website covers.", "url": "The URL the LLM will use as context when answering questions.", + "date_published_precision": "Granularity of the date above (year / month / day). Leave blank if unknown.", } def get_form(self, request, obj=None, **kwargs): @@ -230,10 +326,22 @@ class PDFResourceAdmin(KBDeleteAdminMixin, admin.ModelAdmin): list_filter = ("status",) search_fields = ("title",) readonly_fields = ("created_at", "modified_at", "creator", "modifier", "mcp_kb_document_id", "status", "status_message") + fieldsets = ( + (None, {"fields": ("title", "description", "file")}), + ("Metadata", {"fields": ( + "date_published", "date_published_precision", + "document_type", "document_author_institution", "institution_type", + )}), + ("Status", {"fields": ( + "status", "status_message", "mcp_kb_document_id", + "created_at", "modified_at", "creator", "modifier", + )}), + ) help_texts = { "title": "A short name to identify this PDF resource.", "description": "Optional details about what this PDF covers.", "file": "The PDF file the LLM will use as context when answering questions.", + "date_published_precision": "Granularity of the date above (year / month / day). Leave blank if unknown.", } def get_form(self, request, obj=None, **kwargs): diff --git a/hospexplorer/ask/admin_csv.py b/hospexplorer/ask/admin_csv.py new file mode 100644 index 0000000..beebfd6 --- /dev/null +++ b/hospexplorer/ask/admin_csv.py @@ -0,0 +1,26 @@ +import csv +import io + + +def import_names_csv(model, file_obj): + """Import a one-column CSV into a model with a ``name`` field. + + Returns ``(created, skipped)``. Blank rows, a leading header row of ``name``, + and rows whose name already exists in the table are all counted as skipped. + """ + text = file_obj.read().decode("utf-8-sig", errors="replace") + reader = csv.reader(io.StringIO(text)) + + created = 0 + skipped = 0 + for row in reader: + name = row[0].strip() if row else "" + if not name or name.lower() == "name": + skipped += 1 + continue + _, was_created = model.objects.get_or_create(name=name) + if was_created: + created += 1 + else: + skipped += 1 + return created, skipped diff --git a/hospexplorer/ask/kb_connector.py b/hospexplorer/ask/kb_connector.py index 94bdf40..39b6ea0 100644 --- a/hospexplorer/ask/kb_connector.py +++ b/hospexplorer/ask/kb_connector.py @@ -1,3 +1,4 @@ +import json import logging import httpx @@ -30,15 +31,16 @@ def list_kb_documents(page=1, page_size=10): return response.json() -def add_website_to_kb(url): +def add_website_to_kb(url, metadata=None): """Send a website URL to the MCP KB server for ingestion. Calls POST /docs/website/add?url={url} on the MCP KB server. - The KB server fetches the page, chunks it, generates embeddings, - and stores it for semantic search. + ``metadata`` (if provided) is sent as a JSON body ``{"metadata": ...}`` so + the KB server can store it on the Document row. """ headers = { "Authorization": f"Bearer {settings.KB_MCP_JWT_TOKEN}", + "Content-Type": "application/json", } endpoint = f"{settings.KB_MCP_HOST}/docs/website/add" @@ -46,6 +48,7 @@ def add_website_to_kb(url): response = client.post( endpoint, params={"url": url}, + json={"metadata": metadata} if metadata is not None else {}, headers=headers, timeout=settings.KB_MCP_TIMEOUT, ) @@ -54,12 +57,12 @@ def add_website_to_kb(url): return response.json() -def add_pdf_to_kb(file_bytes, filename, title, url=None): +def add_pdf_to_kb(file_bytes, filename, title, url=None, metadata=None): """Upload a PDF to the MCP KB server for ingestion. Calls POST /docs/pdf/add on the MCP KB server with multipart form data. - The KB server extracts text, chunks it, generates embeddings, - and stores it for semantic search. + metadata (if provided) is JSON-encoded into a metadata form field so + it can travel alongside the file. """ headers = { "Authorization": f"Bearer {settings.KB_MCP_JWT_TOKEN}", @@ -70,6 +73,8 @@ def add_pdf_to_kb(file_bytes, filename, title, url=None): data = {"title": title} if url: data["url"] = url + if metadata is not None: + data["metadata"] = json.dumps(metadata) with httpx.Client() as client: response = client.post( diff --git a/hospexplorer/ask/migrations/0013_documentauthorinstitution_documenttype_and_more.py b/hospexplorer/ask/migrations/0013_documentauthorinstitution_documenttype_and_more.py new file mode 100644 index 0000000..dfe1411 --- /dev/null +++ b/hospexplorer/ask/migrations/0013_documentauthorinstitution_documenttype_and_more.py @@ -0,0 +1,94 @@ +# Generated by Django 6.0.2 on 2026-05-11 23:30 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('ask', '0012_pdfresource_status_pdfresource_status_message_and_more'), + ] + + operations = [ + migrations.CreateModel( + name='DocumentAuthorInstitution', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=255, unique=True)), + ], + options={ + 'ordering': ['name'], + }, + ), + migrations.CreateModel( + name='DocumentType', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=255, unique=True)), + ], + options={ + 'ordering': ['name'], + }, + ), + migrations.CreateModel( + name='InstitutionType', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=255, unique=True)), + ], + options={ + 'ordering': ['name'], + }, + ), + migrations.AddField( + model_name='pdfresource', + name='date_published', + field=models.DateField(blank=True, null=True), + ), + migrations.AddField( + model_name='pdfresource', + name='date_published_precision', + field=models.CharField(blank=True, choices=[('year', 'Year'), ('month', 'Month'), ('day', 'Day')], default='', max_length=10), + ), + migrations.AddField( + model_name='websiteresource', + name='date_published', + field=models.DateField(blank=True, null=True), + ), + migrations.AddField( + model_name='websiteresource', + name='date_published_precision', + field=models.CharField(blank=True, choices=[('year', 'Year'), ('month', 'Month'), ('day', 'Day')], default='', max_length=10), + ), + migrations.AddField( + model_name='pdfresource', + name='document_author_institution', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='%(class)s_resources', to='ask.documentauthorinstitution'), + ), + migrations.AddField( + model_name='websiteresource', + name='document_author_institution', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='%(class)s_resources', to='ask.documentauthorinstitution'), + ), + migrations.AddField( + model_name='pdfresource', + name='document_type', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='%(class)s_resources', to='ask.documenttype'), + ), + migrations.AddField( + model_name='websiteresource', + name='document_type', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='%(class)s_resources', to='ask.documenttype'), + ), + migrations.AddField( + model_name='pdfresource', + name='institution_type', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='%(class)s_resources', to='ask.institutiontype'), + ), + migrations.AddField( + model_name='websiteresource', + name='institution_type', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='%(class)s_resources', to='ask.institutiontype'), + ), + ] diff --git a/hospexplorer/ask/models.py b/hospexplorer/ask/models.py index 70fa94b..e51d420 100644 --- a/hospexplorer/ask/models.py +++ b/hospexplorer/ask/models.py @@ -3,6 +3,36 @@ from django.conf import settings from django.db import models +class DocumentType(models.Model): + name = models.CharField(max_length=255, unique=True) + + class Meta: + ordering = ["name"] + + def __str__(self): + return self.name + + +class DocumentAuthorInstitution(models.Model): + name = models.CharField(max_length=255, unique=True) + + class Meta: + ordering = ["name"] + + def __str__(self): + return self.name + + +class InstitutionType(models.Model): + name = models.CharField(max_length=255, unique=True) + + class Meta: + ordering = ["name"] + + def __str__(self): + return self.name + + # Abstract Model, fields are inherited by subclasses class Resource(models.Model): class Status(models.TextChoices): @@ -11,6 +41,11 @@ class Status(models.TextChoices): ERROR = "error", "Error" WARNING = "warning", "Warning" + class DatePrecision(models.TextChoices): + YEAR = "year", "Year" + MONTH = "month", "Month" + DAY = "day", "Day" + title = models.CharField(max_length=255) description = models.TextField(blank=True, default="") creator = models.ForeignKey( @@ -34,6 +69,35 @@ class Status(models.TextChoices): ) status_message = models.TextField(blank=True, default="") + date_published = models.DateField(null=True, blank=True) + date_published_precision = models.CharField( + max_length=10, + choices=DatePrecision.choices, + blank=True, + default="", + ) + document_type = models.ForeignKey( + "DocumentType", + on_delete=models.SET_NULL, + null=True, + blank=True, + related_name="%(class)s_resources", + ) + document_author_institution = models.ForeignKey( + "DocumentAuthorInstitution", + on_delete=models.SET_NULL, + null=True, + blank=True, + related_name="%(class)s_resources", + ) + institution_type = models.ForeignKey( + "InstitutionType", + on_delete=models.SET_NULL, + null=True, + blank=True, + related_name="%(class)s_resources", + ) + class Meta: abstract = True diff --git a/hospexplorer/ask/tasks.py b/hospexplorer/ask/tasks.py index e290310..89b53b3 100644 --- a/hospexplorer/ask/tasks.py +++ b/hospexplorer/ask/tasks.py @@ -144,6 +144,23 @@ def run_llm_task(task_id, record_id, conversation_id): close_old_connections() +def _build_resource_metadata(obj): + """Serialize a Resource's metadata fields into a JSON-safe dict. + + FK lookups are flattened to their ``name`` so the MCP payload is + self-describing and doesn't depend on hosp-explorer's local IDs. + """ + return { + "date_published": obj.date_published.isoformat() if obj.date_published else None, + "date_published_precision": obj.date_published_precision or None, + "document_type": obj.document_type.name if obj.document_type_id else None, + "document_author_institution": ( + obj.document_author_institution.name if obj.document_author_institution_id else None + ), + "institution_type": obj.institution_type.name if obj.institution_type_id else None, + } + + def run_kb_resource_upload(model_label, resource_id): """Background thread: push a resource to the MCP KB and record its doc_id. @@ -169,15 +186,18 @@ def run_kb_resource_upload(model_label, resource_id): return try: + metadata = _build_resource_metadata(obj) if model_label == "pdf": obj.file.open("rb") try: file_bytes = obj.file.read() finally: obj.file.close() - result = add_pdf_to_kb(file_bytes, obj.file.name.split("/")[-1], obj.title) + result = add_pdf_to_kb( + file_bytes, obj.file.name.split("/")[-1], obj.title, metadata=metadata, + ) else: - result = add_website_to_kb(obj.url) + result = add_website_to_kb(obj.url, metadata=metadata) obj.mcp_kb_document_id = result.get("doc_id") obj.status = Resource.Status.SUCCESS diff --git a/hospexplorer/ask/templates/admin/ask/lookup_change_list.html b/hospexplorer/ask/templates/admin/ask/lookup_change_list.html new file mode 100644 index 0000000..766c505 --- /dev/null +++ b/hospexplorer/ask/templates/admin/ask/lookup_change_list.html @@ -0,0 +1,9 @@ +{% extends "admin/change_list.html" %} +{% load i18n %} + +{% block object-tools-items %} +