Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 110 additions & 2 deletions hospexplorer/ask/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,22 @@
from django.contrib.auth.admin import UserAdmin
from django.contrib.auth.models import User
from django.db import transaction

from ask.models import Conversation, TermsAcceptance, QARecord, SimWorkflow, WebsiteResource, PDFResource
from django.http import HttpResponseRedirect
from django.shortcuts import render
from django.urls import path, reverse

from ask.models import (
Conversation,
TermsAcceptance,
QARecord,
SimWorkflow,
WebsiteResource,
PDFResource,
DocumentType,
DocumentAuthorInstitution,
InstitutionType,
)
from ask.admin_csv import import_names_csv
from ask.kb_connector import delete_kb_document
from ask.tasks import run_kb_resource_upload

Expand Down Expand Up @@ -181,16 +195,98 @@ def delete_queryset(self, request, queryset):
return


class LookupCSVImportMixin:
"""Adds an Import CSV button + upload view to a lookup ModelAdmin.

CSV is single-column name. Duplicates are skipped, header row optional.
"""

change_list_template = "admin/ask/lookup_change_list.html"

def get_urls(self):
urls = super().get_urls()
info = (self.model._meta.app_label, self.model._meta.model_name)
return [
path(
"import-csv/",
self.admin_site.admin_view(self.import_csv_view),
name=f"{info[0]}_{info[1]}_import_csv",
),
] + urls

def import_csv_view(self, request):
info = (self.model._meta.app_label, self.model._meta.model_name)
changelist_url = reverse(f"admin:{info[0]}_{info[1]}_changelist")

if request.method == "POST":
file_obj = request.FILES.get("csv_file")
if file_obj is None:
self.message_user(request, "No file provided.", level="error")
elif not file_obj.name.lower().endswith(".csv"):
self.message_user(request, "File must have a .csv extension.", level="error")
else:
try:
created, skipped = import_names_csv(self.model, file_obj)
except Exception as e:
logger.exception("CSV import failed for %s", self.model.__name__)
self.message_user(request, f"Import failed: {e}", level="error")
else:
self.message_user(
request,
f"Imported {created} new {self.model._meta.verbose_name_plural} "
f"(skipped {skipped} duplicate or empty rows).",
)
return HttpResponseRedirect(changelist_url)

context = {
**self.admin_site.each_context(request),
"title": f"Import {self.model._meta.verbose_name_plural} from CSV",
"opts": self.model._meta,
"changelist_url": changelist_url,
}
return render(request, "admin/ask/lookup_csv_import.html", context)


@admin.register(DocumentType)
class DocumentTypeAdmin(LookupCSVImportMixin, admin.ModelAdmin):
list_display = ("name",)
search_fields = ("name",)


@admin.register(DocumentAuthorInstitution)
class DocumentAuthorInstitutionAdmin(LookupCSVImportMixin, admin.ModelAdmin):
list_display = ("name",)
search_fields = ("name",)


@admin.register(InstitutionType)
class InstitutionTypeAdmin(LookupCSVImportMixin, admin.ModelAdmin):
list_display = ("name",)
search_fields = ("name",)


@admin.register(WebsiteResource)
class WebsiteResourceAdmin(KBDeleteAdminMixin, admin.ModelAdmin):
list_display = ("title", "url", "creator", "status", "modified_at")
list_filter = ("status",)
search_fields = ("title", "url")
readonly_fields = ("created_at", "modified_at", "creator", "modifier", "mcp_kb_document_id", "status", "status_message")
fieldsets = (
(None, {"fields": ("title", "description", "url")}),
("Metadata", {"fields": (
"date_published", "date_published_precision",
"document_type", "document_author_institution", "institution_type",
)}),
("Status", {"fields": (
"status", "status_message", "mcp_kb_document_id",
"created_at", "modified_at", "creator", "modifier",
)}),
)
help_texts = {
"title": "A short name to identify this website resource.",
"description": "Optional details about what this website covers.",
"url": "The URL the LLM will use as context when answering questions.",
"date_published_precision": "Granularity of the date above (year / month / day). Leave blank if unknown.",
}

def get_form(self, request, obj=None, **kwargs):
Expand Down Expand Up @@ -230,10 +326,22 @@ class PDFResourceAdmin(KBDeleteAdminMixin, admin.ModelAdmin):
list_filter = ("status",)
search_fields = ("title",)
readonly_fields = ("created_at", "modified_at", "creator", "modifier", "mcp_kb_document_id", "status", "status_message")
fieldsets = (
(None, {"fields": ("title", "description", "file")}),
("Metadata", {"fields": (
"date_published", "date_published_precision",
"document_type", "document_author_institution", "institution_type",
)}),
("Status", {"fields": (
"status", "status_message", "mcp_kb_document_id",
"created_at", "modified_at", "creator", "modifier",
)}),
)
help_texts = {
"title": "A short name to identify this PDF resource.",
"description": "Optional details about what this PDF covers.",
"file": "The PDF file the LLM will use as context when answering questions.",
"date_published_precision": "Granularity of the date above (year / month / day). Leave blank if unknown.",
}

def get_form(self, request, obj=None, **kwargs):
Expand Down
26 changes: 26 additions & 0 deletions hospexplorer/ask/admin_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import csv
import io


def import_names_csv(model, file_obj):
"""Import a one-column CSV into a model with a ``name`` field.

Returns ``(created, skipped)``. Blank rows, a leading header row of ``name``,
and rows whose name already exists in the table are all counted as skipped.
"""
text = file_obj.read().decode("utf-8-sig", errors="replace")
reader = csv.reader(io.StringIO(text))

created = 0
skipped = 0
for row in reader:
name = row[0].strip() if row else ""
if not name or name.lower() == "name":
skipped += 1
continue
_, was_created = model.objects.get_or_create(name=name)
if was_created:
created += 1
else:
skipped += 1
return created, skipped
17 changes: 11 additions & 6 deletions hospexplorer/ask/kb_connector.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import logging

import httpx
Expand Down Expand Up @@ -30,22 +31,24 @@ def list_kb_documents(page=1, page_size=10):
return response.json()


def add_website_to_kb(url):
def add_website_to_kb(url, metadata=None):
"""Send a website URL to the MCP KB server for ingestion.

Calls POST /docs/website/add?url={url} on the MCP KB server.
The KB server fetches the page, chunks it, generates embeddings,
and stores it for semantic search.
``metadata`` (if provided) is sent as a JSON body ``{"metadata": ...}`` so
the KB server can store it on the Document row.
"""
headers = {
"Authorization": f"Bearer {settings.KB_MCP_JWT_TOKEN}",
"Content-Type": "application/json",
}
endpoint = f"{settings.KB_MCP_HOST}/docs/website/add"

with httpx.Client() as client:
response = client.post(
endpoint,
params={"url": url},
json={"metadata": metadata} if metadata is not None else {},
headers=headers,
timeout=settings.KB_MCP_TIMEOUT,
)
Expand All @@ -54,12 +57,12 @@ def add_website_to_kb(url):
return response.json()


def add_pdf_to_kb(file_bytes, filename, title, url=None):
def add_pdf_to_kb(file_bytes, filename, title, url=None, metadata=None):
"""Upload a PDF to the MCP KB server for ingestion.

Calls POST /docs/pdf/add on the MCP KB server with multipart form data.
The KB server extracts text, chunks it, generates embeddings,
and stores it for semantic search.
metadata (if provided) is JSON-encoded into a metadata form field so
it can travel alongside the file.
"""
headers = {
"Authorization": f"Bearer {settings.KB_MCP_JWT_TOKEN}",
Expand All @@ -70,6 +73,8 @@ def add_pdf_to_kb(file_bytes, filename, title, url=None):
data = {"title": title}
if url:
data["url"] = url
if metadata is not None:
data["metadata"] = json.dumps(metadata)

with httpx.Client() as client:
response = client.post(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# Generated by Django 6.0.2 on 2026-05-11 23:30

import django.db.models.deletion
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('ask', '0012_pdfresource_status_pdfresource_status_message_and_more'),
]

operations = [
migrations.CreateModel(
name='DocumentAuthorInstitution',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(max_length=255, unique=True)),
],
options={
'ordering': ['name'],
},
),
migrations.CreateModel(
name='DocumentType',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(max_length=255, unique=True)),
],
options={
'ordering': ['name'],
},
),
migrations.CreateModel(
name='InstitutionType',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(max_length=255, unique=True)),
],
options={
'ordering': ['name'],
},
),
migrations.AddField(
model_name='pdfresource',
name='date_published',
field=models.DateField(blank=True, null=True),
),
migrations.AddField(
model_name='pdfresource',
name='date_published_precision',
field=models.CharField(blank=True, choices=[('year', 'Year'), ('month', 'Month'), ('day', 'Day')], default='', max_length=10),
),
migrations.AddField(
model_name='websiteresource',
name='date_published',
field=models.DateField(blank=True, null=True),
),
migrations.AddField(
model_name='websiteresource',
name='date_published_precision',
field=models.CharField(blank=True, choices=[('year', 'Year'), ('month', 'Month'), ('day', 'Day')], default='', max_length=10),
),
migrations.AddField(
model_name='pdfresource',
name='document_author_institution',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='%(class)s_resources', to='ask.documentauthorinstitution'),
),
migrations.AddField(
model_name='websiteresource',
name='document_author_institution',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='%(class)s_resources', to='ask.documentauthorinstitution'),
),
migrations.AddField(
model_name='pdfresource',
name='document_type',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='%(class)s_resources', to='ask.documenttype'),
),
migrations.AddField(
model_name='websiteresource',
name='document_type',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='%(class)s_resources', to='ask.documenttype'),
),
migrations.AddField(
model_name='pdfresource',
name='institution_type',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='%(class)s_resources', to='ask.institutiontype'),
),
migrations.AddField(
model_name='websiteresource',
name='institution_type',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='%(class)s_resources', to='ask.institutiontype'),
),
]
Loading