Skip to content

Commit a546054

Browse files
feat(SolidStorage): robust Pod discovery via Link header, profile storage, path walk
1 parent 3b677f0 commit a546054

3 files changed

Lines changed: 97 additions & 25 deletions

File tree

api/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171
"firebase": "^11.0.2",
7272
"form-data": "^4.0.4",
7373
"handlebars": "^4.7.7",
74+
"http-link-header": "^1.1.3",
7475
"https-proxy-agent": "^7.0.6",
7576
"ioredis": "^5.3.2",
7677
"js-yaml": "^4.1.1",

api/server/services/SolidStorage.js

Lines changed: 95 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
const { logger } = require('@librechat/data-schemas');
22
const { DataFactory, Writer, Parser } = require('n3');
3+
const LinkHeader = require('http-link-header');
34

45
// LDP (Linked Data Platform) namespace for parsing container Turtle responses
56
const LDP_NS = 'http://www.w3.org/ns/ldp#';
@@ -27,12 +28,23 @@ const {
2728
deleteFile,
2829
getPodUrlAll,
2930
createContainerAt,
31+
getSolidDataset,
32+
getThing,
33+
getUrl,
3034
} = require('@inrupt/solid-client');
3135

3236
// ACL/ACP namespaces
3337
const ACL_NS = 'http://www.w3.org/ns/auth/acl#';
3438
const RDF_NS = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
3539
const FOAF_NS = 'http://xmlns.com/foaf/0.1/';
40+
/** PIM Space vocabulary: Storage type (Link header) and storage predicate (profile data) */
41+
const PIM_SPACE_STORAGE_TYPE = 'http://www.w3.org/ns/pim/space#Storage';
42+
const PIM_SPACE_STORAGE_PREDICATE = 'http://www.w3.org/ns/pim/space#storage';
43+
44+
/** @param {string} url - @returns {string} URL with trailing slash */
45+
function ensureTrailingSlash(url) {
46+
return url.endsWith('/') ? url : `${url}/`;
47+
}
3648

3749
/**
3850
* Solid Storage Utility Module
@@ -206,6 +218,67 @@ async function getSolidFetch(req) {
206218
}
207219
}
208220

221+
/**
222+
* Discover Pod root from a resource URL using Solid conventions (Link header, profile storage, path walk).
223+
* Based on https://github.com/SolidLabResearch/Bashlib/blob/80de25cbb4b3ed057f95e25bc057f1be9b00cef3/src/utils/util.ts getPodRoot.
224+
* Uses http-link-header (same as Bashlib) for Link header parsing.
225+
*
226+
* Real servers (e.g. solidcommunity.net): profile document has Link headers but no Storage type;
227+
* the Pod root (e.g. https://user.solidcommunity.net/) returns Link: <http://www.w3.org/ns/pim/space#Storage>; rel="type".
228+
* The profile Turtle has space:storage on the #me subject (WebID), not on the document URL.
229+
*
230+
* @param {string} url - Resource URL (e.g. WebID document URL without fragment) for HEAD/path walk
231+
* @param {Function} fetch - Authenticated fetch function
232+
* @param {string} [webId] - Full WebID (with #me) so we can read space:storage from the profile subject
233+
* @returns {Promise<string|null>} Pod root URL with trailing slash, or null if not found
234+
*/
235+
async function getPodRoot(url, fetch, webId) {
236+
if (!url || !fetch) return null;
237+
try {
238+
const res = await fetch(url, { method: 'HEAD' });
239+
if (!res.ok) return null;
240+
241+
const linkHeaders = res.headers.get('Link');
242+
if (linkHeaders) {
243+
const parsed = LinkHeader.parse(linkHeaders);
244+
for (const ref of parsed.refs) {
245+
const isStorageType =
246+
ref.rel === 'type' &&
247+
(ref.uri === PIM_SPACE_STORAGE_TYPE || ref.type === PIM_SPACE_STORAGE_TYPE);
248+
if (isStorageType) {
249+
const podUrl = ensureTrailingSlash(url);
250+
logger.debug('[SolidStorage] Pod root from Link header', { url, podUrl });
251+
return podUrl;
252+
}
253+
}
254+
}
255+
256+
try {
257+
const ds = await getSolidDataset(url, { fetch });
258+
// space:storage is on the WebID subject (#me), not the document URL (see solidcommunity.net profile/card)
259+
const thing = webId ? getThing(ds, webId) : getThing(ds, url);
260+
const storageUrl = thing ? getUrl(thing, PIM_SPACE_STORAGE_PREDICATE) : null;
261+
if (storageUrl) {
262+
const podUrl = ensureTrailingSlash(storageUrl);
263+
logger.debug('[SolidStorage] Pod root from profile storage predicate', { url, podUrl });
264+
return podUrl;
265+
}
266+
} catch (_ignored) {
267+
// Not a Solid dataset or no storage pointer
268+
}
269+
270+
const splitUrl = url.split('/');
271+
const index = url.endsWith('/') ? splitUrl.length - 2 : splitUrl.length - 1;
272+
if (index < 0) return null;
273+
const nextUrl = splitUrl.slice(0, index).join('/') + '/';
274+
if (nextUrl === url) return null; // avoid infinite loop when at server root
275+
return getPodRoot(nextUrl, fetch, webId);
276+
} catch (err) {
277+
logger.debug('[SolidStorage] getPodRoot failed for url', { url, error: err?.message });
278+
return null;
279+
}
280+
}
281+
209282
/**
210283
* Get user's Pod URL from their WebID
211284
*
@@ -243,50 +316,48 @@ async function getPodUrl(webId, fetch) {
243316
});
244317
}
245318

246-
// If no Pod URLs found, derive from WebID as fallback
319+
// If no Pod URLs found, use Solid discovery (Link header + profile storage + path walk), then path heuristic
247320
if (!podUrls || podUrls.length === 0) {
248321
logger.info('[SolidStorage] No Pod URLs found in profile, deriving from WebID', { webId });
249322

250-
// Extract base URL from WebID
251-
// WebID format: http://localhost:3000/bisi/profile/card#me
252-
// Pod URL format: http://localhost:3000/bisi/
253323
try {
254324
const webIdUrl = new URL(webId);
255-
// Remove the fragment (#me) and path segments after the pod identifier
256-
// For most Solid servers, the Pod is at the root or one level deep
257-
// Pattern: http://host:port/podId/ -> Pod URL
258-
const pathParts = webIdUrl.pathname.split('/').filter((p) => p);
325+
// Resource URL = WebID without fragment (e.g. http://localhost:3000/bisi/profile/card)
326+
const resourceUrl = webIdUrl.hash
327+
? webIdUrl.href.replace(webIdUrl.hash, '')
328+
: webIdUrl.href;
329+
330+
// 1) Solid discovery: Link header, then profile storage predicate, then path walk (Bashlib getPodRoot)
331+
// Pass webId so we can read space:storage from the #me subject in the profile document
332+
let derivedPodUrl = await getPodRoot(resourceUrl, fetch, webId);
333+
if (derivedPodUrl) {
334+
logger.info('[SolidStorage] Derived Pod URL via Solid discovery', {
335+
webId,
336+
derivedPodUrl,
337+
});
338+
return derivedPodUrl;
339+
}
259340

260-
// If path contains 'profile', 'card', or similar, remove them
261-
// The Pod is usually at the base or one level up
341+
// 2) Fallback: path-based heuristic (WebID format: .../podId/profile/card#me -> Pod at .../podId/)
342+
const pathParts = webIdUrl.pathname.split('/').filter((p) => p);
262343
let podPath = '/';
263344
if (pathParts.length > 0) {
264-
// For pattern like /bisi/profile/card, Pod is at /bisi/
265-
// For pattern like /profile/card, Pod is at /
266345
const podIdentifier = pathParts[0];
267346
if (podIdentifier && podIdentifier !== 'profile' && podIdentifier !== 'card') {
268347
podPath = `/${podIdentifier}/`;
269348
}
270349
}
271-
272-
const derivedPodUrl = `${webIdUrl.protocol}//${webIdUrl.host}${podPath}`;
273-
logger.info('[SolidStorage] Derived Pod URL from WebID', {
350+
derivedPodUrl = `${webIdUrl.protocol}//${webIdUrl.host}${podPath}`;
351+
logger.info('[SolidStorage] Derived Pod URL from path heuristic', {
274352
webId,
275353
derivedPodUrl,
276354
pathParts,
277355
});
278356

279-
// Verify the Pod URL is accessible by trying to fetch the root
357+
// Verify the derived Pod URL is accessible
280358
try {
281-
const response = await fetch(derivedPodUrl, {
282-
method: 'HEAD',
283-
});
359+
const response = await fetch(derivedPodUrl, { method: 'HEAD' });
284360
if (response.ok || response.status === 401 || response.status === 403) {
285-
// 401/403 means the Pod exists but we need auth (which is expected)
286-
logger.info('[SolidStorage] Derived Pod URL is accessible', {
287-
derivedPodUrl,
288-
status: response.status,
289-
});
290361
return derivedPodUrl;
291362
}
292363
} catch (verifyError) {
@@ -295,7 +366,6 @@ async function getPodUrl(webId, fetch) {
295366
error: verifyError.message,
296367
});
297368
}
298-
299369
return derivedPodUrl;
300370
} catch (urlError) {
301371
logger.error('[SolidStorage] Failed to derive Pod URL from WebID', {

package-lock.json

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)