11const { logger } = require ( '@librechat/data-schemas' ) ;
22const { DataFactory, Writer, Parser } = require ( 'n3' ) ;
3+ const LinkHeader = require ( 'http-link-header' ) ;
34
45// LDP (Linked Data Platform) namespace for parsing container Turtle responses
56const LDP_NS = 'http://www.w3.org/ns/ldp#' ;
@@ -27,12 +28,23 @@ const {
2728 deleteFile,
2829 getPodUrlAll,
2930 createContainerAt,
31+ getSolidDataset,
32+ getThing,
33+ getUrl,
3034} = require ( '@inrupt/solid-client' ) ;
3135
3236// ACL/ACP namespaces
3337const ACL_NS = 'http://www.w3.org/ns/auth/acl#' ;
3438const RDF_NS = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' ;
3539const FOAF_NS = 'http://xmlns.com/foaf/0.1/' ;
40+ /** PIM Space vocabulary: Storage type (Link header) and storage predicate (profile data) */
41+ const PIM_SPACE_STORAGE_TYPE = 'http://www.w3.org/ns/pim/space#Storage' ;
42+ const PIM_SPACE_STORAGE_PREDICATE = 'http://www.w3.org/ns/pim/space#storage' ;
43+
44+ /** @param {string } url - @returns {string} URL with trailing slash */
45+ function ensureTrailingSlash ( url ) {
46+ return url . endsWith ( '/' ) ? url : `${ url } /` ;
47+ }
3648
3749/**
3850 * Solid Storage Utility Module
@@ -206,6 +218,67 @@ async function getSolidFetch(req) {
206218 }
207219}
208220
221+ /**
222+ * Discover Pod root from a resource URL using Solid conventions (Link header, profile storage, path walk).
223+ * Based on https://github.com/SolidLabResearch/Bashlib/blob/80de25cbb4b3ed057f95e25bc057f1be9b00cef3/src/utils/util.ts getPodRoot.
224+ * Uses http-link-header (same as Bashlib) for Link header parsing.
225+ *
226+ * Real servers (e.g. solidcommunity.net): profile document has Link headers but no Storage type;
227+ * the Pod root (e.g. https://user.solidcommunity.net/) returns Link: <http://www.w3.org/ns/pim/space#Storage>; rel="type".
228+ * The profile Turtle has space:storage on the #me subject (WebID), not on the document URL.
229+ *
230+ * @param {string } url - Resource URL (e.g. WebID document URL without fragment) for HEAD/path walk
231+ * @param {Function } fetch - Authenticated fetch function
232+ * @param {string } [webId] - Full WebID (with #me) so we can read space:storage from the profile subject
233+ * @returns {Promise<string|null> } Pod root URL with trailing slash, or null if not found
234+ */
235+ async function getPodRoot ( url , fetch , webId ) {
236+ if ( ! url || ! fetch ) return null ;
237+ try {
238+ const res = await fetch ( url , { method : 'HEAD' } ) ;
239+ if ( ! res . ok ) return null ;
240+
241+ const linkHeaders = res . headers . get ( 'Link' ) ;
242+ if ( linkHeaders ) {
243+ const parsed = LinkHeader . parse ( linkHeaders ) ;
244+ for ( const ref of parsed . refs ) {
245+ const isStorageType =
246+ ref . rel === 'type' &&
247+ ( ref . uri === PIM_SPACE_STORAGE_TYPE || ref . type === PIM_SPACE_STORAGE_TYPE ) ;
248+ if ( isStorageType ) {
249+ const podUrl = ensureTrailingSlash ( url ) ;
250+ logger . debug ( '[SolidStorage] Pod root from Link header' , { url, podUrl } ) ;
251+ return podUrl ;
252+ }
253+ }
254+ }
255+
256+ try {
257+ const ds = await getSolidDataset ( url , { fetch } ) ;
258+ // space:storage is on the WebID subject (#me), not the document URL (see solidcommunity.net profile/card)
259+ const thing = webId ? getThing ( ds , webId ) : getThing ( ds , url ) ;
260+ const storageUrl = thing ? getUrl ( thing , PIM_SPACE_STORAGE_PREDICATE ) : null ;
261+ if ( storageUrl ) {
262+ const podUrl = ensureTrailingSlash ( storageUrl ) ;
263+ logger . debug ( '[SolidStorage] Pod root from profile storage predicate' , { url, podUrl } ) ;
264+ return podUrl ;
265+ }
266+ } catch ( _ignored ) {
267+ // Not a Solid dataset or no storage pointer
268+ }
269+
270+ const splitUrl = url . split ( '/' ) ;
271+ const index = url . endsWith ( '/' ) ? splitUrl . length - 2 : splitUrl . length - 1 ;
272+ if ( index < 0 ) return null ;
273+ const nextUrl = splitUrl . slice ( 0 , index ) . join ( '/' ) + '/' ;
274+ if ( nextUrl === url ) return null ; // avoid infinite loop when at server root
275+ return getPodRoot ( nextUrl , fetch , webId ) ;
276+ } catch ( err ) {
277+ logger . debug ( '[SolidStorage] getPodRoot failed for url' , { url, error : err ?. message } ) ;
278+ return null ;
279+ }
280+ }
281+
209282/**
210283 * Get user's Pod URL from their WebID
211284 *
@@ -243,50 +316,48 @@ async function getPodUrl(webId, fetch) {
243316 } ) ;
244317 }
245318
246- // If no Pod URLs found, derive from WebID as fallback
319+ // If no Pod URLs found, use Solid discovery (Link header + profile storage + path walk), then path heuristic
247320 if ( ! podUrls || podUrls . length === 0 ) {
248321 logger . info ( '[SolidStorage] No Pod URLs found in profile, deriving from WebID' , { webId } ) ;
249322
250- // Extract base URL from WebID
251- // WebID format: http://localhost:3000/bisi/profile/card#me
252- // Pod URL format: http://localhost:3000/bisi/
253323 try {
254324 const webIdUrl = new URL ( webId ) ;
255- // Remove the fragment (#me) and path segments after the pod identifier
256- // For most Solid servers, the Pod is at the root or one level deep
257- // Pattern: http://host:port/podId/ -> Pod URL
258- const pathParts = webIdUrl . pathname . split ( '/' ) . filter ( ( p ) => p ) ;
325+ // Resource URL = WebID without fragment (e.g. http://localhost:3000/bisi/profile/card)
326+ const resourceUrl = webIdUrl . hash
327+ ? webIdUrl . href . replace ( webIdUrl . hash , '' )
328+ : webIdUrl . href ;
329+
330+ // 1) Solid discovery: Link header, then profile storage predicate, then path walk (Bashlib getPodRoot)
331+ // Pass webId so we can read space:storage from the #me subject in the profile document
332+ let derivedPodUrl = await getPodRoot ( resourceUrl , fetch , webId ) ;
333+ if ( derivedPodUrl ) {
334+ logger . info ( '[SolidStorage] Derived Pod URL via Solid discovery' , {
335+ webId,
336+ derivedPodUrl,
337+ } ) ;
338+ return derivedPodUrl ;
339+ }
259340
260- // If path contains 'profile', ' card', or similar, remove them
261- // The Pod is usually at the base or one level up
341+ // 2) Fallback: path-based heuristic (WebID format: .../podId/profile/ card#me -> Pod at .../podId/)
342+ const pathParts = webIdUrl . pathname . split ( '/' ) . filter ( ( p ) => p ) ;
262343 let podPath = '/' ;
263344 if ( pathParts . length > 0 ) {
264- // For pattern like /bisi/profile/card, Pod is at /bisi/
265- // For pattern like /profile/card, Pod is at /
266345 const podIdentifier = pathParts [ 0 ] ;
267346 if ( podIdentifier && podIdentifier !== 'profile' && podIdentifier !== 'card' ) {
268347 podPath = `/${ podIdentifier } /` ;
269348 }
270349 }
271-
272- const derivedPodUrl = `${ webIdUrl . protocol } //${ webIdUrl . host } ${ podPath } ` ;
273- logger . info ( '[SolidStorage] Derived Pod URL from WebID' , {
350+ derivedPodUrl = `${ webIdUrl . protocol } //${ webIdUrl . host } ${ podPath } ` ;
351+ logger . info ( '[SolidStorage] Derived Pod URL from path heuristic' , {
274352 webId,
275353 derivedPodUrl,
276354 pathParts,
277355 } ) ;
278356
279- // Verify the Pod URL is accessible by trying to fetch the root
357+ // Verify the derived Pod URL is accessible
280358 try {
281- const response = await fetch ( derivedPodUrl , {
282- method : 'HEAD' ,
283- } ) ;
359+ const response = await fetch ( derivedPodUrl , { method : 'HEAD' } ) ;
284360 if ( response . ok || response . status === 401 || response . status === 403 ) {
285- // 401/403 means the Pod exists but we need auth (which is expected)
286- logger . info ( '[SolidStorage] Derived Pod URL is accessible' , {
287- derivedPodUrl,
288- status : response . status ,
289- } ) ;
290361 return derivedPodUrl ;
291362 }
292363 } catch ( verifyError ) {
@@ -295,7 +366,6 @@ async function getPodUrl(webId, fetch) {
295366 error : verifyError . message ,
296367 } ) ;
297368 }
298-
299369 return derivedPodUrl ;
300370 } catch ( urlError ) {
301371 logger . error ( '[SolidStorage] Failed to derive Pod URL from WebID' , {
0 commit comments