From de0c62283f6ababf7de5bb1a882b0b282f293333 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Biron?= Date: Tue, 21 Apr 2026 18:29:49 +0200 Subject: [PATCH 01/21] Install xml-js --- package-lock.json | 33 ++++++++++++++++----------------- package.json | 3 ++- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/package-lock.json b/package-lock.json index 3e11f1cf9..2292b55ce 100644 --- a/package-lock.json +++ b/package-lock.json @@ -58,7 +58,8 @@ "swagger-ui-express": "^5.0.1", "turndown": "^7.2.1", "winston": "^3.17.0", - "winston-mail": "^2.0.0" + "winston-mail": "^2.0.0", + "xml-js": "^1.6.11" }, "bin": { "ota": "bin/ota.js" @@ -1273,7 +1274,6 @@ } ], "license": "MIT", - "peer": true, "engines": { "node": ">=20.19.0" }, @@ -1320,7 +1320,6 @@ } ], "license": "MIT", - "peer": true, "engines": { "node": ">=20.19.0" } @@ -1639,7 +1638,6 @@ "integrity": "sha512-jCs9ldd7NwzpgXDIf6P3+NrHh9/sD6CQdxHyjQI+h/6rDNo88ypBxxz45UDuZHz9r3tNz7N/VInSVoVdtXEI4A==", "devOptional": true, "license": "MIT", - "peer": true, "engines": { "node": "^14.21.3 || >=16" }, @@ -1793,7 +1791,6 @@ "resolved": "https://registry.npmjs.org/@octokit/core/-/core-7.0.4.tgz", "integrity": "sha512-jOT8V1Ba5BdC79sKrRWDdMT5l1R+XNHTPR6CPWzUP2EcfAcvIHZWF0eAbmRcpOOP5gVIwnqNg0C4nvh6Abc3OA==", "license": "MIT", - "peer": true, "dependencies": { "@octokit/auth-token": "^6.0.0", "@octokit/graphql": "^9.0.1", @@ -2294,8 +2291,7 @@ "version": "20.7.0", "resolved": "https://registry.npmjs.org/@types/node/-/node-20.7.0.tgz", "integrity": "sha512-zI22/pJW2wUZOVyguFaUL1HABdmSVxpXrzIqkjsHmyUjNhPoWM1CKfvVuXfetHhIok4RY573cqS0mZ1SJEnoTg==", - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/@types/triple-beam": { "version": "1.3.5", @@ -2386,7 +2382,6 @@ "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz", "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", "license": "MIT", - "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -3108,7 +3103,6 @@ "resolved": "https://registry.npmjs.org/chai/-/chai-6.0.1.tgz", "integrity": "sha512-/JOoU2//6p5vCXh00FpNgtlw0LjvhGttaWc+y7wpW9yjBm3ys0dI8tSKZxIOgNruz5J0RleccatSIC3uxEZP0g==", "license": "MIT", - "peer": true, "engines": { "node": ">=18" } @@ -3569,7 +3563,6 @@ "resolved": "https://registry.npmjs.org/cosmiconfig/-/cosmiconfig-9.0.0.tgz", "integrity": "sha512-itvL5h8RETACmOTFc4UfIyB2RfEHi71Ax6E/PivVxq9NseKbOWpeyHEOIbmAw1rs8Ak0VursQNww7lf7YtUwzg==", "license": "MIT", - "peer": true, "dependencies": { "env-paths": "^2.2.1", "import-fresh": "^3.3.0", @@ -3989,8 +3982,7 @@ "version": "0.0.1495869", "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1495869.tgz", "integrity": "sha512-i+bkd9UYFis40RcnkW7XrOprCujXRAHg62IVh/Ah3G8MmNXpCGt1m0dTFhSdx/AVs8XEMbdOGRwdkR1Bcta8AA==", - "license": "BSD-3-Clause", - "peer": true + "license": "BSD-3-Clause" }, "node_modules/dezalgo": { "version": "1.0.4", @@ -4482,7 +4474,6 @@ "integrity": "sha512-ypowyDxpVSYpkXr9WPv2PAZCtNip1Mv5KTW0SCurXv/9iOpcrH9PaqUElksqEB6pChqHGDRCFTyrZlGhnLNGiA==", "deprecated": "This version is no longer supported. Please see https://eslint.org/version-support for other options.", "license": "MIT", - "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.2.0", "@eslint-community/regexpp": "^4.6.1", @@ -4624,7 +4615,6 @@ "resolved": "https://registry.npmjs.org/eslint-plugin-import/-/eslint-plugin-import-2.32.0.tgz", "integrity": "sha512-whOE1HFo/qJDyX4SnXzP4N6zOWn79WhnCUY/iDR0mPfQZO8wcYE4JClzI2oZrhBnnMUCBCHZhO6VQyoBU95mZA==", "license": "MIT", - "peer": true, "dependencies": { "@rtsao/scc": "^1.1.0", "array-includes": "^3.1.9", @@ -8870,7 +8860,6 @@ "integrity": "sha512-QabGIvu7F0hAMiKGHZCIRHMb6UoH0QAJA2OaqxEU2tL5noXPrxUcotg2l3ttOA4p1PFnVIGkr6PXRAWlM2evVQ==", "hasInstallScript": true, "license": "Apache-2.0", - "peer": true, "dependencies": { "@puppeteer/browsers": "2.10.10", "chromium-bidi": "8.0.0", @@ -8926,7 +8915,6 @@ "resolved": "https://registry.npmjs.org/puppeteer-extra/-/puppeteer-extra-3.3.6.tgz", "integrity": "sha512-rsLBE/6mMxAjlLd06LuGacrukP2bqbzKCLzV1vrhHFavqQE/taQ2UXv3H5P0Ls7nsrASa+6x3bDbXHpqMwq+7A==", "license": "MIT", - "peer": true, "dependencies": { "@types/debug": "^4.1.0", "debug": "^4.1.1", @@ -9887,7 +9875,6 @@ "resolved": "https://registry.npmjs.org/socks/-/socks-2.8.7.tgz", "integrity": "sha512-HLpt+uLy/pxB+bum/9DzAgiKS8CX1EvbWxI4zlmgGCExImLdiad2iCwXT5Z4c9c3Eq8rP2318mPW2c+QbtjK8A==", "license": "MIT", - "peer": true, "dependencies": { "ip-address": "^10.0.1", "smart-buffer": "^4.2.0" @@ -11334,6 +11321,18 @@ } } }, + "node_modules/xml-js": { + "version": "1.6.11", + "resolved": "https://registry.npmjs.org/xml-js/-/xml-js-1.6.11.tgz", + "integrity": "sha512-7rVi2KMfwfWFl+GpPg6m80IVMWXLRjO+PxTq7V2CDhoGak0wzYzFgUY2m4XJ47OGdXd8eLE8EmwfAmdjw7lC1g==", + "license": "MIT", + "dependencies": { + "sax": "^1.2.4" + }, + "bin": { + "xml-js": "bin/cli.js" + } + }, "node_modules/xml-name-validator": { "version": "5.0.0", "resolved": "https://registry.npmjs.org/xml-name-validator/-/xml-name-validator-5.0.0.tgz", diff --git a/package.json b/package.json index f21c5f107..db0fbd146 100644 --- a/package.json +++ b/package.json @@ -100,7 +100,8 @@ "swagger-ui-express": "^5.0.1", "turndown": "^7.2.1", "winston": "^3.17.0", - "winston-mail": "^2.0.0" + "winston-mail": "^2.0.0", + "xml-js": "^1.6.11" }, "devDependencies": { "@commitlint/cli": "^19.8.1", From 78ff82e9f42b416c9a84619c0b38877f408bf9d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Biron?= Date: Tue, 21 Apr 2026 18:30:03 +0200 Subject: [PATCH 02/21] Add findRecent method --- .../recorder/repositories/git/index.js | 20 +++ .../recorder/repositories/git/index.test.js | 142 ++++++++++++++++++ .../recorder/repositories/interface.js | 14 ++ .../recorder/repositories/mongo/index.js | 16 ++ .../recorder/repositories/mongo/index.test.js | 140 +++++++++++++++++ 5 files changed, 332 insertions(+) diff --git a/src/archivist/recorder/repositories/git/index.js b/src/archivist/recorder/repositories/git/index.js index 5caf59948..32904cad3 100644 --- a/src/archivist/recorder/repositories/git/index.js +++ b/src/archivist/recorder/repositories/git/index.js @@ -92,6 +92,26 @@ export default class GitRepository extends RepositoryInterface { return Promise.all((await this.#getCommits()).map(commit => this.#toDomain(commit, { deferContentLoading: true }))); } + async findRecent(limit, { serviceId, termsType } = {}) { + const commits = (await this.#getCommits()).reverse(); + const records = []; + + for (const commit of commits) { + if (records.length >= limit) break; + + const record = await this.#toDomain(commit, { deferContentLoading: true }); + + if (!record) continue; + + if (serviceId !== undefined && record.serviceId !== serviceId) continue; + if (termsType !== undefined && record.termsType !== termsType) continue; + + records.push(record); + } + + return records; + } + async count() { return (await this.git.log(Object.values(DataMapper.COMMIT_MESSAGE_PREFIXES).map(prefix => `--grep=${prefix}`))).length; } diff --git a/src/archivist/recorder/repositories/git/index.test.js b/src/archivist/recorder/repositories/git/index.test.js index 6c7e1dea0..ee8d8b6e6 100644 --- a/src/archivist/recorder/repositories/git/index.test.js +++ b/src/archivist/recorder/repositories/git/index.test.js @@ -584,6 +584,148 @@ describe('GitRepository', () => { }); }); + describe('#findRecent', () => { + const OTHER_SERVICE = 'other_service'; + const OTHER_TERMS = 'Privacy Policy'; + + before(async function () { + this.timeout(5000); + + await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: CONTENT, + fetchDate: FETCH_DATE_EARLIER, + snapshotIds: [SNAPSHOT_ID], + })); + await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: `${CONTENT} - updated`, + fetchDate: FETCH_DATE, + snapshotIds: [SNAPSHOT_ID], + })); + await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: OTHER_TERMS, + content: CONTENT, + fetchDate: FETCH_DATE_LATER, + snapshotIds: [SNAPSHOT_ID], + })); + await subject.save(new Version({ + serviceId: OTHER_SERVICE, + termsType: TERMS_TYPE, + content: CONTENT, + fetchDate: FETCH_DATE_LATER, + snapshotIds: [SNAPSHOT_ID], + })); + }); + + after(() => subject.removeAll()); + + context('without filters', () => { + let records; + + before(async () => { + records = await subject.findRecent(10); + }); + + it('returns records in descending chronological order', () => { + const dates = records.map(record => record.fetchDate.getTime()); + + expect(dates).to.deep.equal([...dates].sort((a, b) => b - a)); + }); + + it('returns all matching records', () => { + expect(records).to.have.length(4); + }); + + it('does not load content eagerly', () => { + for (const record of records) { + expect(() => record.content).to.throw('Content not defined'); + } + }); + + it('exposes the metadata needed for feed entries', () => { + const [record] = records; + + expect(record.id).to.be.a('string'); + expect(record.serviceId).to.be.a('string'); + expect(record.termsType).to.be.a('string'); + expect(record.fetchDate).to.be.an.instanceof(Date); + expect(record.isFirstRecord).to.be.a('boolean'); + expect(record.isTechnicalUpgrade).to.be.a('boolean'); + }); + }); + + context('when limit is smaller than the number of matching records', () => { + let records; + + before(async () => { + records = await subject.findRecent(2); + }); + + it('returns at most limit records', () => { + expect(records).to.have.length(2); + }); + + it('returns the most recent records', () => { + for (const record of records) { + expect(record.fetchDate.getTime()).to.be.at.least(FETCH_DATE.getTime()); + } + }); + }); + + context('when a serviceId filter is given', () => { + let records; + + before(async () => { + records = await subject.findRecent(10, { serviceId: SERVICE_PROVIDER_ID }); + }); + + it('returns only records for that service', () => { + for (const record of records) { + expect(record.serviceId).to.equal(SERVICE_PROVIDER_ID); + } + }); + + it('returns all records that match', () => { + expect(records).to.have.length(3); + }); + }); + + context('when both serviceId and termsType filters are given', () => { + let records; + + before(async () => { + records = await subject.findRecent(10, { serviceId: SERVICE_PROVIDER_ID, termsType: TERMS_TYPE }); + }); + + it('returns only records for that service and terms type', () => { + for (const record of records) { + expect(record.serviceId).to.equal(SERVICE_PROVIDER_ID); + expect(record.termsType).to.equal(TERMS_TYPE); + } + }); + + it('returns all records that match', () => { + expect(records).to.have.length(2); + }); + }); + + context('when filters match no record', () => { + let records; + + before(async () => { + records = await subject.findRecent(10, { serviceId: 'unknown' }); + }); + + it('returns an empty array', () => { + expect(records).to.deep.equal([]); + }); + }); + }); + describe('#findLatest', () => { context('when there are records for the given service', () => { let lastSnapshotId; diff --git a/src/archivist/recorder/repositories/interface.js b/src/archivist/recorder/repositories/interface.js index 1d9270944..1c1cfd7d6 100644 --- a/src/archivist/recorder/repositories/interface.js +++ b/src/archivist/recorder/repositories/interface.js @@ -79,6 +79,20 @@ class RepositoryInterface { throw new Error(`#findAll method is not implemented in ${this.constructor.name}`); } + /** + * Find the most recent records in the repository, optionally filtered by service ID and terms type + * For performance reasons, the content of the records will not be loaded. Use #loadRecordContent to load the content of individual records + * @see RepositoryInterface#loadRecordContent + * @param {number} limit - Maximum number of records to return + * @param {object} [filters] - Optional filters + * @param {string} [filters.serviceId] - Restrict results to this service ID + * @param {string} [filters.termsType] - Restrict results to this terms type + * @returns {Promise>} Promise that will be resolved with an array of records in descending chronological order + */ + async findRecent(limit, filters) { + throw new Error(`#findRecent method is not implemented in ${this.constructor.name}`); + } + /** * Count the total number of records in the repository * For performance reasons, use this method rather than counting the number of entries returned by #findAll if you only need the size of a repository diff --git a/src/archivist/recorder/repositories/mongo/index.js b/src/archivist/recorder/repositories/mongo/index.js index 2a4abb18c..b9cf9437c 100644 --- a/src/archivist/recorder/repositories/mongo/index.js +++ b/src/archivist/recorder/repositories/mongo/index.js @@ -93,6 +93,22 @@ export default class MongoRepository extends RepositoryInterface { .map(mongoDocument => this.#toDomain(mongoDocument, { deferContentLoading: true }))); } + async findRecent(limit, { serviceId, termsType } = {}) { + const query = {}; + + if (serviceId !== undefined) query.serviceId = serviceId; + if (termsType !== undefined) query.termsType = termsType; + + const mongoDocuments = await this.collection + .find(query) + .project({ content: 0 }) + .sort({ fetchDate: -1 }) + .limit(limit) + .toArray(); + + return Promise.all(mongoDocuments.map(mongoDocument => this.#toDomain(mongoDocument, { deferContentLoading: true }))); + } + count() { return this.collection.countDocuments(); } diff --git a/src/archivist/recorder/repositories/mongo/index.test.js b/src/archivist/recorder/repositories/mongo/index.test.js index 61ecfd1d0..880c9b2e3 100644 --- a/src/archivist/recorder/repositories/mongo/index.test.js +++ b/src/archivist/recorder/repositories/mongo/index.test.js @@ -671,6 +671,146 @@ describe('MongoRepository', () => { }); }); + describe('#findRecent', () => { + const OTHER_SERVICE = 'other_service'; + const OTHER_TERMS = 'Privacy Policy'; + + before(async () => { + await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: CONTENT, + fetchDate: FETCH_DATE_EARLIER, + snapshotIds: [SNAPSHOT_ID], + })); + await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: `${CONTENT} - updated`, + fetchDate: FETCH_DATE, + snapshotIds: [SNAPSHOT_ID], + })); + await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: OTHER_TERMS, + content: CONTENT, + fetchDate: FETCH_DATE_LATER, + snapshotIds: [SNAPSHOT_ID], + })); + await subject.save(new Version({ + serviceId: OTHER_SERVICE, + termsType: TERMS_TYPE, + content: CONTENT, + fetchDate: FETCH_DATE_LATER, + snapshotIds: [SNAPSHOT_ID], + })); + }); + + after(() => subject.removeAll()); + + context('without filters', () => { + let records; + + before(async () => { + records = await subject.findRecent(10); + }); + + it('returns records in descending chronological order', () => { + const dates = records.map(record => record.fetchDate.getTime()); + + expect(dates).to.deep.equal([...dates].sort((a, b) => b - a)); + }); + + it('returns all matching records', () => { + expect(records).to.have.length(4); + }); + + it('does not load content eagerly', () => { + for (const record of records) { + expect(() => record.content).to.throw('Content not defined'); + } + }); + + it('exposes the metadata needed for feed entries', () => { + const [record] = records; + + expect(record.id).to.be.a('string'); + expect(record.serviceId).to.be.a('string'); + expect(record.termsType).to.be.a('string'); + expect(record.fetchDate).to.be.an.instanceof(Date); + expect(record.isFirstRecord).to.be.a('boolean'); + expect(record.isTechnicalUpgrade).to.be.a('boolean'); + }); + }); + + context('when limit is smaller than the number of matching records', () => { + let records; + + before(async () => { + records = await subject.findRecent(2); + }); + + it('returns at most limit records', () => { + expect(records).to.have.length(2); + }); + + it('returns the most recent records', () => { + for (const record of records) { + expect(record.fetchDate.getTime()).to.be.at.least(FETCH_DATE.getTime()); + } + }); + }); + + context('when a serviceId filter is given', () => { + let records; + + before(async () => { + records = await subject.findRecent(10, { serviceId: SERVICE_PROVIDER_ID }); + }); + + it('returns only records for that service', () => { + for (const record of records) { + expect(record.serviceId).to.equal(SERVICE_PROVIDER_ID); + } + }); + + it('returns all records that match', () => { + expect(records).to.have.length(3); + }); + }); + + context('when both serviceId and termsType filters are given', () => { + let records; + + before(async () => { + records = await subject.findRecent(10, { serviceId: SERVICE_PROVIDER_ID, termsType: TERMS_TYPE }); + }); + + it('returns only records for that service and terms type', () => { + for (const record of records) { + expect(record.serviceId).to.equal(SERVICE_PROVIDER_ID); + expect(record.termsType).to.equal(TERMS_TYPE); + } + }); + + it('returns all records that match', () => { + expect(records).to.have.length(2); + }); + }); + + context('when filters match no record', () => { + let records; + + before(async () => { + records = await subject.findRecent(10, { serviceId: 'unknown' }); + }); + + it('returns an empty array', () => { + expect(records).to.deep.equal([]); + }); + }); + }); + describe('#findLatest', () => { context('when there are records for the given service', () => { let lastSnapshotId; From 18a518161b0b3c08fd952690cf3a59d03426bb78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Biron?= Date: Wed, 22 Apr 2026 08:42:09 +0200 Subject: [PATCH 03/21] Add collection Atom feed endpoint --- src/collection-api/routes/feed.js | 149 +++++++++++ src/collection-api/routes/feed.test.js | 231 ++++++++++++++++++ src/collection-api/routes/index.js | 2 + src/collection-api/routes/versions.js | 6 +- .../routes/versionsRepository.js | 9 + 5 files changed, 393 insertions(+), 4 deletions(-) create mode 100644 src/collection-api/routes/feed.js create mode 100644 src/collection-api/routes/feed.test.js create mode 100644 src/collection-api/routes/versionsRepository.js diff --git a/src/collection-api/routes/feed.js b/src/collection-api/routes/feed.js new file mode 100644 index 000000000..7758b473c --- /dev/null +++ b/src/collection-api/routes/feed.js @@ -0,0 +1,149 @@ +import express from 'express'; +import { js2xml } from 'xml-js'; + +import { getCollection } from '../../archivist/collection/index.js'; +import { COMMIT_MESSAGE_PREFIXES } from '../../archivist/recorder/repositories/git/dataMapper.js'; +import { toISODateWithoutMilliseconds } from '../../archivist/utils/date.js'; + +import versionsRepository, { storageConfig } from './versionsRepository.js'; + +const TAG_AUTHORITY = 'opentermsarchive.org,2026'; +const FEED_AUTHOR_NAME = 'OTA-Bot'; +const DEFAULT_LIMIT = 100; + +const RECORD_TYPES = { + firstRecord: 'First record', + technicalUpgrade: 'Technical upgrade', + change: 'Change', +}; + +const SCHEMES = { + service: `tag:${TAG_AUTHORITY}:scheme:service`, + termsType: `tag:${TAG_AUTHORITY}:scheme:terms-type`, + recordType: `tag:${TAG_AUTHORITY}:scheme:record-type`, +}; + +function buildAbsoluteBaseUrl(req) { + return `${req.protocol}://${req.get('host')}${req.baseUrl}`; +} + +function classifyRecordType(version) { + if (version.isFirstRecord) return RECORD_TYPES.firstRecord; + if (version.isTechnicalUpgrade) return RECORD_TYPES.technicalUpgrade; + + return RECORD_TYPES.change; +} + +function buildEntryTitle(version) { + let prefix = COMMIT_MESSAGE_PREFIXES.update; + + if (version.isFirstRecord) prefix = COMMIT_MESSAGE_PREFIXES.startTracking; + else if (version.isTechnicalUpgrade) prefix = COMMIT_MESSAGE_PREFIXES.technicalUpgrade; + + return `${prefix} ${version.serviceId} ${version.termsType}`; +} + +function buildVersionLink(baseUrl, version) { + const encodedDate = encodeURIComponent(toISODateWithoutMilliseconds(version.fetchDate)); + const encodedService = encodeURIComponent(version.serviceId); + const encodedTermsType = encodeURIComponent(version.termsType); + + return `${baseUrl}/version/${encodedService}/${encodedTermsType}/${encodedDate}`; +} + +function buildEntryId(collection, version) { + return `tag:${TAG_AUTHORITY}:version:${collection.metadata?.id}:${storageConfig.type}:${version.id}`; +} + +function buildEntry(collection, baseUrl, version) { + return { + id: { _text: buildEntryId(collection, version) }, + link: { _attributes: { + rel: 'alternate', + type: 'text/html', + href: buildVersionLink(baseUrl, version), + } }, + title: { _text: buildEntryTitle(version) }, + updated: { _text: version.fetchDate.toISOString() }, + category: [ + { _attributes: { term: version.serviceId, scheme: SCHEMES.service } }, + { _attributes: { term: version.termsType, scheme: SCHEMES.termsType } }, + { _attributes: { term: classifyRecordType(version), scheme: SCHEMES.recordType } }, + ], + }; +} + +function buildFeedDocument({ collection, selfHref, feedId, versions, baseUrl }) { + const latestFetchDate = versions.length > 0 ? versions[0].fetchDate : new Date(); + + const feed = { + _attributes: { xmlns: 'http://www.w3.org/2005/Atom' }, + title: { _text: collection.metadata?.name || '' }, + subtitle: { _text: collection.metadata?.tagline || '' }, + id: { _text: feedId }, + updated: { _text: latestFetchDate.toISOString() }, + link: { _attributes: { rel: 'self', href: selfHref } }, + author: { name: { _text: FEED_AUTHOR_NAME } }, + }; + + if (collection.metadata?.logo) { + feed.logo = { _text: collection.metadata.logo }; + } + + feed.entry = versions.map(version => buildEntry(collection, baseUrl, version)); + + return { + _declaration: { _attributes: { version: '1.0', encoding: 'utf-8' } }, + feed, + }; +} + +function sendAtom(res, xml) { + res.set('Content-Type', 'application/atom+xml; charset=utf-8'); + res.status(200).send(xml); +} + +function render(document) { + return js2xml(document, { compact: true, spaces: 2 }); +} + +/** + * @returns {express.Router} The router instance + * @swagger + * tags: + * name: Feeds + * description: Atom feeds of version changes + */ +export default function feedRouter() { + const router = express.Router(); + + /** + * @swagger + * /feed: + * get: + * summary: Atom feed of the latest version changes across the whole collection. + * tags: [Feeds] + * produces: + * - application/atom+xml + * responses: + * 200: + * description: An Atom 1.0 feed listing the latest version records, newest first. + * content: + * application/atom+xml: + * schema: + * type: string + */ + router.get('/feed', async (req, res) => { + const collection = await getCollection(); + const baseUrl = buildAbsoluteBaseUrl(req); + const selfHref = `${baseUrl}/feed`; + const feedId = `tag:${TAG_AUTHORITY}:feed:${collection.metadata?.id}`; + + const versions = await versionsRepository.findRecent(DEFAULT_LIMIT); + const document = buildFeedDocument({ collection, selfHref, feedId, versions, baseUrl }); + + sendAtom(res, render(document)); + }); + + return router; +} diff --git a/src/collection-api/routes/feed.test.js b/src/collection-api/routes/feed.test.js new file mode 100644 index 000000000..5f8e7325e --- /dev/null +++ b/src/collection-api/routes/feed.test.js @@ -0,0 +1,231 @@ +import { expect } from 'chai'; +import config from 'config'; +import supertest from 'supertest'; + +import { getCollection } from '../../archivist/collection/index.js'; +import RepositoryFactory from '../../archivist/recorder/repositories/factory.js'; +import Version from '../../archivist/recorder/version.js'; +import { toISODateWithoutMilliseconds } from '../../archivist/utils/date.js'; +import app from '../server.js'; + +const basePath = config.get('@opentermsarchive/engine.collection-api.basePath'); +const request = supertest(app); +const storageConfig = config.get('@opentermsarchive/engine.recorder.versions.storage'); + +function extractTag(xml, tag) { + const match = xml.match(new RegExp(`<${tag}>([\\s\\S]*?)`)); + + return match ? match[1] : null; +} + +describe('Feed API', () => { + describe('GET /feed', () => { + let response; + let collection; + + before(async () => { + collection = await getCollection(); + response = await request.get(`${basePath}/v1/feed`); + }); + + it('responds with 200 status code', () => { + expect(response.status).to.equal(200); + }); + + it('responds with Content-Type application/atom+xml', () => { + expect(response.headers['content-type']).to.match(/^application\/atom\+xml/); + }); + + it('is a valid Atom feed root', () => { + expect(response.text).to.match(/^<\?xml version="1\.0"/); + expect(response.text).to.include(' { + it('has a title matching the collection name', () => { + expect(extractTag(response.text, 'title')).to.equal(collection.metadata.name); + }); + + it('has a subtitle matching the collection tagline', () => { + expect(extractTag(response.text, 'subtitle')).to.equal(collection.metadata.tagline); + }); + + it('has a tag URI id based on the collection id', () => { + expect(extractTag(response.text, 'id')).to.equal(`tag:opentermsarchive.org,2026:feed:${collection.metadata.id}`); + }); + + it('has an updated element with a valid ISO 8601 datetime', () => { + const updated = extractTag(response.text, 'updated'); + + expect(updated).to.be.a('string'); + expect(new Date(updated).toString()).to.not.equal('Invalid Date'); + }); + + it('has a self link pointing to the feed endpoint', () => { + const selfHrefMatch = response.text.match(/]*rel="self"[^>]*href="([^"]+)"/); + + expect(selfHrefMatch).to.not.be.null; + expect(selfHrefMatch[1]).to.match(new RegExp(`${basePath}/v1/feed$`)); + }); + + it('has an author named OTA-Bot', () => { + expect(response.text).to.match(/[\s\S]*OTA-Bot<\/name>[\s\S]*<\/author>/); + }); + + it('has a logo matching the collection logo', () => { + expect(extractTag(response.text, 'logo')).to.equal(collection.metadata.logo); + }); + }); + }); + + describe('GET /feed — entries', () => { + const FETCH_DATE_FIRST = new Date('2023-01-01T12:00:00Z'); + const FETCH_DATE_CHANGE = new Date('2023-06-15T08:30:00Z'); + const FETCH_DATE_UPGRADE = new Date('2024-02-10T16:45:00Z'); + + let response; + let repository; + let savedVersions; + + before(async function () { + this.timeout(5000); + repository = RepositoryFactory.create(storageConfig); + await repository.initialize(); + + const firstRecord = await repository.save(new Version({ + serviceId: 'service-1', + termsType: 'Terms of Service', + content: 'first content', + fetchDate: FETCH_DATE_FIRST, + snapshotIds: ['snapshot_1'], + })); + + const changeRecord = await repository.save(new Version({ + serviceId: 'service-1', + termsType: 'Terms of Service', + content: 'changed content', + fetchDate: FETCH_DATE_CHANGE, + snapshotIds: ['snapshot_2'], + })); + + const upgradeRecord = await repository.save(new Version({ + serviceId: 'service-2', + termsType: 'Privacy Policy', + content: 'initial privacy', + fetchDate: new Date('2024-01-01T00:00:00Z'), + snapshotIds: ['snapshot_3'], + })); + + const technicalUpgradeRecord = await repository.save(new Version({ + serviceId: 'service-2', + termsType: 'Privacy Policy', + content: 'upgraded privacy', + fetchDate: FETCH_DATE_UPGRADE, + snapshotIds: ['snapshot_4'], + isTechnicalUpgrade: true, + })); + + savedVersions = { firstRecord, changeRecord, upgradeRecord, technicalUpgradeRecord }; + response = await request.get(`${basePath}/v1/feed`); + }); + + after(() => repository.removeAll()); + + it('orders entries newest-first', () => { + const updates = [...response.text.matchAll(/[\s\S]*?([^<]+)<\/updated>[\s\S]*?<\/entry>/g)].map(match => match[1]); + + expect(updates).to.deep.equal([...updates].sort().reverse()); + }); + + describe('entry metadata', () => { + let firstEntry; + + before(() => { + firstEntry = response.text.match(/[\s\S]*?<\/entry>/)[0]; + }); + + it('has an id tag URI including storage type and record id', () => { + const collectionId = 'test'; + const expected = `tag:opentermsarchive.org,2026:version:${collectionId}:${storageConfig.type}:${savedVersions.technicalUpgradeRecord.id}`; + + expect(firstEntry).to.include(`${expected}`); + }); + + it('has an alternate link to the version API endpoint', () => { + const href = firstEntry.match(/]*rel="alternate"[^>]*href="([^"]+)"/)[1]; + const expectedPathFragment = `/version/${encodeURIComponent('service-2')}/${encodeURIComponent('Privacy Policy')}/${encodeURIComponent(toISODateWithoutMilliseconds(FETCH_DATE_UPGRADE))}`; + + expect(href).to.include(expectedPathFragment); + }); + + it('has a type="text/html" on the alternate link', () => { + expect(firstEntry).to.match(/]*rel="alternate"[^>]*type="text\/html"/); + }); + + it('has a title reconstructed from commit prefix + serviceId + termsType', () => { + const title = firstEntry.match(/]*>([\s\S]*?)<\/title>/)[1]; + + expect(title).to.include('Apply technical or declaration upgrade on'); + expect(title).to.include('service-2'); + expect(title).to.include('Privacy Policy'); + }); + + it('has an updated element matching the fetch date', () => { + const updated = firstEntry.match(/([^<]+)<\/updated>/)[1]; + + expect(new Date(updated).toISOString()).to.equal(FETCH_DATE_UPGRADE.toISOString()); + }); + + it('has three categories with the expected schemes', () => { + const categories = [...firstEntry.matchAll(//g)].map(match => match[1]); + + expect(categories).to.have.length(3); + + const schemes = categories.map(attrs => attrs.match(/scheme="([^"]+)"/)[1]); + + expect(schemes).to.include('tag:opentermsarchive.org,2026:scheme:service'); + expect(schemes).to.include('tag:opentermsarchive.org,2026:scheme:terms-type'); + expect(schemes).to.include('tag:opentermsarchive.org,2026:scheme:record-type'); + }); + + it('has category terms for service, terms type and record type', () => { + const categories = [...firstEntry.matchAll(//g)].map(match => match[1]); + const terms = categories.map(attrs => attrs.match(/term="([^"]+)"/)[1]); + + expect(terms).to.include('service-2'); + expect(terms).to.include('Privacy Policy'); + expect(terms).to.include('Technical upgrade'); + }); + }); + + describe('record-type classification', () => { + function findEntryById(xml, recordId) { + const match = [...xml.matchAll(/[\s\S]*?<\/entry>/g)].find(entry => entry[0].includes(`:${recordId}`)); + + return match && match[0]; + } + + it('classifies a first record as "First record"', () => { + const entry = findEntryById(response.text, savedVersions.upgradeRecord.id); + + expect(entry).to.not.be.undefined; + expect(entry).to.match(/term="First record"/); + }); + + it('classifies a content change as "Change"', () => { + const entry = findEntryById(response.text, savedVersions.changeRecord.id); + + expect(entry).to.not.be.undefined; + expect(entry).to.match(/term="Change"/); + }); + + it('classifies a technical upgrade as "Technical upgrade"', () => { + const entry = findEntryById(response.text, savedVersions.technicalUpgradeRecord.id); + + expect(entry).to.not.be.undefined; + expect(entry).to.match(/term="Technical upgrade"/); + }); + }); + }); +}); diff --git a/src/collection-api/routes/index.js b/src/collection-api/routes/index.js index b99636b90..a334b1691 100644 --- a/src/collection-api/routes/index.js +++ b/src/collection-api/routes/index.js @@ -5,6 +5,7 @@ import { getCollection } from '../../archivist/collection/index.js'; import * as Services from '../../archivist/services/index.js'; import docsRouter from './docs.js'; +import feedRouter from './feed.js'; import metadataRouter from './metadata.js'; import servicesRouter from './services.js'; import versionsRouter from './versions.js'; @@ -37,6 +38,7 @@ export default async function apiRouter(basePath) { router.use(await metadataRouter(collection, services)); router.use(servicesRouter(services)); router.use(versionsRouter); + router.use(feedRouter()); return router; } diff --git a/src/collection-api/routes/versions.js b/src/collection-api/routes/versions.js index e420f8998..555f74c56 100644 --- a/src/collection-api/routes/versions.js +++ b/src/collection-api/routes/versions.js @@ -1,9 +1,9 @@ -import config from 'config'; import express from 'express'; -import RepositoryFactory from '../../archivist/recorder/repositories/factory.js'; import { toISODateWithoutMilliseconds } from '../../archivist/utils/date.js'; +import versionsRepository from './versionsRepository.js'; + /** * @private * @swagger @@ -29,8 +29,6 @@ import { toISODateWithoutMilliseconds } from '../../archivist/utils/date.js'; */ const router = express.Router(); -const versionsRepository = await RepositoryFactory.create(config.get('@opentermsarchive/engine.recorder.versions.storage')).initialize(); - /** * @private * @swagger diff --git a/src/collection-api/routes/versionsRepository.js b/src/collection-api/routes/versionsRepository.js new file mode 100644 index 000000000..d76d06ce1 --- /dev/null +++ b/src/collection-api/routes/versionsRepository.js @@ -0,0 +1,9 @@ +import config from 'config'; + +import RepositoryFactory from '../../archivist/recorder/repositories/factory.js'; + +export const storageConfig = config.get('@opentermsarchive/engine.recorder.versions.storage'); + +const versionsRepository = await RepositoryFactory.create(storageConfig).initialize(); + +export default versionsRepository; From e73d36222c4c86f34b2acd3722c728c9ceabdcf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Biron?= Date: Wed, 22 Apr 2026 08:57:52 +0200 Subject: [PATCH 04/21] Add service-scoped feed endpoint --- src/collection-api/routes/feed.js | 49 ++++++++++- src/collection-api/routes/feed.test.js | 116 +++++++++++++++++++++++++ src/collection-api/routes/index.js | 2 +- src/collection-api/routes/services.js | 5 +- src/collection-api/routes/utils.js | 5 ++ 5 files changed, 172 insertions(+), 5 deletions(-) create mode 100644 src/collection-api/routes/utils.js diff --git a/src/collection-api/routes/feed.js b/src/collection-api/routes/feed.js index 7758b473c..fee9fdccd 100644 --- a/src/collection-api/routes/feed.js +++ b/src/collection-api/routes/feed.js @@ -6,6 +6,7 @@ import { COMMIT_MESSAGE_PREFIXES } from '../../archivist/recorder/repositories/g import { toISODateWithoutMilliseconds } from '../../archivist/utils/date.js'; import versionsRepository, { storageConfig } from './versionsRepository.js'; +import { findServiceCaseInsensitive } from './utils.js'; const TAG_AUTHORITY = 'opentermsarchive.org,2026'; const FEED_AUTHOR_NAME = 'OTA-Bot'; @@ -108,13 +109,14 @@ function render(document) { } /** - * @returns {express.Router} The router instance + * @param {object} services The services to be exposed by the API + * @returns {express.Router} The router instance * @swagger * tags: * name: Feeds * description: Atom feeds of version changes */ -export default function feedRouter() { +export default function feedRouter(services) { const router = express.Router(); /** @@ -145,5 +147,48 @@ export default function feedRouter() { sendAtom(res, render(document)); }); + /** + * @swagger + * /feed/{serviceId}: + * get: + * summary: Atom feed of the latest version changes scoped to a single service. + * tags: [Feeds] + * produces: + * - application/atom+xml + * parameters: + * - in: path + * name: serviceId + * description: The ID of the service. Case-insensitive. + * schema: + * type: string + * required: true + * responses: + * 200: + * description: An Atom 1.0 feed listing the latest version records for the given service, newest first. + * content: + * application/atom+xml: + * schema: + * type: string + * 404: + * description: No service matching the provided ID is found. + */ + router.get('/feed/:serviceId', async (req, res) => { + const service = findServiceCaseInsensitive(services, req.params.serviceId); + + if (!service) { + return res.status(404).send('Service not found'); + } + + const collection = await getCollection(); + const baseUrl = buildAbsoluteBaseUrl(req); + const selfHref = `${baseUrl}/feed/${encodeURIComponent(service.id)}`; + const feedId = `tag:${TAG_AUTHORITY}:feed:${collection.metadata?.id}:${service.id}`; + + const versions = await versionsRepository.findRecent(DEFAULT_LIMIT, { serviceId: service.id }); + const document = buildFeedDocument({ collection, selfHref, feedId, versions, baseUrl }); + + return sendAtom(res, render(document)); + }); + return router; } diff --git a/src/collection-api/routes/feed.test.js b/src/collection-api/routes/feed.test.js index 5f8e7325e..4a40f27f7 100644 --- a/src/collection-api/routes/feed.test.js +++ b/src/collection-api/routes/feed.test.js @@ -228,4 +228,120 @@ describe('Feed API', () => { }); }); }); + + describe('GET /feed/:serviceId', () => { + const SERVICE = 'service_without_history'; + const OTHER_SERVICE = 'service_with_history'; + const TERMS = 'Terms of Service'; + + let repository; + + before(async function () { + this.timeout(5000); + repository = RepositoryFactory.create(storageConfig); + await repository.initialize(); + + await repository.save(new Version({ + serviceId: SERVICE, + termsType: TERMS, + content: 'c1', + fetchDate: new Date('2024-01-01T00:00:00Z'), + snapshotIds: ['s1'], + })); + await repository.save(new Version({ + serviceId: SERVICE, + termsType: TERMS, + content: 'c2', + fetchDate: new Date('2024-02-01T00:00:00Z'), + snapshotIds: ['s2'], + })); + await repository.save(new Version({ + serviceId: OTHER_SERVICE, + termsType: TERMS, + content: 'c3', + fetchDate: new Date('2024-03-01T00:00:00Z'), + snapshotIds: ['s3'], + })); + }); + + after(() => repository.removeAll()); + + context('when the service exists and has versions', () => { + let response; + + before(async () => { + response = await request.get(`${basePath}/v1/feed/${encodeURIComponent(SERVICE)}`); + }); + + it('responds with 200', () => { + expect(response.status).to.equal(200); + }); + + it('responds with Content-Type application/atom+xml', () => { + expect(response.headers['content-type']).to.match(/^application\/atom\+xml/); + }); + + it('includes only entries for that service', () => { + const serviceTerms = [...response.text.matchAll(/scheme="tag:opentermsarchive.org,2026:scheme:service"[^/]*term="([^"]+)"/g)] + .concat([...response.text.matchAll(/term="([^"]+)"[^/]*scheme="tag:opentermsarchive.org,2026:scheme:service"/g)]) + .map(match => match[1]); + + expect(serviceTerms).to.not.be.empty; + + for (const term of serviceTerms) { + expect(term).to.equal(SERVICE); + } + }); + + it('has a feed id including the service id', () => { + expect(extractTag(response.text, 'id')).to.equal(`tag:opentermsarchive.org,2026:feed:test:${SERVICE}`); + }); + + it('has a self link pointing to the service-scoped feed endpoint', () => { + const href = response.text.match(/]*rel="self"[^>]*href="([^"]+)"/)[1]; + + expect(href).to.match(new RegExp(`/feed/${SERVICE}$`)); + }); + }); + + context('when the service exists but has no versions', () => { + let response; + + before(async () => { + response = await request.get(`${basePath}/v1/feed/${encodeURIComponent('service_with_filters_history')}`); + }); + + it('responds with 200', () => { + expect(response.status).to.equal(200); + }); + + it('returns an empty feed (no entries)', () => { + expect(response.text).to.not.include(''); + }); + }); + + context('when the service does not exist', () => { + let response; + + before(async () => { + response = await request.get(`${basePath}/v1/feed/DoesNotExist`); + }); + + it('responds with 404', () => { + expect(response.status).to.equal(404); + }); + }); + + context('when the serviceId uses different casing', () => { + let response; + + before(async () => { + response = await request.get(`${basePath}/v1/feed/${encodeURIComponent(SERVICE.toUpperCase())}`); + }); + + it('still resolves to the service (case-insensitive)', () => { + expect(response.status).to.equal(200); + }); + }); + }); }); diff --git a/src/collection-api/routes/index.js b/src/collection-api/routes/index.js index a334b1691..f492593a4 100644 --- a/src/collection-api/routes/index.js +++ b/src/collection-api/routes/index.js @@ -38,7 +38,7 @@ export default async function apiRouter(basePath) { router.use(await metadataRouter(collection, services)); router.use(servicesRouter(services)); router.use(versionsRouter); - router.use(feedRouter()); + router.use(feedRouter(services)); return router; } diff --git a/src/collection-api/routes/services.js b/src/collection-api/routes/services.js index f13879d2d..95febb6a2 100644 --- a/src/collection-api/routes/services.js +++ b/src/collection-api/routes/services.js @@ -1,5 +1,7 @@ import express from 'express'; +import { findServiceCaseInsensitive } from './utils.js'; + /** * @param {object} services The services to be exposed by the API * @returns {express.Router} The router instance @@ -130,8 +132,7 @@ export default function servicesRouter(services) { * description: No service matching the provided ID is found. */ router.get('/service/:serviceId', (req, res) => { - const matchedServiceID = Object.keys(services).find(key => key.toLowerCase() === req.params.serviceId?.toLowerCase()); - const service = services[matchedServiceID]; + const service = findServiceCaseInsensitive(services, req.params.serviceId); if (!service) { res.status(404).send('Service not found'); diff --git a/src/collection-api/routes/utils.js b/src/collection-api/routes/utils.js new file mode 100644 index 000000000..18728e445 --- /dev/null +++ b/src/collection-api/routes/utils.js @@ -0,0 +1,5 @@ +export function findServiceCaseInsensitive(services, serviceId) { + const matched = Object.keys(services).find(key => key.toLowerCase() === serviceId?.toLowerCase()); + + return matched ? services[matched] : null; +} From 891151ab11b6e1bb85357ac46ce24ed97dd1d9cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Biron?= Date: Wed, 22 Apr 2026 09:09:16 +0200 Subject: [PATCH 05/21] Add service and terms type scoped feed endpoint --- src/collection-api/routes/feed.js | 55 ++++++++++ src/collection-api/routes/feed.test.js | 139 +++++++++++++++++++++++++ 2 files changed, 194 insertions(+) diff --git a/src/collection-api/routes/feed.js b/src/collection-api/routes/feed.js index fee9fdccd..35490a74c 100644 --- a/src/collection-api/routes/feed.js +++ b/src/collection-api/routes/feed.js @@ -190,5 +190,60 @@ export default function feedRouter(services) { return sendAtom(res, render(document)); }); + /** + * @swagger + * /feed/{serviceId}/{termsType}: + * get: + * summary: Atom feed of the latest version changes scoped to a service and terms type. + * tags: [Feeds] + * produces: + * - application/atom+xml + * parameters: + * - in: path + * name: serviceId + * description: The ID of the service. Case-insensitive. + * schema: + * type: string + * required: true + * - in: path + * name: termsType + * description: The terms type declared by the service (e.g. "Terms of Service", "Privacy Policy"). + * schema: + * type: string + * required: true + * responses: + * 200: + * description: An Atom 1.0 feed listing the latest version records for the given service and terms type, newest first. + * content: + * application/atom+xml: + * schema: + * type: string + * 404: + * description: Either the service ID does not match any service or the terms type is not declared by that service. + */ + router.get('/feed/:serviceId/:termsType', async (req, res) => { + const service = findServiceCaseInsensitive(services, req.params.serviceId); + + if (!service) { + return res.status(404).send('Service not found'); + } + + const { termsType } = req.params; + + if (!service.getTermsTypes().includes(termsType)) { + return res.status(404).send('Terms type not found for this service'); + } + + const collection = await getCollection(); + const baseUrl = buildAbsoluteBaseUrl(req); + const selfHref = `${baseUrl}/feed/${encodeURIComponent(service.id)}/${encodeURIComponent(termsType)}`; + const feedId = `tag:${TAG_AUTHORITY}:feed:${collection.metadata?.id}:${service.id}:${termsType}`; + + const versions = await versionsRepository.findRecent(DEFAULT_LIMIT, { serviceId: service.id, termsType }); + const document = buildFeedDocument({ collection, selfHref, feedId, versions, baseUrl }); + + return sendAtom(res, render(document)); + }); + return router; } diff --git a/src/collection-api/routes/feed.test.js b/src/collection-api/routes/feed.test.js index 4a40f27f7..1d1d1ac95 100644 --- a/src/collection-api/routes/feed.test.js +++ b/src/collection-api/routes/feed.test.js @@ -344,4 +344,143 @@ describe('Feed API', () => { }); }); }); + + describe('XML escaping and URL encoding', () => { + const SERVICE = 'Service B!'; + const TERMS = 'Privacy Policy'; + const FETCH_DATE = new Date('2024-05-15T10:00:00Z'); + + let response; + let repository; + + before(async function () { + this.timeout(5000); + repository = RepositoryFactory.create(storageConfig); + await repository.initialize(); + + await repository.save(new Version({ + serviceId: SERVICE, + termsType: TERMS, + content: 'content with & and ', + fetchDate: FETCH_DATE, + snapshotIds: ['s_escape'], + })); + + response = await request.get(`${basePath}/v1/feed/${encodeURIComponent(SERVICE)}/${encodeURIComponent(TERMS)}`); + }); + + after(() => repository.removeAll()); + + it('responds with 200', () => { + expect(response.status).to.equal(200); + }); + + it('URL-encodes spaces and special characters in the self link href', () => { + const href = response.text.match(/]*rel="self"[^>]*href="([^"]+)"/)[1]; + + expect(href).to.include('Service%20B!'); + expect(href).to.include('Privacy%20Policy'); + expect(href).to.not.include('Service B!'); + }); + + it('URL-encodes spaces and special characters in entry alternate links', () => { + const href = response.text.match(/]*rel="alternate"[^>]*href="([^"]+)"/)[1]; + + expect(href).to.include('Service%20B!'); + expect(href).to.include('Privacy%20Policy'); + }); + }); + + describe('GET /feed/:serviceId/:termsType', () => { + const SERVICE = 'service_without_history'; + const TERMS = 'Terms of Service'; + const UNKNOWN_TERMS = 'Imprint'; + + let repository; + + before(async function () { + this.timeout(5000); + repository = RepositoryFactory.create(storageConfig); + await repository.initialize(); + + await repository.save(new Version({ + serviceId: SERVICE, + termsType: TERMS, + content: 'first', + fetchDate: new Date('2024-01-01T00:00:00Z'), + snapshotIds: ['s1'], + })); + await repository.save(new Version({ + serviceId: SERVICE, + termsType: TERMS, + content: 'updated', + fetchDate: new Date('2024-02-01T00:00:00Z'), + snapshotIds: ['s2'], + })); + }); + + after(() => repository.removeAll()); + + context('when the service and terms type match', () => { + let response; + + before(async () => { + response = await request.get(`${basePath}/v1/feed/${encodeURIComponent(SERVICE)}/${encodeURIComponent(TERMS)}`); + }); + + it('responds with 200', () => { + expect(response.status).to.equal(200); + }); + + it('includes entries for the combination', () => { + const entries = response.text.match(//g) || []; + + expect(entries.length).to.be.at.least(1); + }); + + it('entries only have the expected terms type', () => { + const termsTypeTerms = [...response.text.matchAll(/ match[1]); + + for (const term of termsTypeTerms) { + expect(term).to.equal(TERMS); + } + }); + + it('has a feed id that includes both service and terms type', () => { + expect(extractTag(response.text, 'id')).to.equal(`tag:opentermsarchive.org,2026:feed:test:${SERVICE}:${TERMS}`); + }); + + it('has a self link pointing to the combination endpoint', () => { + const href = response.text.match(/]*rel="self"[^>]*href="([^"]+)"/)[1]; + + expect(href).to.match(new RegExp(`/feed/${SERVICE}/${encodeURIComponent(TERMS)}$`)); + }); + }); + + context('when the service exists but does not declare the terms type', () => { + let response; + + before(async () => { + response = await request.get(`${basePath}/v1/feed/${encodeURIComponent(SERVICE)}/${encodeURIComponent(UNKNOWN_TERMS)}`); + }); + + it('responds with 404', () => { + expect(response.status).to.equal(404); + }); + }); + + context('when the service does not exist', () => { + let response; + + before(async () => { + response = await request.get(`${basePath}/v1/feed/DoesNotExist/${encodeURIComponent(TERMS)}`); + }); + + it('responds with 404', () => { + expect(response.status).to.equal(404); + }); + }); + }); }); From 54e0e1d54d01438142af6a30c852af1d5b9f71f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Biron?= Date: Wed, 22 Apr 2026 09:29:09 +0200 Subject: [PATCH 06/21] Cap feed entries with configurable limit --- config/default.json | 5 +++++ config/test.json | 5 ++++- src/collection-api/routes/feed.js | 17 +++++++++++++---- src/collection-api/routes/feed.test.js | 16 ++++++++++++++++ 4 files changed, 38 insertions(+), 5 deletions(-) diff --git a/config/default.json b/config/default.json index c044f2939..96309b6fb 100644 --- a/config/default.json +++ b/config/default.json @@ -47,6 +47,11 @@ }, "dataset": { "publishingSchedule": "30 8 * * MON" + }, + "collection-api": { + "feed": { + "limit": 100 + } } } } diff --git a/config/test.json b/config/test.json index cf14b8be3..050fd5b79 100644 --- a/config/test.json +++ b/config/test.json @@ -47,7 +47,10 @@ }, "collection-api": { "port": 3000, - "basePath": "/collection-api" + "basePath": "/collection-api", + "feed": { + "limit": 3 + } } } } diff --git a/src/collection-api/routes/feed.js b/src/collection-api/routes/feed.js index 35490a74c..050bf163b 100644 --- a/src/collection-api/routes/feed.js +++ b/src/collection-api/routes/feed.js @@ -1,3 +1,4 @@ +import config from 'config'; import express from 'express'; import { js2xml } from 'xml-js'; @@ -12,6 +13,14 @@ const TAG_AUTHORITY = 'opentermsarchive.org,2026'; const FEED_AUTHOR_NAME = 'OTA-Bot'; const DEFAULT_LIMIT = 100; +function getFeedLimit() { + if (config.has('@opentermsarchive/engine.collection-api.feed.limit')) { + return config.get('@opentermsarchive/engine.collection-api.feed.limit'); + } + + return DEFAULT_LIMIT; +} + const RECORD_TYPES = { firstRecord: 'First record', technicalUpgrade: 'Technical upgrade', @@ -129,7 +138,7 @@ export default function feedRouter(services) { * - application/atom+xml * responses: * 200: - * description: An Atom 1.0 feed listing the latest version records, newest first. + * description: An Atom 1.0 feed listing the latest version records, newest first. The maximum number of entries is server-configured. * content: * application/atom+xml: * schema: @@ -141,7 +150,7 @@ export default function feedRouter(services) { const selfHref = `${baseUrl}/feed`; const feedId = `tag:${TAG_AUTHORITY}:feed:${collection.metadata?.id}`; - const versions = await versionsRepository.findRecent(DEFAULT_LIMIT); + const versions = await versionsRepository.findRecent(getFeedLimit()); const document = buildFeedDocument({ collection, selfHref, feedId, versions, baseUrl }); sendAtom(res, render(document)); @@ -184,7 +193,7 @@ export default function feedRouter(services) { const selfHref = `${baseUrl}/feed/${encodeURIComponent(service.id)}`; const feedId = `tag:${TAG_AUTHORITY}:feed:${collection.metadata?.id}:${service.id}`; - const versions = await versionsRepository.findRecent(DEFAULT_LIMIT, { serviceId: service.id }); + const versions = await versionsRepository.findRecent(getFeedLimit(), { serviceId: service.id }); const document = buildFeedDocument({ collection, selfHref, feedId, versions, baseUrl }); return sendAtom(res, render(document)); @@ -239,7 +248,7 @@ export default function feedRouter(services) { const selfHref = `${baseUrl}/feed/${encodeURIComponent(service.id)}/${encodeURIComponent(termsType)}`; const feedId = `tag:${TAG_AUTHORITY}:feed:${collection.metadata?.id}:${service.id}:${termsType}`; - const versions = await versionsRepository.findRecent(DEFAULT_LIMIT, { serviceId: service.id, termsType }); + const versions = await versionsRepository.findRecent(getFeedLimit(), { serviceId: service.id, termsType }); const document = buildFeedDocument({ collection, selfHref, feedId, versions, baseUrl }); return sendAtom(res, render(document)); diff --git a/src/collection-api/routes/feed.test.js b/src/collection-api/routes/feed.test.js index 1d1d1ac95..d71278dd1 100644 --- a/src/collection-api/routes/feed.test.js +++ b/src/collection-api/routes/feed.test.js @@ -132,6 +132,13 @@ describe('Feed API', () => { after(() => repository.removeAll()); + it('lists one entry per saved version up to the configured limit', () => { + const limit = config.get('@opentermsarchive/engine.collection-api.feed.limit'); + const entries = response.text.match(//g) || []; + + expect(entries).to.have.length(Math.min(4, limit)); + }); + it('orders entries newest-first', () => { const updates = [...response.text.matchAll(/[\s\S]*?([^<]+)<\/updated>[\s\S]*?<\/entry>/g)].map(match => match[1]); @@ -227,6 +234,15 @@ describe('Feed API', () => { expect(entry).to.match(/term="Technical upgrade"/); }); }); + + describe('configurable limit', () => { + it('returns at most the configured number of entries', () => { + const limit = config.get('@opentermsarchive/engine.collection-api.feed.limit'); + const entries = response.text.match(//g) || []; + + expect(entries.length).to.be.at.most(limit); + }); + }); }); describe('GET /feed/:serviceId', () => { From eb24e391a1c5a04b0ec91388397e4ad10c70fbd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Biron?= Date: Wed, 22 Apr 2026 14:19:22 +0200 Subject: [PATCH 07/21] Link feed entries to GitHub commits --- src/collection-api/routes/feed.js | 15 ++++++++++----- src/collection-api/routes/feed.test.js | 13 ++++++++++--- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/src/collection-api/routes/feed.js b/src/collection-api/routes/feed.js index 050bf163b..5cd0692cf 100644 --- a/src/collection-api/routes/feed.js +++ b/src/collection-api/routes/feed.js @@ -66,13 +66,18 @@ function buildEntryId(collection, version) { } function buildEntry(collection, baseUrl, version) { + const apiLink = buildVersionLink(baseUrl, version); + const githubCommitLink = collection.metadata?.versions && `${collection.metadata.versions}/commit/${version.id}`; + + const links = [{ _attributes: { rel: 'alternate', type: 'text/html', href: githubCommitLink || apiLink } }]; + + if (githubCommitLink) { + links.push({ _attributes: { rel: 'related', type: 'text/html', href: apiLink } }); + } + return { id: { _text: buildEntryId(collection, version) }, - link: { _attributes: { - rel: 'alternate', - type: 'text/html', - href: buildVersionLink(baseUrl, version), - } }, + link: links, title: { _text: buildEntryTitle(version) }, updated: { _text: version.fetchDate.toISOString() }, category: [ diff --git a/src/collection-api/routes/feed.test.js b/src/collection-api/routes/feed.test.js index d71278dd1..0118e1f70 100644 --- a/src/collection-api/routes/feed.test.js +++ b/src/collection-api/routes/feed.test.js @@ -159,8 +159,15 @@ describe('Feed API', () => { expect(firstEntry).to.include(`${expected}`); }); - it('has an alternate link to the version API endpoint', () => { + it('has an alternate link to the GitHub commit', async () => { + const collection = await getCollection(); const href = firstEntry.match(/]*rel="alternate"[^>]*href="([^"]+)"/)[1]; + + expect(href).to.equal(`${collection.metadata.versions}/commit/${savedVersions.technicalUpgradeRecord.id}`); + }); + + it('has a related link to the version API endpoint', () => { + const href = firstEntry.match(/]*rel="related"[^>]*href="([^"]+)"/)[1]; const expectedPathFragment = `/version/${encodeURIComponent('service-2')}/${encodeURIComponent('Privacy Policy')}/${encodeURIComponent(toISODateWithoutMilliseconds(FETCH_DATE_UPGRADE))}`; expect(href).to.include(expectedPathFragment); @@ -399,8 +406,8 @@ describe('Feed API', () => { expect(href).to.not.include('Service B!'); }); - it('URL-encodes spaces and special characters in entry alternate links', () => { - const href = response.text.match(/]*rel="alternate"[^>]*href="([^"]+)"/)[1]; + it('URL-encodes spaces and special characters in entry related links', () => { + const href = response.text.match(/]*rel="related"[^>]*href="([^"]+)"/)[1]; expect(href).to.include('Service%20B!'); expect(href).to.include('Privacy%20Policy'); From c21bb463d458b14d3129e13cdb14aa52e7eb0487 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Biron?= Date: Wed, 22 Apr 2026 15:07:59 +0200 Subject: [PATCH 08/21] Resolve serviceId case-insensitively --- src/collection-api/routes/index.js | 2 +- src/collection-api/routes/utils.test.js | 44 +++++++++++++++++++++ src/collection-api/routes/versions.js | 45 +++++++++++++--------- src/collection-api/routes/versions.test.js | 33 ++++++++++++++-- 4 files changed, 101 insertions(+), 23 deletions(-) create mode 100644 src/collection-api/routes/utils.test.js diff --git a/src/collection-api/routes/index.js b/src/collection-api/routes/index.js index f492593a4..e02829fa2 100644 --- a/src/collection-api/routes/index.js +++ b/src/collection-api/routes/index.js @@ -37,7 +37,7 @@ export default async function apiRouter(basePath) { router.use(await metadataRouter(collection, services)); router.use(servicesRouter(services)); - router.use(versionsRouter); + router.use(versionsRouter(services)); router.use(feedRouter(services)); return router; diff --git a/src/collection-api/routes/utils.test.js b/src/collection-api/routes/utils.test.js new file mode 100644 index 000000000..b7bb137f0 --- /dev/null +++ b/src/collection-api/routes/utils.test.js @@ -0,0 +1,44 @@ +import { expect } from 'chai'; + +import { findServiceCaseInsensitive } from './utils.js'; + +describe('findServiceCaseInsensitive', () => { + const services = { + '42Corp': { id: '42Corp' }, + ACMEco: { id: 'ACMEco' }, + 'example.org': { id: 'example.org' }, + 'Foo Bar': { id: 'Foo Bar' }, + 'service-b': { id: 'service-b' }, + service·A: { id: 'service·A' }, + }; + + it('returns the service when the id matches exactly', () => { + expect(findServiceCaseInsensitive(services, '42Corp')).to.equal(services['42Corp']); + expect(findServiceCaseInsensitive(services, 'ACMEco')).to.equal(services.ACMEco); + expect(findServiceCaseInsensitive(services, 'example.org')).to.equal(services['example.org']); + expect(findServiceCaseInsensitive(services, 'Foo Bar')).to.equal(services['Foo Bar']); + expect(findServiceCaseInsensitive(services, 'service-b')).to.equal(services['service-b']); + expect(findServiceCaseInsensitive(services, 'service·A')).to.equal(services['service·A']); + }); + + it('returns the service when the id casing differs', () => { + expect(findServiceCaseInsensitive(services, '42CORP')).to.equal(services['42Corp']); + expect(findServiceCaseInsensitive(services, 'acmeco')).to.equal(services.ACMEco); + expect(findServiceCaseInsensitive(services, 'EXAMPLE.ORG')).to.equal(services['example.org']); + expect(findServiceCaseInsensitive(services, 'foo bar')).to.equal(services['Foo Bar']); + expect(findServiceCaseInsensitive(services, 'SERVICE-B')).to.equal(services['service-b']); + expect(findServiceCaseInsensitive(services, 'SERVICE·A')).to.equal(services['service·A']); + }); + + it('returns null when no service matches', () => { + expect(findServiceCaseInsensitive(services, 'Unknown')).to.be.null; + }); + + it('returns null when serviceId is undefined', () => { + expect(findServiceCaseInsensitive(services, undefined)).to.be.null; + }); + + it('returns null when services is empty', () => { + expect(findServiceCaseInsensitive({}, 'Foo Bar')).to.be.null; + }); +}); diff --git a/src/collection-api/routes/versions.js b/src/collection-api/routes/versions.js index 555f74c56..0cc412bd6 100644 --- a/src/collection-api/routes/versions.js +++ b/src/collection-api/routes/versions.js @@ -3,6 +3,7 @@ import express from 'express'; import { toISODateWithoutMilliseconds } from '../../archivist/utils/date.js'; import versionsRepository from './versionsRepository.js'; +import { findServiceCaseInsensitive } from './utils.js'; /** * @private @@ -27,9 +28,10 @@ import versionsRepository from './versionsRepository.js'; * type: string * description: The JSON-escaped Markdown content of the version */ -const router = express.Router(); +export default function versionsRouter(services) { + const router = express.Router(); -/** + /** * @private * @swagger * /version/{serviceId}/{termsType}/{date}: @@ -86,25 +88,32 @@ const router = express.Router(); * type: string * description: Error message indicating that the requested date is in the future. */ -router.get('/version/:serviceId/:termsType/:date', async (req, res) => { - const { serviceId, termsType, date } = req.params; - const requestedDate = new Date(date); + router.get('/version/:serviceId/:termsType/:date', async (req, res) => { + const { termsType, date } = req.params; + const requestedDate = new Date(date); + + if (requestedDate > new Date()) { + return res.status(416).json({ error: 'Requested version is in the future' }); + } + + const service = findServiceCaseInsensitive(services, req.params.serviceId); - if (requestedDate > new Date()) { - return res.status(416).json({ error: 'Requested version is in the future' }); - } + if (!service) { + return res.status(404).json({ error: 'Service not found' }); + } - const version = await versionsRepository.findByDate(serviceId, termsType, requestedDate); + const version = await versionsRepository.findByDate(service.id, termsType, requestedDate); - if (!version) { - return res.status(404).json({ error: `No version found for date ${date}` }); - } + if (!version) { + return res.status(404).json({ error: `No version found for date ${date}` }); + } - return res.status(200).json({ - id: version.id, - fetchDate: toISODateWithoutMilliseconds(version.fetchDate), - content: version.content, + return res.status(200).json({ + id: version.id, + fetchDate: toISODateWithoutMilliseconds(version.fetchDate), + content: version.content, + }); }); -}); -export default router; + return router; +} diff --git a/src/collection-api/routes/versions.test.js b/src/collection-api/routes/versions.test.js index aadcfe14b..1ec145854 100644 --- a/src/collection-api/routes/versions.test.js +++ b/src/collection-api/routes/versions.test.js @@ -17,7 +17,7 @@ describe('Versions API', () => { let versionsRepository; const FETCH_DATE = new Date('2023-01-01T12:00:00Z'); const VERSION_COMMON_ATTRIBUTES = { - serviceId: 'service-1', + serviceId: 'service·A', termsType: 'Terms of Service', snapshotId: ['snapshot_id'], }; @@ -62,7 +62,7 @@ describe('Versions API', () => { context('when a version is found', () => { before(async () => { - response = await request.get(`${basePath}/v1/version/service-1/Terms%20of%20Service/${encodeURIComponent(toISODateWithoutMilliseconds(FETCH_DATE))}`); + response = await request.get(`${basePath}/v1/version/service·A/Terms%20of%20Service/${encodeURIComponent(toISODateWithoutMilliseconds(FETCH_DATE))}`); }); it('responds with 200 status code', () => { @@ -80,7 +80,7 @@ describe('Versions API', () => { context('when the requested date is anterior to the first available version', () => { before(async () => { - response = await request.get(`${basePath}/v1/version/service-1/Terms%20of%20Service/2000-01-01T12:00:00Z`); + response = await request.get(`${basePath}/v1/version/service·A/Terms%20of%20Service/2000-01-01T12:00:00Z`); }); it('responds with 404 status code', () => { @@ -96,11 +96,36 @@ describe('Versions API', () => { }); }); + context('when the serviceId uses different casing', () => { + before(async () => { + response = await request.get(`${basePath}/v1/version/SERVICE·A/Terms%20of%20Service/${encodeURIComponent(toISODateWithoutMilliseconds(FETCH_DATE))}`); + }); + + it('still resolves to the service (case-insensitive)', () => { + expect(response.status).to.equal(200); + expect(response.body).to.deep.equal(expectedResult); + }); + }); + + context('when the service does not exist', () => { + before(async () => { + response = await request.get(`${basePath}/v1/version/DoesNotExist/Terms%20of%20Service/${encodeURIComponent(toISODateWithoutMilliseconds(FETCH_DATE))}`); + }); + + it('responds with 404 status code', () => { + expect(response.status).to.equal(404); + }); + + it('returns an error message', () => { + expect(response.body.error).to.equal('Service not found'); + }); + }); + context('when the requested date is in the future', () => { before(async () => { const dateInTheFuture = new Date(Date.now() + 60000); // 1 minute in the future - response = await request.get(`${basePath}/v1/version/service-1/Terms%20of%20Service/${encodeURIComponent(toISODateWithoutMilliseconds(dateInTheFuture))}`); + response = await request.get(`${basePath}/v1/version/service·A/Terms%20of%20Service/${encodeURIComponent(toISODateWithoutMilliseconds(dateInTheFuture))}`); }); it('responds with 416 status code', () => { From d2ed924089e505fab11b1816a73227a35f753b71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Biron?= Date: Wed, 22 Apr 2026 09:29:16 +0200 Subject: [PATCH 09/21] Add changelog entry --- CHANGELOG.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ad05dcd96..0c0c6adee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,21 @@ All changes that impact users of this module are documented in this file, in the [Common Changelog](https://common-changelog.org) format with some additional specifications defined in the CONTRIBUTING file. This codebase adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## Unreleased [minor] + +> Development of this release was supported by [Reset Tech](https://www.reset.tech). + +### Added + +- Add `GET /feed` endpoint on the Collection API exposing an Atom feed of the latest version changes across the whole collection +- Add `GET /feed/:serviceId` endpoint on the Collection API exposing an Atom feed scoped to a single service +- Add `GET /feed/:serviceId/:termsType` endpoint on the Collection API exposing an Atom feed scoped to a single service and terms type +- Add [`@opentermsarchive/engine.collection-api.feed.limit`](https://docs.opentermsarchive.org/collections/reference/configuration/) configuration option controlling the maximum number of entries returned by feed endpoints (default: `100`) + +### Changed + +- Resolve `serviceId` path parameter case-insensitively on the `GET /version/:serviceId/:termsType/:date` endpoint, consistent with other endpoints + ## 11.0.2 - 2026-04-14 > Development of this release was supported by [Reset Tech](https://www.reset.tech). From a6a4723bb254c0d0bb37e7635bf1f92930fe4ff7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Biron?= Date: Wed, 29 Apr 2026 15:42:41 +0200 Subject: [PATCH 10/21] Enforce consistent brace style --- .eslintrc.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.eslintrc.yaml b/.eslintrc.yaml index 85702b255..3731975fa 100644 --- a/.eslintrc.yaml +++ b/.eslintrc.yaml @@ -37,6 +37,9 @@ rules: - error - always-multiline consistent-return: 0 + curly: + - error + - all function-paren-newline: - error - multiline From 2582940d84449ed874eab62cf0ee9b0815de3c54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Biron?= Date: Wed, 29 Apr 2026 15:43:09 +0200 Subject: [PATCH 11/21] Lint --- scripts/reporter/duplicate/index.js | 2 +- src/archivist/collection/index.test.js | 2 +- .../recorder/repositories/git/index.js | 8 +- .../recorder/repositories/interface.js | 10 +- .../recorder/repositories/mongo/index.js | 4 +- src/archivist/services/index.js | 2 +- src/collection-api/routes/feed.js | 9 +- src/collection-api/routes/feed.test.js | 2 +- src/collection-api/routes/versions.js | 116 +++++++++--------- src/reporter/gitlab/index.js | 2 +- 10 files changed, 79 insertions(+), 78 deletions(-) diff --git a/scripts/reporter/duplicate/index.js b/scripts/reporter/duplicate/index.js index d2b508770..22e13b1a0 100644 --- a/scripts/reporter/duplicate/index.js +++ b/scripts/reporter/duplicate/index.js @@ -39,7 +39,7 @@ async function removeDuplicateIssues() { } for (const [ title, duplicateIssues ] of issuesByTitle) { - if (duplicateIssues.length === 1) continue; + if (duplicateIssues.length === 1) { continue; } const originalIssue = duplicateIssues.reduce((oldest, current) => (new Date(current.created_at) < new Date(oldest.created_at) ? current : oldest)); diff --git a/src/archivist/collection/index.test.js b/src/archivist/collection/index.test.js index f7689384d..3b817e615 100644 --- a/src/archivist/collection/index.test.js +++ b/src/archivist/collection/index.test.js @@ -18,7 +18,7 @@ describe('Collection', () => { try { metadataBackup = await fs.readFile(metadataPath, 'utf8'); } catch (error) { - if (error.code !== 'ENOENT') throw error; + if (error.code !== 'ENOENT') { throw error; } } }); diff --git a/src/archivist/recorder/repositories/git/index.js b/src/archivist/recorder/repositories/git/index.js index 32904cad3..284a0340c 100644 --- a/src/archivist/recorder/repositories/git/index.js +++ b/src/archivist/recorder/repositories/git/index.js @@ -97,14 +97,14 @@ export default class GitRepository extends RepositoryInterface { const records = []; for (const commit of commits) { - if (records.length >= limit) break; + if (records.length >= limit) { break; } const record = await this.#toDomain(commit, { deferContentLoading: true }); - if (!record) continue; + if (!record) { continue; } - if (serviceId !== undefined && record.serviceId !== serviceId) continue; - if (termsType !== undefined && record.termsType !== termsType) continue; + if (serviceId !== undefined && record.serviceId !== serviceId) { continue; } + if (termsType !== undefined && record.termsType !== termsType) { continue; } records.push(record); } diff --git a/src/archivist/recorder/repositories/interface.js b/src/archivist/recorder/repositories/interface.js index 1c1cfd7d6..cf18e6a85 100644 --- a/src/archivist/recorder/repositories/interface.js +++ b/src/archivist/recorder/repositories/interface.js @@ -83,11 +83,11 @@ class RepositoryInterface { * Find the most recent records in the repository, optionally filtered by service ID and terms type * For performance reasons, the content of the records will not be loaded. Use #loadRecordContent to load the content of individual records * @see RepositoryInterface#loadRecordContent - * @param {number} limit - Maximum number of records to return - * @param {object} [filters] - Optional filters - * @param {string} [filters.serviceId] - Restrict results to this service ID - * @param {string} [filters.termsType] - Restrict results to this terms type - * @returns {Promise>} Promise that will be resolved with an array of records in descending chronological order + * @param {number} limit - Maximum number of records to return + * @param {object} [filters] - Optional filters + * @param {string} [filters.serviceId] - Restrict results to this service ID + * @param {string} [filters.termsType] - Restrict results to this terms type + * @returns {Promise>} Promise that will be resolved with an array of records in descending chronological order */ async findRecent(limit, filters) { throw new Error(`#findRecent method is not implemented in ${this.constructor.name}`); diff --git a/src/archivist/recorder/repositories/mongo/index.js b/src/archivist/recorder/repositories/mongo/index.js index b9cf9437c..cd64940f9 100644 --- a/src/archivist/recorder/repositories/mongo/index.js +++ b/src/archivist/recorder/repositories/mongo/index.js @@ -96,8 +96,8 @@ export default class MongoRepository extends RepositoryInterface { async findRecent(limit, { serviceId, termsType } = {}) { const query = {}; - if (serviceId !== undefined) query.serviceId = serviceId; - if (termsType !== undefined) query.termsType = termsType; + if (serviceId !== undefined) { query.serviceId = serviceId; } + if (termsType !== undefined) { query.termsType = termsType; } const mongoDocuments = await this.collection .find(query) diff --git a/src/archivist/services/index.js b/src/archivist/services/index.js index cdcc07bbf..980973379 100644 --- a/src/archivist/services/index.js +++ b/src/archivist/services/index.js @@ -281,7 +281,7 @@ function getHistoryFilePaths(serviceId) { } async function loadServiceHistory(historyFilePath) { - if (!(await fileExists(historyFilePath))) return {}; + if (!(await fileExists(historyFilePath))) { return {}; } try { return JSON.parse(await fs.readFile(historyFilePath)); diff --git a/src/collection-api/routes/feed.js b/src/collection-api/routes/feed.js index 5cd0692cf..bd4885c93 100644 --- a/src/collection-api/routes/feed.js +++ b/src/collection-api/routes/feed.js @@ -6,8 +6,8 @@ import { getCollection } from '../../archivist/collection/index.js'; import { COMMIT_MESSAGE_PREFIXES } from '../../archivist/recorder/repositories/git/dataMapper.js'; import { toISODateWithoutMilliseconds } from '../../archivist/utils/date.js'; -import versionsRepository, { storageConfig } from './versionsRepository.js'; import { findServiceCaseInsensitive } from './utils.js'; +import versionsRepository, { storageConfig } from './versionsRepository.js'; const TAG_AUTHORITY = 'opentermsarchive.org,2026'; const FEED_AUTHOR_NAME = 'OTA-Bot'; @@ -38,8 +38,8 @@ function buildAbsoluteBaseUrl(req) { } function classifyRecordType(version) { - if (version.isFirstRecord) return RECORD_TYPES.firstRecord; - if (version.isTechnicalUpgrade) return RECORD_TYPES.technicalUpgrade; + if (version.isFirstRecord) { return RECORD_TYPES.firstRecord; } + if (version.isTechnicalUpgrade) { return RECORD_TYPES.technicalUpgrade; } return RECORD_TYPES.change; } @@ -47,8 +47,7 @@ function classifyRecordType(version) { function buildEntryTitle(version) { let prefix = COMMIT_MESSAGE_PREFIXES.update; - if (version.isFirstRecord) prefix = COMMIT_MESSAGE_PREFIXES.startTracking; - else if (version.isTechnicalUpgrade) prefix = COMMIT_MESSAGE_PREFIXES.technicalUpgrade; + if (version.isFirstRecord) { prefix = COMMIT_MESSAGE_PREFIXES.startTracking; } else if (version.isTechnicalUpgrade) { prefix = COMMIT_MESSAGE_PREFIXES.technicalUpgrade; } return `${prefix} ${version.serviceId} ${version.termsType}`; } diff --git a/src/collection-api/routes/feed.test.js b/src/collection-api/routes/feed.test.js index 0118e1f70..b63d1fb04 100644 --- a/src/collection-api/routes/feed.test.js +++ b/src/collection-api/routes/feed.test.js @@ -149,7 +149,7 @@ describe('Feed API', () => { let firstEntry; before(() => { - firstEntry = response.text.match(/[\s\S]*?<\/entry>/)[0]; + [firstEntry] = response.text.match(/[\s\S]*?<\/entry>/); }); it('has an id tag URI including storage type and record id', () => { diff --git a/src/collection-api/routes/versions.js b/src/collection-api/routes/versions.js index 0cc412bd6..069cd7f7b 100644 --- a/src/collection-api/routes/versions.js +++ b/src/collection-api/routes/versions.js @@ -2,10 +2,12 @@ import express from 'express'; import { toISODateWithoutMilliseconds } from '../../archivist/utils/date.js'; -import versionsRepository from './versionsRepository.js'; import { findServiceCaseInsensitive } from './utils.js'; +import versionsRepository from './versionsRepository.js'; /** + * @param {object} services The services to be exposed by the API + * @returns {express.Router} The router instance * @private * @swagger * tags: @@ -32,62 +34,62 @@ export default function versionsRouter(services) { const router = express.Router(); /** - * @private - * @swagger - * /version/{serviceId}/{termsType}/{date}: - * get: - * summary: Get a specific version of some terms at a given date. - * tags: [Versions] - * produces: - * - application/json - * parameters: - * - in: path - * name: serviceId - * description: The ID of the service whose version will be returned. - * schema: - * type: string - * required: true - * - in: path - * name: termsType - * description: The type of terms whose version will be returned. - * schema: - * type: string - * required: true - * - in: path - * name: date - * description: The date and time for which the version is requested, in ISO 8601 format. - * schema: - * type: string - * format: date-time - * required: true - * responses: - * 200: - * description: A JSON object containing the version content and metadata. - * content: - * application/json: - * schema: - * $ref: '#/components/schemas/Version' - * 404: - * description: No version found for the specified combination of service ID, terms type and date. - * content: - * application/json: - * schema: - * type: object - * properties: - * error: - * type: string - * description: Error message indicating that no version is found. - * 416: - * description: The requested date is in the future. - * content: - * application/json: - * schema: - * type: object - * properties: - * error: - * type: string - * description: Error message indicating that the requested date is in the future. - */ + * @private + * @swagger + * /version/{serviceId}/{termsType}/{date}: + * get: + * summary: Get a specific version of some terms at a given date. + * tags: [Versions] + * produces: + * - application/json + * parameters: + * - in: path + * name: serviceId + * description: The ID of the service whose version will be returned. + * schema: + * type: string + * required: true + * - in: path + * name: termsType + * description: The type of terms whose version will be returned. + * schema: + * type: string + * required: true + * - in: path + * name: date + * description: The date and time for which the version is requested, in ISO 8601 format. + * schema: + * type: string + * format: date-time + * required: true + * responses: + * 200: + * description: A JSON object containing the version content and metadata. + * content: + * application/json: + * schema: + * $ref: '#/components/schemas/Version' + * 404: + * description: No version found for the specified combination of service ID, terms type and date. + * content: + * application/json: + * schema: + * type: object + * properties: + * error: + * type: string + * description: Error message indicating that no version is found. + * 416: + * description: The requested date is in the future. + * content: + * application/json: + * schema: + * type: object + * properties: + * error: + * type: string + * description: Error message indicating that the requested date is in the future. + */ router.get('/version/:serviceId/:termsType/:date', async (req, res) => { const { termsType, date } = req.params; const requestedDate = new Date(date); diff --git a/src/reporter/gitlab/index.js b/src/reporter/gitlab/index.js index 431416768..55ea591f1 100644 --- a/src/reporter/gitlab/index.js +++ b/src/reporter/gitlab/index.js @@ -358,7 +358,7 @@ export default class GitLab { try { let apiUrl = `${this.apiBaseURL}/projects/${this.projectId}/issues?search=${encodeURIComponent(title)}&state=${searchParams.state}&per_page=100`; - if (searchParams.state == 'all') apiUrl = `${this.apiBaseURL}/projects/${this.projectId}/issues?search=${encodeURIComponent(title)}&per_page=100`; + if (searchParams.state == 'all') { apiUrl = `${this.apiBaseURL}/projects/${this.projectId}/issues?search=${encodeURIComponent(title)}&per_page=100`; } const options = GitLab.baseOptionsHttpReq(); From ef71d573ed10b896f35ee29b582cffdf9223be82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Biron?= Date: Wed, 29 Apr 2026 16:20:29 +0200 Subject: [PATCH 12/21] Instantiate versions repository in API router --- src/collection-api/routes/feed.js | 27 ++++++++++--------- src/collection-api/routes/index.js | 8 ++++-- src/collection-api/routes/versions.js | 8 +++--- .../routes/versionsRepository.js | 9 ------- 4 files changed, 24 insertions(+), 28 deletions(-) delete mode 100644 src/collection-api/routes/versionsRepository.js diff --git a/src/collection-api/routes/feed.js b/src/collection-api/routes/feed.js index bd4885c93..dc73f20a3 100644 --- a/src/collection-api/routes/feed.js +++ b/src/collection-api/routes/feed.js @@ -7,7 +7,6 @@ import { COMMIT_MESSAGE_PREFIXES } from '../../archivist/recorder/repositories/g import { toISODateWithoutMilliseconds } from '../../archivist/utils/date.js'; import { findServiceCaseInsensitive } from './utils.js'; -import versionsRepository, { storageConfig } from './versionsRepository.js'; const TAG_AUTHORITY = 'opentermsarchive.org,2026'; const FEED_AUTHOR_NAME = 'OTA-Bot'; @@ -60,11 +59,11 @@ function buildVersionLink(baseUrl, version) { return `${baseUrl}/version/${encodedService}/${encodedTermsType}/${encodedDate}`; } -function buildEntryId(collection, version) { - return `tag:${TAG_AUTHORITY}:version:${collection.metadata?.id}:${storageConfig.type}:${version.id}`; +function buildEntryId(collection, storageType, version) { + return `tag:${TAG_AUTHORITY}:version:${collection.metadata?.id}:${storageType}:${version.id}`; } -function buildEntry(collection, baseUrl, version) { +function buildEntry(collection, storageType, baseUrl, version) { const apiLink = buildVersionLink(baseUrl, version); const githubCommitLink = collection.metadata?.versions && `${collection.metadata.versions}/commit/${version.id}`; @@ -75,7 +74,7 @@ function buildEntry(collection, baseUrl, version) { } return { - id: { _text: buildEntryId(collection, version) }, + id: { _text: buildEntryId(collection, storageType, version) }, link: links, title: { _text: buildEntryTitle(version) }, updated: { _text: version.fetchDate.toISOString() }, @@ -87,7 +86,7 @@ function buildEntry(collection, baseUrl, version) { }; } -function buildFeedDocument({ collection, selfHref, feedId, versions, baseUrl }) { +function buildFeedDocument({ collection, storageType, selfHref, feedId, versions, baseUrl }) { const latestFetchDate = versions.length > 0 ? versions[0].fetchDate : new Date(); const feed = { @@ -104,7 +103,7 @@ function buildFeedDocument({ collection, selfHref, feedId, versions, baseUrl }) feed.logo = { _text: collection.metadata.logo }; } - feed.entry = versions.map(version => buildEntry(collection, baseUrl, version)); + feed.entry = versions.map(version => buildEntry(collection, storageType, baseUrl, version)); return { _declaration: { _attributes: { version: '1.0', encoding: 'utf-8' } }, @@ -122,14 +121,16 @@ function render(document) { } /** - * @param {object} services The services to be exposed by the API - * @returns {express.Router} The router instance + * @param {object} services The services to be exposed by the API + * @param {object} versionsRepository The versions repository instance + * @param {string} storageType The storage type identifier of the versions repository + * @returns {express.Router} The router instance * @swagger * tags: * name: Feeds * description: Atom feeds of version changes */ -export default function feedRouter(services) { +export default function feedRouter(services, versionsRepository, storageType) { const router = express.Router(); /** @@ -155,7 +156,7 @@ export default function feedRouter(services) { const feedId = `tag:${TAG_AUTHORITY}:feed:${collection.metadata?.id}`; const versions = await versionsRepository.findRecent(getFeedLimit()); - const document = buildFeedDocument({ collection, selfHref, feedId, versions, baseUrl }); + const document = buildFeedDocument({ collection, storageType, selfHref, feedId, versions, baseUrl }); sendAtom(res, render(document)); }); @@ -198,7 +199,7 @@ export default function feedRouter(services) { const feedId = `tag:${TAG_AUTHORITY}:feed:${collection.metadata?.id}:${service.id}`; const versions = await versionsRepository.findRecent(getFeedLimit(), { serviceId: service.id }); - const document = buildFeedDocument({ collection, selfHref, feedId, versions, baseUrl }); + const document = buildFeedDocument({ collection, storageType, selfHref, feedId, versions, baseUrl }); return sendAtom(res, render(document)); }); @@ -253,7 +254,7 @@ export default function feedRouter(services) { const feedId = `tag:${TAG_AUTHORITY}:feed:${collection.metadata?.id}:${service.id}:${termsType}`; const versions = await versionsRepository.findRecent(getFeedLimit(), { serviceId: service.id, termsType }); - const document = buildFeedDocument({ collection, selfHref, feedId, versions, baseUrl }); + const document = buildFeedDocument({ collection, storageType, selfHref, feedId, versions, baseUrl }); return sendAtom(res, render(document)); }); diff --git a/src/collection-api/routes/index.js b/src/collection-api/routes/index.js index e02829fa2..ee235f495 100644 --- a/src/collection-api/routes/index.js +++ b/src/collection-api/routes/index.js @@ -1,7 +1,9 @@ +import config from 'config'; import express from 'express'; import helmet from 'helmet'; import { getCollection } from '../../archivist/collection/index.js'; +import RepositoryFactory from '../../archivist/recorder/repositories/factory.js'; import * as Services from '../../archivist/services/index.js'; import docsRouter from './docs.js'; @@ -34,11 +36,13 @@ export default async function apiRouter(basePath) { const services = await Services.load(); const collection = await getCollection(); + const versionsStorageConfig = config.get('@opentermsarchive/engine.recorder.versions.storage'); + const versionsRepository = await RepositoryFactory.create(versionsStorageConfig).initialize(); router.use(await metadataRouter(collection, services)); router.use(servicesRouter(services)); - router.use(versionsRouter(services)); - router.use(feedRouter(services)); + router.use(versionsRouter(services, versionsRepository)); + router.use(feedRouter(services, versionsRepository, versionsStorageConfig.type)); return router; } diff --git a/src/collection-api/routes/versions.js b/src/collection-api/routes/versions.js index 069cd7f7b..914790a9b 100644 --- a/src/collection-api/routes/versions.js +++ b/src/collection-api/routes/versions.js @@ -3,11 +3,11 @@ import express from 'express'; import { toISODateWithoutMilliseconds } from '../../archivist/utils/date.js'; import { findServiceCaseInsensitive } from './utils.js'; -import versionsRepository from './versionsRepository.js'; /** - * @param {object} services The services to be exposed by the API - * @returns {express.Router} The router instance + * @param {object} services The services to be exposed by the API + * @param {object} versionsRepository The versions repository instance + * @returns {express.Router} The router instance * @private * @swagger * tags: @@ -30,7 +30,7 @@ import versionsRepository from './versionsRepository.js'; * type: string * description: The JSON-escaped Markdown content of the version */ -export default function versionsRouter(services) { +export default function versionsRouter(services, versionsRepository) { const router = express.Router(); /** diff --git a/src/collection-api/routes/versionsRepository.js b/src/collection-api/routes/versionsRepository.js deleted file mode 100644 index d76d06ce1..000000000 --- a/src/collection-api/routes/versionsRepository.js +++ /dev/null @@ -1,9 +0,0 @@ -import config from 'config'; - -import RepositoryFactory from '../../archivist/recorder/repositories/factory.js'; - -export const storageConfig = config.get('@opentermsarchive/engine.recorder.versions.storage'); - -const versionsRepository = await RepositoryFactory.create(storageConfig).initialize(); - -export default versionsRepository; From 693dc2865d37299667bcd3b85008e59bac7ad6a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Biron?= Date: Wed, 29 Apr 2026 17:34:06 +0200 Subject: [PATCH 13/21] Resolve serviceId case-sensitively #34335620 introduced case-insensitive matching on/service/:serviceId, contradicting the documented case-sensitive service ID format --- CHANGELOG.md | 4 +- src/collection-api/routes/feed.js | 10 ++--- src/collection-api/routes/feed.test.js | 6 +-- src/collection-api/routes/index.js | 2 +- src/collection-api/routes/services.js | 4 +- src/collection-api/routes/services.test.js | 45 ++-------------------- src/collection-api/routes/utils.js | 5 --- src/collection-api/routes/utils.test.js | 44 --------------------- src/collection-api/routes/versions.js | 15 ++------ src/collection-api/routes/versions.test.js | 25 ------------ 10 files changed, 18 insertions(+), 142 deletions(-) delete mode 100644 src/collection-api/routes/utils.js delete mode 100644 src/collection-api/routes/utils.test.js diff --git a/CHANGELOG.md b/CHANGELOG.md index 0c0c6adee..2b9a99268 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ All changes that impact users of this module are documented in this file, in the [Common Changelog](https://common-changelog.org) format with some additional specifications defined in the CONTRIBUTING file. This codebase adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## Unreleased [minor] +## Unreleased [major] > Development of this release was supported by [Reset Tech](https://www.reset.tech). @@ -15,7 +15,7 @@ All changes that impact users of this module are documented in this file, in the ### Changed -- Resolve `serviceId` path parameter case-insensitively on the `GET /version/:serviceId/:termsType/:date` endpoint, consistent with other endpoints +- **Breaking:** Resolve `serviceId` path parameter case-sensitively on the `GET /service/:serviceId` endpoint, in line with the documented service ID format; clients relying on case-insensitive matching must now use the exact ID casing ## 11.0.2 - 2026-04-14 diff --git a/src/collection-api/routes/feed.js b/src/collection-api/routes/feed.js index dc73f20a3..5d63eb88f 100644 --- a/src/collection-api/routes/feed.js +++ b/src/collection-api/routes/feed.js @@ -6,8 +6,6 @@ import { getCollection } from '../../archivist/collection/index.js'; import { COMMIT_MESSAGE_PREFIXES } from '../../archivist/recorder/repositories/git/dataMapper.js'; import { toISODateWithoutMilliseconds } from '../../archivist/utils/date.js'; -import { findServiceCaseInsensitive } from './utils.js'; - const TAG_AUTHORITY = 'opentermsarchive.org,2026'; const FEED_AUTHOR_NAME = 'OTA-Bot'; const DEFAULT_LIMIT = 100; @@ -172,7 +170,7 @@ export default function feedRouter(services, versionsRepository, storageType) { * parameters: * - in: path * name: serviceId - * description: The ID of the service. Case-insensitive. + * description: The ID of the service. * schema: * type: string * required: true @@ -187,7 +185,7 @@ export default function feedRouter(services, versionsRepository, storageType) { * description: No service matching the provided ID is found. */ router.get('/feed/:serviceId', async (req, res) => { - const service = findServiceCaseInsensitive(services, req.params.serviceId); + const service = Object.hasOwn(services, req.params.serviceId) ? services[req.params.serviceId] : null; if (!service) { return res.status(404).send('Service not found'); @@ -215,7 +213,7 @@ export default function feedRouter(services, versionsRepository, storageType) { * parameters: * - in: path * name: serviceId - * description: The ID of the service. Case-insensitive. + * description: The ID of the service. * schema: * type: string * required: true @@ -236,7 +234,7 @@ export default function feedRouter(services, versionsRepository, storageType) { * description: Either the service ID does not match any service or the terms type is not declared by that service. */ router.get('/feed/:serviceId/:termsType', async (req, res) => { - const service = findServiceCaseInsensitive(services, req.params.serviceId); + const service = Object.hasOwn(services, req.params.serviceId) ? services[req.params.serviceId] : null; if (!service) { return res.status(404).send('Service not found'); diff --git a/src/collection-api/routes/feed.test.js b/src/collection-api/routes/feed.test.js index b63d1fb04..8c60f3493 100644 --- a/src/collection-api/routes/feed.test.js +++ b/src/collection-api/routes/feed.test.js @@ -355,15 +355,15 @@ describe('Feed API', () => { }); }); - context('when the serviceId uses different casing', () => { + context('when the serviceId casing does not match', () => { let response; before(async () => { response = await request.get(`${basePath}/v1/feed/${encodeURIComponent(SERVICE.toUpperCase())}`); }); - it('still resolves to the service (case-insensitive)', () => { - expect(response.status).to.equal(200); + it('responds with 404', () => { + expect(response.status).to.equal(404); }); }); }); diff --git a/src/collection-api/routes/index.js b/src/collection-api/routes/index.js index ee235f495..c24b8f5dd 100644 --- a/src/collection-api/routes/index.js +++ b/src/collection-api/routes/index.js @@ -41,7 +41,7 @@ export default async function apiRouter(basePath) { router.use(await metadataRouter(collection, services)); router.use(servicesRouter(services)); - router.use(versionsRouter(services, versionsRepository)); + router.use(versionsRouter(versionsRepository)); router.use(feedRouter(services, versionsRepository, versionsStorageConfig.type)); return router; diff --git a/src/collection-api/routes/services.js b/src/collection-api/routes/services.js index 95febb6a2..9906b7152 100644 --- a/src/collection-api/routes/services.js +++ b/src/collection-api/routes/services.js @@ -1,7 +1,5 @@ import express from 'express'; -import { findServiceCaseInsensitive } from './utils.js'; - /** * @param {object} services The services to be exposed by the API * @returns {express.Router} The router instance @@ -132,7 +130,7 @@ export default function servicesRouter(services) { * description: No service matching the provided ID is found. */ router.get('/service/:serviceId', (req, res) => { - const service = findServiceCaseInsensitive(services, req.params.serviceId); + const service = Object.hasOwn(services, req.params.serviceId) ? services[req.params.serviceId] : null; if (!service) { res.status(404).send('Service not found'); diff --git a/src/collection-api/routes/services.test.js b/src/collection-api/routes/services.test.js index db6bdc16f..43dfed950 100644 --- a/src/collection-api/routes/services.test.js +++ b/src/collection-api/routes/services.test.js @@ -56,7 +56,6 @@ describe('Services API', () => { describe('GET /service/:serviceId', () => { let response; const SERVICE_ID = 'Service B!'; - const CASE_INSENSITIVE_SERVICE_ID = 'service b!'; before(async () => { response = await request(app).get(`${basePath}/v1/service/${encodeURI(SERVICE_ID)}`); @@ -106,49 +105,13 @@ describe('Services API', () => { }); }); - context('with a case-insensitive service ID parameter', () => { + context('when the service ID casing does not match', () => { before(async () => { - response = await request(app).get(`${basePath}/v1/service/${encodeURI(CASE_INSENSITIVE_SERVICE_ID)}`); + response = await request(app).get(`${basePath}/v1/service/${encodeURI(SERVICE_ID.toLowerCase())}`); }); - it('responds with 200 status code', () => { - expect(response.status).to.equal(200); - }); - - it('returns a service object with id', () => { - expect(response.body).to.have.property('id'); - }); - - it('returns the proper service object', () => { - expect(response.body.id).to.equal(SERVICE_ID); - }); - - it('returns a service object with name', () => { - expect(response.body).to.have.property('name'); - }); - - it('returns a service object with an array of terms', () => { - expect(response.body).to.have.property('terms').that.is.an('array'); - }); - - it('each terms should have a type property', () => { - response.body.terms.forEach(terms => { - expect(terms).to.have.property('type'); - }); - }); - - it('each terms should have an array of source documents', () => { - response.body.terms.forEach(terms => { - expect(terms).to.have.property('sourceDocuments').that.is.an('array'); - }); - }); - - it('each source document should have a location', () => { - response.body.terms.forEach(terms => { - terms.sourceDocuments.forEach(sourceDocument => { - expect(sourceDocument).to.have.property('location'); - }); - }); + it('responds with 404 status code', () => { + expect(response.status).to.equal(404); }); }); diff --git a/src/collection-api/routes/utils.js b/src/collection-api/routes/utils.js deleted file mode 100644 index 18728e445..000000000 --- a/src/collection-api/routes/utils.js +++ /dev/null @@ -1,5 +0,0 @@ -export function findServiceCaseInsensitive(services, serviceId) { - const matched = Object.keys(services).find(key => key.toLowerCase() === serviceId?.toLowerCase()); - - return matched ? services[matched] : null; -} diff --git a/src/collection-api/routes/utils.test.js b/src/collection-api/routes/utils.test.js deleted file mode 100644 index b7bb137f0..000000000 --- a/src/collection-api/routes/utils.test.js +++ /dev/null @@ -1,44 +0,0 @@ -import { expect } from 'chai'; - -import { findServiceCaseInsensitive } from './utils.js'; - -describe('findServiceCaseInsensitive', () => { - const services = { - '42Corp': { id: '42Corp' }, - ACMEco: { id: 'ACMEco' }, - 'example.org': { id: 'example.org' }, - 'Foo Bar': { id: 'Foo Bar' }, - 'service-b': { id: 'service-b' }, - service·A: { id: 'service·A' }, - }; - - it('returns the service when the id matches exactly', () => { - expect(findServiceCaseInsensitive(services, '42Corp')).to.equal(services['42Corp']); - expect(findServiceCaseInsensitive(services, 'ACMEco')).to.equal(services.ACMEco); - expect(findServiceCaseInsensitive(services, 'example.org')).to.equal(services['example.org']); - expect(findServiceCaseInsensitive(services, 'Foo Bar')).to.equal(services['Foo Bar']); - expect(findServiceCaseInsensitive(services, 'service-b')).to.equal(services['service-b']); - expect(findServiceCaseInsensitive(services, 'service·A')).to.equal(services['service·A']); - }); - - it('returns the service when the id casing differs', () => { - expect(findServiceCaseInsensitive(services, '42CORP')).to.equal(services['42Corp']); - expect(findServiceCaseInsensitive(services, 'acmeco')).to.equal(services.ACMEco); - expect(findServiceCaseInsensitive(services, 'EXAMPLE.ORG')).to.equal(services['example.org']); - expect(findServiceCaseInsensitive(services, 'foo bar')).to.equal(services['Foo Bar']); - expect(findServiceCaseInsensitive(services, 'SERVICE-B')).to.equal(services['service-b']); - expect(findServiceCaseInsensitive(services, 'SERVICE·A')).to.equal(services['service·A']); - }); - - it('returns null when no service matches', () => { - expect(findServiceCaseInsensitive(services, 'Unknown')).to.be.null; - }); - - it('returns null when serviceId is undefined', () => { - expect(findServiceCaseInsensitive(services, undefined)).to.be.null; - }); - - it('returns null when services is empty', () => { - expect(findServiceCaseInsensitive({}, 'Foo Bar')).to.be.null; - }); -}); diff --git a/src/collection-api/routes/versions.js b/src/collection-api/routes/versions.js index 914790a9b..176ba0c55 100644 --- a/src/collection-api/routes/versions.js +++ b/src/collection-api/routes/versions.js @@ -2,10 +2,7 @@ import express from 'express'; import { toISODateWithoutMilliseconds } from '../../archivist/utils/date.js'; -import { findServiceCaseInsensitive } from './utils.js'; - /** - * @param {object} services The services to be exposed by the API * @param {object} versionsRepository The versions repository instance * @returns {express.Router} The router instance * @private @@ -30,7 +27,7 @@ import { findServiceCaseInsensitive } from './utils.js'; * type: string * description: The JSON-escaped Markdown content of the version */ -export default function versionsRouter(services, versionsRepository) { +export default function versionsRouter(versionsRepository) { const router = express.Router(); /** @@ -91,20 +88,14 @@ export default function versionsRouter(services, versionsRepository) { * description: Error message indicating that the requested date is in the future. */ router.get('/version/:serviceId/:termsType/:date', async (req, res) => { - const { termsType, date } = req.params; + const { serviceId, termsType, date } = req.params; const requestedDate = new Date(date); if (requestedDate > new Date()) { return res.status(416).json({ error: 'Requested version is in the future' }); } - const service = findServiceCaseInsensitive(services, req.params.serviceId); - - if (!service) { - return res.status(404).json({ error: 'Service not found' }); - } - - const version = await versionsRepository.findByDate(service.id, termsType, requestedDate); + const version = await versionsRepository.findByDate(serviceId, termsType, requestedDate); if (!version) { return res.status(404).json({ error: `No version found for date ${date}` }); diff --git a/src/collection-api/routes/versions.test.js b/src/collection-api/routes/versions.test.js index 1ec145854..bfdff4e15 100644 --- a/src/collection-api/routes/versions.test.js +++ b/src/collection-api/routes/versions.test.js @@ -96,31 +96,6 @@ describe('Versions API', () => { }); }); - context('when the serviceId uses different casing', () => { - before(async () => { - response = await request.get(`${basePath}/v1/version/SERVICE·A/Terms%20of%20Service/${encodeURIComponent(toISODateWithoutMilliseconds(FETCH_DATE))}`); - }); - - it('still resolves to the service (case-insensitive)', () => { - expect(response.status).to.equal(200); - expect(response.body).to.deep.equal(expectedResult); - }); - }); - - context('when the service does not exist', () => { - before(async () => { - response = await request.get(`${basePath}/v1/version/DoesNotExist/Terms%20of%20Service/${encodeURIComponent(toISODateWithoutMilliseconds(FETCH_DATE))}`); - }); - - it('responds with 404 status code', () => { - expect(response.status).to.equal(404); - }); - - it('returns an error message', () => { - expect(response.body.error).to.equal('Service not found'); - }); - }); - context('when the requested date is in the future', () => { before(async () => { const dateInTheFuture = new Date(Date.now() + 60000); // 1 minute in the future From 793c0abf73590bbdd40fefebd805eb9d7a479a8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Biron?= Date: Wed, 29 Apr 2026 17:50:21 +0200 Subject: [PATCH 14/21] Restructure repository query API with pagination --- .../recorder/repositories/git/dataMapper.js | 5 + .../recorder/repositories/git/git.js | 8 +- .../recorder/repositories/git/index.js | 84 ++++-- .../recorder/repositories/git/index.test.js | 208 ++++++-------- .../recorder/repositories/interface.js | 53 ++-- .../recorder/repositories/mongo/index.js | 64 +++-- .../recorder/repositories/mongo/index.test.js | 255 +++++++++--------- src/collection-api/routes/feed.js | 6 +- 8 files changed, 382 insertions(+), 301 deletions(-) diff --git a/src/archivist/recorder/repositories/git/dataMapper.js b/src/archivist/recorder/repositories/git/dataMapper.js index c9dadd267..8fcd3fafb 100644 --- a/src/archivist/recorder/repositories/git/dataMapper.js +++ b/src/archivist/recorder/repositories/git/dataMapper.js @@ -91,6 +91,11 @@ function generateFileName(termsType, documentId, extension) { } export function generateFilePath(serviceId, termsType, documentId, mimeType) { + // If only serviceId is provided, return a pattern to match all files for that service + if (termsType === undefined) { + return `${serviceId}/*`; + } + const extension = mime.getExtension(mimeType) || '*'; // If mime type is undefined, an asterisk is set as an extension. Used to match all files for the given service ID, terms type and document ID when mime type is unknown return `${serviceId}/${generateFileName(termsType, documentId, extension)}`; // Do not use `path.join` as even for Windows, the path should be with `/` and not `\` diff --git a/src/archivist/recorder/repositories/git/git.js b/src/archivist/recorder/repositories/git/git.js index 791c39310..364fdc72b 100644 --- a/src/archivist/recorder/repositories/git/git.js +++ b/src/archivist/recorder/repositories/git/git.js @@ -68,8 +68,12 @@ export default class Git { return this.git.push(); } - listCommits(options = []) { - return this.log([ '--reverse', '--no-merges', '--name-only', ...options ]); // Returns all commits in chronological order (`--reverse`), excluding merge commits (`--no-merges`), with modified files names (`--name-only`) + listCommits(options = [], { reverse = true, skip, maxCount } = {}) { + const reverseOption = reverse ? ['--reverse'] : []; + const skipOption = skip !== undefined ? [`--skip=${skip}`] : []; + const maxCountOption = maxCount !== undefined ? [`--max-count=${maxCount}`] : []; + + return this.log([ ...reverseOption, '--author-date-order', '--no-merges', '--name-only', ...skipOption, ...maxCountOption, ...options ]); // Returns commits in chronological order with `--reverse` (oldest first) or reverse chronological without it (newest first), sorted by author date (`--author-date-order`), excluding merge commits (`--no-merges`), with modified files names (`--name-only`), with optional pagination (`--skip`, `--max-count`) } async getCommit(options) { diff --git a/src/archivist/recorder/repositories/git/index.js b/src/archivist/recorder/repositories/git/index.js index 284a0340c..50da196ba 100644 --- a/src/archivist/recorder/repositories/git/index.js +++ b/src/archivist/recorder/repositories/git/index.js @@ -88,36 +88,45 @@ export default class GitRepository extends RepositoryInterface { return this.#toDomain(commit); } - async findAll() { - return Promise.all((await this.#getCommits()).map(commit => this.#toDomain(commit, { deferContentLoading: true }))); + async findAll({ limit, offset } = {}) { + return Promise.all((await this.#getCommits({ limit, offset })).map(commit => this.#toDomain(commit, { deferContentLoading: true }))); } - async findRecent(limit, { serviceId, termsType } = {}) { - const commits = (await this.#getCommits()).reverse(); - const records = []; + async findByService(serviceId, { limit, offset } = {}) { + const pathPattern = DataMapper.generateFilePath(serviceId); - for (const commit of commits) { - if (records.length >= limit) { break; } + return Promise.all((await this.#getCommits({ pathFilter: pathPattern, limit, offset })).map(commit => this.#toDomain(commit, { deferContentLoading: true }))); + } - const record = await this.#toDomain(commit, { deferContentLoading: true }); + async findByServiceAndTermsType(serviceId, termsType, { limit, offset } = {}) { + const pathPattern = DataMapper.generateFilePath(serviceId, termsType); - if (!record) { continue; } + return Promise.all((await this.#getCommits({ pathFilter: pathPattern, limit, offset })).map(commit => this.#toDomain(commit, { deferContentLoading: true }))); + } - if (serviceId !== undefined && record.serviceId !== serviceId) { continue; } - if (termsType !== undefined && record.termsType !== termsType) { continue; } + async count(serviceId, termsType) { + const grepOptions = Object.values(DataMapper.COMMIT_MESSAGE_PREFIXES).map(prefix => `--grep=${prefix}`); + const pathOptions = []; - records.push(record); - } + if (serviceId && termsType) { + const pathPattern = DataMapper.generateFilePath(serviceId, termsType); - return records; - } + pathOptions.push('--', pathPattern); + } else if (serviceId) { + // Count all records for a service (all terms types) + const pathPattern = DataMapper.generateFilePath(serviceId); - async count() { - return (await this.git.log(Object.values(DataMapper.COMMIT_MESSAGE_PREFIXES).map(prefix => `--grep=${prefix}`))).length; + pathOptions.push('--', pathPattern); + } else { + // Count all records (exclude root directory files) + pathOptions.push('--', '*/*'); + } + + return (await this.git.log([ ...grepOptions, ...pathOptions ])).length; } async* iterate() { - const commits = await this.#getCommits(); + const commits = await this.#getCommits({ reverse: true }); for (const commit of commits) { yield this.#toDomain(commit); @@ -151,12 +160,39 @@ export default class GitRepository extends RepositoryInterface { record.content = pdfBuffer; } - async #getCommits() { - return (await this.git.listCommits()) - .filter(commit => // Skip non-record commits (e.g., README or LICENSE updates) - DataMapper.COMMIT_MESSAGE_PREFIXES_REGEXP.test(commit.message) // Commits generated by the engine have messages that match predefined prefixes - && path.dirname(commit.diff.files[0].file) !== '.') // Assumes one record per commit; records must be in a serviceId folder, not root - .sort((commitA, commitB) => new Date(commitA.date) - new Date(commitB.date)); // Make sure that the commits are sorted in ascending chronological order + async #getCommits({ pathFilter, reverse = false, limit, offset } = {}) { + const grepOptions = Object.values(DataMapper.COMMIT_MESSAGE_PREFIXES).flatMap(prefix => [ '--grep', prefix ]); + const pathOptions = pathFilter + ? [ '--', pathFilter ] + : [ '--', '*/*' ]; // Exclude root directory files by only matching files in subdirectories + + const options = [ ...grepOptions, ...pathOptions ]; + + // Use git-level pagination when available + // Note: --skip and --max-count work in topological order, not chronological order + // This means pagination may not be strictly chronological, but it's acceptable for performance + const paginationOptions = {}; + + if (offset !== undefined) { + paginationOptions.skip = offset; + } + + if (limit !== undefined) { + paginationOptions.maxCount = limit; + } + + const commits = await this.git.listCommits(options, { reverse: false, ...paginationOptions }); // Get commits without git's --reverse for better performance, filtered at git level + + // Sort by date in JavaScript for accuracy - git's date ordering may not be reliable with backdated commits + // Default order is descending (newest to oldest), reverse gives ascending (oldest to newest) + commits.sort((commitA, commitB) => { + const dateA = new Date(commitA.date); + const dateB = new Date(commitB.date); + + return reverse ? dateA - dateB : dateB - dateA; + }); + + return commits; } static async writeFile({ filePath, content }) { diff --git a/src/archivist/recorder/repositories/git/index.test.js b/src/archivist/recorder/repositories/git/index.test.js index ee8d8b6e6..6ef24175f 100644 --- a/src/archivist/recorder/repositories/git/index.test.js +++ b/src/archivist/recorder/repositories/git/index.test.js @@ -540,53 +540,92 @@ describe('GitRepository', () => { } }); - it('returns records in ascending order', () => { - expect(records.map(record => record.fetchDate)).to.deep.equal([ FETCH_DATE_EARLIER, FETCH_DATE, FETCH_DATE_LATER ]); + it('returns records in descending order', () => { + expect(records.map(record => record.fetchDate)).to.deep.equal([ FETCH_DATE_LATER, FETCH_DATE, FETCH_DATE_EARLIER ]); }); }); - describe('#count', () => { - let count; + describe('#findByServiceAndTermsType', () => { + const expectedIds = []; + let records; before(async function () { this.timeout(5000); - await subject.save(new Version({ + const { id: id1 } = await subject.save(new Version({ serviceId: SERVICE_PROVIDER_ID, termsType: TERMS_TYPE, content: CONTENT, fetchDate: FETCH_DATE, snapshotIds: [SNAPSHOT_ID], })); - await subject.save(new Version({ + + expectedIds.push(id1); + + const { id: id2 } = await subject.save(new Version({ serviceId: SERVICE_PROVIDER_ID, termsType: TERMS_TYPE, content: `${CONTENT} - updated`, fetchDate: FETCH_DATE_LATER, snapshotIds: [SNAPSHOT_ID], })); + + expectedIds.push(id2); + await subject.save(new Version({ - serviceId: SERVICE_PROVIDER_ID, - termsType: TERMS_TYPE, - content: `${CONTENT} - updated 2`, - isTechnicalUpgrade: true, - fetchDate: FETCH_DATE_EARLIER, + serviceId: 'other_service', + termsType: 'Privacy Policy', + content: `${CONTENT} - other`, + fetchDate: FETCH_DATE, snapshotIds: [SNAPSHOT_ID], })); - (count = await subject.count()); + (records = await subject.findByServiceAndTermsType(SERVICE_PROVIDER_ID, TERMS_TYPE)); }); after(() => subject.removeAll()); - it('returns the proper count', () => { - expect(count).to.equal(3); + it('returns only matching records', () => { + expect(records.length).to.equal(2); + }); + + it('returns Version objects', () => { + for (const record of records) { + expect(record).to.be.an.instanceof(Version); + } + }); + + it('returns records with matching service ID', () => { + for (const record of records) { + expect(record.serviceId).to.equal(SERVICE_PROVIDER_ID); + } + }); + + it('returns records with matching terms type', () => { + for (const record of records) { + expect(record.termsType).to.equal(TERMS_TYPE); + } + }); + + it('returns records in descending order', () => { + expect(records.map(record => record.fetchDate)).to.deep.equal([ FETCH_DATE_LATER, FETCH_DATE ]); + }); + + it('returns records with correct IDs', () => { + expect(records.map(record => record.id)).to.have.members(expectedIds); + }); + + context('when no matching records exist', () => { + it('returns an empty array', async () => { + const result = await subject.findByServiceAndTermsType('non_existent_service', 'Non Existent Terms'); + + expect(result).to.be.an('array').that.is.empty; + }); }); }); - describe('#findRecent', () => { - const OTHER_SERVICE = 'other_service'; - const OTHER_TERMS = 'Privacy Policy'; + describe('#count', () => { + let count; before(async function () { this.timeout(5000); @@ -595,133 +634,62 @@ describe('GitRepository', () => { serviceId: SERVICE_PROVIDER_ID, termsType: TERMS_TYPE, content: CONTENT, - fetchDate: FETCH_DATE_EARLIER, + fetchDate: FETCH_DATE, snapshotIds: [SNAPSHOT_ID], })); await subject.save(new Version({ serviceId: SERVICE_PROVIDER_ID, termsType: TERMS_TYPE, content: `${CONTENT} - updated`, - fetchDate: FETCH_DATE, - snapshotIds: [SNAPSHOT_ID], - })); - await subject.save(new Version({ - serviceId: SERVICE_PROVIDER_ID, - termsType: OTHER_TERMS, - content: CONTENT, fetchDate: FETCH_DATE_LATER, snapshotIds: [SNAPSHOT_ID], })); await subject.save(new Version({ - serviceId: OTHER_SERVICE, + serviceId: SERVICE_PROVIDER_ID, termsType: TERMS_TYPE, - content: CONTENT, - fetchDate: FETCH_DATE_LATER, + content: `${CONTENT} - updated 2`, + isTechnicalUpgrade: true, + fetchDate: FETCH_DATE_EARLIER, snapshotIds: [SNAPSHOT_ID], })); - }); - - after(() => subject.removeAll()); - - context('without filters', () => { - let records; - - before(async () => { - records = await subject.findRecent(10); - }); - - it('returns records in descending chronological order', () => { - const dates = records.map(record => record.fetchDate.getTime()); - expect(dates).to.deep.equal([...dates].sort((a, b) => b - a)); - }); - - it('returns all matching records', () => { - expect(records).to.have.length(4); - }); - - it('does not load content eagerly', () => { - for (const record of records) { - expect(() => record.content).to.throw('Content not defined'); - } - }); - - it('exposes the metadata needed for feed entries', () => { - const [record] = records; - - expect(record.id).to.be.a('string'); - expect(record.serviceId).to.be.a('string'); - expect(record.termsType).to.be.a('string'); - expect(record.fetchDate).to.be.an.instanceof(Date); - expect(record.isFirstRecord).to.be.a('boolean'); - expect(record.isTechnicalUpgrade).to.be.a('boolean'); - }); - }); - - context('when limit is smaller than the number of matching records', () => { - let records; - - before(async () => { - records = await subject.findRecent(2); - }); - - it('returns at most limit records', () => { - expect(records).to.have.length(2); - }); - - it('returns the most recent records', () => { - for (const record of records) { - expect(record.fetchDate.getTime()).to.be.at.least(FETCH_DATE.getTime()); - } - }); + (count = await subject.count()); }); - context('when a serviceId filter is given', () => { - let records; - - before(async () => { - records = await subject.findRecent(10, { serviceId: SERVICE_PROVIDER_ID }); - }); - - it('returns only records for that service', () => { - for (const record of records) { - expect(record.serviceId).to.equal(SERVICE_PROVIDER_ID); - } - }); + after(() => subject.removeAll()); - it('returns all records that match', () => { - expect(records).to.have.length(3); - }); + it('returns the proper count', () => { + expect(count).to.equal(3); }); - context('when both serviceId and termsType filters are given', () => { - let records; + context('with serviceId and termsType filters', () => { + it('returns count for specific service and terms type', async () => { + const filteredCount = await subject.count(SERVICE_PROVIDER_ID, TERMS_TYPE); - before(async () => { - records = await subject.findRecent(10, { serviceId: SERVICE_PROVIDER_ID, termsType: TERMS_TYPE }); + expect(filteredCount).to.equal(3); }); - it('returns only records for that service and terms type', () => { - for (const record of records) { - expect(record.serviceId).to.equal(SERVICE_PROVIDER_ID); - expect(record.termsType).to.equal(TERMS_TYPE); - } - }); + it('returns zero for non-existent service', async () => { + const filteredCount = await subject.count('non-existent-service', TERMS_TYPE); - it('returns all records that match', () => { - expect(records).to.have.length(2); + expect(filteredCount).to.equal(0); }); }); - context('when filters match no record', () => { - let records; + context('with only serviceId filter', () => { + it('returns count for all terms types of a service', async () => { + // Add a version with different terms type + await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: 'Different Terms', + content: CONTENT, + fetchDate: FETCH_DATE, + snapshotIds: [SNAPSHOT_ID], + })); - before(async () => { - records = await subject.findRecent(10, { serviceId: 'unknown' }); - }); + const filteredCount = await subject.count(SERVICE_PROVIDER_ID); - it('returns an empty array', () => { - expect(records).to.deep.equal([]); + expect(filteredCount).to.equal(4); // 3 from TERMS_TYPE + 1 from 'Different Terms' }); }); }); @@ -1243,8 +1211,8 @@ describe('GitRepository', () => { } }); - it('returns records in ascending order', () => { - expect(records.map(record => record.fetchDate)).to.deep.equal([ FETCH_DATE_EARLIER, FETCH_DATE, FETCH_DATE_LATER ]); + it('returns records in descending order', () => { + expect(records.map(record => record.fetchDate)).to.deep.equal([ FETCH_DATE_LATER, FETCH_DATE, FETCH_DATE_EARLIER ]); }); }); @@ -1604,8 +1572,8 @@ describe('GitRepository', () => { } }); - it('returns records in ascending order', () => { - expect(records.map(record => record.fetchDate)).to.deep.equal(expectedDates); + it('returns records in descending order', () => { + expect(records.map(record => record.fetchDate)).to.deep.equal([...expectedDates].reverse()); }); }); diff --git a/src/archivist/recorder/repositories/interface.js b/src/archivist/recorder/repositories/interface.js index cf18e6a85..ae0ffafcc 100644 --- a/src/archivist/recorder/repositories/interface.js +++ b/src/archivist/recorder/repositories/interface.js @@ -70,35 +70,56 @@ class RepositoryInterface { } /** - * Find all records + * Find all records, in descending chronological order (newest first; opposite of #iterate) * For performance reasons, the content of the records will not be loaded by default. Use #loadRecordContent to load the content of individual records - * @see RepositoryInterface#loadRecordContent - * @returns {Promise>} Promise that will be resolved with an array of all records + * @see RepositoryInterface#loadRecordContent + * @see RepositoryInterface#iterate + * @param {object} [options] - Pagination options + * @param {number} [options.limit] - Maximum number of records to return + * @param {number} [options.offset] - Number of records to skip + * @returns {Promise>} Promise that will be resolved with an array of records in descending chronological order */ - async findAll() { + async findAll(options = {}) { throw new Error(`#findAll method is not implemented in ${this.constructor.name}`); } /** - * Find the most recent records in the repository, optionally filtered by service ID and terms type - * For performance reasons, the content of the records will not be loaded. Use #loadRecordContent to load the content of individual records - * @see RepositoryInterface#loadRecordContent - * @param {number} limit - Maximum number of records to return - * @param {object} [filters] - Optional filters - * @param {string} [filters.serviceId] - Restrict results to this service ID - * @param {string} [filters.termsType] - Restrict results to this terms type - * @returns {Promise>} Promise that will be resolved with an array of records in descending chronological order + * Find all records for a specific service, in descending chronological order + * For performance reasons, the content of the records will not be loaded by default. Use #loadRecordContent to load the content of individual records + * @see RepositoryInterface#loadRecordContent + * @param {string} serviceId - Service ID of records to find + * @param {object} [options] - Pagination options + * @param {number} [options.limit] - Maximum number of records to return + * @param {number} [options.offset] - Number of records to skip + * @returns {Promise>} Promise that will be resolved with an array of matching records in descending chronological order + */ + async findByService(serviceId, options = {}) { + throw new Error(`#findByService method is not implemented in ${this.constructor.name}`); + } + + /** + * Find all records for a specific service and terms type, in descending chronological order + * For performance reasons, the content of the records will not be loaded by default. Use #loadRecordContent to load the content of individual records + * @see RepositoryInterface#loadRecordContent + * @param {string} serviceId - Service ID of records to find + * @param {string} termsType - Terms type of records to find + * @param {object} [options] - Pagination options + * @param {number} [options.limit] - Maximum number of records to return + * @param {number} [options.offset] - Number of records to skip + * @returns {Promise>} Promise that will be resolved with an array of matching records in descending chronological order */ - async findRecent(limit, filters) { - throw new Error(`#findRecent method is not implemented in ${this.constructor.name}`); + async findByServiceAndTermsType(serviceId, termsType, options = {}) { + throw new Error(`#findByServiceAndTermsType method is not implemented in ${this.constructor.name}`); } /** * Count the total number of records in the repository * For performance reasons, use this method rather than counting the number of entries returned by #findAll if you only need the size of a repository - * @returns {Promise} Promise that will be resolved with the total number of records + * @param {string} [serviceId] - Optional service ID to filter records + * @param {string} [termsType] - Optional terms type to filter records (requires serviceId) + * @returns {Promise} Promise that will be resolved with the total number of records */ - async count() { + async count(serviceId, termsType) { throw new Error(`#count method is not implemented in ${this.constructor.name}`); } diff --git a/src/archivist/recorder/repositories/mongo/index.js b/src/archivist/recorder/repositories/mongo/index.js index cd64940f9..fc1b860ca 100644 --- a/src/archivist/recorder/repositories/mongo/index.js +++ b/src/archivist/recorder/repositories/mongo/index.js @@ -88,29 +88,63 @@ export default class MongoRepository extends RepositoryInterface { return this.#toDomain(mongoDocument); } - async findAll() { - return Promise.all((await this.collection.find().project({ content: 0 }).sort({ fetchDate: 1 }).toArray()) + async findAll({ limit, offset } = {}) { + let query = this.collection.find().project({ content: 0 }).sort({ fetchDate: -1 }); + + if (offset !== undefined) { + query = query.skip(offset); + } + + if (limit !== undefined) { + query = query.limit(limit); + } + + return Promise.all((await query.toArray()) .map(mongoDocument => this.#toDomain(mongoDocument, { deferContentLoading: true }))); } - async findRecent(limit, { serviceId, termsType } = {}) { - const query = {}; + async findByServiceAndTermsType(serviceId, termsType, { limit, offset } = {}) { + let query = this.collection.find({ serviceId, termsType }).project({ content: 0 }).sort({ fetchDate: -1 }); - if (serviceId !== undefined) { query.serviceId = serviceId; } - if (termsType !== undefined) { query.termsType = termsType; } + if (offset !== undefined) { + query = query.skip(offset); + } - const mongoDocuments = await this.collection - .find(query) - .project({ content: 0 }) - .sort({ fetchDate: -1 }) - .limit(limit) - .toArray(); + if (limit !== undefined) { + query = query.limit(limit); + } - return Promise.all(mongoDocuments.map(mongoDocument => this.#toDomain(mongoDocument, { deferContentLoading: true }))); + return Promise.all((await query.toArray()) + .map(mongoDocument => this.#toDomain(mongoDocument, { deferContentLoading: true }))); } - count() { - return this.collection.countDocuments(); + async findByService(serviceId, { limit, offset } = {}) { + let query = this.collection.find({ serviceId }).project({ content: 0 }).sort({ fetchDate: -1 }); + + if (offset !== undefined) { + query = query.skip(offset); + } + + if (limit !== undefined) { + query = query.limit(limit); + } + + return Promise.all((await query.toArray()) + .map(mongoDocument => this.#toDomain(mongoDocument, { deferContentLoading: true }))); + } + + count(serviceId, termsType) { + const filter = {}; + + if (serviceId) { + filter.serviceId = serviceId; + } + + if (termsType) { + filter.termsType = termsType; + } + + return this.collection.countDocuments(filter); } async* iterate() { diff --git a/src/archivist/recorder/repositories/mongo/index.test.js b/src/archivist/recorder/repositories/mongo/index.test.js index 880c9b2e3..e2123cdfd 100644 --- a/src/archivist/recorder/repositories/mongo/index.test.js +++ b/src/archivist/recorder/repositories/mongo/index.test.js @@ -629,184 +629,197 @@ describe('MongoRepository', () => { } }); - it('returns records in ascending order', () => { - expect(records.map(record => record.fetchDate)).to.deep.equal([ FETCH_DATE_EARLIER, FETCH_DATE, FETCH_DATE_LATER ]); + it('returns records in descending order', () => { + expect(records.map(record => record.fetchDate)).to.deep.equal([ FETCH_DATE_LATER, FETCH_DATE, FETCH_DATE_EARLIER ]); }); }); - describe('#count', () => { - let count; + describe('#findByServiceAndTermsType', () => { + const expectedIds = []; + let records; before(async () => { - await subject.save(new Version({ + const { id: id1 } = await subject.save(new Version({ serviceId: SERVICE_PROVIDER_ID, termsType: TERMS_TYPE, content: CONTENT, fetchDate: FETCH_DATE, snapshotIds: [SNAPSHOT_ID], })); - await subject.save(new Version({ + + expectedIds.push(id1); + + const { id: id2 } = await subject.save(new Version({ serviceId: SERVICE_PROVIDER_ID, termsType: TERMS_TYPE, content: `${CONTENT} - updated`, fetchDate: FETCH_DATE_LATER, snapshotIds: [SNAPSHOT_ID], })); - await subject.save(new Version({ - serviceId: SERVICE_PROVIDER_ID, - termsType: TERMS_TYPE, - content: `${CONTENT} - updated 2`, - isTechnicalUpgrade: true, - fetchDate: FETCH_DATE_EARLIER, - snapshotIds: [SNAPSHOT_ID], - })); - (count = await subject.count()); - }); - - after(() => subject.removeAll()); - - it('returns the proper count', () => { - expect(count).to.equal(3); - }); - }); - - describe('#findRecent', () => { - const OTHER_SERVICE = 'other_service'; - const OTHER_TERMS = 'Privacy Policy'; + expectedIds.push(id2); - before(async () => { await subject.save(new Version({ - serviceId: SERVICE_PROVIDER_ID, - termsType: TERMS_TYPE, - content: CONTENT, - fetchDate: FETCH_DATE_EARLIER, - snapshotIds: [SNAPSHOT_ID], - })); - await subject.save(new Version({ - serviceId: SERVICE_PROVIDER_ID, - termsType: TERMS_TYPE, - content: `${CONTENT} - updated`, + serviceId: 'other_service', + termsType: 'Privacy Policy', + content: `${CONTENT} - other`, fetchDate: FETCH_DATE, snapshotIds: [SNAPSHOT_ID], })); - await subject.save(new Version({ - serviceId: SERVICE_PROVIDER_ID, - termsType: OTHER_TERMS, - content: CONTENT, - fetchDate: FETCH_DATE_LATER, - snapshotIds: [SNAPSHOT_ID], - })); - await subject.save(new Version({ - serviceId: OTHER_SERVICE, - termsType: TERMS_TYPE, - content: CONTENT, - fetchDate: FETCH_DATE_LATER, - snapshotIds: [SNAPSHOT_ID], - })); + + (records = await subject.findByServiceAndTermsType(SERVICE_PROVIDER_ID, TERMS_TYPE)); }); after(() => subject.removeAll()); - context('without filters', () => { - let records; + it('returns only matching records', () => { + expect(records.length).to.equal(2); + }); - before(async () => { - records = await subject.findRecent(10); - }); + it('returns Version objects', () => { + for (const record of records) { + expect(record).to.be.an.instanceof(Version); + } + }); - it('returns records in descending chronological order', () => { - const dates = records.map(record => record.fetchDate.getTime()); + it('returns records with matching service ID', () => { + for (const record of records) { + expect(record.serviceId).to.equal(SERVICE_PROVIDER_ID); + } + }); - expect(dates).to.deep.equal([...dates].sort((a, b) => b - a)); - }); + it('returns records with matching terms type', () => { + for (const record of records) { + expect(record.termsType).to.equal(TERMS_TYPE); + } + }); - it('returns all matching records', () => { - expect(records).to.have.length(4); - }); + it('returns records in descending order', () => { + expect(records.map(record => record.fetchDate)).to.deep.equal([ FETCH_DATE_LATER, FETCH_DATE ]); + }); - it('does not load content eagerly', () => { - for (const record of records) { - expect(() => record.content).to.throw('Content not defined'); - } - }); + it('returns records with correct IDs', () => { + expect(records.map(record => record.id)).to.have.members(expectedIds); + }); - it('exposes the metadata needed for feed entries', () => { - const [record] = records; + context('when no matching records exist', () => { + it('returns an empty array', async () => { + const result = await subject.findByServiceAndTermsType('non_existent_service', 'Non Existent Terms'); - expect(record.id).to.be.a('string'); - expect(record.serviceId).to.be.a('string'); - expect(record.termsType).to.be.a('string'); - expect(record.fetchDate).to.be.an.instanceof(Date); - expect(record.isFirstRecord).to.be.a('boolean'); - expect(record.isTechnicalUpgrade).to.be.a('boolean'); + expect(result).to.be.an('array').that.is.empty; }); }); + }); - context('when limit is smaller than the number of matching records', () => { - let records; + describe('#count', () => { + context('without filters', () => { + let count; before(async () => { - records = await subject.findRecent(2); - }); + await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: CONTENT, + fetchDate: FETCH_DATE, + snapshotIds: [SNAPSHOT_ID], + })); + await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: `${CONTENT} - updated`, + fetchDate: FETCH_DATE_LATER, + snapshotIds: [SNAPSHOT_ID], + })); + await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: `${CONTENT} - updated 2`, + isTechnicalUpgrade: true, + fetchDate: FETCH_DATE_EARLIER, + snapshotIds: [SNAPSHOT_ID], + })); - it('returns at most limit records', () => { - expect(records).to.have.length(2); + (count = await subject.count()); }); - it('returns the most recent records', () => { - for (const record of records) { - expect(record.fetchDate.getTime()).to.be.at.least(FETCH_DATE.getTime()); - } + after(() => subject.removeAll()); + + it('returns the proper count', () => { + expect(count).to.equal(3); }); }); - context('when a serviceId filter is given', () => { - let records; - + context('with serviceId and termsType filters', () => { before(async () => { - records = await subject.findRecent(10, { serviceId: SERVICE_PROVIDER_ID }); - }); - - it('returns only records for that service', () => { - for (const record of records) { - expect(record.serviceId).to.equal(SERVICE_PROVIDER_ID); - } + await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: CONTENT, + fetchDate: FETCH_DATE, + snapshotIds: [SNAPSHOT_ID], + })); + await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: `${CONTENT} - updated`, + fetchDate: FETCH_DATE_LATER, + snapshotIds: [SNAPSHOT_ID], + })); + await subject.save(new Version({ + serviceId: 'other_service', + termsType: 'Privacy Policy', + content: 'Other content', + fetchDate: FETCH_DATE, + snapshotIds: [SNAPSHOT_ID], + })); }); - it('returns all records that match', () => { - expect(records).to.have.length(3); - }); - }); + after(() => subject.removeAll()); - context('when both serviceId and termsType filters are given', () => { - let records; + it('returns count for specific service and terms type', async () => { + const filteredCount = await subject.count(SERVICE_PROVIDER_ID, TERMS_TYPE); - before(async () => { - records = await subject.findRecent(10, { serviceId: SERVICE_PROVIDER_ID, termsType: TERMS_TYPE }); + expect(filteredCount).to.equal(2); }); - it('returns only records for that service and terms type', () => { - for (const record of records) { - expect(record.serviceId).to.equal(SERVICE_PROVIDER_ID); - expect(record.termsType).to.equal(TERMS_TYPE); - } - }); + it('returns zero for non-existent service', async () => { + const filteredCount = await subject.count('non-existent-service', TERMS_TYPE); - it('returns all records that match', () => { - expect(records).to.have.length(2); + expect(filteredCount).to.equal(0); }); }); - context('when filters match no record', () => { - let records; - + context('with only serviceId filter', () => { before(async () => { - records = await subject.findRecent(10, { serviceId: 'unknown' }); + await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: CONTENT, + fetchDate: FETCH_DATE, + snapshotIds: [SNAPSHOT_ID], + })); + await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: 'Different Terms', + content: 'Different content', + fetchDate: FETCH_DATE_LATER, + snapshotIds: [SNAPSHOT_ID], + })); + await subject.save(new Version({ + serviceId: 'other_service', + termsType: 'Privacy Policy', + content: 'Other content', + fetchDate: FETCH_DATE, + snapshotIds: [SNAPSHOT_ID], + })); }); - it('returns an empty array', () => { - expect(records).to.deep.equal([]); + after(() => subject.removeAll()); + + it('returns count for all terms types of a service', async () => { + const filteredCount = await subject.count(SERVICE_PROVIDER_ID); + + expect(filteredCount).to.equal(2); }); }); }); @@ -1337,8 +1350,8 @@ describe('MongoRepository', () => { } }); - it('returns records in ascending order', () => { - expect(records.map(record => record.fetchDate)).to.deep.equal([ FETCH_DATE_EARLIER, FETCH_DATE, FETCH_DATE_LATER ]); + it('returns records in descending order', () => { + expect(records.map(record => record.fetchDate)).to.deep.equal([ FETCH_DATE_LATER, FETCH_DATE, FETCH_DATE_EARLIER ]); }); }); diff --git a/src/collection-api/routes/feed.js b/src/collection-api/routes/feed.js index 5d63eb88f..97db22ebf 100644 --- a/src/collection-api/routes/feed.js +++ b/src/collection-api/routes/feed.js @@ -153,7 +153,7 @@ export default function feedRouter(services, versionsRepository, storageType) { const selfHref = `${baseUrl}/feed`; const feedId = `tag:${TAG_AUTHORITY}:feed:${collection.metadata?.id}`; - const versions = await versionsRepository.findRecent(getFeedLimit()); + const versions = await versionsRepository.findAll({ limit: getFeedLimit() }); const document = buildFeedDocument({ collection, storageType, selfHref, feedId, versions, baseUrl }); sendAtom(res, render(document)); @@ -196,7 +196,7 @@ export default function feedRouter(services, versionsRepository, storageType) { const selfHref = `${baseUrl}/feed/${encodeURIComponent(service.id)}`; const feedId = `tag:${TAG_AUTHORITY}:feed:${collection.metadata?.id}:${service.id}`; - const versions = await versionsRepository.findRecent(getFeedLimit(), { serviceId: service.id }); + const versions = await versionsRepository.findByService(service.id, { limit: getFeedLimit() }); const document = buildFeedDocument({ collection, storageType, selfHref, feedId, versions, baseUrl }); return sendAtom(res, render(document)); @@ -251,7 +251,7 @@ export default function feedRouter(services, versionsRepository, storageType) { const selfHref = `${baseUrl}/feed/${encodeURIComponent(service.id)}/${encodeURIComponent(termsType)}`; const feedId = `tag:${TAG_AUTHORITY}:feed:${collection.metadata?.id}:${service.id}:${termsType}`; - const versions = await versionsRepository.findRecent(getFeedLimit(), { serviceId: service.id, termsType }); + const versions = await versionsRepository.findByServiceAndTermsType(service.id, termsType, { limit: getFeedLimit() }); const document = buildFeedDocument({ collection, storageType, selfHref, feedId, versions, baseUrl }); return sendAtom(res, render(document)); From b0af4e6410933baa535fe2088d0ec2c343d97605 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Biron?= Date: Mon, 4 May 2026 12:04:55 +0200 Subject: [PATCH 15/21] Improve code readbility --- src/collection-api/routes/feed.js | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/src/collection-api/routes/feed.js b/src/collection-api/routes/feed.js index 97db22ebf..aaf184288 100644 --- a/src/collection-api/routes/feed.js +++ b/src/collection-api/routes/feed.js @@ -35,16 +35,29 @@ function buildAbsoluteBaseUrl(req) { } function classifyRecordType(version) { - if (version.isFirstRecord) { return RECORD_TYPES.firstRecord; } - if (version.isTechnicalUpgrade) { return RECORD_TYPES.technicalUpgrade; } - - return RECORD_TYPES.change; + switch (true) { + case version.isFirstRecord: + return RECORD_TYPES.firstRecord; + case version.isTechnicalUpgrade: + return RECORD_TYPES.technicalUpgrade; + default: + return RECORD_TYPES.change; + } } function buildEntryTitle(version) { - let prefix = COMMIT_MESSAGE_PREFIXES.update; - - if (version.isFirstRecord) { prefix = COMMIT_MESSAGE_PREFIXES.startTracking; } else if (version.isTechnicalUpgrade) { prefix = COMMIT_MESSAGE_PREFIXES.technicalUpgrade; } + let prefix; + + switch (true) { + case version.isFirstRecord: + prefix = COMMIT_MESSAGE_PREFIXES.startTracking; + break; + case version.isTechnicalUpgrade: + prefix = COMMIT_MESSAGE_PREFIXES.technicalUpgrade; + break; + default: + prefix = COMMIT_MESSAGE_PREFIXES.update; + } return `${prefix} ${version.serviceId} ${version.termsType}`; } From 64aa0655d6c484545fe44922e3a7ec24b3d50add Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Biron?= Date: Mon, 4 May 2026 12:06:18 +0200 Subject: [PATCH 16/21] Factorize feed response --- src/collection-api/routes/feed.js | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/src/collection-api/routes/feed.js b/src/collection-api/routes/feed.js index aaf184288..907d5c30e 100644 --- a/src/collection-api/routes/feed.js +++ b/src/collection-api/routes/feed.js @@ -122,13 +122,11 @@ function buildFeedDocument({ collection, storageType, selfHref, feedId, versions }; } -function sendAtom(res, xml) { - res.set('Content-Type', 'application/atom+xml; charset=utf-8'); - res.status(200).send(xml); -} +function sendFeed(res, opts) { + const document = buildFeedDocument(opts); -function render(document) { - return js2xml(document, { compact: true, spaces: 2 }); + res.set('Content-Type', 'application/atom+xml; charset=utf-8'); + res.status(200).send(js2xml(document, { compact: true, spaces: 2 })); } /** @@ -167,9 +165,8 @@ export default function feedRouter(services, versionsRepository, storageType) { const feedId = `tag:${TAG_AUTHORITY}:feed:${collection.metadata?.id}`; const versions = await versionsRepository.findAll({ limit: getFeedLimit() }); - const document = buildFeedDocument({ collection, storageType, selfHref, feedId, versions, baseUrl }); - sendAtom(res, render(document)); + sendFeed(res, { collection, storageType, selfHref, feedId, versions, baseUrl }); }); /** @@ -210,9 +207,8 @@ export default function feedRouter(services, versionsRepository, storageType) { const feedId = `tag:${TAG_AUTHORITY}:feed:${collection.metadata?.id}:${service.id}`; const versions = await versionsRepository.findByService(service.id, { limit: getFeedLimit() }); - const document = buildFeedDocument({ collection, storageType, selfHref, feedId, versions, baseUrl }); - return sendAtom(res, render(document)); + return sendFeed(res, { collection, storageType, selfHref, feedId, versions, baseUrl }); }); /** @@ -265,9 +261,8 @@ export default function feedRouter(services, versionsRepository, storageType) { const feedId = `tag:${TAG_AUTHORITY}:feed:${collection.metadata?.id}:${service.id}:${termsType}`; const versions = await versionsRepository.findByServiceAndTermsType(service.id, termsType, { limit: getFeedLimit() }); - const document = buildFeedDocument({ collection, storageType, selfHref, feedId, versions, baseUrl }); - return sendAtom(res, render(document)); + return sendFeed(res, { collection, storageType, selfHref, feedId, versions, baseUrl }); }); return router; From 1128a584187242b35eae345381a0a6e8d35e9073 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Biron?= Date: Mon, 4 May 2026 13:54:23 +0200 Subject: [PATCH 17/21] Inject feed limit, author and tag authority --- config/default.json | 6 ++- src/collection-api/routes/feed.js | 62 ++++++++++---------------- src/collection-api/routes/feed.test.js | 6 ++- src/collection-api/routes/index.js | 3 +- 4 files changed, 34 insertions(+), 43 deletions(-) diff --git a/config/default.json b/config/default.json index 96309b6fb..c6cf33cad 100644 --- a/config/default.json +++ b/config/default.json @@ -50,7 +50,11 @@ }, "collection-api": { "feed": { - "limit": 100 + "limit": 100, + "author": { + "name": "Open Terms Archive Collection API" + }, + "tagAuthority": "opentermsarchive.org,2026" } } } diff --git a/src/collection-api/routes/feed.js b/src/collection-api/routes/feed.js index 907d5c30e..6d9ce9b89 100644 --- a/src/collection-api/routes/feed.js +++ b/src/collection-api/routes/feed.js @@ -1,4 +1,3 @@ -import config from 'config'; import express from 'express'; import { js2xml } from 'xml-js'; @@ -6,30 +5,12 @@ import { getCollection } from '../../archivist/collection/index.js'; import { COMMIT_MESSAGE_PREFIXES } from '../../archivist/recorder/repositories/git/dataMapper.js'; import { toISODateWithoutMilliseconds } from '../../archivist/utils/date.js'; -const TAG_AUTHORITY = 'opentermsarchive.org,2026'; -const FEED_AUTHOR_NAME = 'OTA-Bot'; -const DEFAULT_LIMIT = 100; - -function getFeedLimit() { - if (config.has('@opentermsarchive/engine.collection-api.feed.limit')) { - return config.get('@opentermsarchive/engine.collection-api.feed.limit'); - } - - return DEFAULT_LIMIT; -} - const RECORD_TYPES = { firstRecord: 'First record', technicalUpgrade: 'Technical upgrade', change: 'Change', }; -const SCHEMES = { - service: `tag:${TAG_AUTHORITY}:scheme:service`, - termsType: `tag:${TAG_AUTHORITY}:scheme:terms-type`, - recordType: `tag:${TAG_AUTHORITY}:scheme:record-type`, -}; - function buildAbsoluteBaseUrl(req) { return `${req.protocol}://${req.get('host')}${req.baseUrl}`; } @@ -70,11 +51,11 @@ function buildVersionLink(baseUrl, version) { return `${baseUrl}/version/${encodedService}/${encodedTermsType}/${encodedDate}`; } -function buildEntryId(collection, storageType, version) { - return `tag:${TAG_AUTHORITY}:version:${collection.metadata?.id}:${storageType}:${version.id}`; +function buildEntryId(tagAuthority, storageType, collection, version) { + return `tag:${tagAuthority}:version:${collection.metadata?.id}:${storageType}:${version.id}`; } -function buildEntry(collection, storageType, baseUrl, version) { +function buildEntry(tagAuthority, storageType, baseUrl, collection, version) { const apiLink = buildVersionLink(baseUrl, version); const githubCommitLink = collection.metadata?.versions && `${collection.metadata.versions}/commit/${version.id}`; @@ -85,19 +66,19 @@ function buildEntry(collection, storageType, baseUrl, version) { } return { - id: { _text: buildEntryId(collection, storageType, version) }, + id: { _text: buildEntryId(tagAuthority, storageType, collection, version) }, link: links, title: { _text: buildEntryTitle(version) }, updated: { _text: version.fetchDate.toISOString() }, category: [ - { _attributes: { term: version.serviceId, scheme: SCHEMES.service } }, - { _attributes: { term: version.termsType, scheme: SCHEMES.termsType } }, - { _attributes: { term: classifyRecordType(version), scheme: SCHEMES.recordType } }, + { _attributes: { term: version.serviceId, scheme: `tag:${tagAuthority}:scheme:service` } }, + { _attributes: { term: version.termsType, scheme: `tag:${tagAuthority}:scheme:terms-type` } }, + { _attributes: { term: classifyRecordType(version), scheme: `tag:${tagAuthority}:scheme:record-type` } }, ], }; } -function buildFeedDocument({ collection, storageType, selfHref, feedId, versions, baseUrl }) { +function buildFeedDocument({ tagAuthority, storageType, feedAuthorName, collection, selfHref, feedId, versions, baseUrl }) { const latestFetchDate = versions.length > 0 ? versions[0].fetchDate : new Date(); const feed = { @@ -107,14 +88,14 @@ function buildFeedDocument({ collection, storageType, selfHref, feedId, versions id: { _text: feedId }, updated: { _text: latestFetchDate.toISOString() }, link: { _attributes: { rel: 'self', href: selfHref } }, - author: { name: { _text: FEED_AUTHOR_NAME } }, + author: { name: { _text: feedAuthorName } }, }; if (collection.metadata?.logo) { feed.logo = { _text: collection.metadata.logo }; } - feed.entry = versions.map(version => buildEntry(collection, storageType, baseUrl, version)); + feed.entry = versions.map(version => buildEntry(tagAuthority, storageType, baseUrl, collection, version)); return { _declaration: { _attributes: { version: '1.0', encoding: 'utf-8' } }, @@ -133,13 +114,16 @@ function sendFeed(res, opts) { * @param {object} services The services to be exposed by the API * @param {object} versionsRepository The versions repository instance * @param {string} storageType The storage type identifier of the versions repository + * @param {number} feedLimit Maximum number of entries returned by feed endpoints + * @param {string} feedAuthorName Name used for the Atom feed-level author element + * @param {string} tagAuthority Tag URI authority used to mint feed and entry IDs (RFC 4151) * @returns {express.Router} The router instance * @swagger * tags: * name: Feeds * description: Atom feeds of version changes */ -export default function feedRouter(services, versionsRepository, storageType) { +export default function feedRouter(services, versionsRepository, storageType, feedLimit, feedAuthorName, tagAuthority) { const router = express.Router(); /** @@ -162,11 +146,11 @@ export default function feedRouter(services, versionsRepository, storageType) { const collection = await getCollection(); const baseUrl = buildAbsoluteBaseUrl(req); const selfHref = `${baseUrl}/feed`; - const feedId = `tag:${TAG_AUTHORITY}:feed:${collection.metadata?.id}`; + const feedId = `tag:${tagAuthority}:feed:${collection.metadata?.id}`; - const versions = await versionsRepository.findAll({ limit: getFeedLimit() }); + const versions = await versionsRepository.findAll({ limit: feedLimit }); - sendFeed(res, { collection, storageType, selfHref, feedId, versions, baseUrl }); + sendFeed(res, { tagAuthority, storageType, feedAuthorName, collection, selfHref, feedId, versions, baseUrl }); }); /** @@ -204,11 +188,11 @@ export default function feedRouter(services, versionsRepository, storageType) { const collection = await getCollection(); const baseUrl = buildAbsoluteBaseUrl(req); const selfHref = `${baseUrl}/feed/${encodeURIComponent(service.id)}`; - const feedId = `tag:${TAG_AUTHORITY}:feed:${collection.metadata?.id}:${service.id}`; + const feedId = `tag:${tagAuthority}:feed:${collection.metadata?.id}:${service.id}`; - const versions = await versionsRepository.findByService(service.id, { limit: getFeedLimit() }); + const versions = await versionsRepository.findByService(service.id, { limit: feedLimit }); - return sendFeed(res, { collection, storageType, selfHref, feedId, versions, baseUrl }); + return sendFeed(res, { tagAuthority, storageType, feedAuthorName, collection, selfHref, feedId, versions, baseUrl }); }); /** @@ -258,11 +242,11 @@ export default function feedRouter(services, versionsRepository, storageType) { const collection = await getCollection(); const baseUrl = buildAbsoluteBaseUrl(req); const selfHref = `${baseUrl}/feed/${encodeURIComponent(service.id)}/${encodeURIComponent(termsType)}`; - const feedId = `tag:${TAG_AUTHORITY}:feed:${collection.metadata?.id}:${service.id}:${termsType}`; + const feedId = `tag:${tagAuthority}:feed:${collection.metadata?.id}:${service.id}:${termsType}`; - const versions = await versionsRepository.findByServiceAndTermsType(service.id, termsType, { limit: getFeedLimit() }); + const versions = await versionsRepository.findByServiceAndTermsType(service.id, termsType, { limit: feedLimit }); - return sendFeed(res, { collection, storageType, selfHref, feedId, versions, baseUrl }); + return sendFeed(res, { tagAuthority, storageType, feedAuthorName, collection, selfHref, feedId, versions, baseUrl }); }); return router; diff --git a/src/collection-api/routes/feed.test.js b/src/collection-api/routes/feed.test.js index 8c60f3493..bd2e608b3 100644 --- a/src/collection-api/routes/feed.test.js +++ b/src/collection-api/routes/feed.test.js @@ -69,8 +69,10 @@ describe('Feed API', () => { expect(selfHrefMatch[1]).to.match(new RegExp(`${basePath}/v1/feed$`)); }); - it('has an author named OTA-Bot', () => { - expect(response.text).to.match(/[\s\S]*OTA-Bot<\/name>[\s\S]*<\/author>/); + it('has an author matching the configured feed author name', () => { + const expectedName = config.get('@opentermsarchive/engine.collection-api.feed.author.name'); + + expect(response.text).to.match(new RegExp(`[\\s\\S]*${expectedName}[\\s\\S]*`)); }); it('has a logo matching the collection logo', () => { diff --git a/src/collection-api/routes/index.js b/src/collection-api/routes/index.js index c24b8f5dd..b82274ae9 100644 --- a/src/collection-api/routes/index.js +++ b/src/collection-api/routes/index.js @@ -38,11 +38,12 @@ export default async function apiRouter(basePath) { const collection = await getCollection(); const versionsStorageConfig = config.get('@opentermsarchive/engine.recorder.versions.storage'); const versionsRepository = await RepositoryFactory.create(versionsStorageConfig).initialize(); + const feedConfig = config.get('@opentermsarchive/engine.collection-api.feed'); router.use(await metadataRouter(collection, services)); router.use(servicesRouter(services)); router.use(versionsRouter(versionsRepository)); - router.use(feedRouter(services, versionsRepository, versionsStorageConfig.type)); + router.use(feedRouter(services, versionsRepository, versionsStorageConfig.type, feedConfig.limit, feedConfig.author.name, feedConfig.tagAuthority)); return router; } From 6da0199464f25b647d1e45b5575bea76059d4296 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Biron?= Date: Mon, 4 May 2026 14:02:54 +0200 Subject: [PATCH 18/21] Factor out buildFeedId helper --- src/collection-api/routes/feed.js | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/collection-api/routes/feed.js b/src/collection-api/routes/feed.js index 6d9ce9b89..4d9b2fac4 100644 --- a/src/collection-api/routes/feed.js +++ b/src/collection-api/routes/feed.js @@ -55,6 +55,10 @@ function buildEntryId(tagAuthority, storageType, collection, version) { return `tag:${tagAuthority}:version:${collection.metadata?.id}:${storageType}:${version.id}`; } +function buildFeedId(tagAuthority, collection, ...suffix) { + return [ `tag:${tagAuthority}:feed`, collection.metadata?.id, ...suffix ].join(':'); +} + function buildEntry(tagAuthority, storageType, baseUrl, collection, version) { const apiLink = buildVersionLink(baseUrl, version); const githubCommitLink = collection.metadata?.versions && `${collection.metadata.versions}/commit/${version.id}`; @@ -146,7 +150,7 @@ export default function feedRouter(services, versionsRepository, storageType, fe const collection = await getCollection(); const baseUrl = buildAbsoluteBaseUrl(req); const selfHref = `${baseUrl}/feed`; - const feedId = `tag:${tagAuthority}:feed:${collection.metadata?.id}`; + const feedId = buildFeedId(tagAuthority, collection); const versions = await versionsRepository.findAll({ limit: feedLimit }); @@ -188,7 +192,7 @@ export default function feedRouter(services, versionsRepository, storageType, fe const collection = await getCollection(); const baseUrl = buildAbsoluteBaseUrl(req); const selfHref = `${baseUrl}/feed/${encodeURIComponent(service.id)}`; - const feedId = `tag:${tagAuthority}:feed:${collection.metadata?.id}:${service.id}`; + const feedId = buildFeedId(tagAuthority, collection, service.id); const versions = await versionsRepository.findByService(service.id, { limit: feedLimit }); @@ -242,7 +246,7 @@ export default function feedRouter(services, versionsRepository, storageType, fe const collection = await getCollection(); const baseUrl = buildAbsoluteBaseUrl(req); const selfHref = `${baseUrl}/feed/${encodeURIComponent(service.id)}/${encodeURIComponent(termsType)}`; - const feedId = `tag:${tagAuthority}:feed:${collection.metadata?.id}:${service.id}:${termsType}`; + const feedId = buildFeedId(tagAuthority, collection, service.id, termsType); const versions = await versionsRepository.findByServiceAndTermsType(service.id, termsType, { limit: feedLimit }); From 68cbcff3334c9a1d76dba29a6fe75082e7021004 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Biron?= Date: Mon, 4 May 2026 14:04:35 +0200 Subject: [PATCH 19/21] Factor out buildSchemes helper --- src/collection-api/routes/feed.js | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/collection-api/routes/feed.js b/src/collection-api/routes/feed.js index 4d9b2fac4..46d19013c 100644 --- a/src/collection-api/routes/feed.js +++ b/src/collection-api/routes/feed.js @@ -59,9 +59,18 @@ function buildFeedId(tagAuthority, collection, ...suffix) { return [ `tag:${tagAuthority}:feed`, collection.metadata?.id, ...suffix ].join(':'); } +function buildSchemes(tagAuthority) { + return { + service: `tag:${tagAuthority}:scheme:service`, + termsType: `tag:${tagAuthority}:scheme:terms-type`, + recordType: `tag:${tagAuthority}:scheme:record-type`, + }; +} + function buildEntry(tagAuthority, storageType, baseUrl, collection, version) { const apiLink = buildVersionLink(baseUrl, version); const githubCommitLink = collection.metadata?.versions && `${collection.metadata.versions}/commit/${version.id}`; + const schemes = buildSchemes(tagAuthority); const links = [{ _attributes: { rel: 'alternate', type: 'text/html', href: githubCommitLink || apiLink } }]; @@ -75,9 +84,9 @@ function buildEntry(tagAuthority, storageType, baseUrl, collection, version) { title: { _text: buildEntryTitle(version) }, updated: { _text: version.fetchDate.toISOString() }, category: [ - { _attributes: { term: version.serviceId, scheme: `tag:${tagAuthority}:scheme:service` } }, - { _attributes: { term: version.termsType, scheme: `tag:${tagAuthority}:scheme:terms-type` } }, - { _attributes: { term: classifyRecordType(version), scheme: `tag:${tagAuthority}:scheme:record-type` } }, + { _attributes: { term: version.serviceId, scheme: schemes.service } }, + { _attributes: { term: version.termsType, scheme: schemes.termsType } }, + { _attributes: { term: classifyRecordType(version), scheme: schemes.recordType } }, ], }; } From 7ae4e8d454431268b76bc45b14f981eb916c505e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Biron?= Date: Mon, 4 May 2026 15:54:16 +0200 Subject: [PATCH 20/21] Hardcode feed tag authority and author name --- config/default.json | 6 +--- src/collection-api/routes/feed.js | 47 +++++++++++++------------- src/collection-api/routes/feed.test.js | 6 ++-- src/collection-api/routes/index.js | 2 +- 4 files changed, 28 insertions(+), 33 deletions(-) diff --git a/config/default.json b/config/default.json index c6cf33cad..96309b6fb 100644 --- a/config/default.json +++ b/config/default.json @@ -50,11 +50,7 @@ }, "collection-api": { "feed": { - "limit": 100, - "author": { - "name": "Open Terms Archive Collection API" - }, - "tagAuthority": "opentermsarchive.org,2026" + "limit": 100 } } } diff --git a/src/collection-api/routes/feed.js b/src/collection-api/routes/feed.js index 46d19013c..eb3caa792 100644 --- a/src/collection-api/routes/feed.js +++ b/src/collection-api/routes/feed.js @@ -11,6 +11,9 @@ const RECORD_TYPES = { change: 'Change', }; +const TAG_AUTHORITY = 'opentermsarchive.org,2026'; +const FEED_AUTHOR_NAME = 'Open Terms Archive engine'; + function buildAbsoluteBaseUrl(req) { return `${req.protocol}://${req.get('host')}${req.baseUrl}`; } @@ -51,26 +54,26 @@ function buildVersionLink(baseUrl, version) { return `${baseUrl}/version/${encodedService}/${encodedTermsType}/${encodedDate}`; } -function buildEntryId(tagAuthority, storageType, collection, version) { - return `tag:${tagAuthority}:version:${collection.metadata?.id}:${storageType}:${version.id}`; +function buildEntryId(storageType, collection, version) { + return `tag:${TAG_AUTHORITY}:version:${collection.metadata?.id}:${storageType}:${version.id}`; } -function buildFeedId(tagAuthority, collection, ...suffix) { - return [ `tag:${tagAuthority}:feed`, collection.metadata?.id, ...suffix ].join(':'); +function buildFeedId(collection, ...suffix) { + return [ `tag:${TAG_AUTHORITY}:feed`, collection.metadata?.id, ...suffix ].join(':'); } -function buildSchemes(tagAuthority) { +function buildSchemes() { return { - service: `tag:${tagAuthority}:scheme:service`, - termsType: `tag:${tagAuthority}:scheme:terms-type`, - recordType: `tag:${tagAuthority}:scheme:record-type`, + service: `tag:${TAG_AUTHORITY}:scheme:service`, + termsType: `tag:${TAG_AUTHORITY}:scheme:terms-type`, + recordType: `tag:${TAG_AUTHORITY}:scheme:record-type`, }; } -function buildEntry(tagAuthority, storageType, baseUrl, collection, version) { +function buildEntry(storageType, baseUrl, collection, version) { const apiLink = buildVersionLink(baseUrl, version); const githubCommitLink = collection.metadata?.versions && `${collection.metadata.versions}/commit/${version.id}`; - const schemes = buildSchemes(tagAuthority); + const schemes = buildSchemes(); const links = [{ _attributes: { rel: 'alternate', type: 'text/html', href: githubCommitLink || apiLink } }]; @@ -79,7 +82,7 @@ function buildEntry(tagAuthority, storageType, baseUrl, collection, version) { } return { - id: { _text: buildEntryId(tagAuthority, storageType, collection, version) }, + id: { _text: buildEntryId(storageType, collection, version) }, link: links, title: { _text: buildEntryTitle(version) }, updated: { _text: version.fetchDate.toISOString() }, @@ -91,7 +94,7 @@ function buildEntry(tagAuthority, storageType, baseUrl, collection, version) { }; } -function buildFeedDocument({ tagAuthority, storageType, feedAuthorName, collection, selfHref, feedId, versions, baseUrl }) { +function buildFeedDocument({ storageType, collection, selfHref, feedId, versions, baseUrl }) { const latestFetchDate = versions.length > 0 ? versions[0].fetchDate : new Date(); const feed = { @@ -101,14 +104,14 @@ function buildFeedDocument({ tagAuthority, storageType, feedAuthorName, collecti id: { _text: feedId }, updated: { _text: latestFetchDate.toISOString() }, link: { _attributes: { rel: 'self', href: selfHref } }, - author: { name: { _text: feedAuthorName } }, + author: { name: { _text: FEED_AUTHOR_NAME } }, }; if (collection.metadata?.logo) { feed.logo = { _text: collection.metadata.logo }; } - feed.entry = versions.map(version => buildEntry(tagAuthority, storageType, baseUrl, collection, version)); + feed.entry = versions.map(version => buildEntry(storageType, baseUrl, collection, version)); return { _declaration: { _attributes: { version: '1.0', encoding: 'utf-8' } }, @@ -128,15 +131,13 @@ function sendFeed(res, opts) { * @param {object} versionsRepository The versions repository instance * @param {string} storageType The storage type identifier of the versions repository * @param {number} feedLimit Maximum number of entries returned by feed endpoints - * @param {string} feedAuthorName Name used for the Atom feed-level author element - * @param {string} tagAuthority Tag URI authority used to mint feed and entry IDs (RFC 4151) * @returns {express.Router} The router instance * @swagger * tags: * name: Feeds * description: Atom feeds of version changes */ -export default function feedRouter(services, versionsRepository, storageType, feedLimit, feedAuthorName, tagAuthority) { +export default function feedRouter(services, versionsRepository, storageType, feedLimit) { const router = express.Router(); /** @@ -159,11 +160,11 @@ export default function feedRouter(services, versionsRepository, storageType, fe const collection = await getCollection(); const baseUrl = buildAbsoluteBaseUrl(req); const selfHref = `${baseUrl}/feed`; - const feedId = buildFeedId(tagAuthority, collection); + const feedId = buildFeedId(collection); const versions = await versionsRepository.findAll({ limit: feedLimit }); - sendFeed(res, { tagAuthority, storageType, feedAuthorName, collection, selfHref, feedId, versions, baseUrl }); + sendFeed(res, { storageType, collection, selfHref, feedId, versions, baseUrl }); }); /** @@ -201,11 +202,11 @@ export default function feedRouter(services, versionsRepository, storageType, fe const collection = await getCollection(); const baseUrl = buildAbsoluteBaseUrl(req); const selfHref = `${baseUrl}/feed/${encodeURIComponent(service.id)}`; - const feedId = buildFeedId(tagAuthority, collection, service.id); + const feedId = buildFeedId(collection, service.id); const versions = await versionsRepository.findByService(service.id, { limit: feedLimit }); - return sendFeed(res, { tagAuthority, storageType, feedAuthorName, collection, selfHref, feedId, versions, baseUrl }); + return sendFeed(res, { storageType, collection, selfHref, feedId, versions, baseUrl }); }); /** @@ -255,11 +256,11 @@ export default function feedRouter(services, versionsRepository, storageType, fe const collection = await getCollection(); const baseUrl = buildAbsoluteBaseUrl(req); const selfHref = `${baseUrl}/feed/${encodeURIComponent(service.id)}/${encodeURIComponent(termsType)}`; - const feedId = buildFeedId(tagAuthority, collection, service.id, termsType); + const feedId = buildFeedId(collection, service.id, termsType); const versions = await versionsRepository.findByServiceAndTermsType(service.id, termsType, { limit: feedLimit }); - return sendFeed(res, { tagAuthority, storageType, feedAuthorName, collection, selfHref, feedId, versions, baseUrl }); + return sendFeed(res, { storageType, collection, selfHref, feedId, versions, baseUrl }); }); return router; diff --git a/src/collection-api/routes/feed.test.js b/src/collection-api/routes/feed.test.js index bd2e608b3..753f5e2ad 100644 --- a/src/collection-api/routes/feed.test.js +++ b/src/collection-api/routes/feed.test.js @@ -69,10 +69,8 @@ describe('Feed API', () => { expect(selfHrefMatch[1]).to.match(new RegExp(`${basePath}/v1/feed$`)); }); - it('has an author matching the configured feed author name', () => { - const expectedName = config.get('@opentermsarchive/engine.collection-api.feed.author.name'); - - expect(response.text).to.match(new RegExp(`[\\s\\S]*${expectedName}[\\s\\S]*`)); + it('has an author matching the feed author name', () => { + expect(response.text).to.match(/[\s\S]*Open Terms Archive engine<\/name>[\s\S]*<\/author>/); }); it('has a logo matching the collection logo', () => { diff --git a/src/collection-api/routes/index.js b/src/collection-api/routes/index.js index b82274ae9..16df0bbb8 100644 --- a/src/collection-api/routes/index.js +++ b/src/collection-api/routes/index.js @@ -43,7 +43,7 @@ export default async function apiRouter(basePath) { router.use(await metadataRouter(collection, services)); router.use(servicesRouter(services)); router.use(versionsRouter(versionsRepository)); - router.use(feedRouter(services, versionsRepository, versionsStorageConfig.type, feedConfig.limit, feedConfig.author.name, feedConfig.tagAuthority)); + router.use(feedRouter(services, versionsRepository, versionsStorageConfig.type, feedConfig.limit)); return router; } From e26b940cc25d2eb71948690a6f1c110c5b15b5d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Biron?= Date: Mon, 4 May 2026 17:21:26 +0200 Subject: [PATCH 21/21] Replace feed dual links with URL template --- src/collection-api/routes/feed.js | 36 +++++------- src/collection-api/routes/feed.test.js | 77 +++++++++++++++++++++++--- src/collection-api/routes/index.js | 2 +- 3 files changed, 84 insertions(+), 31 deletions(-) diff --git a/src/collection-api/routes/feed.js b/src/collection-api/routes/feed.js index eb3caa792..e4ec32801 100644 --- a/src/collection-api/routes/feed.js +++ b/src/collection-api/routes/feed.js @@ -70,20 +70,13 @@ function buildSchemes() { }; } -function buildEntry(storageType, baseUrl, collection, version) { - const apiLink = buildVersionLink(baseUrl, version); - const githubCommitLink = collection.metadata?.versions && `${collection.metadata.versions}/commit/${version.id}`; +function buildEntry(storageType, versionUrlTemplate, baseUrl, collection, version) { + const href = versionUrlTemplate?.replace('%VERSION_ID', version.id) ?? buildVersionLink(baseUrl, version); const schemes = buildSchemes(); - const links = [{ _attributes: { rel: 'alternate', type: 'text/html', href: githubCommitLink || apiLink } }]; - - if (githubCommitLink) { - links.push({ _attributes: { rel: 'related', type: 'text/html', href: apiLink } }); - } - return { id: { _text: buildEntryId(storageType, collection, version) }, - link: links, + link: { _attributes: { rel: 'alternate', type: 'text/html', href } }, title: { _text: buildEntryTitle(version) }, updated: { _text: version.fetchDate.toISOString() }, category: [ @@ -94,7 +87,7 @@ function buildEntry(storageType, baseUrl, collection, version) { }; } -function buildFeedDocument({ storageType, collection, selfHref, feedId, versions, baseUrl }) { +function buildFeedDocument({ storageType, versionUrlTemplate, collection, selfHref, feedId, versions, baseUrl }) { const latestFetchDate = versions.length > 0 ? versions[0].fetchDate : new Date(); const feed = { @@ -111,7 +104,7 @@ function buildFeedDocument({ storageType, collection, selfHref, feedId, versions feed.logo = { _text: collection.metadata.logo }; } - feed.entry = versions.map(version => buildEntry(storageType, baseUrl, collection, version)); + feed.entry = versions.map(version => buildEntry(storageType, versionUrlTemplate, baseUrl, collection, version)); return { _declaration: { _attributes: { version: '1.0', encoding: 'utf-8' } }, @@ -127,17 +120,18 @@ function sendFeed(res, opts) { } /** - * @param {object} services The services to be exposed by the API - * @param {object} versionsRepository The versions repository instance - * @param {string} storageType The storage type identifier of the versions repository - * @param {number} feedLimit Maximum number of entries returned by feed endpoints - * @returns {express.Router} The router instance + * @param {object} services The services to be exposed by the API + * @param {object} versionsRepository The versions repository instance + * @param {string} storageType The storage type identifier of the versions repository + * @param {number} feedLimit Maximum number of entries returned by feed endpoints + * @param {string} [versionUrlTemplate] Optional URL template with %VERSION_ID placeholder; when set, replaces the API link as each entry's alternate href + * @returns {express.Router} The router instance * @swagger * tags: * name: Feeds * description: Atom feeds of version changes */ -export default function feedRouter(services, versionsRepository, storageType, feedLimit) { +export default function feedRouter(services, versionsRepository, storageType, feedLimit, versionUrlTemplate) { const router = express.Router(); /** @@ -164,7 +158,7 @@ export default function feedRouter(services, versionsRepository, storageType, fe const versions = await versionsRepository.findAll({ limit: feedLimit }); - sendFeed(res, { storageType, collection, selfHref, feedId, versions, baseUrl }); + sendFeed(res, { storageType, versionUrlTemplate, collection, selfHref, feedId, versions, baseUrl }); }); /** @@ -206,7 +200,7 @@ export default function feedRouter(services, versionsRepository, storageType, fe const versions = await versionsRepository.findByService(service.id, { limit: feedLimit }); - return sendFeed(res, { storageType, collection, selfHref, feedId, versions, baseUrl }); + return sendFeed(res, { storageType, versionUrlTemplate, collection, selfHref, feedId, versions, baseUrl }); }); /** @@ -260,7 +254,7 @@ export default function feedRouter(services, versionsRepository, storageType, fe const versions = await versionsRepository.findByServiceAndTermsType(service.id, termsType, { limit: feedLimit }); - return sendFeed(res, { storageType, collection, selfHref, feedId, versions, baseUrl }); + return sendFeed(res, { storageType, versionUrlTemplate, collection, selfHref, feedId, versions, baseUrl }); }); return router; diff --git a/src/collection-api/routes/feed.test.js b/src/collection-api/routes/feed.test.js index 753f5e2ad..5722128bf 100644 --- a/src/collection-api/routes/feed.test.js +++ b/src/collection-api/routes/feed.test.js @@ -1,13 +1,17 @@ import { expect } from 'chai'; import config from 'config'; +import express from 'express'; import supertest from 'supertest'; import { getCollection } from '../../archivist/collection/index.js'; import RepositoryFactory from '../../archivist/recorder/repositories/factory.js'; +import * as Services from '../../archivist/services/index.js'; import Version from '../../archivist/recorder/version.js'; import { toISODateWithoutMilliseconds } from '../../archivist/utils/date.js'; import app from '../server.js'; +import feedRouter from './feed.js'; + const basePath = config.get('@opentermsarchive/engine.collection-api.basePath'); const request = supertest(app); const storageConfig = config.get('@opentermsarchive/engine.recorder.versions.storage'); @@ -159,18 +163,17 @@ describe('Feed API', () => { expect(firstEntry).to.include(`${expected}`); }); - it('has an alternate link to the GitHub commit', async () => { - const collection = await getCollection(); + it('has an alternate link to the API version endpoint', () => { const href = firstEntry.match(/]*rel="alternate"[^>]*href="([^"]+)"/)[1]; + const expectedPathFragment = `/version/${encodeURIComponent('service-2')}/${encodeURIComponent('Privacy Policy')}/${encodeURIComponent(toISODateWithoutMilliseconds(FETCH_DATE_UPGRADE))}`; - expect(href).to.equal(`${collection.metadata.versions}/commit/${savedVersions.technicalUpgradeRecord.id}`); + expect(href).to.include(expectedPathFragment); }); - it('has a related link to the version API endpoint', () => { - const href = firstEntry.match(/]*rel="related"[^>]*href="([^"]+)"/)[1]; - const expectedPathFragment = `/version/${encodeURIComponent('service-2')}/${encodeURIComponent('Privacy Policy')}/${encodeURIComponent(toISODateWithoutMilliseconds(FETCH_DATE_UPGRADE))}`; + it('has exactly one link per entry', () => { + const links = firstEntry.match(/]*\/>/g) || []; - expect(href).to.include(expectedPathFragment); + expect(links).to.have.length(1); }); it('has a type="text/html" on the alternate link', () => { @@ -406,8 +409,9 @@ describe('Feed API', () => { expect(href).to.not.include('Service B!'); }); - it('URL-encodes spaces and special characters in entry related links', () => { - const href = response.text.match(/]*rel="related"[^>]*href="([^"]+)"/)[1]; + it('URL-encodes spaces and special characters in entry alternate links', () => { + const entry = response.text.match(/[\s\S]*?<\/entry>/)[0]; + const href = entry.match(/]*rel="alternate"[^>]*href="([^"]+)"/)[1]; expect(href).to.include('Service%20B!'); expect(href).to.include('Privacy%20Policy'); @@ -506,4 +510,59 @@ describe('Feed API', () => { }); }); }); + + describe('entry links with versionUrlTemplate configured', () => { + const TEMPLATE = 'https://example.test/v/%VERSION_ID'; + + let response; + let repository; + let savedVersion; + + before(async function () { + this.timeout(5000); + repository = RepositoryFactory.create(storageConfig); + await repository.initialize(); + + savedVersion = await repository.save(new Version({ + serviceId: 'service-1', + termsType: 'Terms of Service', + content: 'content', + fetchDate: new Date('2024-01-01T00:00:00Z'), + snapshotIds: ['s1'], + })); + + const services = await Services.load(); + const templatedApp = express(); + + templatedApp.use(feedRouter(services, repository, storageConfig.type, 10, TEMPLATE)); + + response = await supertest(templatedApp).get('/feed'); + }); + + after(() => repository.removeAll()); + + it('interpolates %VERSION_ID into the alternate link', () => { + const href = response.text.match(/[\s\S]*?]*rel="alternate"[^>]*href="([^"]+)"/)[1]; + + expect(href).to.equal(`https://example.test/v/${savedVersion.id}`); + }); + + it('does not point to the API for entry links', () => { + const entries = response.text.match(/[\s\S]*?<\/entry>/g) || []; + + for (const entry of entries) { + expect(entry).to.not.match(/]*href="[^"]*\/version\//); + } + }); + + it('still emits exactly one link per entry', () => { + const entries = response.text.match(/[\s\S]*?<\/entry>/g) || []; + + for (const entry of entries) { + const links = entry.match(/]*\/>/g) || []; + + expect(links).to.have.length(1); + } + }); + }); }); diff --git a/src/collection-api/routes/index.js b/src/collection-api/routes/index.js index 16df0bbb8..34e720470 100644 --- a/src/collection-api/routes/index.js +++ b/src/collection-api/routes/index.js @@ -43,7 +43,7 @@ export default async function apiRouter(basePath) { router.use(await metadataRouter(collection, services)); router.use(servicesRouter(services)); router.use(versionsRouter(versionsRepository)); - router.use(feedRouter(services, versionsRepository, versionsStorageConfig.type, feedConfig.limit)); + router.use(feedRouter(services, versionsRepository, versionsStorageConfig.type, feedConfig.limit, feedConfig.versionUrlTemplate)); return router; }