diff --git a/.eslintrc.yaml b/.eslintrc.yaml index 85702b255..3731975fa 100644 --- a/.eslintrc.yaml +++ b/.eslintrc.yaml @@ -37,6 +37,9 @@ rules: - error - always-multiline consistent-return: 0 + curly: + - error + - all function-paren-newline: - error - multiline diff --git a/CHANGELOG.md b/CHANGELOG.md index ad05dcd96..2b9a99268 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,21 @@ All changes that impact users of this module are documented in this file, in the [Common Changelog](https://common-changelog.org) format with some additional specifications defined in the CONTRIBUTING file. This codebase adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## Unreleased [major] + +> Development of this release was supported by [Reset Tech](https://www.reset.tech). + +### Added + +- Add `GET /feed` endpoint on the Collection API exposing an Atom feed of the latest version changes across the whole collection +- Add `GET /feed/:serviceId` endpoint on the Collection API exposing an Atom feed scoped to a single service +- Add `GET /feed/:serviceId/:termsType` endpoint on the Collection API exposing an Atom feed scoped to a single service and terms type +- Add [`@opentermsarchive/engine.collection-api.feed.limit`](https://docs.opentermsarchive.org/collections/reference/configuration/) configuration option controlling the maximum number of entries returned by feed endpoints (default: `100`) + +### Changed + +- **Breaking:** Resolve `serviceId` path parameter case-sensitively on the `GET /service/:serviceId` endpoint, in line with the documented service ID format; clients relying on case-insensitive matching must now use the exact ID casing + ## 11.0.2 - 2026-04-14 > Development of this release was supported by [Reset Tech](https://www.reset.tech). diff --git a/config/default.json b/config/default.json index c044f2939..96309b6fb 100644 --- a/config/default.json +++ b/config/default.json @@ -47,6 +47,11 @@ }, "dataset": { "publishingSchedule": "30 8 * * MON" + }, + "collection-api": { + "feed": { + "limit": 100 + } } } } diff --git a/config/test.json b/config/test.json index cf14b8be3..050fd5b79 100644 --- a/config/test.json +++ b/config/test.json @@ -47,7 +47,10 @@ }, "collection-api": { "port": 3000, - "basePath": "/collection-api" + "basePath": "/collection-api", + "feed": { + "limit": 3 + } } } } diff --git a/package-lock.json b/package-lock.json index 3e11f1cf9..2292b55ce 100644 --- a/package-lock.json +++ b/package-lock.json @@ -58,7 +58,8 @@ "swagger-ui-express": "^5.0.1", "turndown": "^7.2.1", "winston": "^3.17.0", - "winston-mail": "^2.0.0" + "winston-mail": "^2.0.0", + "xml-js": "^1.6.11" }, "bin": { "ota": "bin/ota.js" @@ -1273,7 +1274,6 @@ } ], "license": "MIT", - "peer": true, "engines": { "node": ">=20.19.0" }, @@ -1320,7 +1320,6 @@ } ], "license": "MIT", - "peer": true, "engines": { "node": ">=20.19.0" } @@ -1639,7 +1638,6 @@ "integrity": "sha512-jCs9ldd7NwzpgXDIf6P3+NrHh9/sD6CQdxHyjQI+h/6rDNo88ypBxxz45UDuZHz9r3tNz7N/VInSVoVdtXEI4A==", "devOptional": true, "license": "MIT", - "peer": true, "engines": { "node": "^14.21.3 || >=16" }, @@ -1793,7 +1791,6 @@ "resolved": "https://registry.npmjs.org/@octokit/core/-/core-7.0.4.tgz", "integrity": "sha512-jOT8V1Ba5BdC79sKrRWDdMT5l1R+XNHTPR6CPWzUP2EcfAcvIHZWF0eAbmRcpOOP5gVIwnqNg0C4nvh6Abc3OA==", "license": "MIT", - "peer": true, "dependencies": { "@octokit/auth-token": "^6.0.0", "@octokit/graphql": "^9.0.1", @@ -2294,8 +2291,7 @@ "version": "20.7.0", "resolved": "https://registry.npmjs.org/@types/node/-/node-20.7.0.tgz", "integrity": "sha512-zI22/pJW2wUZOVyguFaUL1HABdmSVxpXrzIqkjsHmyUjNhPoWM1CKfvVuXfetHhIok4RY573cqS0mZ1SJEnoTg==", - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/@types/triple-beam": { "version": "1.3.5", @@ -2386,7 +2382,6 @@ "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz", "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", "license": "MIT", - "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -3108,7 +3103,6 @@ "resolved": "https://registry.npmjs.org/chai/-/chai-6.0.1.tgz", "integrity": "sha512-/JOoU2//6p5vCXh00FpNgtlw0LjvhGttaWc+y7wpW9yjBm3ys0dI8tSKZxIOgNruz5J0RleccatSIC3uxEZP0g==", "license": "MIT", - "peer": true, "engines": { "node": ">=18" } @@ -3569,7 +3563,6 @@ "resolved": "https://registry.npmjs.org/cosmiconfig/-/cosmiconfig-9.0.0.tgz", "integrity": "sha512-itvL5h8RETACmOTFc4UfIyB2RfEHi71Ax6E/PivVxq9NseKbOWpeyHEOIbmAw1rs8Ak0VursQNww7lf7YtUwzg==", "license": "MIT", - "peer": true, "dependencies": { "env-paths": "^2.2.1", "import-fresh": "^3.3.0", @@ -3989,8 +3982,7 @@ "version": "0.0.1495869", "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1495869.tgz", "integrity": "sha512-i+bkd9UYFis40RcnkW7XrOprCujXRAHg62IVh/Ah3G8MmNXpCGt1m0dTFhSdx/AVs8XEMbdOGRwdkR1Bcta8AA==", - "license": "BSD-3-Clause", - "peer": true + "license": "BSD-3-Clause" }, "node_modules/dezalgo": { "version": "1.0.4", @@ -4482,7 +4474,6 @@ "integrity": "sha512-ypowyDxpVSYpkXr9WPv2PAZCtNip1Mv5KTW0SCurXv/9iOpcrH9PaqUElksqEB6pChqHGDRCFTyrZlGhnLNGiA==", "deprecated": "This version is no longer supported. Please see https://eslint.org/version-support for other options.", "license": "MIT", - "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.2.0", "@eslint-community/regexpp": "^4.6.1", @@ -4624,7 +4615,6 @@ "resolved": "https://registry.npmjs.org/eslint-plugin-import/-/eslint-plugin-import-2.32.0.tgz", "integrity": "sha512-whOE1HFo/qJDyX4SnXzP4N6zOWn79WhnCUY/iDR0mPfQZO8wcYE4JClzI2oZrhBnnMUCBCHZhO6VQyoBU95mZA==", "license": "MIT", - "peer": true, "dependencies": { "@rtsao/scc": "^1.1.0", "array-includes": "^3.1.9", @@ -8870,7 +8860,6 @@ "integrity": "sha512-QabGIvu7F0hAMiKGHZCIRHMb6UoH0QAJA2OaqxEU2tL5noXPrxUcotg2l3ttOA4p1PFnVIGkr6PXRAWlM2evVQ==", "hasInstallScript": true, "license": "Apache-2.0", - "peer": true, "dependencies": { "@puppeteer/browsers": "2.10.10", "chromium-bidi": "8.0.0", @@ -8926,7 +8915,6 @@ "resolved": "https://registry.npmjs.org/puppeteer-extra/-/puppeteer-extra-3.3.6.tgz", "integrity": "sha512-rsLBE/6mMxAjlLd06LuGacrukP2bqbzKCLzV1vrhHFavqQE/taQ2UXv3H5P0Ls7nsrASa+6x3bDbXHpqMwq+7A==", "license": "MIT", - "peer": true, "dependencies": { "@types/debug": "^4.1.0", "debug": "^4.1.1", @@ -9887,7 +9875,6 @@ "resolved": "https://registry.npmjs.org/socks/-/socks-2.8.7.tgz", "integrity": "sha512-HLpt+uLy/pxB+bum/9DzAgiKS8CX1EvbWxI4zlmgGCExImLdiad2iCwXT5Z4c9c3Eq8rP2318mPW2c+QbtjK8A==", "license": "MIT", - "peer": true, "dependencies": { "ip-address": "^10.0.1", "smart-buffer": "^4.2.0" @@ -11334,6 +11321,18 @@ } } }, + "node_modules/xml-js": { + "version": "1.6.11", + "resolved": "https://registry.npmjs.org/xml-js/-/xml-js-1.6.11.tgz", + "integrity": "sha512-7rVi2KMfwfWFl+GpPg6m80IVMWXLRjO+PxTq7V2CDhoGak0wzYzFgUY2m4XJ47OGdXd8eLE8EmwfAmdjw7lC1g==", + "license": "MIT", + "dependencies": { + "sax": "^1.2.4" + }, + "bin": { + "xml-js": "bin/cli.js" + } + }, "node_modules/xml-name-validator": { "version": "5.0.0", "resolved": "https://registry.npmjs.org/xml-name-validator/-/xml-name-validator-5.0.0.tgz", diff --git a/package.json b/package.json index f21c5f107..db0fbd146 100644 --- a/package.json +++ b/package.json @@ -100,7 +100,8 @@ "swagger-ui-express": "^5.0.1", "turndown": "^7.2.1", "winston": "^3.17.0", - "winston-mail": "^2.0.0" + "winston-mail": "^2.0.0", + "xml-js": "^1.6.11" }, "devDependencies": { "@commitlint/cli": "^19.8.1", diff --git a/scripts/reporter/duplicate/index.js b/scripts/reporter/duplicate/index.js index d2b508770..22e13b1a0 100644 --- a/scripts/reporter/duplicate/index.js +++ b/scripts/reporter/duplicate/index.js @@ -39,7 +39,7 @@ async function removeDuplicateIssues() { } for (const [ title, duplicateIssues ] of issuesByTitle) { - if (duplicateIssues.length === 1) continue; + if (duplicateIssues.length === 1) { continue; } const originalIssue = duplicateIssues.reduce((oldest, current) => (new Date(current.created_at) < new Date(oldest.created_at) ? current : oldest)); diff --git a/src/archivist/collection/index.test.js b/src/archivist/collection/index.test.js index f7689384d..3b817e615 100644 --- a/src/archivist/collection/index.test.js +++ b/src/archivist/collection/index.test.js @@ -18,7 +18,7 @@ describe('Collection', () => { try { metadataBackup = await fs.readFile(metadataPath, 'utf8'); } catch (error) { - if (error.code !== 'ENOENT') throw error; + if (error.code !== 'ENOENT') { throw error; } } }); diff --git a/src/archivist/recorder/repositories/git/dataMapper.js b/src/archivist/recorder/repositories/git/dataMapper.js index c9dadd267..8fcd3fafb 100644 --- a/src/archivist/recorder/repositories/git/dataMapper.js +++ b/src/archivist/recorder/repositories/git/dataMapper.js @@ -91,6 +91,11 @@ function generateFileName(termsType, documentId, extension) { } export function generateFilePath(serviceId, termsType, documentId, mimeType) { + // If only serviceId is provided, return a pattern to match all files for that service + if (termsType === undefined) { + return `${serviceId}/*`; + } + const extension = mime.getExtension(mimeType) || '*'; // If mime type is undefined, an asterisk is set as an extension. Used to match all files for the given service ID, terms type and document ID when mime type is unknown return `${serviceId}/${generateFileName(termsType, documentId, extension)}`; // Do not use `path.join` as even for Windows, the path should be with `/` and not `\` diff --git a/src/archivist/recorder/repositories/git/git.js b/src/archivist/recorder/repositories/git/git.js index 791c39310..364fdc72b 100644 --- a/src/archivist/recorder/repositories/git/git.js +++ b/src/archivist/recorder/repositories/git/git.js @@ -68,8 +68,12 @@ export default class Git { return this.git.push(); } - listCommits(options = []) { - return this.log([ '--reverse', '--no-merges', '--name-only', ...options ]); // Returns all commits in chronological order (`--reverse`), excluding merge commits (`--no-merges`), with modified files names (`--name-only`) + listCommits(options = [], { reverse = true, skip, maxCount } = {}) { + const reverseOption = reverse ? ['--reverse'] : []; + const skipOption = skip !== undefined ? [`--skip=${skip}`] : []; + const maxCountOption = maxCount !== undefined ? [`--max-count=${maxCount}`] : []; + + return this.log([ ...reverseOption, '--author-date-order', '--no-merges', '--name-only', ...skipOption, ...maxCountOption, ...options ]); // Returns commits in chronological order with `--reverse` (oldest first) or reverse chronological without it (newest first), sorted by author date (`--author-date-order`), excluding merge commits (`--no-merges`), with modified files names (`--name-only`), with optional pagination (`--skip`, `--max-count`) } async getCommit(options) { diff --git a/src/archivist/recorder/repositories/git/index.js b/src/archivist/recorder/repositories/git/index.js index 5caf59948..50da196ba 100644 --- a/src/archivist/recorder/repositories/git/index.js +++ b/src/archivist/recorder/repositories/git/index.js @@ -88,16 +88,45 @@ export default class GitRepository extends RepositoryInterface { return this.#toDomain(commit); } - async findAll() { - return Promise.all((await this.#getCommits()).map(commit => this.#toDomain(commit, { deferContentLoading: true }))); + async findAll({ limit, offset } = {}) { + return Promise.all((await this.#getCommits({ limit, offset })).map(commit => this.#toDomain(commit, { deferContentLoading: true }))); } - async count() { - return (await this.git.log(Object.values(DataMapper.COMMIT_MESSAGE_PREFIXES).map(prefix => `--grep=${prefix}`))).length; + async findByService(serviceId, { limit, offset } = {}) { + const pathPattern = DataMapper.generateFilePath(serviceId); + + return Promise.all((await this.#getCommits({ pathFilter: pathPattern, limit, offset })).map(commit => this.#toDomain(commit, { deferContentLoading: true }))); + } + + async findByServiceAndTermsType(serviceId, termsType, { limit, offset } = {}) { + const pathPattern = DataMapper.generateFilePath(serviceId, termsType); + + return Promise.all((await this.#getCommits({ pathFilter: pathPattern, limit, offset })).map(commit => this.#toDomain(commit, { deferContentLoading: true }))); + } + + async count(serviceId, termsType) { + const grepOptions = Object.values(DataMapper.COMMIT_MESSAGE_PREFIXES).map(prefix => `--grep=${prefix}`); + const pathOptions = []; + + if (serviceId && termsType) { + const pathPattern = DataMapper.generateFilePath(serviceId, termsType); + + pathOptions.push('--', pathPattern); + } else if (serviceId) { + // Count all records for a service (all terms types) + const pathPattern = DataMapper.generateFilePath(serviceId); + + pathOptions.push('--', pathPattern); + } else { + // Count all records (exclude root directory files) + pathOptions.push('--', '*/*'); + } + + return (await this.git.log([ ...grepOptions, ...pathOptions ])).length; } async* iterate() { - const commits = await this.#getCommits(); + const commits = await this.#getCommits({ reverse: true }); for (const commit of commits) { yield this.#toDomain(commit); @@ -131,12 +160,39 @@ export default class GitRepository extends RepositoryInterface { record.content = pdfBuffer; } - async #getCommits() { - return (await this.git.listCommits()) - .filter(commit => // Skip non-record commits (e.g., README or LICENSE updates) - DataMapper.COMMIT_MESSAGE_PREFIXES_REGEXP.test(commit.message) // Commits generated by the engine have messages that match predefined prefixes - && path.dirname(commit.diff.files[0].file) !== '.') // Assumes one record per commit; records must be in a serviceId folder, not root - .sort((commitA, commitB) => new Date(commitA.date) - new Date(commitB.date)); // Make sure that the commits are sorted in ascending chronological order + async #getCommits({ pathFilter, reverse = false, limit, offset } = {}) { + const grepOptions = Object.values(DataMapper.COMMIT_MESSAGE_PREFIXES).flatMap(prefix => [ '--grep', prefix ]); + const pathOptions = pathFilter + ? [ '--', pathFilter ] + : [ '--', '*/*' ]; // Exclude root directory files by only matching files in subdirectories + + const options = [ ...grepOptions, ...pathOptions ]; + + // Use git-level pagination when available + // Note: --skip and --max-count work in topological order, not chronological order + // This means pagination may not be strictly chronological, but it's acceptable for performance + const paginationOptions = {}; + + if (offset !== undefined) { + paginationOptions.skip = offset; + } + + if (limit !== undefined) { + paginationOptions.maxCount = limit; + } + + const commits = await this.git.listCommits(options, { reverse: false, ...paginationOptions }); // Get commits without git's --reverse for better performance, filtered at git level + + // Sort by date in JavaScript for accuracy - git's date ordering may not be reliable with backdated commits + // Default order is descending (newest to oldest), reverse gives ascending (oldest to newest) + commits.sort((commitA, commitB) => { + const dateA = new Date(commitA.date); + const dateB = new Date(commitB.date); + + return reverse ? dateA - dateB : dateB - dateA; + }); + + return commits; } static async writeFile({ filePath, content }) { diff --git a/src/archivist/recorder/repositories/git/index.test.js b/src/archivist/recorder/repositories/git/index.test.js index 6c7e1dea0..6ef24175f 100644 --- a/src/archivist/recorder/repositories/git/index.test.js +++ b/src/archivist/recorder/repositories/git/index.test.js @@ -540,8 +540,87 @@ describe('GitRepository', () => { } }); - it('returns records in ascending order', () => { - expect(records.map(record => record.fetchDate)).to.deep.equal([ FETCH_DATE_EARLIER, FETCH_DATE, FETCH_DATE_LATER ]); + it('returns records in descending order', () => { + expect(records.map(record => record.fetchDate)).to.deep.equal([ FETCH_DATE_LATER, FETCH_DATE, FETCH_DATE_EARLIER ]); + }); + }); + + describe('#findByServiceAndTermsType', () => { + const expectedIds = []; + let records; + + before(async function () { + this.timeout(5000); + + const { id: id1 } = await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: CONTENT, + fetchDate: FETCH_DATE, + snapshotIds: [SNAPSHOT_ID], + })); + + expectedIds.push(id1); + + const { id: id2 } = await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: `${CONTENT} - updated`, + fetchDate: FETCH_DATE_LATER, + snapshotIds: [SNAPSHOT_ID], + })); + + expectedIds.push(id2); + + await subject.save(new Version({ + serviceId: 'other_service', + termsType: 'Privacy Policy', + content: `${CONTENT} - other`, + fetchDate: FETCH_DATE, + snapshotIds: [SNAPSHOT_ID], + })); + + (records = await subject.findByServiceAndTermsType(SERVICE_PROVIDER_ID, TERMS_TYPE)); + }); + + after(() => subject.removeAll()); + + it('returns only matching records', () => { + expect(records.length).to.equal(2); + }); + + it('returns Version objects', () => { + for (const record of records) { + expect(record).to.be.an.instanceof(Version); + } + }); + + it('returns records with matching service ID', () => { + for (const record of records) { + expect(record.serviceId).to.equal(SERVICE_PROVIDER_ID); + } + }); + + it('returns records with matching terms type', () => { + for (const record of records) { + expect(record.termsType).to.equal(TERMS_TYPE); + } + }); + + it('returns records in descending order', () => { + expect(records.map(record => record.fetchDate)).to.deep.equal([ FETCH_DATE_LATER, FETCH_DATE ]); + }); + + it('returns records with correct IDs', () => { + expect(records.map(record => record.id)).to.have.members(expectedIds); + }); + + context('when no matching records exist', () => { + it('returns an empty array', async () => { + const result = await subject.findByServiceAndTermsType('non_existent_service', 'Non Existent Terms'); + + expect(result).to.be.an('array').that.is.empty; + }); }); }); @@ -582,6 +661,37 @@ describe('GitRepository', () => { it('returns the proper count', () => { expect(count).to.equal(3); }); + + context('with serviceId and termsType filters', () => { + it('returns count for specific service and terms type', async () => { + const filteredCount = await subject.count(SERVICE_PROVIDER_ID, TERMS_TYPE); + + expect(filteredCount).to.equal(3); + }); + + it('returns zero for non-existent service', async () => { + const filteredCount = await subject.count('non-existent-service', TERMS_TYPE); + + expect(filteredCount).to.equal(0); + }); + }); + + context('with only serviceId filter', () => { + it('returns count for all terms types of a service', async () => { + // Add a version with different terms type + await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: 'Different Terms', + content: CONTENT, + fetchDate: FETCH_DATE, + snapshotIds: [SNAPSHOT_ID], + })); + + const filteredCount = await subject.count(SERVICE_PROVIDER_ID); + + expect(filteredCount).to.equal(4); // 3 from TERMS_TYPE + 1 from 'Different Terms' + }); + }); }); describe('#findLatest', () => { @@ -1101,8 +1211,8 @@ describe('GitRepository', () => { } }); - it('returns records in ascending order', () => { - expect(records.map(record => record.fetchDate)).to.deep.equal([ FETCH_DATE_EARLIER, FETCH_DATE, FETCH_DATE_LATER ]); + it('returns records in descending order', () => { + expect(records.map(record => record.fetchDate)).to.deep.equal([ FETCH_DATE_LATER, FETCH_DATE, FETCH_DATE_EARLIER ]); }); }); @@ -1462,8 +1572,8 @@ describe('GitRepository', () => { } }); - it('returns records in ascending order', () => { - expect(records.map(record => record.fetchDate)).to.deep.equal(expectedDates); + it('returns records in descending order', () => { + expect(records.map(record => record.fetchDate)).to.deep.equal([...expectedDates].reverse()); }); }); diff --git a/src/archivist/recorder/repositories/interface.js b/src/archivist/recorder/repositories/interface.js index 1d9270944..ae0ffafcc 100644 --- a/src/archivist/recorder/repositories/interface.js +++ b/src/archivist/recorder/repositories/interface.js @@ -70,21 +70,56 @@ class RepositoryInterface { } /** - * Find all records + * Find all records, in descending chronological order (newest first; opposite of #iterate) * For performance reasons, the content of the records will not be loaded by default. Use #loadRecordContent to load the content of individual records - * @see RepositoryInterface#loadRecordContent - * @returns {Promise>} Promise that will be resolved with an array of all records + * @see RepositoryInterface#loadRecordContent + * @see RepositoryInterface#iterate + * @param {object} [options] - Pagination options + * @param {number} [options.limit] - Maximum number of records to return + * @param {number} [options.offset] - Number of records to skip + * @returns {Promise>} Promise that will be resolved with an array of records in descending chronological order */ - async findAll() { + async findAll(options = {}) { throw new Error(`#findAll method is not implemented in ${this.constructor.name}`); } + /** + * Find all records for a specific service, in descending chronological order + * For performance reasons, the content of the records will not be loaded by default. Use #loadRecordContent to load the content of individual records + * @see RepositoryInterface#loadRecordContent + * @param {string} serviceId - Service ID of records to find + * @param {object} [options] - Pagination options + * @param {number} [options.limit] - Maximum number of records to return + * @param {number} [options.offset] - Number of records to skip + * @returns {Promise>} Promise that will be resolved with an array of matching records in descending chronological order + */ + async findByService(serviceId, options = {}) { + throw new Error(`#findByService method is not implemented in ${this.constructor.name}`); + } + + /** + * Find all records for a specific service and terms type, in descending chronological order + * For performance reasons, the content of the records will not be loaded by default. Use #loadRecordContent to load the content of individual records + * @see RepositoryInterface#loadRecordContent + * @param {string} serviceId - Service ID of records to find + * @param {string} termsType - Terms type of records to find + * @param {object} [options] - Pagination options + * @param {number} [options.limit] - Maximum number of records to return + * @param {number} [options.offset] - Number of records to skip + * @returns {Promise>} Promise that will be resolved with an array of matching records in descending chronological order + */ + async findByServiceAndTermsType(serviceId, termsType, options = {}) { + throw new Error(`#findByServiceAndTermsType method is not implemented in ${this.constructor.name}`); + } + /** * Count the total number of records in the repository * For performance reasons, use this method rather than counting the number of entries returned by #findAll if you only need the size of a repository - * @returns {Promise} Promise that will be resolved with the total number of records + * @param {string} [serviceId] - Optional service ID to filter records + * @param {string} [termsType] - Optional terms type to filter records (requires serviceId) + * @returns {Promise} Promise that will be resolved with the total number of records */ - async count() { + async count(serviceId, termsType) { throw new Error(`#count method is not implemented in ${this.constructor.name}`); } diff --git a/src/archivist/recorder/repositories/mongo/index.js b/src/archivist/recorder/repositories/mongo/index.js index 2a4abb18c..fc1b860ca 100644 --- a/src/archivist/recorder/repositories/mongo/index.js +++ b/src/archivist/recorder/repositories/mongo/index.js @@ -88,13 +88,63 @@ export default class MongoRepository extends RepositoryInterface { return this.#toDomain(mongoDocument); } - async findAll() { - return Promise.all((await this.collection.find().project({ content: 0 }).sort({ fetchDate: 1 }).toArray()) + async findAll({ limit, offset } = {}) { + let query = this.collection.find().project({ content: 0 }).sort({ fetchDate: -1 }); + + if (offset !== undefined) { + query = query.skip(offset); + } + + if (limit !== undefined) { + query = query.limit(limit); + } + + return Promise.all((await query.toArray()) + .map(mongoDocument => this.#toDomain(mongoDocument, { deferContentLoading: true }))); + } + + async findByServiceAndTermsType(serviceId, termsType, { limit, offset } = {}) { + let query = this.collection.find({ serviceId, termsType }).project({ content: 0 }).sort({ fetchDate: -1 }); + + if (offset !== undefined) { + query = query.skip(offset); + } + + if (limit !== undefined) { + query = query.limit(limit); + } + + return Promise.all((await query.toArray()) .map(mongoDocument => this.#toDomain(mongoDocument, { deferContentLoading: true }))); } - count() { - return this.collection.countDocuments(); + async findByService(serviceId, { limit, offset } = {}) { + let query = this.collection.find({ serviceId }).project({ content: 0 }).sort({ fetchDate: -1 }); + + if (offset !== undefined) { + query = query.skip(offset); + } + + if (limit !== undefined) { + query = query.limit(limit); + } + + return Promise.all((await query.toArray()) + .map(mongoDocument => this.#toDomain(mongoDocument, { deferContentLoading: true }))); + } + + count(serviceId, termsType) { + const filter = {}; + + if (serviceId) { + filter.serviceId = serviceId; + } + + if (termsType) { + filter.termsType = termsType; + } + + return this.collection.countDocuments(filter); } async* iterate() { diff --git a/src/archivist/recorder/repositories/mongo/index.test.js b/src/archivist/recorder/repositories/mongo/index.test.js index 61ecfd1d0..e2123cdfd 100644 --- a/src/archivist/recorder/repositories/mongo/index.test.js +++ b/src/archivist/recorder/repositories/mongo/index.test.js @@ -629,45 +629,198 @@ describe('MongoRepository', () => { } }); - it('returns records in ascending order', () => { - expect(records.map(record => record.fetchDate)).to.deep.equal([ FETCH_DATE_EARLIER, FETCH_DATE, FETCH_DATE_LATER ]); + it('returns records in descending order', () => { + expect(records.map(record => record.fetchDate)).to.deep.equal([ FETCH_DATE_LATER, FETCH_DATE, FETCH_DATE_EARLIER ]); }); }); - describe('#count', () => { - let count; + describe('#findByServiceAndTermsType', () => { + const expectedIds = []; + let records; before(async () => { - await subject.save(new Version({ + const { id: id1 } = await subject.save(new Version({ serviceId: SERVICE_PROVIDER_ID, termsType: TERMS_TYPE, content: CONTENT, fetchDate: FETCH_DATE, snapshotIds: [SNAPSHOT_ID], })); - await subject.save(new Version({ + + expectedIds.push(id1); + + const { id: id2 } = await subject.save(new Version({ serviceId: SERVICE_PROVIDER_ID, termsType: TERMS_TYPE, content: `${CONTENT} - updated`, fetchDate: FETCH_DATE_LATER, snapshotIds: [SNAPSHOT_ID], })); + + expectedIds.push(id2); + await subject.save(new Version({ - serviceId: SERVICE_PROVIDER_ID, - termsType: TERMS_TYPE, - content: `${CONTENT} - updated 2`, - isTechnicalUpgrade: true, - fetchDate: FETCH_DATE_EARLIER, + serviceId: 'other_service', + termsType: 'Privacy Policy', + content: `${CONTENT} - other`, + fetchDate: FETCH_DATE, snapshotIds: [SNAPSHOT_ID], })); - (count = await subject.count()); + (records = await subject.findByServiceAndTermsType(SERVICE_PROVIDER_ID, TERMS_TYPE)); }); after(() => subject.removeAll()); - it('returns the proper count', () => { - expect(count).to.equal(3); + it('returns only matching records', () => { + expect(records.length).to.equal(2); + }); + + it('returns Version objects', () => { + for (const record of records) { + expect(record).to.be.an.instanceof(Version); + } + }); + + it('returns records with matching service ID', () => { + for (const record of records) { + expect(record.serviceId).to.equal(SERVICE_PROVIDER_ID); + } + }); + + it('returns records with matching terms type', () => { + for (const record of records) { + expect(record.termsType).to.equal(TERMS_TYPE); + } + }); + + it('returns records in descending order', () => { + expect(records.map(record => record.fetchDate)).to.deep.equal([ FETCH_DATE_LATER, FETCH_DATE ]); + }); + + it('returns records with correct IDs', () => { + expect(records.map(record => record.id)).to.have.members(expectedIds); + }); + + context('when no matching records exist', () => { + it('returns an empty array', async () => { + const result = await subject.findByServiceAndTermsType('non_existent_service', 'Non Existent Terms'); + + expect(result).to.be.an('array').that.is.empty; + }); + }); + }); + + describe('#count', () => { + context('without filters', () => { + let count; + + before(async () => { + await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: CONTENT, + fetchDate: FETCH_DATE, + snapshotIds: [SNAPSHOT_ID], + })); + await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: `${CONTENT} - updated`, + fetchDate: FETCH_DATE_LATER, + snapshotIds: [SNAPSHOT_ID], + })); + await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: `${CONTENT} - updated 2`, + isTechnicalUpgrade: true, + fetchDate: FETCH_DATE_EARLIER, + snapshotIds: [SNAPSHOT_ID], + })); + + (count = await subject.count()); + }); + + after(() => subject.removeAll()); + + it('returns the proper count', () => { + expect(count).to.equal(3); + }); + }); + + context('with serviceId and termsType filters', () => { + before(async () => { + await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: CONTENT, + fetchDate: FETCH_DATE, + snapshotIds: [SNAPSHOT_ID], + })); + await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: `${CONTENT} - updated`, + fetchDate: FETCH_DATE_LATER, + snapshotIds: [SNAPSHOT_ID], + })); + await subject.save(new Version({ + serviceId: 'other_service', + termsType: 'Privacy Policy', + content: 'Other content', + fetchDate: FETCH_DATE, + snapshotIds: [SNAPSHOT_ID], + })); + }); + + after(() => subject.removeAll()); + + it('returns count for specific service and terms type', async () => { + const filteredCount = await subject.count(SERVICE_PROVIDER_ID, TERMS_TYPE); + + expect(filteredCount).to.equal(2); + }); + + it('returns zero for non-existent service', async () => { + const filteredCount = await subject.count('non-existent-service', TERMS_TYPE); + + expect(filteredCount).to.equal(0); + }); + }); + + context('with only serviceId filter', () => { + before(async () => { + await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: CONTENT, + fetchDate: FETCH_DATE, + snapshotIds: [SNAPSHOT_ID], + })); + await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: 'Different Terms', + content: 'Different content', + fetchDate: FETCH_DATE_LATER, + snapshotIds: [SNAPSHOT_ID], + })); + await subject.save(new Version({ + serviceId: 'other_service', + termsType: 'Privacy Policy', + content: 'Other content', + fetchDate: FETCH_DATE, + snapshotIds: [SNAPSHOT_ID], + })); + }); + + after(() => subject.removeAll()); + + it('returns count for all terms types of a service', async () => { + const filteredCount = await subject.count(SERVICE_PROVIDER_ID); + + expect(filteredCount).to.equal(2); + }); }); }); @@ -1197,8 +1350,8 @@ describe('MongoRepository', () => { } }); - it('returns records in ascending order', () => { - expect(records.map(record => record.fetchDate)).to.deep.equal([ FETCH_DATE_EARLIER, FETCH_DATE, FETCH_DATE_LATER ]); + it('returns records in descending order', () => { + expect(records.map(record => record.fetchDate)).to.deep.equal([ FETCH_DATE_LATER, FETCH_DATE, FETCH_DATE_EARLIER ]); }); }); diff --git a/src/archivist/services/index.js b/src/archivist/services/index.js index cdcc07bbf..980973379 100644 --- a/src/archivist/services/index.js +++ b/src/archivist/services/index.js @@ -281,7 +281,7 @@ function getHistoryFilePaths(serviceId) { } async function loadServiceHistory(historyFilePath) { - if (!(await fileExists(historyFilePath))) return {}; + if (!(await fileExists(historyFilePath))) { return {}; } try { return JSON.parse(await fs.readFile(historyFilePath)); diff --git a/src/collection-api/routes/feed.js b/src/collection-api/routes/feed.js new file mode 100644 index 000000000..e4ec32801 --- /dev/null +++ b/src/collection-api/routes/feed.js @@ -0,0 +1,261 @@ +import express from 'express'; +import { js2xml } from 'xml-js'; + +import { getCollection } from '../../archivist/collection/index.js'; +import { COMMIT_MESSAGE_PREFIXES } from '../../archivist/recorder/repositories/git/dataMapper.js'; +import { toISODateWithoutMilliseconds } from '../../archivist/utils/date.js'; + +const RECORD_TYPES = { + firstRecord: 'First record', + technicalUpgrade: 'Technical upgrade', + change: 'Change', +}; + +const TAG_AUTHORITY = 'opentermsarchive.org,2026'; +const FEED_AUTHOR_NAME = 'Open Terms Archive engine'; + +function buildAbsoluteBaseUrl(req) { + return `${req.protocol}://${req.get('host')}${req.baseUrl}`; +} + +function classifyRecordType(version) { + switch (true) { + case version.isFirstRecord: + return RECORD_TYPES.firstRecord; + case version.isTechnicalUpgrade: + return RECORD_TYPES.technicalUpgrade; + default: + return RECORD_TYPES.change; + } +} + +function buildEntryTitle(version) { + let prefix; + + switch (true) { + case version.isFirstRecord: + prefix = COMMIT_MESSAGE_PREFIXES.startTracking; + break; + case version.isTechnicalUpgrade: + prefix = COMMIT_MESSAGE_PREFIXES.technicalUpgrade; + break; + default: + prefix = COMMIT_MESSAGE_PREFIXES.update; + } + + return `${prefix} ${version.serviceId} ${version.termsType}`; +} + +function buildVersionLink(baseUrl, version) { + const encodedDate = encodeURIComponent(toISODateWithoutMilliseconds(version.fetchDate)); + const encodedService = encodeURIComponent(version.serviceId); + const encodedTermsType = encodeURIComponent(version.termsType); + + return `${baseUrl}/version/${encodedService}/${encodedTermsType}/${encodedDate}`; +} + +function buildEntryId(storageType, collection, version) { + return `tag:${TAG_AUTHORITY}:version:${collection.metadata?.id}:${storageType}:${version.id}`; +} + +function buildFeedId(collection, ...suffix) { + return [ `tag:${TAG_AUTHORITY}:feed`, collection.metadata?.id, ...suffix ].join(':'); +} + +function buildSchemes() { + return { + service: `tag:${TAG_AUTHORITY}:scheme:service`, + termsType: `tag:${TAG_AUTHORITY}:scheme:terms-type`, + recordType: `tag:${TAG_AUTHORITY}:scheme:record-type`, + }; +} + +function buildEntry(storageType, versionUrlTemplate, baseUrl, collection, version) { + const href = versionUrlTemplate?.replace('%VERSION_ID', version.id) ?? buildVersionLink(baseUrl, version); + const schemes = buildSchemes(); + + return { + id: { _text: buildEntryId(storageType, collection, version) }, + link: { _attributes: { rel: 'alternate', type: 'text/html', href } }, + title: { _text: buildEntryTitle(version) }, + updated: { _text: version.fetchDate.toISOString() }, + category: [ + { _attributes: { term: version.serviceId, scheme: schemes.service } }, + { _attributes: { term: version.termsType, scheme: schemes.termsType } }, + { _attributes: { term: classifyRecordType(version), scheme: schemes.recordType } }, + ], + }; +} + +function buildFeedDocument({ storageType, versionUrlTemplate, collection, selfHref, feedId, versions, baseUrl }) { + const latestFetchDate = versions.length > 0 ? versions[0].fetchDate : new Date(); + + const feed = { + _attributes: { xmlns: 'http://www.w3.org/2005/Atom' }, + title: { _text: collection.metadata?.name || '' }, + subtitle: { _text: collection.metadata?.tagline || '' }, + id: { _text: feedId }, + updated: { _text: latestFetchDate.toISOString() }, + link: { _attributes: { rel: 'self', href: selfHref } }, + author: { name: { _text: FEED_AUTHOR_NAME } }, + }; + + if (collection.metadata?.logo) { + feed.logo = { _text: collection.metadata.logo }; + } + + feed.entry = versions.map(version => buildEntry(storageType, versionUrlTemplate, baseUrl, collection, version)); + + return { + _declaration: { _attributes: { version: '1.0', encoding: 'utf-8' } }, + feed, + }; +} + +function sendFeed(res, opts) { + const document = buildFeedDocument(opts); + + res.set('Content-Type', 'application/atom+xml; charset=utf-8'); + res.status(200).send(js2xml(document, { compact: true, spaces: 2 })); +} + +/** + * @param {object} services The services to be exposed by the API + * @param {object} versionsRepository The versions repository instance + * @param {string} storageType The storage type identifier of the versions repository + * @param {number} feedLimit Maximum number of entries returned by feed endpoints + * @param {string} [versionUrlTemplate] Optional URL template with %VERSION_ID placeholder; when set, replaces the API link as each entry's alternate href + * @returns {express.Router} The router instance + * @swagger + * tags: + * name: Feeds + * description: Atom feeds of version changes + */ +export default function feedRouter(services, versionsRepository, storageType, feedLimit, versionUrlTemplate) { + const router = express.Router(); + + /** + * @swagger + * /feed: + * get: + * summary: Atom feed of the latest version changes across the whole collection. + * tags: [Feeds] + * produces: + * - application/atom+xml + * responses: + * 200: + * description: An Atom 1.0 feed listing the latest version records, newest first. The maximum number of entries is server-configured. + * content: + * application/atom+xml: + * schema: + * type: string + */ + router.get('/feed', async (req, res) => { + const collection = await getCollection(); + const baseUrl = buildAbsoluteBaseUrl(req); + const selfHref = `${baseUrl}/feed`; + const feedId = buildFeedId(collection); + + const versions = await versionsRepository.findAll({ limit: feedLimit }); + + sendFeed(res, { storageType, versionUrlTemplate, collection, selfHref, feedId, versions, baseUrl }); + }); + + /** + * @swagger + * /feed/{serviceId}: + * get: + * summary: Atom feed of the latest version changes scoped to a single service. + * tags: [Feeds] + * produces: + * - application/atom+xml + * parameters: + * - in: path + * name: serviceId + * description: The ID of the service. + * schema: + * type: string + * required: true + * responses: + * 200: + * description: An Atom 1.0 feed listing the latest version records for the given service, newest first. + * content: + * application/atom+xml: + * schema: + * type: string + * 404: + * description: No service matching the provided ID is found. + */ + router.get('/feed/:serviceId', async (req, res) => { + const service = Object.hasOwn(services, req.params.serviceId) ? services[req.params.serviceId] : null; + + if (!service) { + return res.status(404).send('Service not found'); + } + + const collection = await getCollection(); + const baseUrl = buildAbsoluteBaseUrl(req); + const selfHref = `${baseUrl}/feed/${encodeURIComponent(service.id)}`; + const feedId = buildFeedId(collection, service.id); + + const versions = await versionsRepository.findByService(service.id, { limit: feedLimit }); + + return sendFeed(res, { storageType, versionUrlTemplate, collection, selfHref, feedId, versions, baseUrl }); + }); + + /** + * @swagger + * /feed/{serviceId}/{termsType}: + * get: + * summary: Atom feed of the latest version changes scoped to a service and terms type. + * tags: [Feeds] + * produces: + * - application/atom+xml + * parameters: + * - in: path + * name: serviceId + * description: The ID of the service. + * schema: + * type: string + * required: true + * - in: path + * name: termsType + * description: The terms type declared by the service (e.g. "Terms of Service", "Privacy Policy"). + * schema: + * type: string + * required: true + * responses: + * 200: + * description: An Atom 1.0 feed listing the latest version records for the given service and terms type, newest first. + * content: + * application/atom+xml: + * schema: + * type: string + * 404: + * description: Either the service ID does not match any service or the terms type is not declared by that service. + */ + router.get('/feed/:serviceId/:termsType', async (req, res) => { + const service = Object.hasOwn(services, req.params.serviceId) ? services[req.params.serviceId] : null; + + if (!service) { + return res.status(404).send('Service not found'); + } + + const { termsType } = req.params; + + if (!service.getTermsTypes().includes(termsType)) { + return res.status(404).send('Terms type not found for this service'); + } + + const collection = await getCollection(); + const baseUrl = buildAbsoluteBaseUrl(req); + const selfHref = `${baseUrl}/feed/${encodeURIComponent(service.id)}/${encodeURIComponent(termsType)}`; + const feedId = buildFeedId(collection, service.id, termsType); + + const versions = await versionsRepository.findByServiceAndTermsType(service.id, termsType, { limit: feedLimit }); + + return sendFeed(res, { storageType, versionUrlTemplate, collection, selfHref, feedId, versions, baseUrl }); + }); + + return router; +} diff --git a/src/collection-api/routes/feed.test.js b/src/collection-api/routes/feed.test.js new file mode 100644 index 000000000..5722128bf --- /dev/null +++ b/src/collection-api/routes/feed.test.js @@ -0,0 +1,568 @@ +import { expect } from 'chai'; +import config from 'config'; +import express from 'express'; +import supertest from 'supertest'; + +import { getCollection } from '../../archivist/collection/index.js'; +import RepositoryFactory from '../../archivist/recorder/repositories/factory.js'; +import * as Services from '../../archivist/services/index.js'; +import Version from '../../archivist/recorder/version.js'; +import { toISODateWithoutMilliseconds } from '../../archivist/utils/date.js'; +import app from '../server.js'; + +import feedRouter from './feed.js'; + +const basePath = config.get('@opentermsarchive/engine.collection-api.basePath'); +const request = supertest(app); +const storageConfig = config.get('@opentermsarchive/engine.recorder.versions.storage'); + +function extractTag(xml, tag) { + const match = xml.match(new RegExp(`<${tag}>([\\s\\S]*?)`)); + + return match ? match[1] : null; +} + +describe('Feed API', () => { + describe('GET /feed', () => { + let response; + let collection; + + before(async () => { + collection = await getCollection(); + response = await request.get(`${basePath}/v1/feed`); + }); + + it('responds with 200 status code', () => { + expect(response.status).to.equal(200); + }); + + it('responds with Content-Type application/atom+xml', () => { + expect(response.headers['content-type']).to.match(/^application\/atom\+xml/); + }); + + it('is a valid Atom feed root', () => { + expect(response.text).to.match(/^<\?xml version="1\.0"/); + expect(response.text).to.include(' { + it('has a title matching the collection name', () => { + expect(extractTag(response.text, 'title')).to.equal(collection.metadata.name); + }); + + it('has a subtitle matching the collection tagline', () => { + expect(extractTag(response.text, 'subtitle')).to.equal(collection.metadata.tagline); + }); + + it('has a tag URI id based on the collection id', () => { + expect(extractTag(response.text, 'id')).to.equal(`tag:opentermsarchive.org,2026:feed:${collection.metadata.id}`); + }); + + it('has an updated element with a valid ISO 8601 datetime', () => { + const updated = extractTag(response.text, 'updated'); + + expect(updated).to.be.a('string'); + expect(new Date(updated).toString()).to.not.equal('Invalid Date'); + }); + + it('has a self link pointing to the feed endpoint', () => { + const selfHrefMatch = response.text.match(/]*rel="self"[^>]*href="([^"]+)"/); + + expect(selfHrefMatch).to.not.be.null; + expect(selfHrefMatch[1]).to.match(new RegExp(`${basePath}/v1/feed$`)); + }); + + it('has an author matching the feed author name', () => { + expect(response.text).to.match(/[\s\S]*Open Terms Archive engine<\/name>[\s\S]*<\/author>/); + }); + + it('has a logo matching the collection logo', () => { + expect(extractTag(response.text, 'logo')).to.equal(collection.metadata.logo); + }); + }); + }); + + describe('GET /feed — entries', () => { + const FETCH_DATE_FIRST = new Date('2023-01-01T12:00:00Z'); + const FETCH_DATE_CHANGE = new Date('2023-06-15T08:30:00Z'); + const FETCH_DATE_UPGRADE = new Date('2024-02-10T16:45:00Z'); + + let response; + let repository; + let savedVersions; + + before(async function () { + this.timeout(5000); + repository = RepositoryFactory.create(storageConfig); + await repository.initialize(); + + const firstRecord = await repository.save(new Version({ + serviceId: 'service-1', + termsType: 'Terms of Service', + content: 'first content', + fetchDate: FETCH_DATE_FIRST, + snapshotIds: ['snapshot_1'], + })); + + const changeRecord = await repository.save(new Version({ + serviceId: 'service-1', + termsType: 'Terms of Service', + content: 'changed content', + fetchDate: FETCH_DATE_CHANGE, + snapshotIds: ['snapshot_2'], + })); + + const upgradeRecord = await repository.save(new Version({ + serviceId: 'service-2', + termsType: 'Privacy Policy', + content: 'initial privacy', + fetchDate: new Date('2024-01-01T00:00:00Z'), + snapshotIds: ['snapshot_3'], + })); + + const technicalUpgradeRecord = await repository.save(new Version({ + serviceId: 'service-2', + termsType: 'Privacy Policy', + content: 'upgraded privacy', + fetchDate: FETCH_DATE_UPGRADE, + snapshotIds: ['snapshot_4'], + isTechnicalUpgrade: true, + })); + + savedVersions = { firstRecord, changeRecord, upgradeRecord, technicalUpgradeRecord }; + response = await request.get(`${basePath}/v1/feed`); + }); + + after(() => repository.removeAll()); + + it('lists one entry per saved version up to the configured limit', () => { + const limit = config.get('@opentermsarchive/engine.collection-api.feed.limit'); + const entries = response.text.match(//g) || []; + + expect(entries).to.have.length(Math.min(4, limit)); + }); + + it('orders entries newest-first', () => { + const updates = [...response.text.matchAll(/[\s\S]*?([^<]+)<\/updated>[\s\S]*?<\/entry>/g)].map(match => match[1]); + + expect(updates).to.deep.equal([...updates].sort().reverse()); + }); + + describe('entry metadata', () => { + let firstEntry; + + before(() => { + [firstEntry] = response.text.match(/[\s\S]*?<\/entry>/); + }); + + it('has an id tag URI including storage type and record id', () => { + const collectionId = 'test'; + const expected = `tag:opentermsarchive.org,2026:version:${collectionId}:${storageConfig.type}:${savedVersions.technicalUpgradeRecord.id}`; + + expect(firstEntry).to.include(`${expected}`); + }); + + it('has an alternate link to the API version endpoint', () => { + const href = firstEntry.match(/]*rel="alternate"[^>]*href="([^"]+)"/)[1]; + const expectedPathFragment = `/version/${encodeURIComponent('service-2')}/${encodeURIComponent('Privacy Policy')}/${encodeURIComponent(toISODateWithoutMilliseconds(FETCH_DATE_UPGRADE))}`; + + expect(href).to.include(expectedPathFragment); + }); + + it('has exactly one link per entry', () => { + const links = firstEntry.match(/]*\/>/g) || []; + + expect(links).to.have.length(1); + }); + + it('has a type="text/html" on the alternate link', () => { + expect(firstEntry).to.match(/]*rel="alternate"[^>]*type="text\/html"/); + }); + + it('has a title reconstructed from commit prefix + serviceId + termsType', () => { + const title = firstEntry.match(/]*>([\s\S]*?)<\/title>/)[1]; + + expect(title).to.include('Apply technical or declaration upgrade on'); + expect(title).to.include('service-2'); + expect(title).to.include('Privacy Policy'); + }); + + it('has an updated element matching the fetch date', () => { + const updated = firstEntry.match(/([^<]+)<\/updated>/)[1]; + + expect(new Date(updated).toISOString()).to.equal(FETCH_DATE_UPGRADE.toISOString()); + }); + + it('has three categories with the expected schemes', () => { + const categories = [...firstEntry.matchAll(//g)].map(match => match[1]); + + expect(categories).to.have.length(3); + + const schemes = categories.map(attrs => attrs.match(/scheme="([^"]+)"/)[1]); + + expect(schemes).to.include('tag:opentermsarchive.org,2026:scheme:service'); + expect(schemes).to.include('tag:opentermsarchive.org,2026:scheme:terms-type'); + expect(schemes).to.include('tag:opentermsarchive.org,2026:scheme:record-type'); + }); + + it('has category terms for service, terms type and record type', () => { + const categories = [...firstEntry.matchAll(//g)].map(match => match[1]); + const terms = categories.map(attrs => attrs.match(/term="([^"]+)"/)[1]); + + expect(terms).to.include('service-2'); + expect(terms).to.include('Privacy Policy'); + expect(terms).to.include('Technical upgrade'); + }); + }); + + describe('record-type classification', () => { + function findEntryById(xml, recordId) { + const match = [...xml.matchAll(/[\s\S]*?<\/entry>/g)].find(entry => entry[0].includes(`:${recordId}`)); + + return match && match[0]; + } + + it('classifies a first record as "First record"', () => { + const entry = findEntryById(response.text, savedVersions.upgradeRecord.id); + + expect(entry).to.not.be.undefined; + expect(entry).to.match(/term="First record"/); + }); + + it('classifies a content change as "Change"', () => { + const entry = findEntryById(response.text, savedVersions.changeRecord.id); + + expect(entry).to.not.be.undefined; + expect(entry).to.match(/term="Change"/); + }); + + it('classifies a technical upgrade as "Technical upgrade"', () => { + const entry = findEntryById(response.text, savedVersions.technicalUpgradeRecord.id); + + expect(entry).to.not.be.undefined; + expect(entry).to.match(/term="Technical upgrade"/); + }); + }); + + describe('configurable limit', () => { + it('returns at most the configured number of entries', () => { + const limit = config.get('@opentermsarchive/engine.collection-api.feed.limit'); + const entries = response.text.match(//g) || []; + + expect(entries.length).to.be.at.most(limit); + }); + }); + }); + + describe('GET /feed/:serviceId', () => { + const SERVICE = 'service_without_history'; + const OTHER_SERVICE = 'service_with_history'; + const TERMS = 'Terms of Service'; + + let repository; + + before(async function () { + this.timeout(5000); + repository = RepositoryFactory.create(storageConfig); + await repository.initialize(); + + await repository.save(new Version({ + serviceId: SERVICE, + termsType: TERMS, + content: 'c1', + fetchDate: new Date('2024-01-01T00:00:00Z'), + snapshotIds: ['s1'], + })); + await repository.save(new Version({ + serviceId: SERVICE, + termsType: TERMS, + content: 'c2', + fetchDate: new Date('2024-02-01T00:00:00Z'), + snapshotIds: ['s2'], + })); + await repository.save(new Version({ + serviceId: OTHER_SERVICE, + termsType: TERMS, + content: 'c3', + fetchDate: new Date('2024-03-01T00:00:00Z'), + snapshotIds: ['s3'], + })); + }); + + after(() => repository.removeAll()); + + context('when the service exists and has versions', () => { + let response; + + before(async () => { + response = await request.get(`${basePath}/v1/feed/${encodeURIComponent(SERVICE)}`); + }); + + it('responds with 200', () => { + expect(response.status).to.equal(200); + }); + + it('responds with Content-Type application/atom+xml', () => { + expect(response.headers['content-type']).to.match(/^application\/atom\+xml/); + }); + + it('includes only entries for that service', () => { + const serviceTerms = [...response.text.matchAll(/scheme="tag:opentermsarchive.org,2026:scheme:service"[^/]*term="([^"]+)"/g)] + .concat([...response.text.matchAll(/term="([^"]+)"[^/]*scheme="tag:opentermsarchive.org,2026:scheme:service"/g)]) + .map(match => match[1]); + + expect(serviceTerms).to.not.be.empty; + + for (const term of serviceTerms) { + expect(term).to.equal(SERVICE); + } + }); + + it('has a feed id including the service id', () => { + expect(extractTag(response.text, 'id')).to.equal(`tag:opentermsarchive.org,2026:feed:test:${SERVICE}`); + }); + + it('has a self link pointing to the service-scoped feed endpoint', () => { + const href = response.text.match(/]*rel="self"[^>]*href="([^"]+)"/)[1]; + + expect(href).to.match(new RegExp(`/feed/${SERVICE}$`)); + }); + }); + + context('when the service exists but has no versions', () => { + let response; + + before(async () => { + response = await request.get(`${basePath}/v1/feed/${encodeURIComponent('service_with_filters_history')}`); + }); + + it('responds with 200', () => { + expect(response.status).to.equal(200); + }); + + it('returns an empty feed (no entries)', () => { + expect(response.text).to.not.include(''); + }); + }); + + context('when the service does not exist', () => { + let response; + + before(async () => { + response = await request.get(`${basePath}/v1/feed/DoesNotExist`); + }); + + it('responds with 404', () => { + expect(response.status).to.equal(404); + }); + }); + + context('when the serviceId casing does not match', () => { + let response; + + before(async () => { + response = await request.get(`${basePath}/v1/feed/${encodeURIComponent(SERVICE.toUpperCase())}`); + }); + + it('responds with 404', () => { + expect(response.status).to.equal(404); + }); + }); + }); + + describe('XML escaping and URL encoding', () => { + const SERVICE = 'Service B!'; + const TERMS = 'Privacy Policy'; + const FETCH_DATE = new Date('2024-05-15T10:00:00Z'); + + let response; + let repository; + + before(async function () { + this.timeout(5000); + repository = RepositoryFactory.create(storageConfig); + await repository.initialize(); + + await repository.save(new Version({ + serviceId: SERVICE, + termsType: TERMS, + content: 'content with & and ', + fetchDate: FETCH_DATE, + snapshotIds: ['s_escape'], + })); + + response = await request.get(`${basePath}/v1/feed/${encodeURIComponent(SERVICE)}/${encodeURIComponent(TERMS)}`); + }); + + after(() => repository.removeAll()); + + it('responds with 200', () => { + expect(response.status).to.equal(200); + }); + + it('URL-encodes spaces and special characters in the self link href', () => { + const href = response.text.match(/]*rel="self"[^>]*href="([^"]+)"/)[1]; + + expect(href).to.include('Service%20B!'); + expect(href).to.include('Privacy%20Policy'); + expect(href).to.not.include('Service B!'); + }); + + it('URL-encodes spaces and special characters in entry alternate links', () => { + const entry = response.text.match(/[\s\S]*?<\/entry>/)[0]; + const href = entry.match(/]*rel="alternate"[^>]*href="([^"]+)"/)[1]; + + expect(href).to.include('Service%20B!'); + expect(href).to.include('Privacy%20Policy'); + }); + }); + + describe('GET /feed/:serviceId/:termsType', () => { + const SERVICE = 'service_without_history'; + const TERMS = 'Terms of Service'; + const UNKNOWN_TERMS = 'Imprint'; + + let repository; + + before(async function () { + this.timeout(5000); + repository = RepositoryFactory.create(storageConfig); + await repository.initialize(); + + await repository.save(new Version({ + serviceId: SERVICE, + termsType: TERMS, + content: 'first', + fetchDate: new Date('2024-01-01T00:00:00Z'), + snapshotIds: ['s1'], + })); + await repository.save(new Version({ + serviceId: SERVICE, + termsType: TERMS, + content: 'updated', + fetchDate: new Date('2024-02-01T00:00:00Z'), + snapshotIds: ['s2'], + })); + }); + + after(() => repository.removeAll()); + + context('when the service and terms type match', () => { + let response; + + before(async () => { + response = await request.get(`${basePath}/v1/feed/${encodeURIComponent(SERVICE)}/${encodeURIComponent(TERMS)}`); + }); + + it('responds with 200', () => { + expect(response.status).to.equal(200); + }); + + it('includes entries for the combination', () => { + const entries = response.text.match(//g) || []; + + expect(entries.length).to.be.at.least(1); + }); + + it('entries only have the expected terms type', () => { + const termsTypeTerms = [...response.text.matchAll(/ match[1]); + + for (const term of termsTypeTerms) { + expect(term).to.equal(TERMS); + } + }); + + it('has a feed id that includes both service and terms type', () => { + expect(extractTag(response.text, 'id')).to.equal(`tag:opentermsarchive.org,2026:feed:test:${SERVICE}:${TERMS}`); + }); + + it('has a self link pointing to the combination endpoint', () => { + const href = response.text.match(/]*rel="self"[^>]*href="([^"]+)"/)[1]; + + expect(href).to.match(new RegExp(`/feed/${SERVICE}/${encodeURIComponent(TERMS)}$`)); + }); + }); + + context('when the service exists but does not declare the terms type', () => { + let response; + + before(async () => { + response = await request.get(`${basePath}/v1/feed/${encodeURIComponent(SERVICE)}/${encodeURIComponent(UNKNOWN_TERMS)}`); + }); + + it('responds with 404', () => { + expect(response.status).to.equal(404); + }); + }); + + context('when the service does not exist', () => { + let response; + + before(async () => { + response = await request.get(`${basePath}/v1/feed/DoesNotExist/${encodeURIComponent(TERMS)}`); + }); + + it('responds with 404', () => { + expect(response.status).to.equal(404); + }); + }); + }); + + describe('entry links with versionUrlTemplate configured', () => { + const TEMPLATE = 'https://example.test/v/%VERSION_ID'; + + let response; + let repository; + let savedVersion; + + before(async function () { + this.timeout(5000); + repository = RepositoryFactory.create(storageConfig); + await repository.initialize(); + + savedVersion = await repository.save(new Version({ + serviceId: 'service-1', + termsType: 'Terms of Service', + content: 'content', + fetchDate: new Date('2024-01-01T00:00:00Z'), + snapshotIds: ['s1'], + })); + + const services = await Services.load(); + const templatedApp = express(); + + templatedApp.use(feedRouter(services, repository, storageConfig.type, 10, TEMPLATE)); + + response = await supertest(templatedApp).get('/feed'); + }); + + after(() => repository.removeAll()); + + it('interpolates %VERSION_ID into the alternate link', () => { + const href = response.text.match(/[\s\S]*?]*rel="alternate"[^>]*href="([^"]+)"/)[1]; + + expect(href).to.equal(`https://example.test/v/${savedVersion.id}`); + }); + + it('does not point to the API for entry links', () => { + const entries = response.text.match(/[\s\S]*?<\/entry>/g) || []; + + for (const entry of entries) { + expect(entry).to.not.match(/]*href="[^"]*\/version\//); + } + }); + + it('still emits exactly one link per entry', () => { + const entries = response.text.match(/[\s\S]*?<\/entry>/g) || []; + + for (const entry of entries) { + const links = entry.match(/]*\/>/g) || []; + + expect(links).to.have.length(1); + } + }); + }); +}); diff --git a/src/collection-api/routes/index.js b/src/collection-api/routes/index.js index b99636b90..34e720470 100644 --- a/src/collection-api/routes/index.js +++ b/src/collection-api/routes/index.js @@ -1,10 +1,13 @@ +import config from 'config'; import express from 'express'; import helmet from 'helmet'; import { getCollection } from '../../archivist/collection/index.js'; +import RepositoryFactory from '../../archivist/recorder/repositories/factory.js'; import * as Services from '../../archivist/services/index.js'; import docsRouter from './docs.js'; +import feedRouter from './feed.js'; import metadataRouter from './metadata.js'; import servicesRouter from './services.js'; import versionsRouter from './versions.js'; @@ -33,10 +36,14 @@ export default async function apiRouter(basePath) { const services = await Services.load(); const collection = await getCollection(); + const versionsStorageConfig = config.get('@opentermsarchive/engine.recorder.versions.storage'); + const versionsRepository = await RepositoryFactory.create(versionsStorageConfig).initialize(); + const feedConfig = config.get('@opentermsarchive/engine.collection-api.feed'); router.use(await metadataRouter(collection, services)); router.use(servicesRouter(services)); - router.use(versionsRouter); + router.use(versionsRouter(versionsRepository)); + router.use(feedRouter(services, versionsRepository, versionsStorageConfig.type, feedConfig.limit, feedConfig.versionUrlTemplate)); return router; } diff --git a/src/collection-api/routes/services.js b/src/collection-api/routes/services.js index f13879d2d..9906b7152 100644 --- a/src/collection-api/routes/services.js +++ b/src/collection-api/routes/services.js @@ -130,8 +130,7 @@ export default function servicesRouter(services) { * description: No service matching the provided ID is found. */ router.get('/service/:serviceId', (req, res) => { - const matchedServiceID = Object.keys(services).find(key => key.toLowerCase() === req.params.serviceId?.toLowerCase()); - const service = services[matchedServiceID]; + const service = Object.hasOwn(services, req.params.serviceId) ? services[req.params.serviceId] : null; if (!service) { res.status(404).send('Service not found'); diff --git a/src/collection-api/routes/services.test.js b/src/collection-api/routes/services.test.js index db6bdc16f..43dfed950 100644 --- a/src/collection-api/routes/services.test.js +++ b/src/collection-api/routes/services.test.js @@ -56,7 +56,6 @@ describe('Services API', () => { describe('GET /service/:serviceId', () => { let response; const SERVICE_ID = 'Service B!'; - const CASE_INSENSITIVE_SERVICE_ID = 'service b!'; before(async () => { response = await request(app).get(`${basePath}/v1/service/${encodeURI(SERVICE_ID)}`); @@ -106,49 +105,13 @@ describe('Services API', () => { }); }); - context('with a case-insensitive service ID parameter', () => { + context('when the service ID casing does not match', () => { before(async () => { - response = await request(app).get(`${basePath}/v1/service/${encodeURI(CASE_INSENSITIVE_SERVICE_ID)}`); + response = await request(app).get(`${basePath}/v1/service/${encodeURI(SERVICE_ID.toLowerCase())}`); }); - it('responds with 200 status code', () => { - expect(response.status).to.equal(200); - }); - - it('returns a service object with id', () => { - expect(response.body).to.have.property('id'); - }); - - it('returns the proper service object', () => { - expect(response.body.id).to.equal(SERVICE_ID); - }); - - it('returns a service object with name', () => { - expect(response.body).to.have.property('name'); - }); - - it('returns a service object with an array of terms', () => { - expect(response.body).to.have.property('terms').that.is.an('array'); - }); - - it('each terms should have a type property', () => { - response.body.terms.forEach(terms => { - expect(terms).to.have.property('type'); - }); - }); - - it('each terms should have an array of source documents', () => { - response.body.terms.forEach(terms => { - expect(terms).to.have.property('sourceDocuments').that.is.an('array'); - }); - }); - - it('each source document should have a location', () => { - response.body.terms.forEach(terms => { - terms.sourceDocuments.forEach(sourceDocument => { - expect(sourceDocument).to.have.property('location'); - }); - }); + it('responds with 404 status code', () => { + expect(response.status).to.equal(404); }); }); diff --git a/src/collection-api/routes/versions.js b/src/collection-api/routes/versions.js index e420f8998..176ba0c55 100644 --- a/src/collection-api/routes/versions.js +++ b/src/collection-api/routes/versions.js @@ -1,10 +1,10 @@ -import config from 'config'; import express from 'express'; -import RepositoryFactory from '../../archivist/recorder/repositories/factory.js'; import { toISODateWithoutMilliseconds } from '../../archivist/utils/date.js'; /** + * @param {object} versionsRepository The versions repository instance + * @returns {express.Router} The router instance * @private * @swagger * tags: @@ -27,86 +27,86 @@ import { toISODateWithoutMilliseconds } from '../../archivist/utils/date.js'; * type: string * description: The JSON-escaped Markdown content of the version */ -const router = express.Router(); +export default function versionsRouter(versionsRepository) { + const router = express.Router(); -const versionsRepository = await RepositoryFactory.create(config.get('@opentermsarchive/engine.recorder.versions.storage')).initialize(); + /** + * @private + * @swagger + * /version/{serviceId}/{termsType}/{date}: + * get: + * summary: Get a specific version of some terms at a given date. + * tags: [Versions] + * produces: + * - application/json + * parameters: + * - in: path + * name: serviceId + * description: The ID of the service whose version will be returned. + * schema: + * type: string + * required: true + * - in: path + * name: termsType + * description: The type of terms whose version will be returned. + * schema: + * type: string + * required: true + * - in: path + * name: date + * description: The date and time for which the version is requested, in ISO 8601 format. + * schema: + * type: string + * format: date-time + * required: true + * responses: + * 200: + * description: A JSON object containing the version content and metadata. + * content: + * application/json: + * schema: + * $ref: '#/components/schemas/Version' + * 404: + * description: No version found for the specified combination of service ID, terms type and date. + * content: + * application/json: + * schema: + * type: object + * properties: + * error: + * type: string + * description: Error message indicating that no version is found. + * 416: + * description: The requested date is in the future. + * content: + * application/json: + * schema: + * type: object + * properties: + * error: + * type: string + * description: Error message indicating that the requested date is in the future. + */ + router.get('/version/:serviceId/:termsType/:date', async (req, res) => { + const { serviceId, termsType, date } = req.params; + const requestedDate = new Date(date); -/** - * @private - * @swagger - * /version/{serviceId}/{termsType}/{date}: - * get: - * summary: Get a specific version of some terms at a given date. - * tags: [Versions] - * produces: - * - application/json - * parameters: - * - in: path - * name: serviceId - * description: The ID of the service whose version will be returned. - * schema: - * type: string - * required: true - * - in: path - * name: termsType - * description: The type of terms whose version will be returned. - * schema: - * type: string - * required: true - * - in: path - * name: date - * description: The date and time for which the version is requested, in ISO 8601 format. - * schema: - * type: string - * format: date-time - * required: true - * responses: - * 200: - * description: A JSON object containing the version content and metadata. - * content: - * application/json: - * schema: - * $ref: '#/components/schemas/Version' - * 404: - * description: No version found for the specified combination of service ID, terms type and date. - * content: - * application/json: - * schema: - * type: object - * properties: - * error: - * type: string - * description: Error message indicating that no version is found. - * 416: - * description: The requested date is in the future. - * content: - * application/json: - * schema: - * type: object - * properties: - * error: - * type: string - * description: Error message indicating that the requested date is in the future. - */ -router.get('/version/:serviceId/:termsType/:date', async (req, res) => { - const { serviceId, termsType, date } = req.params; - const requestedDate = new Date(date); - - if (requestedDate > new Date()) { - return res.status(416).json({ error: 'Requested version is in the future' }); - } + if (requestedDate > new Date()) { + return res.status(416).json({ error: 'Requested version is in the future' }); + } - const version = await versionsRepository.findByDate(serviceId, termsType, requestedDate); + const version = await versionsRepository.findByDate(serviceId, termsType, requestedDate); - if (!version) { - return res.status(404).json({ error: `No version found for date ${date}` }); - } + if (!version) { + return res.status(404).json({ error: `No version found for date ${date}` }); + } - return res.status(200).json({ - id: version.id, - fetchDate: toISODateWithoutMilliseconds(version.fetchDate), - content: version.content, + return res.status(200).json({ + id: version.id, + fetchDate: toISODateWithoutMilliseconds(version.fetchDate), + content: version.content, + }); }); -}); -export default router; + return router; +} diff --git a/src/collection-api/routes/versions.test.js b/src/collection-api/routes/versions.test.js index aadcfe14b..bfdff4e15 100644 --- a/src/collection-api/routes/versions.test.js +++ b/src/collection-api/routes/versions.test.js @@ -17,7 +17,7 @@ describe('Versions API', () => { let versionsRepository; const FETCH_DATE = new Date('2023-01-01T12:00:00Z'); const VERSION_COMMON_ATTRIBUTES = { - serviceId: 'service-1', + serviceId: 'service·A', termsType: 'Terms of Service', snapshotId: ['snapshot_id'], }; @@ -62,7 +62,7 @@ describe('Versions API', () => { context('when a version is found', () => { before(async () => { - response = await request.get(`${basePath}/v1/version/service-1/Terms%20of%20Service/${encodeURIComponent(toISODateWithoutMilliseconds(FETCH_DATE))}`); + response = await request.get(`${basePath}/v1/version/service·A/Terms%20of%20Service/${encodeURIComponent(toISODateWithoutMilliseconds(FETCH_DATE))}`); }); it('responds with 200 status code', () => { @@ -80,7 +80,7 @@ describe('Versions API', () => { context('when the requested date is anterior to the first available version', () => { before(async () => { - response = await request.get(`${basePath}/v1/version/service-1/Terms%20of%20Service/2000-01-01T12:00:00Z`); + response = await request.get(`${basePath}/v1/version/service·A/Terms%20of%20Service/2000-01-01T12:00:00Z`); }); it('responds with 404 status code', () => { @@ -100,7 +100,7 @@ describe('Versions API', () => { before(async () => { const dateInTheFuture = new Date(Date.now() + 60000); // 1 minute in the future - response = await request.get(`${basePath}/v1/version/service-1/Terms%20of%20Service/${encodeURIComponent(toISODateWithoutMilliseconds(dateInTheFuture))}`); + response = await request.get(`${basePath}/v1/version/service·A/Terms%20of%20Service/${encodeURIComponent(toISODateWithoutMilliseconds(dateInTheFuture))}`); }); it('responds with 416 status code', () => { diff --git a/src/reporter/gitlab/index.js b/src/reporter/gitlab/index.js index 431416768..55ea591f1 100644 --- a/src/reporter/gitlab/index.js +++ b/src/reporter/gitlab/index.js @@ -358,7 +358,7 @@ export default class GitLab { try { let apiUrl = `${this.apiBaseURL}/projects/${this.projectId}/issues?search=${encodeURIComponent(title)}&state=${searchParams.state}&per_page=100`; - if (searchParams.state == 'all') apiUrl = `${this.apiBaseURL}/projects/${this.projectId}/issues?search=${encodeURIComponent(title)}&per_page=100`; + if (searchParams.state == 'all') { apiUrl = `${this.apiBaseURL}/projects/${this.projectId}/issues?search=${encodeURIComponent(title)}&per_page=100`; } const options = GitLab.baseOptionsHttpReq();