diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 02b6235c..b93ab198 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,6 +19,17 @@ jobs: - name: Lint Dockerfile run: docker run --rm -i hadolint/hadolint < Dockerfile + quickstart: + needs: + - hadolint + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + - name: Make quickstart verifier executable + run: chmod +x bin/quickstart-verify + - name: Verify quickstart compose contract + run: bin/quickstart-verify + ruby: runs-on: ubuntu-latest steps: @@ -86,12 +97,12 @@ jobs: run: | git config --global user.name "github-actions[bot]" git config --global user.email "github-actions[bot]@users.noreply.github.com" - + make openapi make openapi-client - + git add public/openapi.yaml frontend/src/api/generated/ - + if git diff --staged --quiet; then echo "No changes to commit" else diff --git a/README.md b/README.md index 3516a8b3..c89f5df0 100644 --- a/README.md +++ b/README.md @@ -2,106 +2,21 @@ # html2rss-web -`html2rss-web` serves RSS/JSON feeds from website sources using a Ruby (Roda) backend and a Preact frontend. +`html2rss-web` turns website sources into RSS/JSON feeds. -## Use This Repo For +## Quickstart -- Running a self-hosted `html2rss-web` instance with Docker Compose. -- Creating signed, per-account feed URLs through `POST /api/v1/feeds`. -- Local development inside the repository Dev Container. +Test drive the app with these steps: -## Quick Links +1. Download [docker-compose.quickstart.yml](./docker-compose.quickstart.yml) +2. `docker compose -f docker-compose.quickstart.yml up -d` +3. Open [`http://localhost:4000/`](http://localhost:4000/) in your browser +4. When prompted for a token, use `CHANGE_ME_ADMIN_TOKEN` -- Public docs + feed directory: https://html2rss.github.io -- Docker Hub image: https://hub.docker.com/r/html2rss/web -- OpenAPI file in this repo: [public/openapi.yaml](public/openapi.yaml) -- Contributor guide: [docs/README.md](docs/README.md) -- Discussions: https://github.com/orgs/html2rss/discussions -- Sponsor: https://github.com/sponsors/gildesmarais - -## Architecture Snapshot - -- Backend: Ruby + Roda (`app.rb`, `app/web/**`) -- Frontend: Preact + Vite (built assets served from `frontend/dist`) -- Feed extraction: `html2rss` gem -- Distribution baseline: `docker-compose.yml` - -For detailed architecture and contributor rules, see [docs/README.md](docs/README.md). - -## Trial Run (Docker Compose) - -Prerequisite: Docker Engine + Docker Compose. - -Run from the repository root: - -```bash -BUILD_TAG="$(date +%F)" \ -GIT_SHA="trial" \ -HTML2RSS_SECRET_KEY="$(openssl rand -hex 32)" \ -HEALTH_CHECK_TOKEN="$(openssl rand -hex 24)" \ -BROWSERLESS_IO_API_TOKEN="trial-browserless-token" \ -docker compose up -d -``` - -Then open: - -- `http://localhost:4000/` (UI) -- `http://localhost:4000/api/v1` (API metadata) -- `http://localhost:4000/openapi.yaml` (OpenAPI document) - -Stop with: - -```bash -docker compose down -``` - -## Deploy With Docker Compose +> [!IMPORTANT] +> This is a first-run demo path, not a production-ready setup. -The checked-in [`docker-compose.yml`](docker-compose.yml) requires these environment variables for `html2rss-web`: - -- `BUILD_TAG` -- `GIT_SHA` -- `HTML2RSS_SECRET_KEY` -- `HEALTH_CHECK_TOKEN` -- `BROWSERLESS_IO_API_TOKEN` - -Optional runtime variables: - -- `SENTRY_DSN` -- `SENTRY_ENABLE_LOGS` (defaults to `false`) - -Example: - -```bash -export HTML2RSS_SECRET_KEY="$(openssl rand -hex 32)" -export HEALTH_CHECK_TOKEN="replace-with-a-strong-token" -export BROWSERLESS_IO_API_TOKEN="replace-with-your-browserless-token" -export BUILD_TAG="local" -export GIT_SHA="$(git rev-parse --short HEAD 2>/dev/null || echo dev)" -export AUTO_SOURCE_ENABLED=true - -docker compose up -d -``` - -## Runtime Behavior That Affects Operations - -- In production, missing `HTML2RSS_SECRET_KEY` stops startup. -- `BUILD_TAG` and `GIT_SHA` are expected in production; missing values produce a startup warning. -- `POST /api/v1/feeds` requires a bearer token and only works when `AUTO_SOURCE_ENABLED=true`. -- `AUTO_SOURCE_ENABLED` defaults to `true` in development/test and `false` otherwise. -- Strategy support comes from `Html2rss::RequestService` (`faraday` and `browserless` availability is runtime-dependent). - -## Stable Integration Entry Points - -- OpenAPI: `/openapi.yaml` (or [`public/openapi.yaml`](public/openapi.yaml) in-repo) -- API metadata: `/api/v1` -- Feed creation endpoint: `POST /api/v1/feeds` -- Health endpoints: `/api/v1/health`, `/api/v1/health/ready`, `/api/v1/health/live` - -For feed config authoring/validation, use the `html2rss` schema: - -- https://github.com/html2rss/html2rss/blob/master/schema/html2rss-config.schema.json -- `html2rss schema` +Continue with the [Getting Started](https://html2rss.github.io/web-application/getting-started) and deployment guides for real setup. ## Development (Dev Container Only) @@ -113,9 +28,9 @@ make dev make ready ``` -See [docs/README.md](docs/README.md) for contributor workflows, verification gates, and architectural constraints. - -## Contributing +## Development and Contributing -- Project guidelines: https://html2rss.github.io/get-involved/contributing -- Repo contributor guide: [docs/README.md](docs/README.md) +- Contributing guidelines: https://html2rss.github.io/get-involved/contributing +- Docker image: https://hub.docker.com/r/html2rss/web +- Discussions: https://github.com/orgs/html2rss/discussions +- Sponsor: https://github.com/sponsors/gildesmarais diff --git a/Rakefile b/Rakefile index c2da4b61..c0163b50 100644 --- a/Rakefile +++ b/Rakefile @@ -46,6 +46,11 @@ desc 'Build and run docker image/container, and send requests to it' task :test do current_dir = ENV.fetch('GITHUB_WORKSPACE', __dir__) smoke_auto_source_enabled = ENV.fetch('SMOKE_AUTO_SOURCE_ENABLED', 'false') + default_smoke_health_token = 'docker-smoke-health-check-token-0123456789abcdef' + smoke_health_token = ENV.fetch('SMOKE_HEALTH_TOKEN', default_smoke_health_token) + default_smoke_api_token = + smoke_auto_source_enabled == 'true' ? 'docker-smoke-admin-token-0123456789abcdef' : 'CHANGE_ME_ADMIN_TOKEN' + smoke_api_token = ENV.fetch('SMOKE_API_TOKEN', default_smoke_api_token) smoke_build_tag = ENV.fetch('SMOKE_BUILD_TAG', ENV.fetch('BUILD_TAG', 'docker-smoke')) smoke_git_sha = ENV.fetch('SMOKE_GIT_SHA', ENV.fetch('GITHUB_SHA', ENV.fetch('GIT_SHA', 'docker-smoke'))) image_name = 'html2rss/web' @@ -64,8 +69,9 @@ task :test do '--env PUMA_LOG_CONFIG=1', "--env BUILD_TAG=#{smoke_build_tag}", "--env GIT_SHA=#{smoke_git_sha}", - '--env HEALTH_CHECK_TOKEN=CHANGE_ME_HEALTH_CHECK_TOKEN', + "--env HEALTH_CHECK_TOKEN=#{smoke_health_token}", '--env HTML2RSS_SECRET_KEY=0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef', + "--env HTML2RSS_ACCESS_TOKEN=#{smoke_api_token}", "--env AUTO_SOURCE_ENABLED=#{smoke_auto_source_enabled}", "--mount type=bind,source=#{current_dir}/config,target=/app/config", '--name html2rss-web-test', @@ -79,8 +85,8 @@ task :test do Output.describe 'Running RSpec smoke suite against container' smoke_env = { 'SMOKE_BASE_URL' => 'http://127.0.0.1:4000', - 'SMOKE_HEALTH_TOKEN' => 'CHANGE_ME_HEALTH_CHECK_TOKEN', - 'SMOKE_API_TOKEN' => 'CHANGE_ME_ADMIN_TOKEN', + 'SMOKE_HEALTH_TOKEN' => smoke_health_token, + 'SMOKE_API_TOKEN' => smoke_api_token, 'SMOKE_AUTO_SOURCE_ENABLED' => smoke_auto_source_enabled, 'RUN_DOCKER_SPECS' => 'true' } diff --git a/app/web/api/v1/health.rb b/app/web/api/v1/health.rb index 81f889ce..0e1effdc 100644 --- a/app/web/api/v1/health.rb +++ b/app/web/api/v1/health.rb @@ -94,7 +94,7 @@ def bearer_token(request) # @return [void] def verify_configuration! - LocalConfig.yaml + LocalConfig.load_snapshot rescue StandardError raise Html2rss::Web::HealthCheckFailedError end diff --git a/app/web/config/environment_validator.rb b/app/web/config/environment_validator.rb index f651ceb1..c4d9121a 100644 --- a/app/web/config/environment_validator.rb +++ b/app/web/config/environment_validator.rb @@ -29,7 +29,6 @@ def validate_production_security! validate_secret_key! validate_account_configuration! - validate_build_metadata! end # @return [Boolean] @@ -94,23 +93,79 @@ def validate_secret_key! exit 1 end - # @return [void] - def validate_build_metadata! - return unless missing_build_metadata? - - log_missing_build_metadata! - warn_lines(*missing_build_metadata_warning_lines) - nil - end - def validate_account_configuration! accounts = AccountManager.accounts + validate_account_token_shapes!(accounts) + validate_health_check_token!(accounts) + validate_create_feed_token!(accounts) weak_tokens = accounts.select { |acc| acc[:token].length < 16 } return unless weak_tokens.any? handle_weak_account_tokens!(weak_tokens) end + # @param accounts [ArrayObject}>] + # @return [void] + def validate_account_token_shapes!(accounts) + malformed_accounts = accounts.reject { |acc| acc[:token].is_a?(String) && !acc[:token].empty? } + return unless malformed_accounts.any? + + handle_malformed_account_tokens!(malformed_accounts) + end + + # @param accounts [ArrayObject}>] + # @return [void] + def validate_create_feed_token!(accounts) + return unless invalid_placeholder_create_feed_token?(accounts) + + SecurityLogger.log_config_validation_failure( + 'access_token', + 'Placeholder create-feed token is not allowed when auto source is enabled' + ) + warn_lines( + 'CRITICAL: Placeholder create-feed token detected in production!', + 'Set HTML2RSS_ACCESS_TOKEN to a strong token before enabling automatic feed generation.' + ) + exit 1 + end + + # @param accounts [ArrayObject}>] + # @return [void] + def validate_health_check_token!(accounts) + return unless placeholder_health_check_token?(accounts) + + SecurityLogger.log_config_validation_failure( + 'health_check_token', + 'Placeholder health-check token is not allowed in production' + ) + warn_lines( + 'CRITICAL: Placeholder health-check token detected in production!', + 'Set a real token for the health-check account or remove the account from production config.' + ) + exit 1 + end + + # @param accounts [ArrayObject}>] + # @return [Boolean] + def invalid_placeholder_create_feed_token?(accounts) + auto_source_enabled? && placeholder_create_feed_token?(accounts) + end + + # @param accounts [ArrayObject}>] + # @return [Boolean] + def placeholder_create_feed_token?(accounts) + accounts.any? { |account| account[:token] == RuntimeEnv::ADMIN_ACCESS_TOKEN_PLACEHOLDER } + end + + # @param accounts [ArrayObject}>] + # @return [Boolean] + def placeholder_health_check_token?(accounts) + accounts.any? do |account| + account[:username] == 'health-check' && + account[:token] == RuntimeEnv::HEALTH_CHECK_TOKEN_PLACEHOLDER + end + end + # @param lines [Array] # @return [void] def warn_lines(*lines) @@ -140,31 +195,18 @@ def handle_weak_account_tokens!(weak_tokens) exit 1 end - # @return [Boolean] - def missing_build_metadata? - build_metadata_values.any?(&:empty?) - end - - # @return [Array] - def build_metadata_values - %w[BUILD_TAG GIT_SHA].map { |key| ENV.fetch(key, '').strip } - end - + # @param malformed_accounts [ArrayObject}>] # @return [void] - def log_missing_build_metadata! - SecurityLogger.log_config_validation_failure( - 'build_metadata', - 'Missing BUILD_TAG or GIT_SHA', - severity: :warn + def handle_malformed_account_tokens!(malformed_accounts) + malformed_usernames = malformed_accounts.map { |acc| acc[:username] || '(unknown)' }.join(', ') + SecurityLogger.log_config_validation_failure('account_tokens', + "Invalid token configuration for users: #{malformed_usernames}") + warn_lines( + 'CRITICAL: Invalid account token configuration detected in production!', + 'Each account token must be a non-empty string.', + "Invalid token configuration found for users: #{malformed_usernames}" ) - end - - # @return [Array] - def missing_build_metadata_warning_lines - [ - 'WARNING: Missing build metadata for production deployment.', - 'Set BUILD_TAG and GIT_SHA to improve release traceability.' - ] + exit 1 end end # rubocop:enable Metrics/ClassLength diff --git a/app/web/config/local_config.rb b/app/web/config/local_config.rb index 8810ccb5..038f83a2 100644 --- a/app/web/config/local_config.rb +++ b/app/web/config/local_config.rb @@ -1,6 +1,8 @@ # frozen_string_literal: true +require 'erb' require 'yaml' +require_relative 'runtime_env' begin require 'html2rss/configs' rescue LoadError => error @@ -56,17 +58,33 @@ def global end ## + # @return [Html2rss::Web::ConfigSnapshot::Snapshot] + def snapshot + @mutex.synchronize { @snapshot ||= load_snapshot } + rescue KeyError, TypeError, ArgumentError => error + raise InvalidConfig, "Invalid local config: #{error.message}" + end + + ## + # Reparses the current config file without touching memoized runtime + # state. Health checks use this path so config drift shows up without + # forcing live request handlers onto a reload path. + # # @return [Hash] - def yaml - YAML.safe_load_file(CONFIG_FILE, symbolize_names: true).freeze + def load_yaml + template = File.read(CONFIG_FILE) + YAML.safe_load(ERB.new(template, trim_mode: '-').result, symbolize_names: true).freeze rescue Errno::ENOENT => error raise NotFound, "Configuration file not found: #{error.message}" end ## + # Reparses and normalizes the current config file without mutating the + # memoized runtime snapshot. + # # @return [Html2rss::Web::ConfigSnapshot::Snapshot] - def snapshot - @mutex.synchronize { @snapshot ||= ConfigSnapshot.load(yaml) } + def load_snapshot + ConfigSnapshot.load(load_yaml) rescue KeyError, TypeError, ArgumentError => error raise InvalidConfig, "Invalid local config: #{error.message}" end diff --git a/app/web/config/runtime_env.rb b/app/web/config/runtime_env.rb index 6a2c6d5c..8718e645 100644 --- a/app/web/config/runtime_env.rb +++ b/app/web/config/runtime_env.rb @@ -6,7 +6,9 @@ module Web # Captures boot-time environment configuration and scrubs selected secrets # from the process environment after validation. module RuntimeEnv - SENSITIVE_KEYS = %w[HTML2RSS_SECRET_KEY HEALTH_CHECK_TOKEN SENTRY_DSN].freeze + ADMIN_ACCESS_TOKEN_PLACEHOLDER = 'CHANGE_ME_ADMIN_TOKEN' + HEALTH_CHECK_TOKEN_PLACEHOLDER = 'CHANGE_ME_HEALTH_CHECK_TOKEN' + SENSITIVE_KEYS = %w[HTML2RSS_SECRET_KEY HTML2RSS_ACCESS_TOKEN HEALTH_CHECK_TOKEN SENTRY_DSN].freeze BOOT_METADATA_KEYS = %w[BUILD_TAG GIT_SHA RACK_ENV SENTRY_ENABLE_LOGS].freeze @mutex = Mutex.new @values = nil @@ -31,7 +33,19 @@ def secret_key # @return [String] def health_check_token - fetch('HEALTH_CHECK_TOKEN', '') + token = fetch('HEALTH_CHECK_TOKEN', '').to_s.strip + token.empty? ? HEALTH_CHECK_TOKEN_PLACEHOLDER : token + end + + # @return [String] + def access_token + fetch('HTML2RSS_ACCESS_TOKEN', '') + end + + # @return [String] + def admin_access_token + token = access_token.to_s.strip + token.empty? ? ADMIN_ACCESS_TOKEN_PLACEHOLDER : token end # @return [String, nil] diff --git a/bin/quickstart-down b/bin/quickstart-down new file mode 100755 index 00000000..1b9b685d --- /dev/null +++ b/bin/quickstart-down @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# frozen_string_literal: true + +set -euo pipefail + +COMPOSE_PROJECT_NAME="${COMPOSE_PROJECT_NAME:-html2rss-quickstart}" + +docker compose \ + -p "$COMPOSE_PROJECT_NAME" \ + -f docker-compose.quickstart.yml \ + down diff --git a/bin/quickstart-up b/bin/quickstart-up new file mode 100755 index 00000000..4d7607e7 --- /dev/null +++ b/bin/quickstart-up @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +# frozen_string_literal: true + +set -euo pipefail + +IMAGE_TAG="${HTML2RSS_WEB_IMAGE:-html2rss/web:quickstart-local}" +BUILD_TAG="${BUILD_TAG:-quickstart-local}" +GIT_SHA_VALUE="${GIT_SHA:-$(git rev-parse --short HEAD 2>/dev/null || echo dev)}" +COMPOSE_PROJECT_NAME="${COMPOSE_PROJECT_NAME:-html2rss-quickstart}" + +docker build \ + -t "$IMAGE_TAG" \ + --build-arg "BUILD_TAG=$BUILD_TAG" \ + --build-arg "GIT_SHA=$GIT_SHA_VALUE" \ + . + +HTML2RSS_WEB_IMAGE="$IMAGE_TAG" \ +docker compose \ + -p "$COMPOSE_PROJECT_NAME" \ + -f docker-compose.quickstart.yml \ + up -d + +echo "quickstart up: http://localhost:4000/" +echo 'quickstart token: CHANGE_ME_ADMIN_TOKEN' diff --git a/bin/quickstart-verify b/bin/quickstart-verify new file mode 100755 index 00000000..f18c1113 --- /dev/null +++ b/bin/quickstart-verify @@ -0,0 +1,64 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require 'json' +require 'open3' + +COMPOSE_FILE = 'docker-compose.quickstart.yml' +EXPECTED_WEB_IMAGE = 'html2rss/web:1' +EXPECTED_RACK_ENV = 'development' +EXPECTED_ACCESS_TOKEN = 'CHANGE_ME_ADMIN_TOKEN' +EXPECTED_SCRAPER_URL = 'http://botasaurus:4010' +EXPECTED_HOST_IP = '127.0.0.1' +EXPECTED_PUBLISHED_PORT = '4000' +EXPECTED_TARGET_PORT = 4000 + +def compose_config + output, status = Open3.capture2e('docker', 'compose', '-f', COMPOSE_FILE, 'config', '--format', 'json') + abort("quickstart verify failed: docker compose config exited #{status.exitstatus}\n#{output}") unless status.success? + + JSON.parse(output) +rescue JSON::ParserError => error + abort("quickstart verify failed: invalid compose json output: #{error.message}") +end + +def fetch_service!(services, name) + service = services[name] + abort("quickstart verify failed: missing service #{name.inspect}") unless service + + service +end + +def assert_equal!(actual, expected, label) + return if actual == expected + + abort("quickstart verify failed: #{label} expected #{expected.inspect}, got #{actual.inspect}") +end + +def assert_port_mapping!(ports) + match = Array(ports).any? do |port| + port['host_ip'] == EXPECTED_HOST_IP && + port['published'] == EXPECTED_PUBLISHED_PORT && + port['target'] == EXPECTED_TARGET_PORT + end + return if match + + abort( + 'quickstart verify failed: html2rss-web ports must include ' \ + "#{EXPECTED_HOST_IP}:#{EXPECTED_PUBLISHED_PORT}:#{EXPECTED_TARGET_PORT}" + ) +end + +config = compose_config +services = config.fetch('services') { abort('quickstart verify failed: missing top-level services map') } +web = fetch_service!(services, 'html2rss-web') +fetch_service!(services, 'botasaurus') + +assert_equal!(web['image'], EXPECTED_WEB_IMAGE, 'html2rss-web image') +environment = web.fetch('environment') { abort('quickstart verify failed: html2rss-web missing environment map') } +assert_equal!(environment['RACK_ENV'], EXPECTED_RACK_ENV, 'html2rss-web RACK_ENV') +assert_equal!(environment['HTML2RSS_ACCESS_TOKEN'], EXPECTED_ACCESS_TOKEN, 'html2rss-web HTML2RSS_ACCESS_TOKEN') +assert_equal!(environment['BOTASAURUS_SCRAPER_URL'], EXPECTED_SCRAPER_URL, 'html2rss-web BOTASAURUS_SCRAPER_URL') +assert_port_mapping!(web['ports']) + +puts 'quickstart verify passed' diff --git a/config/feeds.yml b/config/feeds.yml index c49a7422..d9639e67 100644 --- a/config/feeds.yml +++ b/config/feeds.yml @@ -1,7 +1,7 @@ auth: accounts: - username: "admin" - token: "CHANGE_ME_ADMIN_TOKEN" + token: "<%= Html2rss::Web::RuntimeEnv.admin_access_token %>" allowed_urls: - "*" # Full access - username: "demo" @@ -11,7 +11,7 @@ auth: - "https://news.ycombinator.com" - "https://github.com/trending" - username: "health-check" - token: "CHANGE_ME_HEALTH_CHECK_TOKEN" + token: "<%= Html2rss::Web::RuntimeEnv.health_check_token %>" allowed_urls: [] # Health check doesn't need URL access stylesheets: diff --git a/docker-compose.quickstart.yml b/docker-compose.quickstart.yml new file mode 100644 index 00000000..b21c9cf9 --- /dev/null +++ b/docker-compose.quickstart.yml @@ -0,0 +1,12 @@ +services: + html2rss-web: + image: ${HTML2RSS_WEB_IMAGE:-html2rss/web:1} + ports: + - "127.0.0.1:4000:4000" + environment: + RACK_ENV: development + HTML2RSS_ACCESS_TOKEN: CHANGE_ME_ADMIN_TOKEN + BOTASAURUS_SCRAPER_URL: http://botasaurus:4010 + + botasaurus: + image: html2rss/botasaurus-scrape-api:latest diff --git a/spec/html2rss/web/api/v1_spec.rb b/spec/html2rss/web/api/v1_spec.rb index a67a2f0a..4ff875e4 100644 --- a/spec/html2rss/web/api/v1_spec.rb +++ b/spec/html2rss/web/api/v1_spec.rb @@ -288,7 +288,7 @@ def expected_featured_feeds it 'returns error when configuration fails', :aggregate_failures do allow(Html2rss::Web::Auth).to receive(:authenticate).and_return({ username: 'health-check' }) - allow(Html2rss::Web::LocalConfig).to receive(:yaml).and_raise(StandardError, 'boom') + allow(Html2rss::Web::LocalConfig).to receive(:load_snapshot).and_raise(StandardError, 'boom') header 'Authorization', "Bearer #{health_token}" get '/api/v1/health' diff --git a/spec/html2rss/web/app_integration_spec.rb b/spec/html2rss/web/app_integration_spec.rb index 0766185c..e993a4bf 100644 --- a/spec/html2rss/web/app_integration_spec.rb +++ b/spec/html2rss/web/app_integration_spec.rb @@ -34,6 +34,7 @@ } } end + let(:config_snapshot) { Html2rss::Web::ConfigSnapshot.load(accounts_config) } let(:json_headers) { { 'CONTENT_TYPE' => 'application/json' } } let(:auth_headers) { json_headers.merge('HTTP_AUTHORIZATION' => "Bearer #{account[:token]}") } @@ -56,7 +57,7 @@ end before do - allow(Html2rss::Web::LocalConfig).to receive(:yaml).and_return(accounts_config) + allow(Html2rss::Web::LocalConfig).to receive_messages(global: config_snapshot.global, snapshot: config_snapshot) stub_const('Html2rss::FeedChannel', Class.new { attr_reader :ttl }) stub_const('Html2rss::Feed', Class.new { attr_reader :channel }) allow(Html2rss::Web::AutoSource).to receive(:enabled?).and_return(true) diff --git a/spec/html2rss/web/boot/setup_spec.rb b/spec/html2rss/web/boot/setup_spec.rb index 433b10aa..5d0ffa3e 100644 --- a/spec/html2rss/web/boot/setup_spec.rb +++ b/spec/html2rss/web/boot/setup_spec.rb @@ -47,13 +47,7 @@ end it 'captures and scrubs sensitive env vars after validation', :aggregate_failures do - allow(Html2rss::Web::EnvironmentValidator).to receive(:validate_environment!).ordered do - expect(ENV.fetch('HTML2RSS_SECRET_KEY', nil)).to eq(boot_secret_key) - expect(ENV.fetch('HEALTH_CHECK_TOKEN', nil)).to eq('health-token') - end - allow(Html2rss::Web::EnvironmentValidator).to receive(:validate_production_security!).ordered do - expect(ENV.fetch('SENTRY_DSN', nil)).to eq(sentry_dsn) - end + expect_sensitive_env_during_validation ClimateControl.modify(scrubbed_env) do described_class.call! @@ -130,19 +124,37 @@ def stub_environment_validation def scrubbed_env boot_env.merge( + 'HTML2RSS_ACCESS_TOKEN' => 'access-token', 'HEALTH_CHECK_TOKEN' => 'health-token', 'SENTRY_DSN' => sentry_dsn ) end def expect_runtime_env_to_match_boot_values - expect(Html2rss::Web::RuntimeEnv.secret_key).to eq(boot_secret_key) - expect(Html2rss::Web::RuntimeEnv.health_check_token).to eq('health-token') - expect(Html2rss::Web::RuntimeEnv.sentry_dsn).to eq(sentry_dsn) + { + secret_key: boot_secret_key, + access_token: 'access-token', + health_check_token: 'health-token', + sentry_dsn: sentry_dsn + }.each do |attribute, value| + expect(Html2rss::Web::RuntimeEnv.public_send(attribute)).to eq(value) + end + end + + def expect_sensitive_env_during_validation # rubocop:disable Metrics/AbcSize + allow(Html2rss::Web::EnvironmentValidator).to receive(:validate_environment!).ordered do + expect(ENV.fetch('HTML2RSS_SECRET_KEY', nil)).to eq(boot_secret_key) + expect(ENV.fetch('HTML2RSS_ACCESS_TOKEN', nil)).to eq('access-token') + expect(ENV.fetch('HEALTH_CHECK_TOKEN', nil)).to eq('health-token') + end + allow(Html2rss::Web::EnvironmentValidator).to receive(:validate_production_security!).ordered do + expect(ENV.fetch('SENTRY_DSN', nil)).to eq(sentry_dsn) + end end def expect_sensitive_env_to_be_scrubbed expect(ENV.fetch('HTML2RSS_SECRET_KEY', nil)).to be_nil + expect(ENV.fetch('HTML2RSS_ACCESS_TOKEN', nil)).to be_nil expect(ENV.fetch('HEALTH_CHECK_TOKEN', nil)).to be_nil expect(ENV.fetch('SENTRY_DSN', nil)).to be_nil end diff --git a/spec/html2rss/web/environment_validator_spec.rb b/spec/html2rss/web/environment_validator_spec.rb index 1d4cae25..87860d88 100644 --- a/spec/html2rss/web/environment_validator_spec.rb +++ b/spec/html2rss/web/environment_validator_spec.rb @@ -53,9 +53,7 @@ def stub_validation_logging ClimateControl.modify( 'RACK_ENV' => 'production', - 'HTML2RSS_SECRET_KEY' => 'short-secret', - 'BUILD_TAG' => '2026-03-27', - 'GIT_SHA' => 'abc1234' + 'HTML2RSS_SECRET_KEY' => 'short-secret' ) do expect { described_class.validate_production_security! }.to raise_error(SystemExit) end @@ -64,21 +62,70 @@ def stub_validation_logging .with('secret_key', 'Invalid or weak secret key') end - it 'logs missing build metadata as a warning' do + it 'fails boot when auto source is enabled with the placeholder create-feed token in production' do stub_validation_logging - allow(Html2rss::Web::AccountManager).to receive(:accounts).and_return([]) + stub_placeholder_account_with_auto_source - ClimateControl.modify( - 'RACK_ENV' => 'production', - 'HTML2RSS_SECRET_KEY' => '0123456789abcdef0123456789abcdef', - 'BUILD_TAG' => nil, - 'GIT_SHA' => nil - ) do - expect { described_class.validate_production_security! }.not_to raise_error + ClimateControl.modify(production_env) do + expect { described_class.validate_production_security! }.to raise_error(SystemExit) + end + + expect(Html2rss::Web::SecurityLogger).to have_received(:log_config_validation_failure) + .with('access_token', 'Placeholder create-feed token is not allowed when auto source is enabled') + end + + it 'fails boot when a scoped account keeps the placeholder create-feed token in production' do + stub_validation_logging + allow(Html2rss::Web::AccountManager).to receive(:accounts).and_return( + [{ username: 'scoped-admin', token: 'CHANGE_ME_ADMIN_TOKEN', allowed_urls: ['https://example.com/*'] }] + ) + allow(Html2rss::Web::Flags).to receive(:auto_source_enabled?).and_return(true) + + ClimateControl.modify(production_env) do + expect { described_class.validate_production_security! }.to raise_error(SystemExit) end expect(Html2rss::Web::SecurityLogger).to have_received(:log_config_validation_failure) - .with('build_metadata', 'Missing BUILD_TAG or GIT_SHA', severity: :warn) + .with('access_token', 'Placeholder create-feed token is not allowed when auto source is enabled') + end + + it 'fails boot with a clear validation error when an account token is malformed' do + stub_validation_logging + allow(Html2rss::Web::AccountManager).to receive(:accounts).and_return( + [{ username: 'admin', token: nil, allowed_urls: ['*'] }] + ) + + ClimateControl.modify(production_env) do + expect { described_class.validate_production_security! }.to raise_error(SystemExit) + end + + expect(Html2rss::Web::SecurityLogger).to have_received(:log_config_validation_failure) + .with('account_tokens', 'Invalid token configuration for users: admin') + end + + it 'fails boot when the health-check account keeps the placeholder token in production' do + stub_validation_logging + allow(Html2rss::Web::AccountManager).to receive(:accounts).and_return( + [{ username: 'health-check', token: 'CHANGE_ME_HEALTH_CHECK_TOKEN', allowed_urls: [] }] + ) + + ClimateControl.modify(production_env) do + expect { described_class.validate_production_security! }.to raise_error(SystemExit) + end + + expect(Html2rss::Web::SecurityLogger).to have_received(:log_config_validation_failure) + .with('health_check_token', 'Placeholder health-check token is not allowed in production') + end + + it 'allows production boot when the health-check account uses a non-placeholder token' do + stub_validation_logging + allow(Html2rss::Web::AccountManager).to receive(:accounts).and_return( + [{ username: 'health-check', token: 'strong-health-token-012345', allowed_urls: [] }] + ) + + ClimateControl.modify(production_env) do + expect { described_class.validate_production_security! }.not_to raise_error + end end end @@ -111,4 +158,18 @@ def stub_validation_logging end end end + + def stub_placeholder_account_with_auto_source + allow(Html2rss::Web::AccountManager).to receive(:accounts).and_return( + [{ username: 'admin', token: 'CHANGE_ME_ADMIN_TOKEN', allowed_urls: ['*'] }] + ) + allow(Html2rss::Web::Flags).to receive(:auto_source_enabled?).and_return(true) + end + + def production_env + { + 'RACK_ENV' => 'production', + 'HTML2RSS_SECRET_KEY' => '0123456789abcdef0123456789abcdef' + } + end end diff --git a/spec/html2rss/web/local_config_spec.rb b/spec/html2rss/web/local_config_spec.rb index ca9f59e0..aa0a10d3 100644 --- a/spec/html2rss/web/local_config_spec.rb +++ b/spec/html2rss/web/local_config_spec.rb @@ -1,22 +1,51 @@ # frozen_string_literal: true require 'spec_helper' +require 'climate_control' +require 'tempfile' -require_relative '../../../app/web/config/local_config' +require_relative '../../../app' RSpec.describe Html2rss::Web::LocalConfig do + let(:empty_snapshot) { Html2rss::Web::ConfigSnapshot::Snapshot.new(global: {}, feeds: {}, accounts: []) } + def titles_for(*names) names.map { |name| described_class.find(name)[:title] } end + def with_config_file(contents) + Tempfile.create(['feeds', '.yml']) do |file| + write_config(file, contents) + stub_const("#{described_class}::CONFIG_FILE", file.path) + yield file + end + end + + def write_config(file, contents) + file.rewind + file.truncate(0) + file.write(contents) + file.flush + end + + def account_token(snapshot) + snapshot.accounts.first.token + end + before do described_class.reload! end describe '.find' do it 'strips feed extensions before lookup' do - allow(described_class).to receive(:yaml).and_return( - { feeds: { example: { title: 'Example' } } } + allow(described_class).to receive(:snapshot).and_return( + Html2rss::Web::ConfigSnapshot::Snapshot.new( + global: {}, + feeds: { + example: Html2rss::Web::ConfigSnapshot::FeedConfig.new(name: :example, raw: { title: 'Example' }) + }, + accounts: [] + ) ) expect(titles_for('example.json', 'example.rss', 'example.xml')).to eq(%w[Example Example Example]) @@ -31,8 +60,7 @@ def self.find_by_name(_name); end .to receive(:find_by_name) .with('support.apple.com/en_gb_ht201222') .and_return({ channel: { title: 'Apple security releases' } }) - allow(described_class).to receive(:snapshot) - .and_return(Html2rss::Web::ConfigSnapshot::Snapshot.new(global: {}, feeds: {}, accounts: [])) + allow(described_class).to receive(:snapshot).and_return(empty_snapshot) config = described_class.find('support.apple.com/en_gb_ht201222.rss') @@ -45,8 +73,7 @@ def self.find_by_name(_name); end end) stub_const('Html2rss::Configs::ConfigNotFound', Class.new(StandardError)) allow(Html2rss::Configs).to receive(:find_by_name) - allow(described_class).to receive(:snapshot) - .and_return(Html2rss::Web::ConfigSnapshot::Snapshot.new(global: {}, feeds: {}, accounts: [])) + allow(described_class).to receive(:snapshot).and_return(empty_snapshot) expect { described_class.find('/broken-name.rss') } .to raise_error(described_class::NotFound, "Did not find local feed config at 'broken-name'") @@ -63,10 +90,61 @@ def self.find_by_name(_name); end end it 'builds typed account models from configuration' do - allow(described_class).to receive(:yaml).and_return(yaml_fixture) + allow(described_class).to receive(:load_yaml).and_return(yaml_fixture) described_class.reload! expect(described_class.snapshot.accounts.first.username).to eq('alice') end + + it 'evaluates ERB before parsing YAML so runtime helpers become effective config' do + with_config_file(erb_backed_config) do + ClimateControl.modify('HTML2RSS_ACCESS_TOKEN' => 'runtime-access-token') do + described_class.reload! + expect(account_token(described_class.snapshot)).to eq('runtime-access-token') + end + end + end + end + + describe '.load_snapshot' do + it 'reparses current config without mutating the memoized runtime snapshot', :aggregate_failures do + with_config_file(config_for('cached-token')) do |file| + described_class.reload! + + cached_snapshot = described_class.snapshot + + write_config(file, config_for('fresh-token')) + + expect(described_class.load_snapshot).to have_attributes( + accounts: contain_exactly(have_attributes(token: 'fresh-token')) + ) + expect(account_token(described_class.snapshot)).to eq('cached-token') + expect(account_token(cached_snapshot)).to eq('cached-token') + end + end + end + + def erb_backed_config + <<~YAML + auth: + accounts: + - username: admin + token: <%= Html2rss::Web::RuntimeEnv.admin_access_token %> + allowed_urls: + - "*" + feeds: {} + YAML + end + + def config_for(token) + <<~YAML + auth: + accounts: + - username: admin + token: #{token} + allowed_urls: + - "*" + feeds: {} + YAML end end diff --git a/spec/html2rss/web/runtime_env_spec.rb b/spec/html2rss/web/runtime_env_spec.rb new file mode 100644 index 00000000..7092ef20 --- /dev/null +++ b/spec/html2rss/web/runtime_env_spec.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +require 'spec_helper' +require 'climate_control' + +require_relative '../../../app/web/config/runtime_env' + +RSpec.describe Html2rss::Web::RuntimeEnv do + describe '.admin_access_token' do + it 'returns the configured access token when present' do + ClimateControl.modify('HTML2RSS_ACCESS_TOKEN' => 'admin-token') do + expect(described_class.admin_access_token).to eq('admin-token') + end + end + + it 'falls back to the quickstart placeholder when the access token is blank' do + ClimateControl.modify('HTML2RSS_ACCESS_TOKEN' => ' ') do + expect(described_class.admin_access_token).to eq('CHANGE_ME_ADMIN_TOKEN') + end + end + end + + describe '.health_check_token' do + it 'returns the configured health-check token when present' do + ClimateControl.modify('HEALTH_CHECK_TOKEN' => 'health-token') do + expect(described_class.health_check_token).to eq('health-token') + end + end + + it 'falls back to the documented placeholder when the health-check token is blank' do + ClimateControl.modify('HEALTH_CHECK_TOKEN' => ' ') do + expect(described_class.health_check_token).to eq('CHANGE_ME_HEALTH_CHECK_TOKEN') + end + end + end +end