diff --git a/.github/workflows/nightly-stress-test.yml b/.github/workflows/nightly-stress-test.yml new file mode 100644 index 000000000000..21098a54872f --- /dev/null +++ b/.github/workflows/nightly-stress-test.yml @@ -0,0 +1,90 @@ +name: Nightly Stress Test + +on: + schedule: + - cron: '0 2 * * *' # 2 AM UTC + workflow_dispatch: + inputs: + duration: + description: 'Duration of the stress test (e.g., 30m, 1h)' + required: true + default: '30m' + +jobs: + stress-test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Cache Docker layers + uses: actions/cache@v4 + with: + path: /tmp/.buildx-cache + key: ${{ runner.os }}-buildx-${{ github.sha }} + restore-keys: | + ${{ runner.os }}-buildx- + + - name: Build and Start Environment + run: | + cd tests/monitoring + docker compose build + docker compose up -d + sleep 30 # Wait for initialization + + - name: Verify Connections + run: | + docker exec salt-master salt '*' test.ping + + - name: Run Aggressive Stress Test + run: | + cd tests/monitoring + chmod +x stress_test.sh stress_api.sh + # Run in background and wait for defined duration + ./stress_test.sh & + STRESS_PID=$! + + # Default to 30m if not workflow_dispatch + DURATION="${{ github.event.inputs.duration || '30m' }}" + echo "Running stress test for $DURATION..." + + # Use sleep with suffix support (m, h) + sleep $DURATION + + echo "Stopping stress test..." + pkill -P $STRESS_PID || true + kill $STRESS_PID || true + + - name: Analyze Results + run: | + cd tests/monitoring + # Give Prometheus a moment to finish scraping the final points + sleep 30 + python3 analyze_stats.py + + - name: Snapshot Metrics + if: always() + run: | + # Stop containers to ensure data is flushed to disk + cd tests/monitoring + docker compose stop prometheus + sudo tar -czf ../../prometheus-data.tar.gz ./prometheus_data + + - name: Collect Logs on Failure + if: failure() + run: | + mkdir -p artifacts + docker logs salt-master > artifacts/salt-master.log + docker logs salt-minion-1 > artifacts/salt-minion-1.log + cp monitoring/event_log.txt artifacts/ || true + + - name: Upload Artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: stress-test-results + path: | + artifacts/ + prometheus-data.tar.gz diff --git a/CHANGELOG.md b/CHANGELOG.md index f83e3d44088a..266a5f391168 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,325 @@ Versions are `MAJOR.PATCH`. # Changelog ## 3007.14 (2026-04-29) +## 3006.26 (2026-06-24) + + +### Removed + +- Removed the unmaintained `linode-python` package dependency to stop SyntaxWarnings during install for retired Linode API v3. [#68992](https://github.com/saltstack/salt/issues/68992) + + +### Changed + +- Changed `salt.returners.redis_return` to enumerate the Redis keyspace + with `SCAN` instead of the blocking `KEYS pattern` command in both + `get_jids` and `clean_old_jobs`. `KEYS` walks the entire keyspace + synchronously and stalls the Redis server for the duration; on a + master with hundreds of thousands of jobs this can block all clients + of that Redis instance for seconds. `SCAN` is incremental and + non-blocking. Order of returned keys is no longer guaranteed (the + returner does not rely on order); operators with custom scripts that + read `ret:*` or `load:*` directly may see them in a different order. [#69037](https://github.com/saltstack/salt/issues/69037) + + +### Fixed + +- Fixed multi-line scalar variables loaded via `import_yaml` (or `load_yaml`) being rendered as literal `\n` instead of actual newlines when the loaded data is interpolated into a YAML state file (e.g. `- context: {{ data }}`). `PrintableDict.__str__`/`__repr__` now emit string values containing newlines as YAML-safe double-quoted scalars rather than Python `repr()` so they round-trip correctly through the subsequent YAML render pass. [#30690](https://github.com/saltstack/salt/issues/30690) +- Handle requisites correctly for empty SLS files [#30971](https://github.com/saltstack/salt/issues/30971) +- Fixed ``win_pkg`` functions ignoring the ``saltenv`` setting in minion configuration. All public functions (``refresh_db``, ``genrepo``, ``install``, ``remove``, ``list_pkgs``, ``latest_version``, ``upgrade_available``, ``list_upgrades``, ``list_available``, ``version``, ``get_repo_data``, ``get_package_info``) now fall back to ``__opts__["saltenv"]`` when ``saltenv`` is not passed explicitly, instead of always defaulting to ``base``. [#38551](https://github.com/saltstack/salt/issues/38551) +- ``dpkg_lowpkg`` no longer reads ``/var/lib/dpkg/available`` or ``/var/lib/dpkg/info/.list`` directly. It now uses ``dpkg-query`` exclusively, addressing the lintian ``uses-dpkg-database-directly`` warning reported in #52605. ``lowpkg.info`` derives the package install time from dpkg's ``${db-fsys:Last-Modified}`` field instead of the ``.list`` file mtime. [#52605](https://github.com/saltstack/salt/issues/52605) +- Added ``encoding`` parameter to ``file.replace`` execution module and state to support UTF-16, UTF-32, and other multi-byte encoded files that would otherwise be incorrectly treated as binary. [#52793](https://github.com/saltstack/salt/issues/52793) +- Fixed `postgres._find_pg_binary` ignoring `postgres.bins_dir` when a `psql` binary is also present on the system PATH, ensuring the configured `bins_dir` is always preferred over the system PATH. [#53190](https://github.com/saltstack/salt/issues/53190) +- Percent-encode the user and password when adding HTTP basic auth to a URL so reserved characters no longer corrupt the result [#55561](https://github.com/saltstack/salt/issues/55561) +- Fixed a ``SaltCacheError`` ("maximum recursion depth exceeded") raised by the + etcd data cache when listing an empty folder, which etcd reports as a child of + itself. The directory walk now stops at the self-referential entry instead of + recursing indefinitely. [#57377](https://github.com/saltstack/salt/issues/57377) +- Fixed `timezone.system` state always returning `result=False` with "Failed to set UTC to True" on Windows. The hardware clock on Windows is always localtime and cannot be changed, so the UTC/hwclock block is now skipped entirely on Windows. [#57754](https://github.com/saltstack/salt/issues/57754) +- Fixed `archive.tar` placing the `-C ` option after the source/member operands, where tar ignores it. The directory-change option is now emitted before the operands so it takes effect in both create and extract modes. [#57847](https://github.com/saltstack/salt/issues/57847) +- Fixed `OSError: The operation completed successfully` raised by `CreateProcessWithTokenW` on Windows when the underlying advapi32 call fails. The error code is now read from `ctypes.get_last_error()` (the ctypes-saved slot) instead of `win32api.GetLastError()` (the live Windows slot, which may be reset to 0 before it is read). [#57848](https://github.com/saltstack/salt/issues/57848) +- Improved documentation for the `runas` and `password` parameters in `cmd.run`, `cmd.script`, and all `salt.modules.cmdmod` execution functions on Windows. The docs now accurately describe when a password is required: only when the salt-minion is **not** running as SYSTEM or as an elevated Administrator. Removed the inaccurate claim that the target user account must be in the Administrators group. Also changed `cmd.script` to log a warning instead of hard-failing when `runas` is used without a password on Windows, since a password is not always required. [#57951](https://github.com/saltstack/salt/issues/57951) +- Fixed ``pkg.group_installed``/``pkg.group_info`` failing to expand a dnf environment group whose member groups have multi-word names (e.g. ``Group '@Common NetworkManager submodules' not found`` when installing ``Workstation`` on RHEL/AlmaLinux 8, 9 and 10). The member group is now resolved by its bare name when the ``@``-prefixed lookup fails. This affects dnf4 only; dnf5 group handling is unchanged. [#60276](https://github.com/saltstack/salt/issues/60276) +- Fix `tls.create_csr` log message path to use `os.path.join` instead of f-string interpolation so paths render correctly when csr_path has a trailing slash. [#60877](https://github.com/saltstack/salt/issues/60877) +- Fixed the LDAP eauth group-membership lookup re-binding the user on every job + payload when ``auth.ldap.freeipa`` is enabled. The user is now only re-bound on + the first payload of a job, matching the standard LDAP code path, so single-use + 2FA credentials (such as a FreeIPA OTP) are no longer consumed more than once. [#61974](https://github.com/saltstack/salt/issues/61974) +- Fixed `SSL: DECRYPTION_FAILED_OR_BAD_RECORD_MAC` errors in the VMware cloud driver by reconnecting when a cached vCenter service instance is found to be stale or corrupted (for example when inherited across a fork by salt-cloud's parallel provider queries). [#61983](https://github.com/saltstack/salt/issues/61983) +- Fix metadata grain so EC2 ``user-data`` is returned verbatim instead of being mangled by the ``=`` line-splitter, which previously corrupted any user-data payload containing ``=`` (e.g. cloud-init ``#cloud-config`` blocks). [#62061](https://github.com/saltstack/salt/issues/62061) +- Fixed LGPO ``get_policy_info`` incorrectly returning a "multiple policies" error when duplicate ADMX policy definitions (e.g. ``TerminalServer.admx`` and ``TerminalServer-Server.admx``) resolve to the same full path. [#62732](https://github.com/saltstack/salt/issues/62732) +- Re-enable test_interrupt_on_long_running_job by removing the initial-onedir-rollout skip marker. [#63627](https://github.com/saltstack/salt/issues/63627) +- Fix missing `dns_plugin_propagate_seconds` arg in acme state/module so DNS propagation timeout is actually forwarded to certbot. [#63700](https://github.com/saltstack/salt/issues/63700) +- Improve PAM eauth diagnostics when ``salt-master`` runs as a non-root user. Previously, ``salt-master``/``salt-api`` running as the ``salt`` user (the 3006.x packaging default) silently failed every PAM authentication with only ``Pam auth failed for :`` in the log; the cause is that the helper subprocess inherits the master's uid and PAM's ``unix_chkpwd`` refuses to validate other users without ``/etc/shadow`` access. The master now emits a one-shot CRITICAL log entry that names the cause and the two standard remediations (run as ``root``, or add the master user to the ``shadow`` group on Debian-derived distributions), and the module documentation describes the constraint. [#64275](https://github.com/saltstack/salt/issues/64275) +- Fixed incorrect minion presence events being sent out on hourly ``Maintenance`` process restarts [#64505](https://github.com/saltstack/salt/issues/64505) +- Catch StrictUndefined in salt jinja custom filters. [#64915](https://github.com/saltstack/salt/issues/64915) +- Stopped logging the misleading "An extra return was detected from minion ... this could be a replay attack" ERROR for benign duplicate returns (also fixes #65516). The local_cache returner now compares a duplicate return to the cached one and logs at DEBUG when the payloads match (the common retry-after-timeout or syndic re-forward case) and at WARNING -- without the "replay attack" wording -- when the payloads differ. [#65301](https://github.com/saltstack/salt/issues/65301) +- Fixed non-root salt CLI access when ``publisher_acl`` or ``external_auth`` is configured. Since 3006.3 the master defaults to running as the ``salt`` user, which left ``sock_dir`` and ``cachedir`` mode ``0o750`` and blocked authorised non-root users from traversing into them to reach ``master_event_pub.ipc`` / ``publish_pull.ipc`` and their per-user ``._key``. The master now adds the world-execute bit to those two directories when ACLs are configured, without exposing directory listings. [#65317](https://github.com/saltstack/salt/issues/65317) +- Fixed ``salt.ext.tornado.netutil`` import on Python 3.12+ where ``ssl.match_hostname`` was removed and the unmaintained ``backports.ssl_match_hostname`` package is unavailable, which previously broke any Salt master-initiated job (e.g. ``test.ping``, ``state.apply``) on Fedora 39+/Ubuntu 24.04 masters. [#65360](https://github.com/saltstack/salt/issues/65360) +- See #65301 -- the same fix to ``salt/returners/local_cache.py`` quiets the spurious "extra return ... replay attack" ERROR that appeared in multimaster and master-of-masters/syndic setups when the same return arrived more than once. [#65516](https://github.com/saltstack/salt/issues/65516) +- Fix deadlock in parallel `cmd.script` states when the script is served by the master. + + Same fork-inherited ZeroMQ socket race as the `file.managed` fix: a + `cmd.script` state with `parallel: True` downloads the script via + `cp.cache_file` in a forked child that inherited the parent's ZeroMQ + REQ socket, deadlocking the asyncio loop at ~100% CPU. Resolved by the + same `os.register_at_fork` handlers that drop inherited channel/socket + references in forked children. [#65709](https://github.com/saltstack/salt/issues/65709) +- Fixed pip.uninstall rejecting the extra_args keyword argument, matching the behavior of pip.install. [#65870](https://github.com/saltstack/salt/issues/65870) +- Fixed salt-ssh failing to fetch ``gitfs_remotes``. ``salt.config.master_config`` + sets ``__fs_update = True`` to suppress fileserver refreshes done by ``FSChan`` + (the master daemon's maintenance thread handles them). salt-ssh inherits the + master config but has no maintenance thread, so its ``FSClient`` never refreshed + the fileserver backends and wrappers such as ``cp.list_states`` saw no gitfs + content until the user ran ``salt-run fileserver.update`` or manually + ``git fetch``ed the cached repos. ``salt.client.ssh.SSH.__init__`` now removes + the suppression flag before instantiating ``FSClient`` so gitfs is refreshed + once at startup. [#66148](https://github.com/saltstack/salt/issues/66148) +- Fixed ``salt/version.py`` reporting the wrong major version on the 3006.x branch when built from a checkout that has no ``salt/_version.txt`` and no usable ``.git`` directory. ``SaltVersionsInfo.current_release()`` now returns the branch's own codename (``Sulfur``) instead of the next un-released codename in the table, so source builds and other tooling no longer leak ``3007.0`` into the reported version. [#67061](https://github.com/saltstack/salt/issues/67061) +- Fixed ``saltutil.runner`` and ``saltutil.wheel`` running master-side functions + as the minion's user (typically ``root``) instead of the master's configured + user (the packaged default since 3006 is ``salt``). Running as the wrong user + left root-owned files in, and tripped git's ``safe.directory`` check on, the + salt-owned master cache -- breaking, for example, ``git_pillar.update`` invoked + via ``saltutil.runner``. These functions now drop to the master's configured + user before executing when invoked from a more-privileged process. [#67716](https://github.com/saltstack/salt/issues/67716) +- Fixed `LocalClient.cmd_subset` raising `TypeError: argument of type 'bool' is not iterable` when one or more targeted minions failed to respond to the `sys.list_functions` probe. Failed minions are now skipped during subset selection. [#68103](https://github.com/saltstack/salt/issues/68103) +- Fixed ``slack_bolt`` engine crashing with ``UnboundLocalError`` when a Slack workflow or other bot posts a message to a monitored channel. Bot messages (``subtype: bot_message``) carry ``bot_id`` and ``username`` instead of a ``user`` field, and these are now used as fallbacks so the engine continues running. [#68105](https://github.com/saltstack/salt/issues/68105) +- Fixed `user.present` to not fail with `result: False` in test mode when a referenced group does not yet exist; the state now reports the pending changes so users can preview states that depend on groups created by a `group.present` requisite in the same run. [#68110](https://github.com/saltstack/salt/issues/68110) +- Fixed ``salt-minion`` and ``salt-proxy`` leaving a privileged (root) keepalive supervisor process at the head of an otherwise unprivileged minion process tree when ``user`` is set to a non-root account. The supervisor now drops privileges to the configured user once the keepalive child has been spawned. [#68115](https://github.com/saltstack/salt/issues/68115) +- Fixed ``ValueError: Formatting field not found in record: 'colorlevel'`` errors when ``log_fmt_console`` uses custom color attributes such as ``%(colorlevel)s`` or ``%(colormsg)s``. ``SaltLogRecord`` now always provides the ``color*`` attributes (uncolored by default) so that log records buffered by the temporary deferred stream handler can be formatted by a colorized console formatter once it is installed. [#68129](https://github.com/saltstack/salt/issues/68129) +- Fixed ``salt-call`` silently ignoring ``--file-root``, ``--pillar-root``, and ``--states-dir`` when ``--local`` was not passed. These overrides only affect the local minion config and are clobbered by the master's values via the remote file client, so ``salt-call`` now emits a warning explaining that ``--local`` is required for the override to take effect. [#68137](https://github.com/saltstack/salt/issues/68137) +- Fixed event signature verification failing under ``minion_sign_messages``. The minion was signing the return load before ``salt.channel.client.AsyncReqChannel._package_load`` attached transport metadata (``nonce``, ``ts``, ``tok``, ``id``), so the bytes the master re-serialized to verify did not match what was signed and every signed return was dropped. Signing is now performed inside ``_package_load`` after the metadata is attached, against the same bytes the master verifies. [#68181](https://github.com/saltstack/salt/issues/68181) +- Fixed ``pkgrepo.managed`` honouring ``clean_file: True`` when the desired + repo line is already present in the managed file alongside unrelated stale + lines. Previously the state returned "already configured" and silently + skipped both the file truncation and the re-write, leaving the stale + entries (for example an obsolete ``bullseye-backports`` line in a file + managed for ``bookworm-backports``) in place. The clean + reconfigure + path now runs whenever the managed file contains any non-comment, + non-blank content other than the desired repo line; when the file already + contains only the desired line the state remains idempotent. [#68208](https://github.com/saltstack/salt/issues/68208) +- Fixed ``pkg.group_installed`` reporting failure on RPM-based systems when a package group's default or optional members are not available in any enabled repository. The state now only considers mandatory group members and explicitly requested ``include`` packages when checking for install failures, matching the behavior of ``yum/dnf group install`` (which reports "No match for group package" but still exits 0). [#68210](https://github.com/saltstack/salt/issues/68210) +- Pass ``--disable-pip-version-check`` when ``pip.list``, ``pip.freeze``, ``pip.list_upgrades``, ``pip.upgrade``, and ``pip.list_all_versions`` invoke pip, so these calls no longer hang for ~20s per invocation on airgapped minions while pip tries to reach PyPI for its self-version check. [#68214](https://github.com/saltstack/salt/issues/68214) +- Fixed ``archive.extracted`` failing to enforce ``user``/``group`` ownership on archives whose tar/zip members include no explicit directory entries (e.g. Oracle's GraalVM JDK tarballs). ``archive.list`` now derives the top-level directory from the common prefix of file and link members in addition to dir members, so ownership is applied to the extracted top-level directory in all cases. [#68227](https://github.com/saltstack/salt/issues/68227) +- Fixed deltaproxy sub-proxies returning identical grain data for every controlled minion. ``subproxy_post_master_init`` now re-packs each sub-proxy's freshly loaded per-minion grains into its execution-module, returner, executor and proxy LazyLoaders so ``__grains__`` inside loaded modules reflects that sub-proxy's device instead of the placeholder values captured during the first-pass grains load through the control proxy. [#68248](https://github.com/saltstack/salt/issues/68248) +- Fixed the salt-minion (and salt-api, salt-cloud, salt-master, salt-syndic) Debian postinst scripts hanging or erroring with "Bad file descriptor" when run from a non-interactive Debian preseed late_command chroot, by tearing down the debconf protocol with ``db_stop`` and explicitly closing file descriptor 3 before the auto-generated ``#DEBHELPER#`` section runs. [#68269](https://github.com/saltstack/salt/issues/68269) +- Fixed ``file.managed`` failing with ``WinError 123`` on Windows when caching a remote URL whose path embeds another URL (e.g. an archive.org snapshot of an ``https://...`` resource). The URL-path portion of the ``extrn_files`` cache path is now sanitised the same way the network location already is. [#68273](https://github.com/saltstack/salt/issues/68273) +- Fixed ``logrotate.set`` dropping the second ``endscript`` (and turning + embedded shell commands into bogus setting keys) when a stanza contained + multiple script blocks such as both ``prerotate`` and ``postrotate``. Script + directives are now parsed as opaque multi-line bodies and round-trip with + their own ``endscript`` terminator each. [#68293](https://github.com/saltstack/salt/issues/68293) +- Fixed the `salt.state` orchestrate state silently reporting only `Run failed on minions: ` when a targeted minion returned `False`, no return at all, or a list of error strings. The orchestrate comment now includes the per-minion failure detail (the minion's actual return value or "did not return a state result") so operators can diagnose `salt-run state.orchestrate` failures without re-running with extra logging. [#68326](https://github.com/saltstack/salt/issues/68326) +- Fixed worker process crash when salt is used outside CLI tools. [#68332](https://github.com/saltstack/salt/issues/68332) +- Fixed ``clean_old_jobs`` in the default local job cache returner to use the jid file's modification time (``st_mtime``) instead of the inode change time (``st_ctime``). A package upgrade's ``chown -R /var/cache/salt/master`` resets ``st_ctime`` on every existing jid file, which previously made the maintenance process treat every pre-upgrade job as freshly created and prevented cleanup until ``keep_jobs_seconds`` had elapsed. On busy masters this exhausted the partition's inodes within a day. [#68351](https://github.com/saltstack/salt/issues/68351) +- Fixed the ``proxmox`` salt-cloud driver raising ``Could not determine an IP address to use`` before the VM was created and started. The IP address is now determined after the VM is running, and the running VM's address reported by Proxmox is used as a fallback when neither a static ``ip_address`` nor ``agent_get_ip`` is configured. [#68353](https://github.com/saltstack/salt/issues/68353) +- Changed ``KillMode`` in the shipped ``salt-minion.service`` systemd unit from ``process`` to ``mixed`` so that ``systemctl stop`` / ``systemctl restart salt-minion`` no longer leaves orphaned ``Minion._thread_return`` worker processes outside the cgroup. SIGTERM is still sent only to the main PID (so the job return scheduled by ``service.restart salt-minion`` from #68183 has time to finish), but any remaining children are reaped with SIGKILL after the main process exits or ``TimeoutStopSec`` elapses. [#68406](https://github.com/saltstack/salt/issues/68406) +- Fixed `task.edit_task` on Windows rejecting `restart_count=999` even though the documented and error-message-stated maximum is 999. The validation now accepts the full 1..999 range. [#68419](https://github.com/saltstack/salt/issues/68419) +- Fixed ``win_task.add_trigger`` so that ``repeat_duration="Indefinitely"`` actually produces an indefinite repetition pattern. Previously the empty string from the internal duration lookup was assigned to ``Repetition.Duration``, which the Windows Task Scheduler treats as "0 seconds" and silently disables repetition. The Duration property is now left at its default for the "Indefinitely" case, which is the documented way to repeat forever. [#68420](https://github.com/saltstack/salt/issues/68420) +- Fixed ``user.setpassword`` on Windows reporting success (``retcode: 0``) when the target user does not exist. The execution module now returns ``False`` and logs an error in that case, so callers and the ``user.present`` state correctly detect the failure instead of swallowing the Win32 "user name could not be found" message as a successful return. [#68428](https://github.com/saltstack/salt/issues/68428) +- Fixed ``user.present`` on Windows so it actually updates a user's password + when the existing password differs from the one specified in the state. + Previously the state reported "User is already present and up to date" and + left the password unchanged. [#68429](https://github.com/saltstack/salt/issues/68429) +- Stop salt-ssh state runs from clobbering the master-side fileclient ``cachedir`` with the on-target ``thin_dir`` cachedir. The state fileserver cache for salt-ssh state runs is now written under the configured master ``cachedir`` (e.g. ``/var/cache/salt/master/``) instead of under the minion's thin_dir path on the master filesystem. [#68458](https://github.com/saltstack/salt/issues/68458) +- Fixed ``pkg.add_repo_key`` and ``pkgrepo.managed`` so APT keyring files that target an ``.asc`` destination keep their ASCII armor instead of being dearmored, matching the apt-secure(8) convention and allowing armored keyfiles that bundle multiple keys to be installed even when the ``gpg`` binary is not available on the minion. [#68464](https://github.com/saltstack/salt/issues/68464) +- Fixed ``jobs.list_jobs search_metadata`` so it matches jobs whose metadata + was passed as a CLI keyword argument (e.g. ``state.apply metadata={...}``) + and is therefore carried inside the job's ``Arguments`` rather than at the + top of the job payload. [#68481](https://github.com/saltstack/salt/issues/68481) +- Fixed `lgpo.set` state reporting "Failed to set the following policies" on subsequent runs of policies with sub-elements (e.g. Storage Sense thresholds). The state compared a user-supplied dict keyed by element id with a current dict keyed by the ADML display name; both forms now normalize to the canonical element id before comparison so the state is idempotent. [#68489](https://github.com/saltstack/salt/issues/68489) +- Fixed minion rejecting the master with "Invalid master key" after restart when the cached `minion_master.pub` differs from the master's payload pub_key only in trailing whitespace. `AsyncAuth.verify_master` now normalizes both sides through `clean_key` before comparing and caches the normalized form on first contact. [#68493](https://github.com/saltstack/salt/issues/68493) +- Fixed ``TypeError: 'NoneType' object is not iterable`` raised from ``AsyncReqMessageClient._send_recv`` when a per-message timeout completes the future before the send/receive coroutine catches a transient transport exception, which aborted the minion's connect loop and prevented it from connecting to the master. [#68506](https://github.com/saltstack/salt/issues/68506) +- Fixed ``docker_network.present`` recreating networks on every run against Docker 29+. Docker 29 added an empty ``IPRange`` field to every IPAM Config entry; ``docker.compare_networks`` now drops empty/None placeholder values before comparing pools, and the state's default-pool short-circuit treats the empty field as absent. [#68518](https://github.com/saltstack/salt/issues/68518) +- Fixed `pkg.installed` verification on x86_64 hosts that mix `x86_64` and `x86_64_v2` packages (e.g. AlmaLinux 10.1). `salt.utils.pkg.rpm.resolve_name` and `salt.modules.yumpkg.normalize_name` now treat `x86_64_v2` as compatible with `x86_64` instead of appending the arch suffix, so installed packages match the names Salt records. [#68540](https://github.com/saltstack/salt/issues/68540) +- Fixed ``mysql_grants.present`` reporting "Failed to execute" when granting ``ALL PRIVILEGES`` on ``*.*`` against MySQL 8.4+, where the server's privilege set drifted from Salt's hard-coded list (``SET_USER_ID`` removed, many dynamic privileges added). ``grant_exists`` now derives the expected privilege set from the connected server's ``SHOW PRIVILEGES`` output instead of a static list. [#68567](https://github.com/saltstack/salt/issues/68567) +- Fixed ``cp.get_template`` raising ``AttributeError: 'NoneType' object has no attribute 'get'`` when the Jinja template uses ``{% from '...' import ... with context %}``. The cp module's loader-backed ``__opts__`` is now unwrapped to a plain dict before the SaltCacheLoader instantiates the file client and channel that fetch the imported template. [#68572](https://github.com/saltstack/salt/issues/68572) +- Fixed `ImportError: cannot import name 'wait' from partially initialized module 'multiprocessing.connection'` raised during salt-master/minion shutdown when a reentrant SIGTERM hit `ProcessManager.kill_children()` mid `Process.join(0)`. `salt.utils.process` now eagerly imports `multiprocessing.connection` so the module is fully initialised before any signal handler can trigger its lazy import. [#68573](https://github.com/saltstack/salt/issues/68573) +- Fixed `cmd.script` on Windows raising `Invalid user: ` when `runas` is a domain account (`DOMAIN\user`, `user@DOMAIN`, or a SID). The pre-execution `user.info` check is backed by `NetUserGetInfo` which only resolves local-machine accounts and returns empty for many valid domain users; the missing lookup is now logged as a warning and execution continues so the underlying `win_runas` machinery can authenticate the account. [#68578](https://github.com/saltstack/salt/issues/68578) +- Fixed `pkg.install` on Windows silently downgrading the salt-minion when a numeric `version=` argument was passed (e.g. `version=3007.10` was YAML-parsed to the float `3007.1` and then matched the wrong winrepo entry). When the numeric version uniquely matches a string-keyed winrepo entry it is now resolved to that entry; when it is ambiguous (e.g. both `3007.1` and `3007.10` are in the winrepo) the install is refused with a clear error pointing the user at the quoted-version syntax. [#68620](https://github.com/saltstack/salt/issues/68620) +- Fixed the loader masking failure reasons when multiple modules declare the same `__virtualname__` and each `__virtual__()` returns False, so users now see every reason (e.g. both x509 v1's "Superseded, using x509_v2" and x509_v2's "Could not load cryptography") instead of only the first one recorded. [#68625](https://github.com/saltstack/salt/issues/68625) +- Fix `NetapiClient.runner` raising `TypeError` when `timeout` arrives as a string from the salt-api HTTP form. [#68653](https://github.com/saltstack/salt/issues/68653) +- Fixed `master_job_cache: redis_return` raising `KeyError: 'redis_return.prep_jid'` by registering the `redis` returner under both `redis` and `redis_return` virtual names, matching the documented `--return redis_return` usage and the module's file name. [#68663](https://github.com/saltstack/salt/issues/68663) +- Fixed ``ini.options_present`` with ``strict: True`` to remove sections that are present in the ini file but absent from the supplied ``sections`` mapping. [#68673](https://github.com/saltstack/salt/issues/68673) +- Handle `SaltDeserializationError` in grains cache loading so a corrupted cache file no longer propagates as CRITICAL during minion startup. [#68678](https://github.com/saltstack/salt/issues/68678) +- Fixed ``network.interfaces`` on Windows systems falling back to WMI (i.e. .NET older than 4.7.2): the default gateway is now reported under ``gateway`` instead of being mistakenly emitted as ``broadcast``. [#68692](https://github.com/saltstack/salt/issues/68692) +- Fixed ``file.managed`` (and other template-rendering callers) silently overwriting user-supplied ``slspath``, ``sls_path``, ``slsdotpath`` and ``slscolonpath`` values in ``defaults``/``context`` with values regenerated from the caller's ``sls`` key. [#68754](https://github.com/saltstack/salt/issues/68754) +- Fixed ``env_order`` not being honored when merging pillar data across environments. ``Pillar.render_pillar`` now iterates matched environments in the configured ``env_order`` so that, with ``top_file_merging_strategy: merge_all``, the last environment in ``env_order`` wins on conflicting pillar keys instead of the result depending on dict insertion order. [#68785](https://github.com/saltstack/salt/issues/68785) +- Improved the "Malformed topfile" error from ``HighState.verify_tops`` to name the saltenv and the matcher whose state declarations were not formed as a list, so users can locate the offending entry in their ``top.sls``. [#68792](https://github.com/saltstack/salt/issues/68792) +- Removed orphaned GnuPG dotlock files (``.#lk..``) from ``gpg_keydir`` before each decrypt in the ``gpg`` renderer so they no longer accumulate when a gpg subprocess is killed mid-operation. [#68869](https://github.com/saltstack/salt/issues/68869) +- Fix `pkg.installed` idempotency on FreeBSD when `with_origin=True` causes + `pkg.list_pkgs` to return per-package dicts instead of version lists; extract + the version list before version-string comparison so a second state run no + longer falsely reports packages as changed. [#68886](https://github.com/saltstack/salt/issues/68886) +- Fix gen_signature() signing raw pub key content instead of clean_key'd content, causing master_use_pubkey_signature verification to always fail. [#68930](https://github.com/saltstack/salt/issues/68930) +- Fixed spurious ``FileLockError: lock_fn ... exists and is not a file`` raised by ``salt.utils.files.wait_lock`` and ``salt.utils.files.await_lock`` (and therefore by ``state.apply`` queue locking) when another process removed the lock file between the two separate ``os.path.exists`` / ``os.path.isfile`` stats. The pre-check now uses a single ``os.stat`` call so a transient regular-file lock no longer trips the "not a file" branch. [#68931](https://github.com/saltstack/salt/issues/68931) +- Fixed pkg.installed(update_holds=True) for APT multiarch packages by preserving arch-qualified package names through install target parsing and verification. [#68932](https://github.com/saltstack/salt/issues/68932) +- Fix deadlock in parallel `file.managed` states when source is served by the master. + + Forked parallel-state children previously inherited the parent's ZeroMQ + REQ socket and asyncio loop from `salt.fileclient.RemoteClient`, + `salt.crypt.AsyncAuth/SAuth`, and `salt.utils.event.SaltEvent`. Multiple + sibling children racing those handles deadlocked the asyncio loop with + ~98% CPU and never completed. Salt now registers `os.register_at_fork` + handlers on those classes that drop inherited channel/socket references + in any forked child; the next use rebuilds them fresh. [#68940](https://github.com/saltstack/salt/issues/68940) +- Fixed grain and pillar targeting matching minions whose data cache entry was missing. ``CkMinions._check_cache_minions`` now excludes accepted minions that have no cached grains/pillar data from greedy target results, instead of silently including them as candidates. [#68976](https://github.com/saltstack/salt/issues/68976) +- Avoid AttributeError on a closed IPCClient when the connect coroutine resolves after close(). [#68993](https://github.com/saltstack/salt/issues/68993) +- Fixed `salt.utils.network.sanitize_host` stripping colons from IPv6 addresses, which broke `network.ping` and any other caller that passed an IPv6 host. [#68995](https://github.com/saltstack/salt/issues/68995) +- Added support for MAINTAIN (m) privilege introduced in PostgreSQL 17 to salt.modules.postgres and salt.states.postgres_privileges [#69003](https://github.com/saltstack/salt/issues/69003) +- Fixed `redis.get_master_ip` silently dropping the `password` argument. The + function was forwarding its arguments positionally to `_connect`, but + `_connect`'s third positional slot is `db`, not `password`, so the + caller's password landed in the database-index argument and the actual + password fell through to `config.option("redis.password")`. Arguments + are now passed by keyword. [#69029](https://github.com/saltstack/salt/issues/69029) +- Fixed `salt.modules.redismod._connect` rejecting valid `db=0`. The helper + used a truthy check (`if not db`) to decide whether to fall back to + `config.option("redis.db")`, but `not 0` is `True`, so an explicitly + supplied `db=0` was silently replaced by the configured value. The check + is now `if db is None`, matching the pattern already used by the sibling + `_sconnect` helper in the same module. Other arguments keep their + truthy-check semantics on purpose. [#69030](https://github.com/saltstack/salt/issues/69030) +- Fixed two distinct bugs in the `salt.engines.redis_sentinel` engine that + together prevented it from being usable. `start()` no longer raises + `AttributeError: 'dict_values' object has no attribute 'pop'` on Python 3 + (the dict.values() result is now wrapped in `list(...)`). `Listener` and + `start()` now accept an optional `password` argument and forward it to + the redis client, allowing the engine to authenticate against a Sentinel + that requires AUTH; the default of `None` keeps existing configurations + working unchanged. [#69031](https://github.com/saltstack/salt/issues/69031) +- Fixed `salt.returners.redis_return` silently ignoring the documented + `redis.password` configuration option. The returner now reads + `redis.password` from config (in both regular and proxy modes) and + forwards it to both the single-server `redis.StrictRedis` and the + `StrictRedisCluster` constructors. Operators with auth-protected Redis + no longer lose every job return to a hidden `NOAUTH Authentication + required` failure; deployments without a password are unaffected. [#69032](https://github.com/saltstack/salt/issues/69032) +- Fixed three closely-related bugs in `salt.cache.redis_cache` that + together broke hierarchical-bank semantics: + `_build_bank_hier` now registers each child bank name in both the + parent's `$BANK_` set (consumed by `flush()` tree traversal) and the + parent's `$BANKEYS_` set (consumed by `list_()`); `_get_banks_to_remove` + now decodes the bytes returned by `smembers` and skips the `"."` + placeholder, so recursive `flush()` of a parent bank actually descends + into sub-banks instead of corrupting the path; and `flush(bank)` of a + sub-bank now removes the flushed bank's own reference from its + parent's index sets so `list_(parent)` no longer reports it as + present. Together these fixes restore `cache.list("minions")`, + `salt-run manage.present` and `salt-run manage.up` for masters + configured with `cache: redis`. [#69033](https://github.com/saltstack/salt/issues/69033) +- Fixed `salt.tokens.rediscluster` being unable to retrieve any eauth + token. The cluster client was created with `decode_responses=True`, + which caused `redis_client.get()` to return `str` and broke + `salt.payload.loads` (msgpack rejects `str`); it also caused + `redis_client.keys()` to return `str` and broke + `[k.decode("utf8") for k in ...]` (`str` has no `.decode`). Both + errors were swallowed by broad `except Exception` handlers, so eauth + appeared to silently reject every token. `decode_responses=True` is + removed; values now round-trip as bytes through msgpack as the rest + of the module already expected. [#69035](https://github.com/saltstack/salt/issues/69035) +- Fixed `salt.returners.redis_return` leaking `:` last-jid + pointer keys indefinitely. The pointer was written with `pipeline.set` + and no `ex=` TTL, so any (minion, fun) pair that stopped running stuck + in Redis forever -- O(minions × distinct funcs) keys accumulating over + the lifetime of the master. The pointer now expires on the same TTL + as the rest of the returner data (`keep_jobs_seconds`). Operators with + external scripts reading these keys directly may observe them + expiring; the documentation never promised they would not. [#69038](https://github.com/saltstack/salt/issues/69038) +- Fixed `salt.returners.redis_return.get_fun` always returning an + empty dict. The function read return data from a `:` + key that no other code in the module ever wrote -- a leftover from + an older storage schema. It now reads from the canonical + `ret:` hash via `HGET ret: `, matching the + storage layout that `returner` actually produces and the read + pattern that `get_jid` already uses. [#69039](https://github.com/saltstack/salt/issues/69039) +- Fixed `salt.returners.pgjsonb` writing database errors to `sys.stderr` + instead of Salt's logger. Errors from `_get_serv`, `_purge_jobs` and + `_archive_jobs` are now reported via `log.exception`, so they reach + the configured `log_file` / syslog destination on a daemonized master, + including a full traceback. The unused `import sys` is also dropped. [#69048](https://github.com/saltstack/salt/issues/69048) +- Fixed `salt.returners.pgjsonb.returner` letting any non-connection + `psycopg2.DatabaseError` propagate to the caller — including the + syndic-aggregate publish path in `salt/master.py` which had no outer + catch — so a single bad row could escape into a master subprocess. + `event_return` had no error handling at all and a database failure + during a flush propagated similarly. Both functions now catch + `SaltMasterError` and `psycopg2.DatabaseError` locally, log a + contextual message (jid/id for returns, batch size for events), and + drop the affected payload. While here, fix `event_return` passing + the events list as the positional `ret` argument to `_get_serv`, + which was a copy-paste leftover from `returner(ret)`. [#69058](https://github.com/saltstack/salt/issues/69058) +- Fixed `salt-api`'s `/events` endpoint accepting eauth tokens via query + string (``?token=...`` or ``?salt_token=...``). Tokens supplied that + way end up in HTTP access logs, the browser ``Referer`` header, log- + aggregation systems and shell history; the token retains validity for + ``token_expire`` (12h by default), so any party reading those logs can + replay the token. The endpoint now rejects query-string tokens with a + 400 error pointing at the ``X-Auth-Token`` header (for non-browser + clients) or the session cookie established by ``/login`` (for browser + ``EventSource`` clients) as the supported channels. ``X-Auth-Token`` + header support is added; cookie-based auth continues to work + unchanged. [#69071](https://github.com/saltstack/salt/issues/69071) +- ``LoadAuth.get_tok`` now distinguishes between corrupt token blobs (removed from the store) and transient backend errors such as Redis connection drops or NFS hangs (token kept, request treated as not-authenticated). Previously a single backend hiccup could log every authenticated user out by deleting valid tokens. [#69073](https://github.com/saltstack/salt/issues/69073) +- ``cmd.run`` and friends no longer include the ``env`` and ``stdin`` arguments in the ``CommandExecutionError`` raised when the underlying subprocess fails to start (typically ``ENOENT`` / binary not found). Both fields routinely carry credentials passed in by the caller (``env={"DB_PASSWORD": "..."}``, password piped via ``stdin``), and the error message ends up in master/minion logs and in event-bus return data visible to the API caller. [#69075](https://github.com/saltstack/salt/issues/69075) +- Lowered the "Cache version mismatch clearing" log message in ``salt.utils.cache.verify_cache_version`` from ``WARNING`` to ``DEBUG``; the cache is rebuilt as part of normal operation after upgrades or when an ephemeral cache directory has been removed, and does not warrant user attention. [#69106](https://github.com/saltstack/salt/issues/69106) +- * Relenv 0.22.14 + - Update sqlite to 3.53.2.0 + - Update openssl to 3.5.7 [#69129](https://github.com/saltstack/salt/issues/69129) +- Surface the real cause of a proxymodule load failure in salt-proxy's abort message. The misleading "Proxymodule X is missing an init() or a shutdown() or both" wording is now only used when init/shutdown really are missing from a loaded module; if the module failed to load (for example because its ``__virtual__`` returned False), the underlying reason is included in the error. [#69139](https://github.com/saltstack/salt/issues/69139) +- Fixed ``pkg.hold`` and ``pkg.list_holds`` on dnf5 systems (e.g. Fedora 42+): + ``pkg.hold`` now calls ``dnf5 versionlock add `` (the bare + ``versionlock `` form was rejected by dnf5), and ``pkg.list_holds`` + reads ``/etc/dnf/versionlock.toml`` directly so ``pkg.installed`` with + ``hold: true`` is again idempotent. [#69181](https://github.com/saltstack/salt/issues/69181) +- Fixed Salt-SSH syncing internal modules as extmods [#69199](https://github.com/saltstack/salt/issues/69199) +- Fixed ``lgpo_reg.value_absent`` failing when the Registry.pol entry was already absent but the registry value still existed. ``lgpo_reg.delete_value`` was returning early before reaching the registry cleanup code, causing the state to see no changes and report failure. The registry value is now removed regardless of whether the pol entry was present. [#69203](https://github.com/saltstack/salt/issues/69203) +- Fixed `postgres_local_cache.save_load` raising `psycopg2.errors.UniqueViolation` when more than one master in an active-active multi-master cluster persists the same JID; the INSERT is now idempotent via `ON CONFLICT (jid) DO NOTHING` on PostgreSQL >= 9.5, and the duplicate is tolerated on older servers. [#69214](https://github.com/saltstack/salt/issues/69214) +- Fixed Windows MSI self-upgrade via ``pkg.install`` failing with error 1603. The old product's ``DeleteConfig_DECAC`` custom action was unconditionally deleting ``ROOTDIR\var`` during ``RemoveExistingProducts``, destroying the MSI that ``pkg.install`` had cached to ``ROOTDIR\var\cache`` before launching the upgrade. Users who had ``REMOVE_CONFIG=1`` persisted in the registry (from checking "On uninstall" at install time) hit a worse variant where the entire ``ROOTDIR`` was deleted. The fix checks ``UPGRADINGPRODUCTCODE`` — set by Windows Installer whenever an uninstall is triggered by a major upgrade — and skips all ``ROOTDIR`` deletion during upgrades, matching the behaviour of the NSIS installer which has always preserved ``ROOTDIR`` during upgrades. [#69219](https://github.com/saltstack/salt/issues/69219) +- Fixed `TypeError: string indices must be integers` in the minion when the master returns a bare string error response (e.g. `"bad load"`, `"Some exception handling minion payload"`) for a pillar request. The minion now raises a clean `AuthenticationError` instead of crashing, allowing the caller to retry or fail gracefully. [#69228](https://github.com/saltstack/salt/issues/69228) +- pkg.list_patches in yumpkg.py parses tdnf output on Photon OS [#69229](https://github.com/saltstack/salt/issues/69229) +- Fix `git.tag` so that the documented `message` argument is actually forwarded to `git tag`, creating an annotated tag with the supplied message instead of silently producing a lightweight tag. [#69298](https://github.com/saltstack/salt/issues/69298) +- Fixed `salt.auth.pam` conversation callback so it answers `PAM_PROMPT_ECHO_ON` prompts with the supplied username; previously only `PAM_PROMPT_ECHO_OFF` prompts were answered, which caused `pam_authenticate` to silently fail (and salt-api to return 401) against PAM stacks that re-prompt for the user. [#69304](https://github.com/saltstack/salt/issues/69304) +- Ensure multiple masters have their own job/state queues [#69308](https://github.com/saltstack/salt/issues/69308) +- Fixed loading private keys from PKCS#12 containers with x509_v2 [#69312](https://github.com/saltstack/salt/issues/69312) +- Fixed creating self-signed PKCS#12-encoded certificates [#69319](https://github.com/saltstack/salt/issues/69319) +- Fixed minion state queue replacing the master-assigned JID on queued state runs, so returns now come back tagged with the JID the master actually published. [#69386](https://github.com/saltstack/salt/issues/69386) +- Made the salt user's home directory and the relenv ``extras-`` directory configurable in the Linux packaging. The DEB preinst scripts now source ``/etc/default/salt-setup`` (and ``/etc/sysconfig/salt-minion-setup`` for cross-distro parity with RPM) before applying the ``SALT_HOME``/``SALT_USER``/``SALT_GROUP``/``SALT_NAME`` defaults, mirroring the long-standing RPM behavior. A new ``SALT_EXTRAS_DIR`` override is honored by both stacks so the extras tree can be relocated outside ``/opt/saltstack/salt`` and its ownership is correctly restored on upgrade. [#69402](https://github.com/saltstack/salt/issues/69402) +- Fixed minion worker threads hanging or crashing when returning job results + to the master. The main process now fires an error event back to the worker + when ``req_channel.send()`` times out, so workers wake up immediately rather + than waiting out their full timeout. Replaced the bare ``TimeoutError`` raised + in ``_send_req_sync`` with ``SaltReqTimeoutError`` so ``_return_pub``'s existing + handler catches it correctly. The worker's wait timeout is now derived from + ``return_retry_timer_max * return_retry_tries`` to ensure it always outlasts + the main process's retry budget. [#69416](https://github.com/saltstack/salt/issues/69416) +- Fixed zsh completion by using the proper python3 instead of python2. [#69419](https://github.com/saltstack/salt/issues/69419) +- Fixed Photon OS Arm64 FIPS CI by re-enabling the OpenSSL default provider after installing openssl-fips-provider, working around the disabled-default-provider bug in `openssl-fips-provider <= 3.1.2-3.ph5` on the lagging Photon aarch64 mirror. [#69449](https://github.com/saltstack/salt/issues/69449) +- Add regression test for changelog template multi-line rendering and harden template with indent filter so continuation lines are correctly indented under the bullet (defensive backport of #69458 to 3006.x). [#69454](https://github.com/saltstack/salt/issues/69454) +- Fixed minion not honoring SIGTERM while stuck in the master DNS retry loop, which caused systemd to escalate to SIGKILL after 90 seconds. [#69466](https://github.com/saltstack/salt/issues/69466) +- Fixed ``lgpo_reg`` module and state functions failing on Windows Domain Controllers with ``Access is denied`` when writing to ``HKLM\SOFTWARE\Policies\`` subkeys. The ``set_value``, ``disable_value``, and ``delete_value`` execution module functions now accept a ``write_registry`` parameter (default ``None``) that auto-detects Domain Controllers via the ``ProductType`` registry key and skips the direct registry write when one is detected, instead relying on the Group Policy engine to apply the policy on the next refresh. An explicit ``True`` or ``False`` overrides auto-detection. A ``refresh_policy`` parameter (default ``False``) has been added to all three functions to trigger an in-process ``userenv.RefreshPolicy`` call immediately after the ``Registry.pol`` file is updated. The corresponding state functions ``value_present``, ``value_disabled``, and ``value_absent`` expose the same parameters. A standalone ``lgpo_reg.refresh_policy`` execution function and ``lgpo_reg.refresh_policy`` state have been added to allow a single Group Policy refresh to be issued after a batch of policy writes. ``is_domain_controller`` has been added to ``salt.utils.win_functions`` and ``refresh_policy`` has been added to ``salt.utils.win_lgpo_reg``. [#69468](https://github.com/saltstack/salt/issues/69468) +- Fixed 3006.x Windows nightly CI by pinning the runner-host Python to 3.14.6 (OpenSSL 3.5.7); the setup-python default `3.14` was resolving to a cached 3.14.5 build whose OpenSSL 3.0.20 rejected the cert pypi.org currently serves. [#69486](https://github.com/saltstack/salt/issues/69486) +- Fixed 3006.x Windows nightly CI Deps by dropping a sitecustomize hook into the salt onedir's `Lib/site-packages` that applies the cpython#104135 iter-and-skip patch before pip touches TLS; the prior runner-host Python pin in #69486 targeted the wrong interpreter (the failing pip runs in a venv created from the relenv-bundled Python 3.10) and is reverted. [#69490](https://github.com/saltstack/salt/issues/69490) +- Fixed ``lgpo_reg`` failures on Windows when ``Registry.pol`` is temporarily locked by the Group Policy service or other processes. Salt now uses ``EnterCriticalPolicySection`` / ``LeaveCriticalPolicySection`` from ``userenv.dll`` — the same synchronization primitive used by the GP engine — to serialize read-modify-write access to ``Registry.pol``. A retry loop with configurable attempts and delay is also applied for non-GP lockers such as antivirus scanners or VSS snapshots that do not participate in the GP critical section handshake. [#69492](https://github.com/saltstack/salt/issues/69492) + + +### Added + +- Added ``shadow.verify_password`` to ``salt.modules.win_shadow``, which + validates a Windows user's password via ``LogonUser`` with + ``LOGON32_LOGON_NETWORK`` (Microsoft's recommended approach per + `KB180548 `_) without + creating an interactive session. If the check causes an account lockout, + the account is automatically unlocked. Updated ``user.present`` on Windows + to use ``shadow.verify_password`` so the password is only changed when it + differs from the current value, matching the idempotent behaviour on other + platforms. [#41347](https://github.com/saltstack/salt/issues/41347) +- Added ability to configure the pillar destination for the `netbox` ext_pillar via `destination_pillar_key` [#65531](https://github.com/saltstack/salt/issues/65531) +- Migrate Salt documentation to the PyData Sphinx theme. This update modernizes the documentation UI, improves navigation with a persistent sidebar tree, and fixes issues with embedded video playback. [#69185](https://github.com/saltstack/salt/issues/69185) +- fix etcdv3 module authentification when using etcd3-py lib [#69202](https://github.com/saltstack/salt/issues/69202) +- Added ``lgpo_reg.get_rsop_value`` to query the Resultant Set of Policy (RSoP) for a registry key/value and detect whether it is managed by a Domain Group Policy Object. The ``lgpo_reg`` module functions ``set_value``, ``disable_value``, and ``delete_value`` now log a warning when a Domain GPO is detected for the target value. The ``lgpo_reg`` state functions ``value_present``, ``value_disabled``, and ``value_absent`` append the same warning to the state comment so it is visible in state output. [#69205](https://github.com/saltstack/salt/issues/69205) + ## 3006.25 (2026-05-13) diff --git a/changelog/30690.fixed.md b/changelog/30690.fixed.md deleted file mode 100644 index a972f6089c7c..000000000000 --- a/changelog/30690.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed multi-line scalar variables loaded via `import_yaml` (or `load_yaml`) being rendered as literal `\n` instead of actual newlines when the loaded data is interpolated into a YAML state file (e.g. `- context: {{ data }}`). `PrintableDict.__str__`/`__repr__` now emit string values containing newlines as YAML-safe double-quoted scalars rather than Python `repr()` so they round-trip correctly through the subsequent YAML render pass. diff --git a/changelog/30971.fixed.md b/changelog/30971.fixed.md deleted file mode 100644 index 495726f1718f..000000000000 --- a/changelog/30971.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Handle requisites correctly for empty SLS files diff --git a/changelog/38551.fixed.md b/changelog/38551.fixed.md deleted file mode 100644 index 5d4d192f899e..000000000000 --- a/changelog/38551.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed ``win_pkg`` functions ignoring the ``saltenv`` setting in minion configuration. All public functions (``refresh_db``, ``genrepo``, ``install``, ``remove``, ``list_pkgs``, ``latest_version``, ``upgrade_available``, ``list_upgrades``, ``list_available``, ``version``, ``get_repo_data``, ``get_package_info``) now fall back to ``__opts__["saltenv"]`` when ``saltenv`` is not passed explicitly, instead of always defaulting to ``base``. diff --git a/changelog/41347.added.md b/changelog/41347.added.md deleted file mode 100644 index 18c65f55806c..000000000000 --- a/changelog/41347.added.md +++ /dev/null @@ -1,9 +0,0 @@ -Added ``shadow.verify_password`` to ``salt.modules.win_shadow``, which -validates a Windows user's password via ``LogonUser`` with -``LOGON32_LOGON_NETWORK`` (Microsoft's recommended approach per -`KB180548 `_) without -creating an interactive session. If the check causes an account lockout, -the account is automatically unlocked. Updated ``user.present`` on Windows -to use ``shadow.verify_password`` so the password is only changed when it -differs from the current value, matching the idempotent behaviour on other -platforms. diff --git a/changelog/52605.fixed.md b/changelog/52605.fixed.md deleted file mode 100644 index 61e07ca76a18..000000000000 --- a/changelog/52605.fixed.md +++ /dev/null @@ -1 +0,0 @@ -``dpkg_lowpkg`` no longer reads ``/var/lib/dpkg/available`` or ``/var/lib/dpkg/info/.list`` directly. It now uses ``dpkg-query`` exclusively, addressing the lintian ``uses-dpkg-database-directly`` warning reported in #52605. ``lowpkg.info`` derives the package install time from dpkg's ``${db-fsys:Last-Modified}`` field instead of the ``.list`` file mtime. diff --git a/changelog/52793.fixed.md b/changelog/52793.fixed.md deleted file mode 100644 index e2c004cd9587..000000000000 --- a/changelog/52793.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Added ``encoding`` parameter to ``file.replace`` execution module and state to support UTF-16, UTF-32, and other multi-byte encoded files that would otherwise be incorrectly treated as binary. diff --git a/changelog/53190.fixed.md b/changelog/53190.fixed.md deleted file mode 100644 index 64f8773ff689..000000000000 --- a/changelog/53190.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed `postgres._find_pg_binary` ignoring `postgres.bins_dir` when a `psql` binary is also present on the system PATH, ensuring the configured `bins_dir` is always preferred over the system PATH. diff --git a/changelog/55561.fixed.md b/changelog/55561.fixed.md deleted file mode 100644 index 25ddf3fbdac6..000000000000 --- a/changelog/55561.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Percent-encode the user and password when adding HTTP basic auth to a URL so reserved characters no longer corrupt the result diff --git a/changelog/57377.fixed.md b/changelog/57377.fixed.md deleted file mode 100644 index 9c9e077221a4..000000000000 --- a/changelog/57377.fixed.md +++ /dev/null @@ -1,4 +0,0 @@ -Fixed a ``SaltCacheError`` ("maximum recursion depth exceeded") raised by the -etcd data cache when listing an empty folder, which etcd reports as a child of -itself. The directory walk now stops at the self-referential entry instead of -recursing indefinitely. diff --git a/changelog/57754.fixed.md b/changelog/57754.fixed.md deleted file mode 100644 index 338cbb829f3a..000000000000 --- a/changelog/57754.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed `timezone.system` state always returning `result=False` with "Failed to set UTC to True" on Windows. The hardware clock on Windows is always localtime and cannot be changed, so the UTC/hwclock block is now skipped entirely on Windows. diff --git a/changelog/57847.fixed.md b/changelog/57847.fixed.md deleted file mode 100644 index a81f98ad5270..000000000000 --- a/changelog/57847.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed `archive.tar` placing the `-C ` option after the source/member operands, where tar ignores it. The directory-change option is now emitted before the operands so it takes effect in both create and extract modes. diff --git a/changelog/57848.fixed.md b/changelog/57848.fixed.md deleted file mode 100644 index 051d5e51e308..000000000000 --- a/changelog/57848.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed `OSError: The operation completed successfully` raised by `CreateProcessWithTokenW` on Windows when the underlying advapi32 call fails. The error code is now read from `ctypes.get_last_error()` (the ctypes-saved slot) instead of `win32api.GetLastError()` (the live Windows slot, which may be reset to 0 before it is read). diff --git a/changelog/57951.fixed.md b/changelog/57951.fixed.md deleted file mode 100644 index 5fc751f5650c..000000000000 --- a/changelog/57951.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Improved documentation for the `runas` and `password` parameters in `cmd.run`, `cmd.script`, and all `salt.modules.cmdmod` execution functions on Windows. The docs now accurately describe when a password is required: only when the salt-minion is **not** running as SYSTEM or as an elevated Administrator. Removed the inaccurate claim that the target user account must be in the Administrators group. Also changed `cmd.script` to log a warning instead of hard-failing when `runas` is used without a password on Windows, since a password is not always required. diff --git a/changelog/60276.fixed.md b/changelog/60276.fixed.md deleted file mode 100644 index aa45ff5922aa..000000000000 --- a/changelog/60276.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed ``pkg.group_installed``/``pkg.group_info`` failing to expand a dnf environment group whose member groups have multi-word names (e.g. ``Group '@Common NetworkManager submodules' not found`` when installing ``Workstation`` on RHEL/AlmaLinux 8, 9 and 10). The member group is now resolved by its bare name when the ``@``-prefixed lookup fails. This affects dnf4 only; dnf5 group handling is unchanged. diff --git a/changelog/60877.fixed.md b/changelog/60877.fixed.md deleted file mode 100644 index 6dab6a62aad2..000000000000 --- a/changelog/60877.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fix `tls.create_csr` log message path to use `os.path.join` instead of f-string interpolation so paths render correctly when csr_path has a trailing slash. diff --git a/changelog/61974.fixed.md b/changelog/61974.fixed.md deleted file mode 100644 index 98659d536a12..000000000000 --- a/changelog/61974.fixed.md +++ /dev/null @@ -1,4 +0,0 @@ -Fixed the LDAP eauth group-membership lookup re-binding the user on every job -payload when ``auth.ldap.freeipa`` is enabled. The user is now only re-bound on -the first payload of a job, matching the standard LDAP code path, so single-use -2FA credentials (such as a FreeIPA OTP) are no longer consumed more than once. diff --git a/changelog/61983.fixed.md b/changelog/61983.fixed.md deleted file mode 100644 index ea2484535e3d..000000000000 --- a/changelog/61983.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed `SSL: DECRYPTION_FAILED_OR_BAD_RECORD_MAC` errors in the VMware cloud driver by reconnecting when a cached vCenter service instance is found to be stale or corrupted (for example when inherited across a fork by salt-cloud's parallel provider queries). diff --git a/changelog/62061.fixed.md b/changelog/62061.fixed.md deleted file mode 100644 index e37187b3b106..000000000000 --- a/changelog/62061.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fix metadata grain so EC2 ``user-data`` is returned verbatim instead of being mangled by the ``=`` line-splitter, which previously corrupted any user-data payload containing ``=`` (e.g. cloud-init ``#cloud-config`` blocks). diff --git a/changelog/62732.fixed.md b/changelog/62732.fixed.md deleted file mode 100644 index a000b21955af..000000000000 --- a/changelog/62732.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed LGPO ``get_policy_info`` incorrectly returning a "multiple policies" error when duplicate ADMX policy definitions (e.g. ``TerminalServer.admx`` and ``TerminalServer-Server.admx``) resolve to the same full path. diff --git a/changelog/63700.fixed.md b/changelog/63700.fixed.md deleted file mode 100644 index 216dfe375d12..000000000000 --- a/changelog/63700.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fix missing `dns_plugin_propagate_seconds` arg in acme state/module so DNS propagation timeout is actually forwarded to certbot. diff --git a/changelog/64505.fixed.md b/changelog/64505.fixed.md deleted file mode 100644 index b851c15712f1..000000000000 --- a/changelog/64505.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed incorrect minion presence events being sent out on hourly ``Maintenance`` process restarts diff --git a/changelog/64915.fixed.md b/changelog/64915.fixed.md deleted file mode 100644 index 44a599c7eea7..000000000000 --- a/changelog/64915.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Catch StrictUndefined in salt jinja custom filters. diff --git a/changelog/65184.fixed.md b/changelog/65184.fixed.md new file mode 100644 index 000000000000..e0ff0d9c528b --- /dev/null +++ b/changelog/65184.fixed.md @@ -0,0 +1 @@ +Fixed the EC2/cloud metadata grain crashing with ``KeyError: 'headers'`` when ``salt.utils.http.query`` returns an error response (4xx/5xx with a body, e.g. when the IMDS rejects a recursive sub-path lookup). Since 3006.3 the tornado backend has populated ``body`` on HTTPError without also populating ``headers``; the grain now treats the missing ``headers`` key as "no Content-Type information" instead of letting the lookup blow up the whole grain load. diff --git a/changelog/65301.fixed.md b/changelog/65301.fixed.md deleted file mode 100644 index e6d0cdc25b25..000000000000 --- a/changelog/65301.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Stopped logging the misleading "An extra return was detected from minion ... this could be a replay attack" ERROR for benign duplicate returns (also fixes #65516). The local_cache returner now compares a duplicate return to the cached one and logs at DEBUG when the payloads match (the common retry-after-timeout or syndic re-forward case) and at WARNING -- without the "replay attack" wording -- when the payloads differ. diff --git a/changelog/65317.fixed.md b/changelog/65317.fixed.md deleted file mode 100644 index ab6cc10604e2..000000000000 --- a/changelog/65317.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed non-root salt CLI access when ``publisher_acl`` or ``external_auth`` is configured. Since 3006.3 the master defaults to running as the ``salt`` user, which left ``sock_dir`` and ``cachedir`` mode ``0o750`` and blocked authorised non-root users from traversing into them to reach ``master_event_pub.ipc`` / ``publish_pull.ipc`` and their per-user ``._key``. The master now adds the world-execute bit to those two directories when ACLs are configured, without exposing directory listings. diff --git a/changelog/65360.fixed.md b/changelog/65360.fixed.md deleted file mode 100644 index 111801becae9..000000000000 --- a/changelog/65360.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed ``salt.ext.tornado.netutil`` import on Python 3.12+ where ``ssl.match_hostname`` was removed and the unmaintained ``backports.ssl_match_hostname`` package is unavailable, which previously broke any Salt master-initiated job (e.g. ``test.ping``, ``state.apply``) on Fedora 39+/Ubuntu 24.04 masters. diff --git a/changelog/65516.fixed.md b/changelog/65516.fixed.md deleted file mode 100644 index 5bf32ff12c79..000000000000 --- a/changelog/65516.fixed.md +++ /dev/null @@ -1 +0,0 @@ -See #65301 -- the same fix to ``salt/returners/local_cache.py`` quiets the spurious "extra return ... replay attack" ERROR that appeared in multimaster and master-of-masters/syndic setups when the same return arrived more than once. diff --git a/changelog/65531.added.md b/changelog/65531.added.md deleted file mode 100644 index 615764ba2417..000000000000 --- a/changelog/65531.added.md +++ /dev/null @@ -1 +0,0 @@ -Added ability to configure the pillar destination for the `netbox` ext_pillar via `destination_pillar_key` diff --git a/changelog/65709.fixed.md b/changelog/65709.fixed.md deleted file mode 100644 index 7d072e727874..000000000000 --- a/changelog/65709.fixed.md +++ /dev/null @@ -1,8 +0,0 @@ -Fix deadlock in parallel `cmd.script` states when the script is served by the master. - -Same fork-inherited ZeroMQ socket race as the `file.managed` fix: a -`cmd.script` state with `parallel: True` downloads the script via -`cp.cache_file` in a forked child that inherited the parent's ZeroMQ -REQ socket, deadlocking the asyncio loop at ~100% CPU. Resolved by the -same `os.register_at_fork` handlers that drop inherited channel/socket -references in forked children. diff --git a/changelog/65870.fixed.md b/changelog/65870.fixed.md deleted file mode 100644 index 8fdee0ff3b2d..000000000000 --- a/changelog/65870.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed pip.uninstall rejecting the extra_args keyword argument, matching the behavior of pip.install. diff --git a/changelog/66148.fixed.md b/changelog/66148.fixed.md deleted file mode 100644 index 144a132e53d0..000000000000 --- a/changelog/66148.fixed.md +++ /dev/null @@ -1,9 +0,0 @@ -Fixed salt-ssh failing to fetch ``gitfs_remotes``. ``salt.config.master_config`` -sets ``__fs_update = True`` to suppress fileserver refreshes done by ``FSChan`` -(the master daemon's maintenance thread handles them). salt-ssh inherits the -master config but has no maintenance thread, so its ``FSClient`` never refreshed -the fileserver backends and wrappers such as ``cp.list_states`` saw no gitfs -content until the user ran ``salt-run fileserver.update`` or manually -``git fetch``ed the cached repos. ``salt.client.ssh.SSH.__init__`` now removes -the suppression flag before instantiating ``FSClient`` so gitfs is refreshed -once at startup. diff --git a/changelog/67061.fixed.md b/changelog/67061.fixed.md deleted file mode 100644 index 1ffd111976d1..000000000000 --- a/changelog/67061.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed ``salt/version.py`` reporting the wrong major version on the 3006.x branch when built from a checkout that has no ``salt/_version.txt`` and no usable ``.git`` directory. ``SaltVersionsInfo.current_release()`` now returns the branch's own codename (``Sulfur``) instead of the next un-released codename in the table, so source builds and other tooling no longer leak ``3007.0`` into the reported version. diff --git a/changelog/67716.fixed.md b/changelog/67716.fixed.md deleted file mode 100644 index 77fea663d9d8..000000000000 --- a/changelog/67716.fixed.md +++ /dev/null @@ -1,7 +0,0 @@ -Fixed ``saltutil.runner`` and ``saltutil.wheel`` running master-side functions -as the minion's user (typically ``root``) instead of the master's configured -user (the packaged default since 3006 is ``salt``). Running as the wrong user -left root-owned files in, and tripped git's ``safe.directory`` check on, the -salt-owned master cache -- breaking, for example, ``git_pillar.update`` invoked -via ``saltutil.runner``. These functions now drop to the master's configured -user before executing when invoked from a more-privileged process. diff --git a/changelog/68103.fixed.md b/changelog/68103.fixed.md deleted file mode 100644 index d69a8ef21bd7..000000000000 --- a/changelog/68103.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed `LocalClient.cmd_subset` raising `TypeError: argument of type 'bool' is not iterable` when one or more targeted minions failed to respond to the `sys.list_functions` probe. Failed minions are now skipped during subset selection. diff --git a/changelog/68105.fixed.md b/changelog/68105.fixed.md deleted file mode 100644 index 5321617335bf..000000000000 --- a/changelog/68105.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed ``slack_bolt`` engine crashing with ``UnboundLocalError`` when a Slack workflow or other bot posts a message to a monitored channel. Bot messages (``subtype: bot_message``) carry ``bot_id`` and ``username`` instead of a ``user`` field, and these are now used as fallbacks so the engine continues running. diff --git a/changelog/68110.fixed.md b/changelog/68110.fixed.md deleted file mode 100644 index 3e456539619e..000000000000 --- a/changelog/68110.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed `user.present` to not fail with `result: False` in test mode when a referenced group does not yet exist; the state now reports the pending changes so users can preview states that depend on groups created by a `group.present` requisite in the same run. diff --git a/changelog/68115.fixed.md b/changelog/68115.fixed.md deleted file mode 100644 index 5dfa7311a99e..000000000000 --- a/changelog/68115.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed ``salt-minion`` and ``salt-proxy`` leaving a privileged (root) keepalive supervisor process at the head of an otherwise unprivileged minion process tree when ``user`` is set to a non-root account. The supervisor now drops privileges to the configured user once the keepalive child has been spawned. diff --git a/changelog/68129.fixed.md b/changelog/68129.fixed.md deleted file mode 100644 index a3f4ac2eb5bf..000000000000 --- a/changelog/68129.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed ``ValueError: Formatting field not found in record: 'colorlevel'`` errors when ``log_fmt_console`` uses custom color attributes such as ``%(colorlevel)s`` or ``%(colormsg)s``. ``SaltLogRecord`` now always provides the ``color*`` attributes (uncolored by default) so that log records buffered by the temporary deferred stream handler can be formatted by a colorized console formatter once it is installed. diff --git a/changelog/68137.fixed.md b/changelog/68137.fixed.md deleted file mode 100644 index d8d3d808af49..000000000000 --- a/changelog/68137.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed ``salt-call`` silently ignoring ``--file-root``, ``--pillar-root``, and ``--states-dir`` when ``--local`` was not passed. These overrides only affect the local minion config and are clobbered by the master's values via the remote file client, so ``salt-call`` now emits a warning explaining that ``--local`` is required for the override to take effect. diff --git a/changelog/68181.fixed.md b/changelog/68181.fixed.md deleted file mode 100644 index 7995e8feb55b..000000000000 --- a/changelog/68181.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed event signature verification failing under ``minion_sign_messages``. The minion was signing the return load before ``salt.channel.client.AsyncReqChannel._package_load`` attached transport metadata (``nonce``, ``ts``, ``tok``, ``id``), so the bytes the master re-serialized to verify did not match what was signed and every signed return was dropped. Signing is now performed inside ``_package_load`` after the metadata is attached, against the same bytes the master verifies. diff --git a/changelog/68208.fixed.md b/changelog/68208.fixed.md deleted file mode 100644 index 46441c2ca2fb..000000000000 --- a/changelog/68208.fixed.md +++ /dev/null @@ -1,9 +0,0 @@ -Fixed ``pkgrepo.managed`` honouring ``clean_file: True`` when the desired -repo line is already present in the managed file alongside unrelated stale -lines. Previously the state returned "already configured" and silently -skipped both the file truncation and the re-write, leaving the stale -entries (for example an obsolete ``bullseye-backports`` line in a file -managed for ``bookworm-backports``) in place. The clean + reconfigure -path now runs whenever the managed file contains any non-comment, -non-blank content other than the desired repo line; when the file already -contains only the desired line the state remains idempotent. diff --git a/changelog/68210.fixed.md b/changelog/68210.fixed.md deleted file mode 100644 index 61ee3baf1cb8..000000000000 --- a/changelog/68210.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed ``pkg.group_installed`` reporting failure on RPM-based systems when a package group's default or optional members are not available in any enabled repository. The state now only considers mandatory group members and explicitly requested ``include`` packages when checking for install failures, matching the behavior of ``yum/dnf group install`` (which reports "No match for group package" but still exits 0). diff --git a/changelog/68214.fixed.md b/changelog/68214.fixed.md deleted file mode 100644 index b2a2d6469bf0..000000000000 --- a/changelog/68214.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Pass ``--disable-pip-version-check`` when ``pip.list``, ``pip.freeze``, ``pip.list_upgrades``, ``pip.upgrade``, and ``pip.list_all_versions`` invoke pip, so these calls no longer hang for ~20s per invocation on airgapped minions while pip tries to reach PyPI for its self-version check. diff --git a/changelog/68227.fixed.md b/changelog/68227.fixed.md deleted file mode 100644 index 8a93f6afc7f6..000000000000 --- a/changelog/68227.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed ``archive.extracted`` failing to enforce ``user``/``group`` ownership on archives whose tar/zip members include no explicit directory entries (e.g. Oracle's GraalVM JDK tarballs). ``archive.list`` now derives the top-level directory from the common prefix of file and link members in addition to dir members, so ownership is applied to the extracted top-level directory in all cases. diff --git a/changelog/68248.fixed.md b/changelog/68248.fixed.md deleted file mode 100644 index e98d492f1964..000000000000 --- a/changelog/68248.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed deltaproxy sub-proxies returning identical grain data for every controlled minion. ``subproxy_post_master_init`` now re-packs each sub-proxy's freshly loaded per-minion grains into its execution-module, returner, executor and proxy LazyLoaders so ``__grains__`` inside loaded modules reflects that sub-proxy's device instead of the placeholder values captured during the first-pass grains load through the control proxy. diff --git a/changelog/68269.fixed.md b/changelog/68269.fixed.md deleted file mode 100644 index 230abebf0022..000000000000 --- a/changelog/68269.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed the salt-minion (and salt-api, salt-cloud, salt-master, salt-syndic) Debian postinst scripts hanging or erroring with "Bad file descriptor" when run from a non-interactive Debian preseed late_command chroot, by tearing down the debconf protocol with ``db_stop`` and explicitly closing file descriptor 3 before the auto-generated ``#DEBHELPER#`` section runs. diff --git a/changelog/68273.fixed.md b/changelog/68273.fixed.md deleted file mode 100644 index c960a2eb4827..000000000000 --- a/changelog/68273.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed ``file.managed`` failing with ``WinError 123`` on Windows when caching a remote URL whose path embeds another URL (e.g. an archive.org snapshot of an ``https://...`` resource). The URL-path portion of the ``extrn_files`` cache path is now sanitised the same way the network location already is. diff --git a/changelog/68293.fixed.md b/changelog/68293.fixed.md deleted file mode 100644 index 732e804ed243..000000000000 --- a/changelog/68293.fixed.md +++ /dev/null @@ -1,5 +0,0 @@ -Fixed ``logrotate.set`` dropping the second ``endscript`` (and turning -embedded shell commands into bogus setting keys) when a stanza contained -multiple script blocks such as both ``prerotate`` and ``postrotate``. Script -directives are now parsed as opaque multi-line bodies and round-trip with -their own ``endscript`` terminator each. diff --git a/changelog/68326.fixed.md b/changelog/68326.fixed.md deleted file mode 100644 index 59817e576fea..000000000000 --- a/changelog/68326.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed the `salt.state` orchestrate state silently reporting only `Run failed on minions: ` when a targeted minion returned `False`, no return at all, or a list of error strings. The orchestrate comment now includes the per-minion failure detail (the minion's actual return value or "did not return a state result") so operators can diagnose `salt-run state.orchestrate` failures without re-running with extra logging. diff --git a/changelog/68351.fixed.md b/changelog/68351.fixed.md deleted file mode 100644 index d142f10b3e3e..000000000000 --- a/changelog/68351.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed ``clean_old_jobs`` in the default local job cache returner to use the jid file's modification time (``st_mtime``) instead of the inode change time (``st_ctime``). A package upgrade's ``chown -R /var/cache/salt/master`` resets ``st_ctime`` on every existing jid file, which previously made the maintenance process treat every pre-upgrade job as freshly created and prevented cleanup until ``keep_jobs_seconds`` had elapsed. On busy masters this exhausted the partition's inodes within a day. diff --git a/changelog/68353.fixed.md b/changelog/68353.fixed.md deleted file mode 100644 index d6282cbc293c..000000000000 --- a/changelog/68353.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed the ``proxmox`` salt-cloud driver raising ``Could not determine an IP address to use`` before the VM was created and started. The IP address is now determined after the VM is running, and the running VM's address reported by Proxmox is used as a fallback when neither a static ``ip_address`` nor ``agent_get_ip`` is configured. diff --git a/changelog/68419.fixed.md b/changelog/68419.fixed.md deleted file mode 100644 index 981855faed2f..000000000000 --- a/changelog/68419.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed `task.edit_task` on Windows rejecting `restart_count=999` even though the documented and error-message-stated maximum is 999. The validation now accepts the full 1..999 range. diff --git a/changelog/68420.fixed.md b/changelog/68420.fixed.md deleted file mode 100644 index 25f498d61886..000000000000 --- a/changelog/68420.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed ``win_task.add_trigger`` so that ``repeat_duration="Indefinitely"`` actually produces an indefinite repetition pattern. Previously the empty string from the internal duration lookup was assigned to ``Repetition.Duration``, which the Windows Task Scheduler treats as "0 seconds" and silently disables repetition. The Duration property is now left at its default for the "Indefinitely" case, which is the documented way to repeat forever. diff --git a/changelog/68428.fixed.md b/changelog/68428.fixed.md deleted file mode 100644 index 2b8bc106ff8e..000000000000 --- a/changelog/68428.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed ``user.setpassword`` on Windows reporting success (``retcode: 0``) when the target user does not exist. The execution module now returns ``False`` and logs an error in that case, so callers and the ``user.present`` state correctly detect the failure instead of swallowing the Win32 "user name could not be found" message as a successful return. diff --git a/changelog/68429.fixed.md b/changelog/68429.fixed.md deleted file mode 100644 index da82431a9b62..000000000000 --- a/changelog/68429.fixed.md +++ /dev/null @@ -1,4 +0,0 @@ -Fixed ``user.present`` on Windows so it actually updates a user's password -when the existing password differs from the one specified in the state. -Previously the state reported "User is already present and up to date" and -left the password unchanged. diff --git a/changelog/68458.fixed.md b/changelog/68458.fixed.md deleted file mode 100644 index 2816017b2064..000000000000 --- a/changelog/68458.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Stop salt-ssh state runs from clobbering the master-side fileclient ``cachedir`` with the on-target ``thin_dir`` cachedir. The state fileserver cache for salt-ssh state runs is now written under the configured master ``cachedir`` (e.g. ``/var/cache/salt/master/``) instead of under the minion's thin_dir path on the master filesystem. diff --git a/changelog/68464.fixed.md b/changelog/68464.fixed.md deleted file mode 100644 index 98a07de3a00d..000000000000 --- a/changelog/68464.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed ``pkg.add_repo_key`` and ``pkgrepo.managed`` so APT keyring files that target an ``.asc`` destination keep their ASCII armor instead of being dearmored, matching the apt-secure(8) convention and allowing armored keyfiles that bundle multiple keys to be installed even when the ``gpg`` binary is not available on the minion. diff --git a/changelog/68481.fixed.md b/changelog/68481.fixed.md deleted file mode 100644 index 7d96dea88671..000000000000 --- a/changelog/68481.fixed.md +++ /dev/null @@ -1,4 +0,0 @@ -Fixed ``jobs.list_jobs search_metadata`` so it matches jobs whose metadata -was passed as a CLI keyword argument (e.g. ``state.apply metadata={...}``) -and is therefore carried inside the job's ``Arguments`` rather than at the -top of the job payload. diff --git a/changelog/68489.fixed.md b/changelog/68489.fixed.md deleted file mode 100644 index c176662744ea..000000000000 --- a/changelog/68489.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed `lgpo.set` state reporting "Failed to set the following policies" on subsequent runs of policies with sub-elements (e.g. Storage Sense thresholds). The state compared a user-supplied dict keyed by element id with a current dict keyed by the ADML display name; both forms now normalize to the canonical element id before comparison so the state is idempotent. diff --git a/changelog/68493.fixed.md b/changelog/68493.fixed.md deleted file mode 100644 index 3e4978bced67..000000000000 --- a/changelog/68493.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed minion rejecting the master with "Invalid master key" after restart when the cached `minion_master.pub` differs from the master's payload pub_key only in trailing whitespace. `AsyncAuth.verify_master` now normalizes both sides through `clean_key` before comparing and caches the normalized form on first contact. diff --git a/changelog/68506.fixed.md b/changelog/68506.fixed.md deleted file mode 100644 index 9a2571c3571f..000000000000 --- a/changelog/68506.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed ``TypeError: 'NoneType' object is not iterable`` raised from ``AsyncReqMessageClient._send_recv`` when a per-message timeout completes the future before the send/receive coroutine catches a transient transport exception, which aborted the minion's connect loop and prevented it from connecting to the master. diff --git a/changelog/68518.fixed.md b/changelog/68518.fixed.md deleted file mode 100644 index a415b9ad0bed..000000000000 --- a/changelog/68518.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed ``docker_network.present`` recreating networks on every run against Docker 29+. Docker 29 added an empty ``IPRange`` field to every IPAM Config entry; ``docker.compare_networks`` now drops empty/None placeholder values before comparing pools, and the state's default-pool short-circuit treats the empty field as absent. diff --git a/changelog/68540.fixed.md b/changelog/68540.fixed.md deleted file mode 100644 index b0db68f4dd61..000000000000 --- a/changelog/68540.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed `pkg.installed` verification on x86_64 hosts that mix `x86_64` and `x86_64_v2` packages (e.g. AlmaLinux 10.1). `salt.utils.pkg.rpm.resolve_name` and `salt.modules.yumpkg.normalize_name` now treat `x86_64_v2` as compatible with `x86_64` instead of appending the arch suffix, so installed packages match the names Salt records. diff --git a/changelog/68567.fixed.md b/changelog/68567.fixed.md deleted file mode 100644 index b87045decf7c..000000000000 --- a/changelog/68567.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed ``mysql_grants.present`` reporting "Failed to execute" when granting ``ALL PRIVILEGES`` on ``*.*`` against MySQL 8.4+, where the server's privilege set drifted from Salt's hard-coded list (``SET_USER_ID`` removed, many dynamic privileges added). ``grant_exists`` now derives the expected privilege set from the connected server's ``SHOW PRIVILEGES`` output instead of a static list. diff --git a/changelog/68572.fixed.md b/changelog/68572.fixed.md deleted file mode 100644 index f11ca2908ac2..000000000000 --- a/changelog/68572.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed ``cp.get_template`` raising ``AttributeError: 'NoneType' object has no attribute 'get'`` when the Jinja template uses ``{% from '...' import ... with context %}``. The cp module's loader-backed ``__opts__`` is now unwrapped to a plain dict before the SaltCacheLoader instantiates the file client and channel that fetch the imported template. diff --git a/changelog/68573.fixed.md b/changelog/68573.fixed.md deleted file mode 100644 index a786a52dad97..000000000000 --- a/changelog/68573.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed `ImportError: cannot import name 'wait' from partially initialized module 'multiprocessing.connection'` raised during salt-master/minion shutdown when a reentrant SIGTERM hit `ProcessManager.kill_children()` mid `Process.join(0)`. `salt.utils.process` now eagerly imports `multiprocessing.connection` so the module is fully initialised before any signal handler can trigger its lazy import. diff --git a/changelog/68578.fixed.md b/changelog/68578.fixed.md deleted file mode 100644 index f1beea2f418f..000000000000 --- a/changelog/68578.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed `cmd.script` on Windows raising `Invalid user: ` when `runas` is a domain account (`DOMAIN\user`, `user@DOMAIN`, or a SID). The pre-execution `user.info` check is backed by `NetUserGetInfo` which only resolves local-machine accounts and returns empty for many valid domain users; the missing lookup is now logged as a warning and execution continues so the underlying `win_runas` machinery can authenticate the account. diff --git a/changelog/68620.fixed.md b/changelog/68620.fixed.md deleted file mode 100644 index 771db9b562ec..000000000000 --- a/changelog/68620.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed `pkg.install` on Windows silently downgrading the salt-minion when a numeric `version=` argument was passed (e.g. `version=3007.10` was YAML-parsed to the float `3007.1` and then matched the wrong winrepo entry). When the numeric version uniquely matches a string-keyed winrepo entry it is now resolved to that entry; when it is ambiguous (e.g. both `3007.1` and `3007.10` are in the winrepo) the install is refused with a clear error pointing the user at the quoted-version syntax. diff --git a/changelog/68625.fixed.md b/changelog/68625.fixed.md deleted file mode 100644 index 3194240c92e8..000000000000 --- a/changelog/68625.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed the loader masking failure reasons when multiple modules declare the same `__virtualname__` and each `__virtual__()` returns False, so users now see every reason (e.g. both x509 v1's "Superseded, using x509_v2" and x509_v2's "Could not load cryptography") instead of only the first one recorded. diff --git a/changelog/68653.fixed.md b/changelog/68653.fixed.md deleted file mode 100644 index eceed554c197..000000000000 --- a/changelog/68653.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fix `NetapiClient.runner` raising `TypeError` when `timeout` arrives as a string from the salt-api HTTP form. diff --git a/changelog/68663.fixed.md b/changelog/68663.fixed.md deleted file mode 100644 index 1f6d1479dd1d..000000000000 --- a/changelog/68663.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed `master_job_cache: redis_return` raising `KeyError: 'redis_return.prep_jid'` by registering the `redis` returner under both `redis` and `redis_return` virtual names, matching the documented `--return redis_return` usage and the module's file name. diff --git a/changelog/68673.fixed.md b/changelog/68673.fixed.md deleted file mode 100644 index e649328595d0..000000000000 --- a/changelog/68673.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed ``ini.options_present`` with ``strict: True`` to remove sections that are present in the ini file but absent from the supplied ``sections`` mapping. diff --git a/changelog/68678.fixed.md b/changelog/68678.fixed.md deleted file mode 100644 index 622a3a814636..000000000000 --- a/changelog/68678.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Handle `SaltDeserializationError` in grains cache loading so a corrupted cache file no longer propagates as CRITICAL during minion startup. diff --git a/changelog/68692.fixed.md b/changelog/68692.fixed.md deleted file mode 100644 index a20a0a043b13..000000000000 --- a/changelog/68692.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed ``network.interfaces`` on Windows systems falling back to WMI (i.e. .NET older than 4.7.2): the default gateway is now reported under ``gateway`` instead of being mistakenly emitted as ``broadcast``. diff --git a/changelog/68754.fixed.md b/changelog/68754.fixed.md deleted file mode 100644 index e5e2bc27c80f..000000000000 --- a/changelog/68754.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed ``file.managed`` (and other template-rendering callers) silently overwriting user-supplied ``slspath``, ``sls_path``, ``slsdotpath`` and ``slscolonpath`` values in ``defaults``/``context`` with values regenerated from the caller's ``sls`` key. diff --git a/changelog/68785.fixed.md b/changelog/68785.fixed.md deleted file mode 100644 index 60056fc66391..000000000000 --- a/changelog/68785.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed ``env_order`` not being honored when merging pillar data across environments. ``Pillar.render_pillar`` now iterates matched environments in the configured ``env_order`` so that, with ``top_file_merging_strategy: merge_all``, the last environment in ``env_order`` wins on conflicting pillar keys instead of the result depending on dict insertion order. diff --git a/changelog/68792.fixed.md b/changelog/68792.fixed.md deleted file mode 100644 index 37330f0d2045..000000000000 --- a/changelog/68792.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Improved the "Malformed topfile" error from ``HighState.verify_tops`` to name the saltenv and the matcher whose state declarations were not formed as a list, so users can locate the offending entry in their ``top.sls``. diff --git a/changelog/68869.fixed.md b/changelog/68869.fixed.md deleted file mode 100644 index 40bda5118488..000000000000 --- a/changelog/68869.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Removed orphaned GnuPG dotlock files (``.#lk..``) from ``gpg_keydir`` before each decrypt in the ``gpg`` renderer so they no longer accumulate when a gpg subprocess is killed mid-operation. diff --git a/changelog/68886.fixed.md b/changelog/68886.fixed.md deleted file mode 100644 index 43dc41bad898..000000000000 --- a/changelog/68886.fixed.md +++ /dev/null @@ -1,4 +0,0 @@ -Fix `pkg.installed` idempotency on FreeBSD when `with_origin=True` causes -`pkg.list_pkgs` to return per-package dicts instead of version lists; extract -the version list before version-string comparison so a second state run no -longer falsely reports packages as changed. diff --git a/changelog/68901.fixed.md b/changelog/68901.fixed.md new file mode 100644 index 000000000000..ed62fbc9ad7f --- /dev/null +++ b/changelog/68901.fixed.md @@ -0,0 +1 @@ +Fixed a file descriptor leak in the Salt minion: when the single-master sign-in path in ``Minion.eval_master`` raised any exception other than ``SaltClientError`` (for example ``OSError`` from the underlying transport), or when ``transport: detect`` rejected a candidate transport because it could not authenticate, the ``AsyncPubChannel`` that had been created was not closed, leaking its socket. Minions with unstable network connectivity could exhaust the per-process file descriptor limit. The channel is now always closed on failure via a ``try/finally``. diff --git a/changelog/68930.fixed.md b/changelog/68930.fixed.md deleted file mode 100644 index 297fa7a69ec2..000000000000 --- a/changelog/68930.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fix gen_signature() signing raw pub key content instead of clean_key'd content, causing master_use_pubkey_signature verification to always fail. diff --git a/changelog/68931.fixed.md b/changelog/68931.fixed.md deleted file mode 100644 index 0ac9e8a8d7e9..000000000000 --- a/changelog/68931.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed spurious ``FileLockError: lock_fn ... exists and is not a file`` raised by ``salt.utils.files.wait_lock`` and ``salt.utils.files.await_lock`` (and therefore by ``state.apply`` queue locking) when another process removed the lock file between the two separate ``os.path.exists`` / ``os.path.isfile`` stats. The pre-check now uses a single ``os.stat`` call so a transient regular-file lock no longer trips the "not a file" branch. diff --git a/changelog/68932.fixed.md b/changelog/68932.fixed.md deleted file mode 100644 index 773870ebe56b..000000000000 --- a/changelog/68932.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed pkg.installed(update_holds=True) for APT multiarch packages by preserving arch-qualified package names through install target parsing and verification. diff --git a/changelog/68940.fixed.md b/changelog/68940.fixed.md deleted file mode 100644 index e09d8685b968..000000000000 --- a/changelog/68940.fixed.md +++ /dev/null @@ -1,9 +0,0 @@ -Fix deadlock in parallel `file.managed` states when source is served by the master. - -Forked parallel-state children previously inherited the parent's ZeroMQ -REQ socket and asyncio loop from `salt.fileclient.RemoteClient`, -`salt.crypt.AsyncAuth/SAuth`, and `salt.utils.event.SaltEvent`. Multiple -sibling children racing those handles deadlocked the asyncio loop with -~98% CPU and never completed. Salt now registers `os.register_at_fork` -handlers on those classes that drop inherited channel/socket references -in any forked child; the next use rebuilds them fresh. diff --git a/changelog/68976.fixed.md b/changelog/68976.fixed.md deleted file mode 100644 index fe482faa5cd3..000000000000 --- a/changelog/68976.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed grain and pillar targeting matching minions whose data cache entry was missing. ``CkMinions._check_cache_minions`` now excludes accepted minions that have no cached grains/pillar data from greedy target results, instead of silently including them as candidates. diff --git a/changelog/68992.removed.md b/changelog/68992.removed.md deleted file mode 100644 index bbbfe4a94fa0..000000000000 --- a/changelog/68992.removed.md +++ /dev/null @@ -1 +0,0 @@ -Removed the unmaintained `linode-python` package dependency to stop SyntaxWarnings during install for retired Linode API v3. diff --git a/changelog/68993.fixed.md b/changelog/68993.fixed.md deleted file mode 100644 index 7671953d58b9..000000000000 --- a/changelog/68993.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Avoid AttributeError on a closed IPCClient when the connect coroutine resolves after close(). diff --git a/changelog/68995.fixed.md b/changelog/68995.fixed.md deleted file mode 100644 index 54ae8c74fd76..000000000000 --- a/changelog/68995.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed `salt.utils.network.sanitize_host` stripping colons from IPv6 addresses, which broke `network.ping` and any other caller that passed an IPv6 host. diff --git a/changelog/69003.fixed.md b/changelog/69003.fixed.md deleted file mode 100644 index 45053dcb6669..000000000000 --- a/changelog/69003.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Added support for MAINTAIN (m) privilege introduced in PostgreSQL 17 to salt.modules.postgres and salt.states.postgres_privileges diff --git a/changelog/69029.fixed.md b/changelog/69029.fixed.md deleted file mode 100644 index 162fe4e2331e..000000000000 --- a/changelog/69029.fixed.md +++ /dev/null @@ -1,6 +0,0 @@ -Fixed `redis.get_master_ip` silently dropping the `password` argument. The -function was forwarding its arguments positionally to `_connect`, but -`_connect`'s third positional slot is `db`, not `password`, so the -caller's password landed in the database-index argument and the actual -password fell through to `config.option("redis.password")`. Arguments -are now passed by keyword. diff --git a/changelog/69030.fixed.md b/changelog/69030.fixed.md deleted file mode 100644 index e061d6764fa6..000000000000 --- a/changelog/69030.fixed.md +++ /dev/null @@ -1,7 +0,0 @@ -Fixed `salt.modules.redismod._connect` rejecting valid `db=0`. The helper -used a truthy check (`if not db`) to decide whether to fall back to -`config.option("redis.db")`, but `not 0` is `True`, so an explicitly -supplied `db=0` was silently replaced by the configured value. The check -is now `if db is None`, matching the pattern already used by the sibling -`_sconnect` helper in the same module. Other arguments keep their -truthy-check semantics on purpose. diff --git a/changelog/69031.fixed.md b/changelog/69031.fixed.md deleted file mode 100644 index e42b2f69a414..000000000000 --- a/changelog/69031.fixed.md +++ /dev/null @@ -1,8 +0,0 @@ -Fixed two distinct bugs in the `salt.engines.redis_sentinel` engine that -together prevented it from being usable. `start()` no longer raises -`AttributeError: 'dict_values' object has no attribute 'pop'` on Python 3 -(the dict.values() result is now wrapped in `list(...)`). `Listener` and -`start()` now accept an optional `password` argument and forward it to -the redis client, allowing the engine to authenticate against a Sentinel -that requires AUTH; the default of `None` keeps existing configurations -working unchanged. diff --git a/changelog/69032.fixed.md b/changelog/69032.fixed.md deleted file mode 100644 index d17b03bbf548..000000000000 --- a/changelog/69032.fixed.md +++ /dev/null @@ -1,7 +0,0 @@ -Fixed `salt.returners.redis_return` silently ignoring the documented -`redis.password` configuration option. The returner now reads -`redis.password` from config (in both regular and proxy modes) and -forwards it to both the single-server `redis.StrictRedis` and the -`StrictRedisCluster` constructors. Operators with auth-protected Redis -no longer lose every job return to a hidden `NOAUTH Authentication -required` failure; deployments without a password are unaffected. diff --git a/changelog/69033.fixed.md b/changelog/69033.fixed.md deleted file mode 100644 index 81cf51d4face..000000000000 --- a/changelog/69033.fixed.md +++ /dev/null @@ -1,13 +0,0 @@ -Fixed three closely-related bugs in `salt.cache.redis_cache` that -together broke hierarchical-bank semantics: -`_build_bank_hier` now registers each child bank name in both the -parent's `$BANK_` set (consumed by `flush()` tree traversal) and the -parent's `$BANKEYS_` set (consumed by `list_()`); `_get_banks_to_remove` -now decodes the bytes returned by `smembers` and skips the `"."` -placeholder, so recursive `flush()` of a parent bank actually descends -into sub-banks instead of corrupting the path; and `flush(bank)` of a -sub-bank now removes the flushed bank's own reference from its -parent's index sets so `list_(parent)` no longer reports it as -present. Together these fixes restore `cache.list("minions")`, -`salt-run manage.present` and `salt-run manage.up` for masters -configured with `cache: redis`. diff --git a/changelog/69035.fixed.md b/changelog/69035.fixed.md deleted file mode 100644 index f1a320d46d17..000000000000 --- a/changelog/69035.fixed.md +++ /dev/null @@ -1,10 +0,0 @@ -Fixed `salt.tokens.rediscluster` being unable to retrieve any eauth -token. The cluster client was created with `decode_responses=True`, -which caused `redis_client.get()` to return `str` and broke -`salt.payload.loads` (msgpack rejects `str`); it also caused -`redis_client.keys()` to return `str` and broke -`[k.decode("utf8") for k in ...]` (`str` has no `.decode`). Both -errors were swallowed by broad `except Exception` handlers, so eauth -appeared to silently reject every token. `decode_responses=True` is -removed; values now round-trip as bytes through msgpack as the rest -of the module already expected. diff --git a/changelog/69037.changed.md b/changelog/69037.changed.md deleted file mode 100644 index 70e2ee59b537..000000000000 --- a/changelog/69037.changed.md +++ /dev/null @@ -1,9 +0,0 @@ -Changed `salt.returners.redis_return` to enumerate the Redis keyspace -with `SCAN` instead of the blocking `KEYS pattern` command in both -`get_jids` and `clean_old_jobs`. `KEYS` walks the entire keyspace -synchronously and stalls the Redis server for the duration; on a -master with hundreds of thousands of jobs this can block all clients -of that Redis instance for seconds. `SCAN` is incremental and -non-blocking. Order of returned keys is no longer guaranteed (the -returner does not rely on order); operators with custom scripts that -read `ret:*` or `load:*` directly may see them in a different order. diff --git a/changelog/69038.fixed.md b/changelog/69038.fixed.md deleted file mode 100644 index a6df603fed5e..000000000000 --- a/changelog/69038.fixed.md +++ /dev/null @@ -1,8 +0,0 @@ -Fixed `salt.returners.redis_return` leaking `:` last-jid -pointer keys indefinitely. The pointer was written with `pipeline.set` -and no `ex=` TTL, so any (minion, fun) pair that stopped running stuck -in Redis forever -- O(minions × distinct funcs) keys accumulating over -the lifetime of the master. The pointer now expires on the same TTL -as the rest of the returner data (`keep_jobs_seconds`). Operators with -external scripts reading these keys directly may observe them -expiring; the documentation never promised they would not. diff --git a/changelog/69039.fixed.md b/changelog/69039.fixed.md deleted file mode 100644 index 81b788e18c28..000000000000 --- a/changelog/69039.fixed.md +++ /dev/null @@ -1,7 +0,0 @@ -Fixed `salt.returners.redis_return.get_fun` always returning an -empty dict. The function read return data from a `:` -key that no other code in the module ever wrote -- a leftover from -an older storage schema. It now reads from the canonical -`ret:` hash via `HGET ret: `, matching the -storage layout that `returner` actually produces and the read -pattern that `get_jid` already uses. diff --git a/changelog/69048.fixed.md b/changelog/69048.fixed.md deleted file mode 100644 index 0b2b9ec18052..000000000000 --- a/changelog/69048.fixed.md +++ /dev/null @@ -1,5 +0,0 @@ -Fixed `salt.returners.pgjsonb` writing database errors to `sys.stderr` -instead of Salt's logger. Errors from `_get_serv`, `_purge_jobs` and -`_archive_jobs` are now reported via `log.exception`, so they reach -the configured `log_file` / syslog destination on a daemonized master, -including a full traceback. The unused `import sys` is also dropped. diff --git a/changelog/69058.fixed.md b/changelog/69058.fixed.md deleted file mode 100644 index 45492f583890..000000000000 --- a/changelog/69058.fixed.md +++ /dev/null @@ -1,11 +0,0 @@ -Fixed `salt.returners.pgjsonb.returner` letting any non-connection -`psycopg2.DatabaseError` propagate to the caller — including the -syndic-aggregate publish path in `salt/master.py` which had no outer -catch — so a single bad row could escape into a master subprocess. -`event_return` had no error handling at all and a database failure -during a flush propagated similarly. Both functions now catch -`SaltMasterError` and `psycopg2.DatabaseError` locally, log a -contextual message (jid/id for returns, batch size for events), and -drop the affected payload. While here, fix `event_return` passing -the events list as the positional `ret` argument to `_get_serv`, -which was a copy-paste leftover from `returner(ret)`. diff --git a/changelog/69071.fixed.md b/changelog/69071.fixed.md deleted file mode 100644 index 5b6362f32e63..000000000000 --- a/changelog/69071.fixed.md +++ /dev/null @@ -1,11 +0,0 @@ -Fixed `salt-api`'s `/events` endpoint accepting eauth tokens via query -string (``?token=...`` or ``?salt_token=...``). Tokens supplied that -way end up in HTTP access logs, the browser ``Referer`` header, log- -aggregation systems and shell history; the token retains validity for -``token_expire`` (12h by default), so any party reading those logs can -replay the token. The endpoint now rejects query-string tokens with a -400 error pointing at the ``X-Auth-Token`` header (for non-browser -clients) or the session cookie established by ``/login`` (for browser -``EventSource`` clients) as the supported channels. ``X-Auth-Token`` -header support is added; cookie-based auth continues to work -unchanged. diff --git a/changelog/69073.fixed.md b/changelog/69073.fixed.md deleted file mode 100644 index d610309edd7b..000000000000 --- a/changelog/69073.fixed.md +++ /dev/null @@ -1 +0,0 @@ -``LoadAuth.get_tok`` now distinguishes between corrupt token blobs (removed from the store) and transient backend errors such as Redis connection drops or NFS hangs (token kept, request treated as not-authenticated). Previously a single backend hiccup could log every authenticated user out by deleting valid tokens. diff --git a/changelog/69075.fixed.md b/changelog/69075.fixed.md deleted file mode 100644 index 9e404dce5426..000000000000 --- a/changelog/69075.fixed.md +++ /dev/null @@ -1 +0,0 @@ -``cmd.run`` and friends no longer include the ``env`` and ``stdin`` arguments in the ``CommandExecutionError`` raised when the underlying subprocess fails to start (typically ``ENOENT`` / binary not found). Both fields routinely carry credentials passed in by the caller (``env={"DB_PASSWORD": "..."}``, password piped via ``stdin``), and the error message ends up in master/minion logs and in event-bus return data visible to the API caller. diff --git a/changelog/69106.fixed.md b/changelog/69106.fixed.md deleted file mode 100644 index 479e0d229763..000000000000 --- a/changelog/69106.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Lowered the "Cache version mismatch clearing" log message in ``salt.utils.cache.verify_cache_version`` from ``WARNING`` to ``DEBUG``; the cache is rebuilt as part of normal operation after upgrades or when an ephemeral cache directory has been removed, and does not warrant user attention. diff --git a/changelog/69129.fixed.md b/changelog/69129.fixed.md deleted file mode 100644 index 7333017387ab..000000000000 --- a/changelog/69129.fixed.md +++ /dev/null @@ -1,3 +0,0 @@ -* Relenv 0.22.14 - - Update sqlite to 3.53.2.0 - - Update openssl to 3.5.7 diff --git a/changelog/69139.fixed.md b/changelog/69139.fixed.md deleted file mode 100644 index 77b70b8077fe..000000000000 --- a/changelog/69139.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Surface the real cause of a proxymodule load failure in salt-proxy's abort message. The misleading "Proxymodule X is missing an init() or a shutdown() or both" wording is now only used when init/shutdown really are missing from a loaded module; if the module failed to load (for example because its ``__virtual__`` returned False), the underlying reason is included in the error. diff --git a/changelog/69181.fixed.md b/changelog/69181.fixed.md deleted file mode 100644 index 45140cab3864..000000000000 --- a/changelog/69181.fixed.md +++ /dev/null @@ -1,5 +0,0 @@ -Fixed ``pkg.hold`` and ``pkg.list_holds`` on dnf5 systems (e.g. Fedora 42+): -``pkg.hold`` now calls ``dnf5 versionlock add `` (the bare -``versionlock `` form was rejected by dnf5), and ``pkg.list_holds`` -reads ``/etc/dnf/versionlock.toml`` directly so ``pkg.installed`` with -``hold: true`` is again idempotent. diff --git a/changelog/69185.added.md b/changelog/69185.added.md deleted file mode 100644 index 67bb195a6076..000000000000 --- a/changelog/69185.added.md +++ /dev/null @@ -1 +0,0 @@ -Migrate Salt documentation to the PyData Sphinx theme. This update modernizes the documentation UI, improves navigation with a persistent sidebar tree, and fixes issues with embedded video playback. diff --git a/changelog/69199.fixed.md b/changelog/69199.fixed.md deleted file mode 100644 index 90e31cbdba3d..000000000000 --- a/changelog/69199.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed Salt-SSH syncing internal modules as extmods diff --git a/changelog/69202.added.md b/changelog/69202.added.md deleted file mode 100644 index ab5176f23455..000000000000 --- a/changelog/69202.added.md +++ /dev/null @@ -1 +0,0 @@ -fix etcdv3 module authentification when using etcd3-py lib diff --git a/changelog/69203.fixed.md b/changelog/69203.fixed.md deleted file mode 100644 index 2de58c5f96ca..000000000000 --- a/changelog/69203.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed ``lgpo_reg.value_absent`` failing when the Registry.pol entry was already absent but the registry value still existed. ``lgpo_reg.delete_value`` was returning early before reaching the registry cleanup code, causing the state to see no changes and report failure. The registry value is now removed regardless of whether the pol entry was present. diff --git a/changelog/69205.added.md b/changelog/69205.added.md deleted file mode 100644 index 5877011c114a..000000000000 --- a/changelog/69205.added.md +++ /dev/null @@ -1 +0,0 @@ -Added ``lgpo_reg.get_rsop_value`` to query the Resultant Set of Policy (RSoP) for a registry key/value and detect whether it is managed by a Domain Group Policy Object. The ``lgpo_reg`` module functions ``set_value``, ``disable_value``, and ``delete_value`` now log a warning when a Domain GPO is detected for the target value. The ``lgpo_reg`` state functions ``value_present``, ``value_disabled``, and ``value_absent`` append the same warning to the state comment so it is visible in state output. diff --git a/changelog/69214.fixed.md b/changelog/69214.fixed.md deleted file mode 100644 index 0bf36c58df32..000000000000 --- a/changelog/69214.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed `postgres_local_cache.save_load` raising `psycopg2.errors.UniqueViolation` when more than one master in an active-active multi-master cluster persists the same JID; the INSERT is now idempotent via `ON CONFLICT (jid) DO NOTHING` on PostgreSQL >= 9.5, and the duplicate is tolerated on older servers. diff --git a/changelog/69219.fixed.md b/changelog/69219.fixed.md deleted file mode 100644 index 2ef0c656eae4..000000000000 --- a/changelog/69219.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed Windows MSI self-upgrade via ``pkg.install`` failing with error 1603. The old product's ``DeleteConfig_DECAC`` custom action was unconditionally deleting ``ROOTDIR\var`` during ``RemoveExistingProducts``, destroying the MSI that ``pkg.install`` had cached to ``ROOTDIR\var\cache`` before launching the upgrade. Users who had ``REMOVE_CONFIG=1`` persisted in the registry (from checking "On uninstall" at install time) hit a worse variant where the entire ``ROOTDIR`` was deleted. The fix checks ``UPGRADINGPRODUCTCODE`` — set by Windows Installer whenever an uninstall is triggered by a major upgrade — and skips all ``ROOTDIR`` deletion during upgrades, matching the behaviour of the NSIS installer which has always preserved ``ROOTDIR`` during upgrades. diff --git a/changelog/69228.fixed.md b/changelog/69228.fixed.md deleted file mode 100644 index dc8e24ff6146..000000000000 --- a/changelog/69228.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed `TypeError: string indices must be integers` in the minion when the master returns a bare string error response (e.g. `"bad load"`, `"Some exception handling minion payload"`) for a pillar request. The minion now raises a clean `AuthenticationError` instead of crashing, allowing the caller to retry or fail gracefully. diff --git a/changelog/69229.fixed.md b/changelog/69229.fixed.md deleted file mode 100644 index ebbd89648d0d..000000000000 --- a/changelog/69229.fixed.md +++ /dev/null @@ -1 +0,0 @@ -pkg.list_patches in yumpkg.py parses tdnf output on Photon OS diff --git a/changelog/69298.fixed.md b/changelog/69298.fixed.md deleted file mode 100644 index bfa69462e03c..000000000000 --- a/changelog/69298.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fix `git.tag` so that the documented `message` argument is actually forwarded to `git tag`, creating an annotated tag with the supplied message instead of silently producing a lightweight tag. diff --git a/changelog/69304.fixed.md b/changelog/69304.fixed.md deleted file mode 100644 index 84add06e9d28..000000000000 --- a/changelog/69304.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed `salt.auth.pam` conversation callback so it answers `PAM_PROMPT_ECHO_ON` prompts with the supplied username; previously only `PAM_PROMPT_ECHO_OFF` prompts were answered, which caused `pam_authenticate` to silently fail (and salt-api to return 401) against PAM stacks that re-prompt for the user. diff --git a/changelog/69308.fixed.md b/changelog/69308.fixed.md deleted file mode 100644 index 252b53503b80..000000000000 --- a/changelog/69308.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Ensure multiple masters have their own job/state queues diff --git a/changelog/69312.fixed.md b/changelog/69312.fixed.md deleted file mode 100644 index b50b91259186..000000000000 --- a/changelog/69312.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed loading private keys from PKCS#12 containers with x509_v2 diff --git a/changelog/69319.fixed.md b/changelog/69319.fixed.md deleted file mode 100644 index 05e22148e9ea..000000000000 --- a/changelog/69319.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed creating self-signed PKCS#12-encoded certificates diff --git a/changelog/69386.fixed.md b/changelog/69386.fixed.md deleted file mode 100644 index 563dec29c3a6..000000000000 --- a/changelog/69386.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed minion state queue replacing the master-assigned JID on queued state runs, so returns now come back tagged with the JID the master actually published. diff --git a/changelog/69402.fixed.md b/changelog/69402.fixed.md deleted file mode 100644 index 5b461bed634a..000000000000 --- a/changelog/69402.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Made the salt user's home directory and the relenv ``extras-`` directory configurable in the Linux packaging. The DEB preinst scripts now source ``/etc/default/salt-setup`` (and ``/etc/sysconfig/salt-minion-setup`` for cross-distro parity with RPM) before applying the ``SALT_HOME``/``SALT_USER``/``SALT_GROUP``/``SALT_NAME`` defaults, mirroring the long-standing RPM behavior. A new ``SALT_EXTRAS_DIR`` override is honored by both stacks so the extras tree can be relocated outside ``/opt/saltstack/salt`` and its ownership is correctly restored on upgrade. diff --git a/changelog/69416.fixed.md b/changelog/69416.fixed.md deleted file mode 100644 index d7920e122d61..000000000000 --- a/changelog/69416.fixed.md +++ /dev/null @@ -1,8 +0,0 @@ -Fixed minion worker threads hanging or crashing when returning job results -to the master. The main process now fires an error event back to the worker -when ``req_channel.send()`` times out, so workers wake up immediately rather -than waiting out their full timeout. Replaced the bare ``TimeoutError`` raised -in ``_send_req_sync`` with ``SaltReqTimeoutError`` so ``_return_pub``'s existing -handler catches it correctly. The worker's wait timeout is now derived from -``return_retry_timer_max * return_retry_tries`` to ensure it always outlasts -the main process's retry budget. diff --git a/changelog/69419.fixed.md b/changelog/69419.fixed.md deleted file mode 100644 index 95f10be91f5f..000000000000 --- a/changelog/69419.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed zsh completion by using the proper python3 instead of python2. diff --git a/changelog/69449.fixed.md b/changelog/69449.fixed.md deleted file mode 100644 index 08a40b56d4de..000000000000 --- a/changelog/69449.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed Photon OS Arm64 FIPS CI by re-enabling the OpenSSL default provider after installing openssl-fips-provider, working around the disabled-default-provider bug in `openssl-fips-provider <= 3.1.2-3.ph5` on the lagging Photon aarch64 mirror. diff --git a/changelog/69454.fixed.md b/changelog/69454.fixed.md deleted file mode 100644 index 8a4e193ba615..000000000000 --- a/changelog/69454.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Add regression test for changelog template multi-line rendering and harden template with indent filter so continuation lines are correctly indented under the bullet (defensive backport of #69458 to 3006.x). diff --git a/changelog/69468.fixed.md b/changelog/69468.fixed.md deleted file mode 100644 index a37d9d7f9bb2..000000000000 --- a/changelog/69468.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed ``lgpo_reg`` module and state functions failing on Windows Domain Controllers with ``Access is denied`` when writing to ``HKLM\SOFTWARE\Policies\`` subkeys. The ``set_value``, ``disable_value``, and ``delete_value`` execution module functions now accept a ``write_registry`` parameter (default ``None``) that auto-detects Domain Controllers via the ``ProductType`` registry key and skips the direct registry write when one is detected, instead relying on the Group Policy engine to apply the policy on the next refresh. An explicit ``True`` or ``False`` overrides auto-detection. A ``refresh_policy`` parameter (default ``False``) has been added to all three functions to trigger an in-process ``userenv.RefreshPolicy`` call immediately after the ``Registry.pol`` file is updated. The corresponding state functions ``value_present``, ``value_disabled``, and ``value_absent`` expose the same parameters. A standalone ``lgpo_reg.refresh_policy`` execution function and ``lgpo_reg.refresh_policy`` state have been added to allow a single Group Policy refresh to be issued after a batch of policy writes. ``is_domain_controller`` has been added to ``salt.utils.win_functions`` and ``refresh_policy`` has been added to ``salt.utils.win_lgpo_reg``. diff --git a/changelog/69486.fixed.md b/changelog/69486.fixed.md deleted file mode 100644 index 1b4085fa89e2..000000000000 --- a/changelog/69486.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed 3006.x Windows nightly CI by pinning the runner-host Python to 3.14.6 (OpenSSL 3.5.7); the setup-python default `3.14` was resolving to a cached 3.14.5 build whose OpenSSL 3.0.20 rejected the cert pypi.org currently serves. diff --git a/changelog/69490.fixed.md b/changelog/69490.fixed.md deleted file mode 100644 index 9579a05c8d6a..000000000000 --- a/changelog/69490.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed 3006.x Windows nightly CI Deps by dropping a sitecustomize hook into the salt onedir's `Lib/site-packages` that applies the cpython#104135 iter-and-skip patch before pip touches TLS; the prior runner-host Python pin in #69486 targeted the wrong interpreter (the failing pip runs in a venv created from the relenv-bundled Python 3.10) and is reverted. diff --git a/changelog/69492.fixed.md b/changelog/69492.fixed.md deleted file mode 100644 index 43a4b3e9f2e3..000000000000 --- a/changelog/69492.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed ``lgpo_reg`` failures on Windows when ``Registry.pol`` is temporarily locked by the Group Policy service or other processes. Salt now uses ``EnterCriticalPolicySection`` / ``LeaveCriticalPolicySection`` from ``userenv.dll`` — the same synchronization primitive used by the GP engine — to serialize read-modify-write access to ``Registry.pol``. A retry loop with configurable attempts and delay is also applied for non-GP lockers such as antivirus scanners or VSS snapshots that do not participate in the GP critical section handshake. diff --git a/doc/topics/releases/3006.26.md b/doc/topics/releases/3006.26.md new file mode 100644 index 000000000000..841e515b50a4 --- /dev/null +++ b/doc/topics/releases/3006.26.md @@ -0,0 +1,335 @@ +(release-3006.26)= +# Salt 3006.26 release notes + + + + + + + +## Changelog + +### Removed + +- Removed the unmaintained `linode-python` package dependency to stop SyntaxWarnings during install for retired Linode API v3. [#68992](https://github.com/saltstack/salt/issues/68992) + + +### Changed + +- Changed `salt.returners.redis_return` to enumerate the Redis keyspace + with `SCAN` instead of the blocking `KEYS pattern` command in both + `get_jids` and `clean_old_jobs`. `KEYS` walks the entire keyspace + synchronously and stalls the Redis server for the duration; on a + master with hundreds of thousands of jobs this can block all clients + of that Redis instance for seconds. `SCAN` is incremental and + non-blocking. Order of returned keys is no longer guaranteed (the + returner does not rely on order); operators with custom scripts that + read `ret:*` or `load:*` directly may see them in a different order. [#69037](https://github.com/saltstack/salt/issues/69037) + + +### Fixed + +- Fixed multi-line scalar variables loaded via `import_yaml` (or `load_yaml`) being rendered as literal `\n` instead of actual newlines when the loaded data is interpolated into a YAML state file (e.g. `- context: {{ data }}`). `PrintableDict.__str__`/`__repr__` now emit string values containing newlines as YAML-safe double-quoted scalars rather than Python `repr()` so they round-trip correctly through the subsequent YAML render pass. [#30690](https://github.com/saltstack/salt/issues/30690) +- Handle requisites correctly for empty SLS files [#30971](https://github.com/saltstack/salt/issues/30971) +- Fixed ``win_pkg`` functions ignoring the ``saltenv`` setting in minion configuration. All public functions (``refresh_db``, ``genrepo``, ``install``, ``remove``, ``list_pkgs``, ``latest_version``, ``upgrade_available``, ``list_upgrades``, ``list_available``, ``version``, ``get_repo_data``, ``get_package_info``) now fall back to ``__opts__["saltenv"]`` when ``saltenv`` is not passed explicitly, instead of always defaulting to ``base``. [#38551](https://github.com/saltstack/salt/issues/38551) +- ``dpkg_lowpkg`` no longer reads ``/var/lib/dpkg/available`` or ``/var/lib/dpkg/info/.list`` directly. It now uses ``dpkg-query`` exclusively, addressing the lintian ``uses-dpkg-database-directly`` warning reported in #52605. ``lowpkg.info`` derives the package install time from dpkg's ``${db-fsys:Last-Modified}`` field instead of the ``.list`` file mtime. [#52605](https://github.com/saltstack/salt/issues/52605) +- Added ``encoding`` parameter to ``file.replace`` execution module and state to support UTF-16, UTF-32, and other multi-byte encoded files that would otherwise be incorrectly treated as binary. [#52793](https://github.com/saltstack/salt/issues/52793) +- Fixed `postgres._find_pg_binary` ignoring `postgres.bins_dir` when a `psql` binary is also present on the system PATH, ensuring the configured `bins_dir` is always preferred over the system PATH. [#53190](https://github.com/saltstack/salt/issues/53190) +- Percent-encode the user and password when adding HTTP basic auth to a URL so reserved characters no longer corrupt the result [#55561](https://github.com/saltstack/salt/issues/55561) +- Fixed a ``SaltCacheError`` ("maximum recursion depth exceeded") raised by the + etcd data cache when listing an empty folder, which etcd reports as a child of + itself. The directory walk now stops at the self-referential entry instead of + recursing indefinitely. [#57377](https://github.com/saltstack/salt/issues/57377) +- Fixed `timezone.system` state always returning `result=False` with "Failed to set UTC to True" on Windows. The hardware clock on Windows is always localtime and cannot be changed, so the UTC/hwclock block is now skipped entirely on Windows. [#57754](https://github.com/saltstack/salt/issues/57754) +- Fixed `archive.tar` placing the `-C ` option after the source/member operands, where tar ignores it. The directory-change option is now emitted before the operands so it takes effect in both create and extract modes. [#57847](https://github.com/saltstack/salt/issues/57847) +- Fixed `OSError: The operation completed successfully` raised by `CreateProcessWithTokenW` on Windows when the underlying advapi32 call fails. The error code is now read from `ctypes.get_last_error()` (the ctypes-saved slot) instead of `win32api.GetLastError()` (the live Windows slot, which may be reset to 0 before it is read). [#57848](https://github.com/saltstack/salt/issues/57848) +- Improved documentation for the `runas` and `password` parameters in `cmd.run`, `cmd.script`, and all `salt.modules.cmdmod` execution functions on Windows. The docs now accurately describe when a password is required: only when the salt-minion is **not** running as SYSTEM or as an elevated Administrator. Removed the inaccurate claim that the target user account must be in the Administrators group. Also changed `cmd.script` to log a warning instead of hard-failing when `runas` is used without a password on Windows, since a password is not always required. [#57951](https://github.com/saltstack/salt/issues/57951) +- Fixed ``pkg.group_installed``/``pkg.group_info`` failing to expand a dnf environment group whose member groups have multi-word names (e.g. ``Group '@Common NetworkManager submodules' not found`` when installing ``Workstation`` on RHEL/AlmaLinux 8, 9 and 10). The member group is now resolved by its bare name when the ``@``-prefixed lookup fails. This affects dnf4 only; dnf5 group handling is unchanged. [#60276](https://github.com/saltstack/salt/issues/60276) +- Fix `tls.create_csr` log message path to use `os.path.join` instead of f-string interpolation so paths render correctly when csr_path has a trailing slash. [#60877](https://github.com/saltstack/salt/issues/60877) +- Fixed the LDAP eauth group-membership lookup re-binding the user on every job + payload when ``auth.ldap.freeipa`` is enabled. The user is now only re-bound on + the first payload of a job, matching the standard LDAP code path, so single-use + 2FA credentials (such as a FreeIPA OTP) are no longer consumed more than once. [#61974](https://github.com/saltstack/salt/issues/61974) +- Fixed `SSL: DECRYPTION_FAILED_OR_BAD_RECORD_MAC` errors in the VMware cloud driver by reconnecting when a cached vCenter service instance is found to be stale or corrupted (for example when inherited across a fork by salt-cloud's parallel provider queries). [#61983](https://github.com/saltstack/salt/issues/61983) +- Fix metadata grain so EC2 ``user-data`` is returned verbatim instead of being mangled by the ``=`` line-splitter, which previously corrupted any user-data payload containing ``=`` (e.g. cloud-init ``#cloud-config`` blocks). [#62061](https://github.com/saltstack/salt/issues/62061) +- Fixed LGPO ``get_policy_info`` incorrectly returning a "multiple policies" error when duplicate ADMX policy definitions (e.g. ``TerminalServer.admx`` and ``TerminalServer-Server.admx``) resolve to the same full path. [#62732](https://github.com/saltstack/salt/issues/62732) +- Re-enable test_interrupt_on_long_running_job by removing the initial-onedir-rollout skip marker. [#63627](https://github.com/saltstack/salt/issues/63627) +- Fix missing `dns_plugin_propagate_seconds` arg in acme state/module so DNS propagation timeout is actually forwarded to certbot. [#63700](https://github.com/saltstack/salt/issues/63700) +- Improve PAM eauth diagnostics when ``salt-master`` runs as a non-root user. Previously, ``salt-master``/``salt-api`` running as the ``salt`` user (the 3006.x packaging default) silently failed every PAM authentication with only ``Pam auth failed for :`` in the log; the cause is that the helper subprocess inherits the master's uid and PAM's ``unix_chkpwd`` refuses to validate other users without ``/etc/shadow`` access. The master now emits a one-shot CRITICAL log entry that names the cause and the two standard remediations (run as ``root``, or add the master user to the ``shadow`` group on Debian-derived distributions), and the module documentation describes the constraint. [#64275](https://github.com/saltstack/salt/issues/64275) +- Fixed incorrect minion presence events being sent out on hourly ``Maintenance`` process restarts [#64505](https://github.com/saltstack/salt/issues/64505) +- Catch StrictUndefined in salt jinja custom filters. [#64915](https://github.com/saltstack/salt/issues/64915) +- Stopped logging the misleading "An extra return was detected from minion ... this could be a replay attack" ERROR for benign duplicate returns (also fixes #65516). The local_cache returner now compares a duplicate return to the cached one and logs at DEBUG when the payloads match (the common retry-after-timeout or syndic re-forward case) and at WARNING -- without the "replay attack" wording -- when the payloads differ. [#65301](https://github.com/saltstack/salt/issues/65301) +- Fixed non-root salt CLI access when ``publisher_acl`` or ``external_auth`` is configured. Since 3006.3 the master defaults to running as the ``salt`` user, which left ``sock_dir`` and ``cachedir`` mode ``0o750`` and blocked authorised non-root users from traversing into them to reach ``master_event_pub.ipc`` / ``publish_pull.ipc`` and their per-user ``._key``. The master now adds the world-execute bit to those two directories when ACLs are configured, without exposing directory listings. [#65317](https://github.com/saltstack/salt/issues/65317) +- Fixed ``salt.ext.tornado.netutil`` import on Python 3.12+ where ``ssl.match_hostname`` was removed and the unmaintained ``backports.ssl_match_hostname`` package is unavailable, which previously broke any Salt master-initiated job (e.g. ``test.ping``, ``state.apply``) on Fedora 39+/Ubuntu 24.04 masters. [#65360](https://github.com/saltstack/salt/issues/65360) +- See #65301 -- the same fix to ``salt/returners/local_cache.py`` quiets the spurious "extra return ... replay attack" ERROR that appeared in multimaster and master-of-masters/syndic setups when the same return arrived more than once. [#65516](https://github.com/saltstack/salt/issues/65516) +- Fix deadlock in parallel `cmd.script` states when the script is served by the master. + + Same fork-inherited ZeroMQ socket race as the `file.managed` fix: a + `cmd.script` state with `parallel: True` downloads the script via + `cp.cache_file` in a forked child that inherited the parent's ZeroMQ + REQ socket, deadlocking the asyncio loop at ~100% CPU. Resolved by the + same `os.register_at_fork` handlers that drop inherited channel/socket + references in forked children. [#65709](https://github.com/saltstack/salt/issues/65709) +- Fixed pip.uninstall rejecting the extra_args keyword argument, matching the behavior of pip.install. [#65870](https://github.com/saltstack/salt/issues/65870) +- Fixed salt-ssh failing to fetch ``gitfs_remotes``. ``salt.config.master_config`` + sets ``__fs_update = True`` to suppress fileserver refreshes done by ``FSChan`` + (the master daemon's maintenance thread handles them). salt-ssh inherits the + master config but has no maintenance thread, so its ``FSClient`` never refreshed + the fileserver backends and wrappers such as ``cp.list_states`` saw no gitfs + content until the user ran ``salt-run fileserver.update`` or manually + ``git fetch``ed the cached repos. ``salt.client.ssh.SSH.__init__`` now removes + the suppression flag before instantiating ``FSClient`` so gitfs is refreshed + once at startup. [#66148](https://github.com/saltstack/salt/issues/66148) +- Fixed ``salt/version.py`` reporting the wrong major version on the 3006.x branch when built from a checkout that has no ``salt/_version.txt`` and no usable ``.git`` directory. ``SaltVersionsInfo.current_release()`` now returns the branch's own codename (``Sulfur``) instead of the next un-released codename in the table, so source builds and other tooling no longer leak ``3007.0`` into the reported version. [#67061](https://github.com/saltstack/salt/issues/67061) +- Fixed ``saltutil.runner`` and ``saltutil.wheel`` running master-side functions + as the minion's user (typically ``root``) instead of the master's configured + user (the packaged default since 3006 is ``salt``). Running as the wrong user + left root-owned files in, and tripped git's ``safe.directory`` check on, the + salt-owned master cache -- breaking, for example, ``git_pillar.update`` invoked + via ``saltutil.runner``. These functions now drop to the master's configured + user before executing when invoked from a more-privileged process. [#67716](https://github.com/saltstack/salt/issues/67716) +- Fixed `LocalClient.cmd_subset` raising `TypeError: argument of type 'bool' is not iterable` when one or more targeted minions failed to respond to the `sys.list_functions` probe. Failed minions are now skipped during subset selection. [#68103](https://github.com/saltstack/salt/issues/68103) +- Fixed ``slack_bolt`` engine crashing with ``UnboundLocalError`` when a Slack workflow or other bot posts a message to a monitored channel. Bot messages (``subtype: bot_message``) carry ``bot_id`` and ``username`` instead of a ``user`` field, and these are now used as fallbacks so the engine continues running. [#68105](https://github.com/saltstack/salt/issues/68105) +- Fixed `user.present` to not fail with `result: False` in test mode when a referenced group does not yet exist; the state now reports the pending changes so users can preview states that depend on groups created by a `group.present` requisite in the same run. [#68110](https://github.com/saltstack/salt/issues/68110) +- Fixed ``salt-minion`` and ``salt-proxy`` leaving a privileged (root) keepalive supervisor process at the head of an otherwise unprivileged minion process tree when ``user`` is set to a non-root account. The supervisor now drops privileges to the configured user once the keepalive child has been spawned. [#68115](https://github.com/saltstack/salt/issues/68115) +- Fixed ``ValueError: Formatting field not found in record: 'colorlevel'`` errors when ``log_fmt_console`` uses custom color attributes such as ``%(colorlevel)s`` or ``%(colormsg)s``. ``SaltLogRecord`` now always provides the ``color*`` attributes (uncolored by default) so that log records buffered by the temporary deferred stream handler can be formatted by a colorized console formatter once it is installed. [#68129](https://github.com/saltstack/salt/issues/68129) +- Fixed ``salt-call`` silently ignoring ``--file-root``, ``--pillar-root``, and ``--states-dir`` when ``--local`` was not passed. These overrides only affect the local minion config and are clobbered by the master's values via the remote file client, so ``salt-call`` now emits a warning explaining that ``--local`` is required for the override to take effect. [#68137](https://github.com/saltstack/salt/issues/68137) +- Fixed event signature verification failing under ``minion_sign_messages``. The minion was signing the return load before ``salt.channel.client.AsyncReqChannel._package_load`` attached transport metadata (``nonce``, ``ts``, ``tok``, ``id``), so the bytes the master re-serialized to verify did not match what was signed and every signed return was dropped. Signing is now performed inside ``_package_load`` after the metadata is attached, against the same bytes the master verifies. [#68181](https://github.com/saltstack/salt/issues/68181) +- Fixed ``pkgrepo.managed`` honouring ``clean_file: True`` when the desired + repo line is already present in the managed file alongside unrelated stale + lines. Previously the state returned "already configured" and silently + skipped both the file truncation and the re-write, leaving the stale + entries (for example an obsolete ``bullseye-backports`` line in a file + managed for ``bookworm-backports``) in place. The clean + reconfigure + path now runs whenever the managed file contains any non-comment, + non-blank content other than the desired repo line; when the file already + contains only the desired line the state remains idempotent. [#68208](https://github.com/saltstack/salt/issues/68208) +- Fixed ``pkg.group_installed`` reporting failure on RPM-based systems when a package group's default or optional members are not available in any enabled repository. The state now only considers mandatory group members and explicitly requested ``include`` packages when checking for install failures, matching the behavior of ``yum/dnf group install`` (which reports "No match for group package" but still exits 0). [#68210](https://github.com/saltstack/salt/issues/68210) +- Pass ``--disable-pip-version-check`` when ``pip.list``, ``pip.freeze``, ``pip.list_upgrades``, ``pip.upgrade``, and ``pip.list_all_versions`` invoke pip, so these calls no longer hang for ~20s per invocation on airgapped minions while pip tries to reach PyPI for its self-version check. [#68214](https://github.com/saltstack/salt/issues/68214) +- Fixed ``archive.extracted`` failing to enforce ``user``/``group`` ownership on archives whose tar/zip members include no explicit directory entries (e.g. Oracle's GraalVM JDK tarballs). ``archive.list`` now derives the top-level directory from the common prefix of file and link members in addition to dir members, so ownership is applied to the extracted top-level directory in all cases. [#68227](https://github.com/saltstack/salt/issues/68227) +- Fixed deltaproxy sub-proxies returning identical grain data for every controlled minion. ``subproxy_post_master_init`` now re-packs each sub-proxy's freshly loaded per-minion grains into its execution-module, returner, executor and proxy LazyLoaders so ``__grains__`` inside loaded modules reflects that sub-proxy's device instead of the placeholder values captured during the first-pass grains load through the control proxy. [#68248](https://github.com/saltstack/salt/issues/68248) +- Fixed the salt-minion (and salt-api, salt-cloud, salt-master, salt-syndic) Debian postinst scripts hanging or erroring with "Bad file descriptor" when run from a non-interactive Debian preseed late_command chroot, by tearing down the debconf protocol with ``db_stop`` and explicitly closing file descriptor 3 before the auto-generated ``#DEBHELPER#`` section runs. [#68269](https://github.com/saltstack/salt/issues/68269) +- Fixed ``file.managed`` failing with ``WinError 123`` on Windows when caching a remote URL whose path embeds another URL (e.g. an archive.org snapshot of an ``https://...`` resource). The URL-path portion of the ``extrn_files`` cache path is now sanitised the same way the network location already is. [#68273](https://github.com/saltstack/salt/issues/68273) +- Fixed ``logrotate.set`` dropping the second ``endscript`` (and turning + embedded shell commands into bogus setting keys) when a stanza contained + multiple script blocks such as both ``prerotate`` and ``postrotate``. Script + directives are now parsed as opaque multi-line bodies and round-trip with + their own ``endscript`` terminator each. [#68293](https://github.com/saltstack/salt/issues/68293) +- Fixed the `salt.state` orchestrate state silently reporting only `Run failed on minions: ` when a targeted minion returned `False`, no return at all, or a list of error strings. The orchestrate comment now includes the per-minion failure detail (the minion's actual return value or "did not return a state result") so operators can diagnose `salt-run state.orchestrate` failures without re-running with extra logging. [#68326](https://github.com/saltstack/salt/issues/68326) +- Fixed worker process crash when salt is used outside CLI tools. [#68332](https://github.com/saltstack/salt/issues/68332) +- Fixed ``clean_old_jobs`` in the default local job cache returner to use the jid file's modification time (``st_mtime``) instead of the inode change time (``st_ctime``). A package upgrade's ``chown -R /var/cache/salt/master`` resets ``st_ctime`` on every existing jid file, which previously made the maintenance process treat every pre-upgrade job as freshly created and prevented cleanup until ``keep_jobs_seconds`` had elapsed. On busy masters this exhausted the partition's inodes within a day. [#68351](https://github.com/saltstack/salt/issues/68351) +- Fixed the ``proxmox`` salt-cloud driver raising ``Could not determine an IP address to use`` before the VM was created and started. The IP address is now determined after the VM is running, and the running VM's address reported by Proxmox is used as a fallback when neither a static ``ip_address`` nor ``agent_get_ip`` is configured. [#68353](https://github.com/saltstack/salt/issues/68353) +- Changed ``KillMode`` in the shipped ``salt-minion.service`` systemd unit from ``process`` to ``mixed`` so that ``systemctl stop`` / ``systemctl restart salt-minion`` no longer leaves orphaned ``Minion._thread_return`` worker processes outside the cgroup. SIGTERM is still sent only to the main PID (so the job return scheduled by ``service.restart salt-minion`` from #68183 has time to finish), but any remaining children are reaped with SIGKILL after the main process exits or ``TimeoutStopSec`` elapses. [#68406](https://github.com/saltstack/salt/issues/68406) +- Fixed `task.edit_task` on Windows rejecting `restart_count=999` even though the documented and error-message-stated maximum is 999. The validation now accepts the full 1..999 range. [#68419](https://github.com/saltstack/salt/issues/68419) +- Fixed ``win_task.add_trigger`` so that ``repeat_duration="Indefinitely"`` actually produces an indefinite repetition pattern. Previously the empty string from the internal duration lookup was assigned to ``Repetition.Duration``, which the Windows Task Scheduler treats as "0 seconds" and silently disables repetition. The Duration property is now left at its default for the "Indefinitely" case, which is the documented way to repeat forever. [#68420](https://github.com/saltstack/salt/issues/68420) +- Fixed ``user.setpassword`` on Windows reporting success (``retcode: 0``) when the target user does not exist. The execution module now returns ``False`` and logs an error in that case, so callers and the ``user.present`` state correctly detect the failure instead of swallowing the Win32 "user name could not be found" message as a successful return. [#68428](https://github.com/saltstack/salt/issues/68428) +- Fixed ``user.present`` on Windows so it actually updates a user's password + when the existing password differs from the one specified in the state. + Previously the state reported "User is already present and up to date" and + left the password unchanged. [#68429](https://github.com/saltstack/salt/issues/68429) +- Stop salt-ssh state runs from clobbering the master-side fileclient ``cachedir`` with the on-target ``thin_dir`` cachedir. The state fileserver cache for salt-ssh state runs is now written under the configured master ``cachedir`` (e.g. ``/var/cache/salt/master/``) instead of under the minion's thin_dir path on the master filesystem. [#68458](https://github.com/saltstack/salt/issues/68458) +- Fixed ``pkg.add_repo_key`` and ``pkgrepo.managed`` so APT keyring files that target an ``.asc`` destination keep their ASCII armor instead of being dearmored, matching the apt-secure(8) convention and allowing armored keyfiles that bundle multiple keys to be installed even when the ``gpg`` binary is not available on the minion. [#68464](https://github.com/saltstack/salt/issues/68464) +- Fixed ``jobs.list_jobs search_metadata`` so it matches jobs whose metadata + was passed as a CLI keyword argument (e.g. ``state.apply metadata={...}``) + and is therefore carried inside the job's ``Arguments`` rather than at the + top of the job payload. [#68481](https://github.com/saltstack/salt/issues/68481) +- Fixed `lgpo.set` state reporting "Failed to set the following policies" on subsequent runs of policies with sub-elements (e.g. Storage Sense thresholds). The state compared a user-supplied dict keyed by element id with a current dict keyed by the ADML display name; both forms now normalize to the canonical element id before comparison so the state is idempotent. [#68489](https://github.com/saltstack/salt/issues/68489) +- Fixed minion rejecting the master with "Invalid master key" after restart when the cached `minion_master.pub` differs from the master's payload pub_key only in trailing whitespace. `AsyncAuth.verify_master` now normalizes both sides through `clean_key` before comparing and caches the normalized form on first contact. [#68493](https://github.com/saltstack/salt/issues/68493) +- Fixed ``TypeError: 'NoneType' object is not iterable`` raised from ``AsyncReqMessageClient._send_recv`` when a per-message timeout completes the future before the send/receive coroutine catches a transient transport exception, which aborted the minion's connect loop and prevented it from connecting to the master. [#68506](https://github.com/saltstack/salt/issues/68506) +- Fixed ``docker_network.present`` recreating networks on every run against Docker 29+. Docker 29 added an empty ``IPRange`` field to every IPAM Config entry; ``docker.compare_networks`` now drops empty/None placeholder values before comparing pools, and the state's default-pool short-circuit treats the empty field as absent. [#68518](https://github.com/saltstack/salt/issues/68518) +- Fixed `pkg.installed` verification on x86_64 hosts that mix `x86_64` and `x86_64_v2` packages (e.g. AlmaLinux 10.1). `salt.utils.pkg.rpm.resolve_name` and `salt.modules.yumpkg.normalize_name` now treat `x86_64_v2` as compatible with `x86_64` instead of appending the arch suffix, so installed packages match the names Salt records. [#68540](https://github.com/saltstack/salt/issues/68540) +- Fixed ``mysql_grants.present`` reporting "Failed to execute" when granting ``ALL PRIVILEGES`` on ``*.*`` against MySQL 8.4+, where the server's privilege set drifted from Salt's hard-coded list (``SET_USER_ID`` removed, many dynamic privileges added). ``grant_exists`` now derives the expected privilege set from the connected server's ``SHOW PRIVILEGES`` output instead of a static list. [#68567](https://github.com/saltstack/salt/issues/68567) +- Fixed ``cp.get_template`` raising ``AttributeError: 'NoneType' object has no attribute 'get'`` when the Jinja template uses ``{% from '...' import ... with context %}``. The cp module's loader-backed ``__opts__`` is now unwrapped to a plain dict before the SaltCacheLoader instantiates the file client and channel that fetch the imported template. [#68572](https://github.com/saltstack/salt/issues/68572) +- Fixed `ImportError: cannot import name 'wait' from partially initialized module 'multiprocessing.connection'` raised during salt-master/minion shutdown when a reentrant SIGTERM hit `ProcessManager.kill_children()` mid `Process.join(0)`. `salt.utils.process` now eagerly imports `multiprocessing.connection` so the module is fully initialised before any signal handler can trigger its lazy import. [#68573](https://github.com/saltstack/salt/issues/68573) +- Fixed `cmd.script` on Windows raising `Invalid user: ` when `runas` is a domain account (`DOMAIN\user`, `user@DOMAIN`, or a SID). The pre-execution `user.info` check is backed by `NetUserGetInfo` which only resolves local-machine accounts and returns empty for many valid domain users; the missing lookup is now logged as a warning and execution continues so the underlying `win_runas` machinery can authenticate the account. [#68578](https://github.com/saltstack/salt/issues/68578) +- Fixed `pkg.install` on Windows silently downgrading the salt-minion when a numeric `version=` argument was passed (e.g. `version=3007.10` was YAML-parsed to the float `3007.1` and then matched the wrong winrepo entry). When the numeric version uniquely matches a string-keyed winrepo entry it is now resolved to that entry; when it is ambiguous (e.g. both `3007.1` and `3007.10` are in the winrepo) the install is refused with a clear error pointing the user at the quoted-version syntax. [#68620](https://github.com/saltstack/salt/issues/68620) +- Fixed the loader masking failure reasons when multiple modules declare the same `__virtualname__` and each `__virtual__()` returns False, so users now see every reason (e.g. both x509 v1's "Superseded, using x509_v2" and x509_v2's "Could not load cryptography") instead of only the first one recorded. [#68625](https://github.com/saltstack/salt/issues/68625) +- Fix `NetapiClient.runner` raising `TypeError` when `timeout` arrives as a string from the salt-api HTTP form. [#68653](https://github.com/saltstack/salt/issues/68653) +- Fixed `master_job_cache: redis_return` raising `KeyError: 'redis_return.prep_jid'` by registering the `redis` returner under both `redis` and `redis_return` virtual names, matching the documented `--return redis_return` usage and the module's file name. [#68663](https://github.com/saltstack/salt/issues/68663) +- Fixed ``ini.options_present`` with ``strict: True`` to remove sections that are present in the ini file but absent from the supplied ``sections`` mapping. [#68673](https://github.com/saltstack/salt/issues/68673) +- Handle `SaltDeserializationError` in grains cache loading so a corrupted cache file no longer propagates as CRITICAL during minion startup. [#68678](https://github.com/saltstack/salt/issues/68678) +- Fixed ``network.interfaces`` on Windows systems falling back to WMI (i.e. .NET older than 4.7.2): the default gateway is now reported under ``gateway`` instead of being mistakenly emitted as ``broadcast``. [#68692](https://github.com/saltstack/salt/issues/68692) +- Fixed ``file.managed`` (and other template-rendering callers) silently overwriting user-supplied ``slspath``, ``sls_path``, ``slsdotpath`` and ``slscolonpath`` values in ``defaults``/``context`` with values regenerated from the caller's ``sls`` key. [#68754](https://github.com/saltstack/salt/issues/68754) +- Fixed ``env_order`` not being honored when merging pillar data across environments. ``Pillar.render_pillar`` now iterates matched environments in the configured ``env_order`` so that, with ``top_file_merging_strategy: merge_all``, the last environment in ``env_order`` wins on conflicting pillar keys instead of the result depending on dict insertion order. [#68785](https://github.com/saltstack/salt/issues/68785) +- Improved the "Malformed topfile" error from ``HighState.verify_tops`` to name the saltenv and the matcher whose state declarations were not formed as a list, so users can locate the offending entry in their ``top.sls``. [#68792](https://github.com/saltstack/salt/issues/68792) +- Removed orphaned GnuPG dotlock files (``.#lk..``) from ``gpg_keydir`` before each decrypt in the ``gpg`` renderer so they no longer accumulate when a gpg subprocess is killed mid-operation. [#68869](https://github.com/saltstack/salt/issues/68869) +- Fix `pkg.installed` idempotency on FreeBSD when `with_origin=True` causes + `pkg.list_pkgs` to return per-package dicts instead of version lists; extract + the version list before version-string comparison so a second state run no + longer falsely reports packages as changed. [#68886](https://github.com/saltstack/salt/issues/68886) +- Fix gen_signature() signing raw pub key content instead of clean_key'd content, causing master_use_pubkey_signature verification to always fail. [#68930](https://github.com/saltstack/salt/issues/68930) +- Fixed spurious ``FileLockError: lock_fn ... exists and is not a file`` raised by ``salt.utils.files.wait_lock`` and ``salt.utils.files.await_lock`` (and therefore by ``state.apply`` queue locking) when another process removed the lock file between the two separate ``os.path.exists`` / ``os.path.isfile`` stats. The pre-check now uses a single ``os.stat`` call so a transient regular-file lock no longer trips the "not a file" branch. [#68931](https://github.com/saltstack/salt/issues/68931) +- Fixed pkg.installed(update_holds=True) for APT multiarch packages by preserving arch-qualified package names through install target parsing and verification. [#68932](https://github.com/saltstack/salt/issues/68932) +- Fix deadlock in parallel `file.managed` states when source is served by the master. + + Forked parallel-state children previously inherited the parent's ZeroMQ + REQ socket and asyncio loop from `salt.fileclient.RemoteClient`, + `salt.crypt.AsyncAuth/SAuth`, and `salt.utils.event.SaltEvent`. Multiple + sibling children racing those handles deadlocked the asyncio loop with + ~98% CPU and never completed. Salt now registers `os.register_at_fork` + handlers on those classes that drop inherited channel/socket references + in any forked child; the next use rebuilds them fresh. [#68940](https://github.com/saltstack/salt/issues/68940) +- Fixed grain and pillar targeting matching minions whose data cache entry was missing. ``CkMinions._check_cache_minions`` now excludes accepted minions that have no cached grains/pillar data from greedy target results, instead of silently including them as candidates. [#68976](https://github.com/saltstack/salt/issues/68976) +- Avoid AttributeError on a closed IPCClient when the connect coroutine resolves after close(). [#68993](https://github.com/saltstack/salt/issues/68993) +- Fixed `salt.utils.network.sanitize_host` stripping colons from IPv6 addresses, which broke `network.ping` and any other caller that passed an IPv6 host. [#68995](https://github.com/saltstack/salt/issues/68995) +- Added support for MAINTAIN (m) privilege introduced in PostgreSQL 17 to salt.modules.postgres and salt.states.postgres_privileges [#69003](https://github.com/saltstack/salt/issues/69003) +- Fixed `redis.get_master_ip` silently dropping the `password` argument. The + function was forwarding its arguments positionally to `_connect`, but + `_connect`'s third positional slot is `db`, not `password`, so the + caller's password landed in the database-index argument and the actual + password fell through to `config.option("redis.password")`. Arguments + are now passed by keyword. [#69029](https://github.com/saltstack/salt/issues/69029) +- Fixed `salt.modules.redismod._connect` rejecting valid `db=0`. The helper + used a truthy check (`if not db`) to decide whether to fall back to + `config.option("redis.db")`, but `not 0` is `True`, so an explicitly + supplied `db=0` was silently replaced by the configured value. The check + is now `if db is None`, matching the pattern already used by the sibling + `_sconnect` helper in the same module. Other arguments keep their + truthy-check semantics on purpose. [#69030](https://github.com/saltstack/salt/issues/69030) +- Fixed two distinct bugs in the `salt.engines.redis_sentinel` engine that + together prevented it from being usable. `start()` no longer raises + `AttributeError: 'dict_values' object has no attribute 'pop'` on Python 3 + (the dict.values() result is now wrapped in `list(...)`). `Listener` and + `start()` now accept an optional `password` argument and forward it to + the redis client, allowing the engine to authenticate against a Sentinel + that requires AUTH; the default of `None` keeps existing configurations + working unchanged. [#69031](https://github.com/saltstack/salt/issues/69031) +- Fixed `salt.returners.redis_return` silently ignoring the documented + `redis.password` configuration option. The returner now reads + `redis.password` from config (in both regular and proxy modes) and + forwards it to both the single-server `redis.StrictRedis` and the + `StrictRedisCluster` constructors. Operators with auth-protected Redis + no longer lose every job return to a hidden `NOAUTH Authentication + required` failure; deployments without a password are unaffected. [#69032](https://github.com/saltstack/salt/issues/69032) +- Fixed three closely-related bugs in `salt.cache.redis_cache` that + together broke hierarchical-bank semantics: + `_build_bank_hier` now registers each child bank name in both the + parent's `$BANK_` set (consumed by `flush()` tree traversal) and the + parent's `$BANKEYS_` set (consumed by `list_()`); `_get_banks_to_remove` + now decodes the bytes returned by `smembers` and skips the `"."` + placeholder, so recursive `flush()` of a parent bank actually descends + into sub-banks instead of corrupting the path; and `flush(bank)` of a + sub-bank now removes the flushed bank's own reference from its + parent's index sets so `list_(parent)` no longer reports it as + present. Together these fixes restore `cache.list("minions")`, + `salt-run manage.present` and `salt-run manage.up` for masters + configured with `cache: redis`. [#69033](https://github.com/saltstack/salt/issues/69033) +- Fixed `salt.tokens.rediscluster` being unable to retrieve any eauth + token. The cluster client was created with `decode_responses=True`, + which caused `redis_client.get()` to return `str` and broke + `salt.payload.loads` (msgpack rejects `str`); it also caused + `redis_client.keys()` to return `str` and broke + `[k.decode("utf8") for k in ...]` (`str` has no `.decode`). Both + errors were swallowed by broad `except Exception` handlers, so eauth + appeared to silently reject every token. `decode_responses=True` is + removed; values now round-trip as bytes through msgpack as the rest + of the module already expected. [#69035](https://github.com/saltstack/salt/issues/69035) +- Fixed `salt.returners.redis_return` leaking `:` last-jid + pointer keys indefinitely. The pointer was written with `pipeline.set` + and no `ex=` TTL, so any (minion, fun) pair that stopped running stuck + in Redis forever -- O(minions × distinct funcs) keys accumulating over + the lifetime of the master. The pointer now expires on the same TTL + as the rest of the returner data (`keep_jobs_seconds`). Operators with + external scripts reading these keys directly may observe them + expiring; the documentation never promised they would not. [#69038](https://github.com/saltstack/salt/issues/69038) +- Fixed `salt.returners.redis_return.get_fun` always returning an + empty dict. The function read return data from a `:` + key that no other code in the module ever wrote -- a leftover from + an older storage schema. It now reads from the canonical + `ret:` hash via `HGET ret: `, matching the + storage layout that `returner` actually produces and the read + pattern that `get_jid` already uses. [#69039](https://github.com/saltstack/salt/issues/69039) +- Fixed `salt.returners.pgjsonb` writing database errors to `sys.stderr` + instead of Salt's logger. Errors from `_get_serv`, `_purge_jobs` and + `_archive_jobs` are now reported via `log.exception`, so they reach + the configured `log_file` / syslog destination on a daemonized master, + including a full traceback. The unused `import sys` is also dropped. [#69048](https://github.com/saltstack/salt/issues/69048) +- Fixed `salt.returners.pgjsonb.returner` letting any non-connection + `psycopg2.DatabaseError` propagate to the caller — including the + syndic-aggregate publish path in `salt/master.py` which had no outer + catch — so a single bad row could escape into a master subprocess. + `event_return` had no error handling at all and a database failure + during a flush propagated similarly. Both functions now catch + `SaltMasterError` and `psycopg2.DatabaseError` locally, log a + contextual message (jid/id for returns, batch size for events), and + drop the affected payload. While here, fix `event_return` passing + the events list as the positional `ret` argument to `_get_serv`, + which was a copy-paste leftover from `returner(ret)`. [#69058](https://github.com/saltstack/salt/issues/69058) +- Fixed `salt-api`'s `/events` endpoint accepting eauth tokens via query + string (``?token=...`` or ``?salt_token=...``). Tokens supplied that + way end up in HTTP access logs, the browser ``Referer`` header, log- + aggregation systems and shell history; the token retains validity for + ``token_expire`` (12h by default), so any party reading those logs can + replay the token. The endpoint now rejects query-string tokens with a + 400 error pointing at the ``X-Auth-Token`` header (for non-browser + clients) or the session cookie established by ``/login`` (for browser + ``EventSource`` clients) as the supported channels. ``X-Auth-Token`` + header support is added; cookie-based auth continues to work + unchanged. [#69071](https://github.com/saltstack/salt/issues/69071) +- ``LoadAuth.get_tok`` now distinguishes between corrupt token blobs (removed from the store) and transient backend errors such as Redis connection drops or NFS hangs (token kept, request treated as not-authenticated). Previously a single backend hiccup could log every authenticated user out by deleting valid tokens. [#69073](https://github.com/saltstack/salt/issues/69073) +- ``cmd.run`` and friends no longer include the ``env`` and ``stdin`` arguments in the ``CommandExecutionError`` raised when the underlying subprocess fails to start (typically ``ENOENT`` / binary not found). Both fields routinely carry credentials passed in by the caller (``env={"DB_PASSWORD": "..."}``, password piped via ``stdin``), and the error message ends up in master/minion logs and in event-bus return data visible to the API caller. [#69075](https://github.com/saltstack/salt/issues/69075) +- Lowered the "Cache version mismatch clearing" log message in ``salt.utils.cache.verify_cache_version`` from ``WARNING`` to ``DEBUG``; the cache is rebuilt as part of normal operation after upgrades or when an ephemeral cache directory has been removed, and does not warrant user attention. [#69106](https://github.com/saltstack/salt/issues/69106) +- * Relenv 0.22.14 + - Update sqlite to 3.53.2.0 + - Update openssl to 3.5.7 [#69129](https://github.com/saltstack/salt/issues/69129) +- Surface the real cause of a proxymodule load failure in salt-proxy's abort message. The misleading "Proxymodule X is missing an init() or a shutdown() or both" wording is now only used when init/shutdown really are missing from a loaded module; if the module failed to load (for example because its ``__virtual__`` returned False), the underlying reason is included in the error. [#69139](https://github.com/saltstack/salt/issues/69139) +- Fixed ``pkg.hold`` and ``pkg.list_holds`` on dnf5 systems (e.g. Fedora 42+): + ``pkg.hold`` now calls ``dnf5 versionlock add `` (the bare + ``versionlock `` form was rejected by dnf5), and ``pkg.list_holds`` + reads ``/etc/dnf/versionlock.toml`` directly so ``pkg.installed`` with + ``hold: true`` is again idempotent. [#69181](https://github.com/saltstack/salt/issues/69181) +- Fixed Salt-SSH syncing internal modules as extmods [#69199](https://github.com/saltstack/salt/issues/69199) +- Fixed ``lgpo_reg.value_absent`` failing when the Registry.pol entry was already absent but the registry value still existed. ``lgpo_reg.delete_value`` was returning early before reaching the registry cleanup code, causing the state to see no changes and report failure. The registry value is now removed regardless of whether the pol entry was present. [#69203](https://github.com/saltstack/salt/issues/69203) +- Fixed `postgres_local_cache.save_load` raising `psycopg2.errors.UniqueViolation` when more than one master in an active-active multi-master cluster persists the same JID; the INSERT is now idempotent via `ON CONFLICT (jid) DO NOTHING` on PostgreSQL >= 9.5, and the duplicate is tolerated on older servers. [#69214](https://github.com/saltstack/salt/issues/69214) +- Fixed Windows MSI self-upgrade via ``pkg.install`` failing with error 1603. The old product's ``DeleteConfig_DECAC`` custom action was unconditionally deleting ``ROOTDIR\var`` during ``RemoveExistingProducts``, destroying the MSI that ``pkg.install`` had cached to ``ROOTDIR\var\cache`` before launching the upgrade. Users who had ``REMOVE_CONFIG=1`` persisted in the registry (from checking "On uninstall" at install time) hit a worse variant where the entire ``ROOTDIR`` was deleted. The fix checks ``UPGRADINGPRODUCTCODE`` — set by Windows Installer whenever an uninstall is triggered by a major upgrade — and skips all ``ROOTDIR`` deletion during upgrades, matching the behaviour of the NSIS installer which has always preserved ``ROOTDIR`` during upgrades. [#69219](https://github.com/saltstack/salt/issues/69219) +- Fixed `TypeError: string indices must be integers` in the minion when the master returns a bare string error response (e.g. `"bad load"`, `"Some exception handling minion payload"`) for a pillar request. The minion now raises a clean `AuthenticationError` instead of crashing, allowing the caller to retry or fail gracefully. [#69228](https://github.com/saltstack/salt/issues/69228) +- pkg.list_patches in yumpkg.py parses tdnf output on Photon OS [#69229](https://github.com/saltstack/salt/issues/69229) +- Fix `git.tag` so that the documented `message` argument is actually forwarded to `git tag`, creating an annotated tag with the supplied message instead of silently producing a lightweight tag. [#69298](https://github.com/saltstack/salt/issues/69298) +- Fixed `salt.auth.pam` conversation callback so it answers `PAM_PROMPT_ECHO_ON` prompts with the supplied username; previously only `PAM_PROMPT_ECHO_OFF` prompts were answered, which caused `pam_authenticate` to silently fail (and salt-api to return 401) against PAM stacks that re-prompt for the user. [#69304](https://github.com/saltstack/salt/issues/69304) +- Ensure multiple masters have their own job/state queues [#69308](https://github.com/saltstack/salt/issues/69308) +- Fixed loading private keys from PKCS#12 containers with x509_v2 [#69312](https://github.com/saltstack/salt/issues/69312) +- Fixed creating self-signed PKCS#12-encoded certificates [#69319](https://github.com/saltstack/salt/issues/69319) +- Fixed minion state queue replacing the master-assigned JID on queued state runs, so returns now come back tagged with the JID the master actually published. [#69386](https://github.com/saltstack/salt/issues/69386) +- Made the salt user's home directory and the relenv ``extras-`` directory configurable in the Linux packaging. The DEB preinst scripts now source ``/etc/default/salt-setup`` (and ``/etc/sysconfig/salt-minion-setup`` for cross-distro parity with RPM) before applying the ``SALT_HOME``/``SALT_USER``/``SALT_GROUP``/``SALT_NAME`` defaults, mirroring the long-standing RPM behavior. A new ``SALT_EXTRAS_DIR`` override is honored by both stacks so the extras tree can be relocated outside ``/opt/saltstack/salt`` and its ownership is correctly restored on upgrade. [#69402](https://github.com/saltstack/salt/issues/69402) +- Fixed minion worker threads hanging or crashing when returning job results + to the master. The main process now fires an error event back to the worker + when ``req_channel.send()`` times out, so workers wake up immediately rather + than waiting out their full timeout. Replaced the bare ``TimeoutError`` raised + in ``_send_req_sync`` with ``SaltReqTimeoutError`` so ``_return_pub``'s existing + handler catches it correctly. The worker's wait timeout is now derived from + ``return_retry_timer_max * return_retry_tries`` to ensure it always outlasts + the main process's retry budget. [#69416](https://github.com/saltstack/salt/issues/69416) +- Fixed zsh completion by using the proper python3 instead of python2. [#69419](https://github.com/saltstack/salt/issues/69419) +- Fixed Photon OS Arm64 FIPS CI by re-enabling the OpenSSL default provider after installing openssl-fips-provider, working around the disabled-default-provider bug in `openssl-fips-provider <= 3.1.2-3.ph5` on the lagging Photon aarch64 mirror. [#69449](https://github.com/saltstack/salt/issues/69449) +- Add regression test for changelog template multi-line rendering and harden template with indent filter so continuation lines are correctly indented under the bullet (defensive backport of #69458 to 3006.x). [#69454](https://github.com/saltstack/salt/issues/69454) +- Fixed minion not honoring SIGTERM while stuck in the master DNS retry loop, which caused systemd to escalate to SIGKILL after 90 seconds. [#69466](https://github.com/saltstack/salt/issues/69466) +- Fixed ``lgpo_reg`` module and state functions failing on Windows Domain Controllers with ``Access is denied`` when writing to ``HKLM\SOFTWARE\Policies\`` subkeys. The ``set_value``, ``disable_value``, and ``delete_value`` execution module functions now accept a ``write_registry`` parameter (default ``None``) that auto-detects Domain Controllers via the ``ProductType`` registry key and skips the direct registry write when one is detected, instead relying on the Group Policy engine to apply the policy on the next refresh. An explicit ``True`` or ``False`` overrides auto-detection. A ``refresh_policy`` parameter (default ``False``) has been added to all three functions to trigger an in-process ``userenv.RefreshPolicy`` call immediately after the ``Registry.pol`` file is updated. The corresponding state functions ``value_present``, ``value_disabled``, and ``value_absent`` expose the same parameters. A standalone ``lgpo_reg.refresh_policy`` execution function and ``lgpo_reg.refresh_policy`` state have been added to allow a single Group Policy refresh to be issued after a batch of policy writes. ``is_domain_controller`` has been added to ``salt.utils.win_functions`` and ``refresh_policy`` has been added to ``salt.utils.win_lgpo_reg``. [#69468](https://github.com/saltstack/salt/issues/69468) +- Fixed 3006.x Windows nightly CI by pinning the runner-host Python to 3.14.6 (OpenSSL 3.5.7); the setup-python default `3.14` was resolving to a cached 3.14.5 build whose OpenSSL 3.0.20 rejected the cert pypi.org currently serves. [#69486](https://github.com/saltstack/salt/issues/69486) +- Fixed 3006.x Windows nightly CI Deps by dropping a sitecustomize hook into the salt onedir's `Lib/site-packages` that applies the cpython#104135 iter-and-skip patch before pip touches TLS; the prior runner-host Python pin in #69486 targeted the wrong interpreter (the failing pip runs in a venv created from the relenv-bundled Python 3.10) and is reverted. [#69490](https://github.com/saltstack/salt/issues/69490) +- Fixed ``lgpo_reg`` failures on Windows when ``Registry.pol`` is temporarily locked by the Group Policy service or other processes. Salt now uses ``EnterCriticalPolicySection`` / ``LeaveCriticalPolicySection`` from ``userenv.dll`` — the same synchronization primitive used by the GP engine — to serialize read-modify-write access to ``Registry.pol``. A retry loop with configurable attempts and delay is also applied for non-GP lockers such as antivirus scanners or VSS snapshots that do not participate in the GP critical section handshake. [#69492](https://github.com/saltstack/salt/issues/69492) + + +### Added + +- Added ``shadow.verify_password`` to ``salt.modules.win_shadow``, which + validates a Windows user's password via ``LogonUser`` with + ``LOGON32_LOGON_NETWORK`` (Microsoft's recommended approach per + `KB180548 `_) without + creating an interactive session. If the check causes an account lockout, + the account is automatically unlocked. Updated ``user.present`` on Windows + to use ``shadow.verify_password`` so the password is only changed when it + differs from the current value, matching the idempotent behaviour on other + platforms. [#41347](https://github.com/saltstack/salt/issues/41347) +- Added ability to configure the pillar destination for the `netbox` ext_pillar via `destination_pillar_key` [#65531](https://github.com/saltstack/salt/issues/65531) +- Migrate Salt documentation to the PyData Sphinx theme. This update modernizes the documentation UI, improves navigation with a persistent sidebar tree, and fixes issues with embedded video playback. [#69185](https://github.com/saltstack/salt/issues/69185) +- fix etcdv3 module authentification when using etcd3-py lib [#69202](https://github.com/saltstack/salt/issues/69202) +- Added ``lgpo_reg.get_rsop_value`` to query the Resultant Set of Policy (RSoP) for a registry key/value and detect whether it is managed by a Domain Group Policy Object. The ``lgpo_reg`` module functions ``set_value``, ``disable_value``, and ``delete_value`` now log a warning when a Domain GPO is detected for the target value. The ``lgpo_reg`` state functions ``value_present``, ``value_disabled``, and ``value_absent`` append the same warning to the state comment so it is visible in state output. [#69205](https://github.com/saltstack/salt/issues/69205) diff --git a/doc/topics/releases/templates/3006.26.md.template b/doc/topics/releases/templates/3006.26.md.template new file mode 100644 index 000000000000..c13e0940db9d --- /dev/null +++ b/doc/topics/releases/templates/3006.26.md.template @@ -0,0 +1,14 @@ +(release-3006.26)= +# Salt 3006.26 release notes{{ unreleased }} +{{ warning }} + + + + +## Changelog +{{ changelog }} diff --git a/pkg/common/salt-minion.service b/pkg/common/salt-minion.service index 69aff18c5835..d5c1113edcd5 100644 --- a/pkg/common/salt-minion.service +++ b/pkg/common/salt-minion.service @@ -4,7 +4,7 @@ Documentation=man:salt-minion(1) file:///usr/share/doc/salt/html/contents.html h After=network.target salt-master.service [Service] -KillMode=process +KillMode=mixed Type=notify NotifyAccess=all LimitNOFILE=8192 diff --git a/pkg/debian/changelog b/pkg/debian/changelog index d21e603a1290..61ebb1aaf534 100644 --- a/pkg/debian/changelog +++ b/pkg/debian/changelog @@ -26,6 +26,325 @@ salt (3007.13) stable; urgency=medium -- Salt Project Packaging Wed, 11 Feb 2026 19:46:35 +0000 +salt (3006.26) stable; urgency=medium + + + # Removed + + * Removed the unmaintained `linode-python` package dependency to stop SyntaxWarnings during install for retired Linode API v3. [#68992](https://github.com/saltstack/salt/issues/68992) + + # Changed + + * Changed `salt.returners.redis_return` to enumerate the Redis keyspace + with `SCAN` instead of the blocking `KEYS pattern` command in both + `get_jids` and `clean_old_jobs`. `KEYS` walks the entire keyspace + synchronously and stalls the Redis server for the duration; on a + master with hundreds of thousands of jobs this can block all clients + of that Redis instance for seconds. `SCAN` is incremental and + non*blocking. Order of returned keys is no longer guaranteed (the + returner does not rely on order); operators with custom scripts that + read `ret:*` or `load:*` directly may see them in a different order. [#69037](https://github.com/saltstack/salt/issues/69037) + + # Fixed + + * Fixed multi-line scalar variables loaded via `import_yaml` (or `load_yaml`) being rendered as literal `\n` instead of actual newlines when the loaded data is interpolated into a YAML state file (e.g. `- context: {{ data }}`). `PrintableDict.__str__`/`__repr__` now emit string values containing newlines as YAML-safe double-quoted scalars rather than Python `repr()` so they round-trip correctly through the subsequent YAML render pass. [#30690](https://github.com/saltstack/salt/issues/30690) + * Handle requisites correctly for empty SLS files [#30971](https://github.com/saltstack/salt/issues/30971) + * Fixed ``win_pkg`` functions ignoring the ``saltenv`` setting in minion configuration. All public functions (``refresh_db``, ``genrepo``, ``install``, ``remove``, ``list_pkgs``, ``latest_version``, ``upgrade_available``, ``list_upgrades``, ``list_available``, ``version``, ``get_repo_data``, ``get_package_info``) now fall back to ``__opts__["saltenv"]`` when ``saltenv`` is not passed explicitly, instead of always defaulting to ``base``. [#38551](https://github.com/saltstack/salt/issues/38551) + * ``dpkg_lowpkg`` no longer reads ``/var/lib/dpkg/available`` or ``/var/lib/dpkg/info/.list`` directly. It now uses ``dpkg-query`` exclusively, addressing the lintian ``uses-dpkg-database-directly`` warning reported in #52605. ``lowpkg.info`` derives the package install time from dpkg's ``${db-fsys:Last-Modified}`` field instead of the ``.list`` file mtime. [#52605](https://github.com/saltstack/salt/issues/52605) + * Added ``encoding`` parameter to ``file.replace`` execution module and state to support UTF-16, UTF-32, and other multi-byte encoded files that would otherwise be incorrectly treated as binary. [#52793](https://github.com/saltstack/salt/issues/52793) + * Fixed `postgres._find_pg_binary` ignoring `postgres.bins_dir` when a `psql` binary is also present on the system PATH, ensuring the configured `bins_dir` is always preferred over the system PATH. [#53190](https://github.com/saltstack/salt/issues/53190) + * Percent-encode the user and password when adding HTTP basic auth to a URL so reserved characters no longer corrupt the result [#55561](https://github.com/saltstack/salt/issues/55561) + * Fixed a ``SaltCacheError`` ("maximum recursion depth exceeded") raised by the + etcd data cache when listing an empty folder, which etcd reports as a child of + itself. The directory walk now stops at the self*referential entry instead of + recursing indefinitely. [#57377](https://github.com/saltstack/salt/issues/57377) + * Fixed `timezone.system` state always returning `result=False` with "Failed to set UTC to True" on Windows. The hardware clock on Windows is always localtime and cannot be changed, so the UTC/hwclock block is now skipped entirely on Windows. [#57754](https://github.com/saltstack/salt/issues/57754) + * Fixed `archive.tar` placing the `-C ` option after the source/member operands, where tar ignores it. The directory-change option is now emitted before the operands so it takes effect in both create and extract modes. [#57847](https://github.com/saltstack/salt/issues/57847) + * Fixed `OSError: The operation completed successfully` raised by `CreateProcessWithTokenW` on Windows when the underlying advapi32 call fails. The error code is now read from `ctypes.get_last_error()` (the ctypes-saved slot) instead of `win32api.GetLastError()` (the live Windows slot, which may be reset to 0 before it is read). [#57848](https://github.com/saltstack/salt/issues/57848) + * Improved documentation for the `runas` and `password` parameters in `cmd.run`, `cmd.script`, and all `salt.modules.cmdmod` execution functions on Windows. The docs now accurately describe when a password is required: only when the salt-minion is **not** running as SYSTEM or as an elevated Administrator. Removed the inaccurate claim that the target user account must be in the Administrators group. Also changed `cmd.script` to log a warning instead of hard-failing when `runas` is used without a password on Windows, since a password is not always required. [#57951](https://github.com/saltstack/salt/issues/57951) + * Fixed ``pkg.group_installed``/``pkg.group_info`` failing to expand a dnf environment group whose member groups have multi-word names (e.g. ``Group '@Common NetworkManager submodules' not found`` when installing ``Workstation`` on RHEL/AlmaLinux 8, 9 and 10). The member group is now resolved by its bare name when the ``@``-prefixed lookup fails. This affects dnf4 only; dnf5 group handling is unchanged. [#60276](https://github.com/saltstack/salt/issues/60276) + * Fix `tls.create_csr` log message path to use `os.path.join` instead of f-string interpolation so paths render correctly when csr_path has a trailing slash. [#60877](https://github.com/saltstack/salt/issues/60877) + * Fixed the LDAP eauth group-membership lookup re-binding the user on every job + payload when ``auth.ldap.freeipa`` is enabled. The user is now only re*bound on + the first payload of a job, matching the standard LDAP code path, so single*use + 2FA credentials (such as a FreeIPA OTP) are no longer consumed more than once. [#61974](https://github.com/saltstack/salt/issues/61974) + * Fixed `SSL: DECRYPTION_FAILED_OR_BAD_RECORD_MAC` errors in the VMware cloud driver by reconnecting when a cached vCenter service instance is found to be stale or corrupted (for example when inherited across a fork by salt-cloud's parallel provider queries). [#61983](https://github.com/saltstack/salt/issues/61983) + * Fix metadata grain so EC2 ``user-data`` is returned verbatim instead of being mangled by the ``=`` line-splitter, which previously corrupted any user-data payload containing ``=`` (e.g. cloud-init ``#cloud-config`` blocks). [#62061](https://github.com/saltstack/salt/issues/62061) + * Fixed LGPO ``get_policy_info`` incorrectly returning a "multiple policies" error when duplicate ADMX policy definitions (e.g. ``TerminalServer.admx`` and ``TerminalServer-Server.admx``) resolve to the same full path. [#62732](https://github.com/saltstack/salt/issues/62732) + * Re-enable test_interrupt_on_long_running_job by removing the initial-onedir-rollout skip marker. [#63627](https://github.com/saltstack/salt/issues/63627) + * Fix missing `dns_plugin_propagate_seconds` arg in acme state/module so DNS propagation timeout is actually forwarded to certbot. [#63700](https://github.com/saltstack/salt/issues/63700) + * Improve PAM eauth diagnostics when ``salt-master`` runs as a non-root user. Previously, ``salt-master``/``salt-api`` running as the ``salt`` user (the 3006.x packaging default) silently failed every PAM authentication with only ``Pam auth failed for :`` in the log; the cause is that the helper subprocess inherits the master's uid and PAM's ``unix_chkpwd`` refuses to validate other users without ``/etc/shadow`` access. The master now emits a one-shot CRITICAL log entry that names the cause and the two standard remediations (run as ``root``, or add the master user to the ``shadow`` group on Debian-derived distributions), and the module documentation describes the constraint. [#64275](https://github.com/saltstack/salt/issues/64275) + * Fixed incorrect minion presence events being sent out on hourly ``Maintenance`` process restarts [#64505](https://github.com/saltstack/salt/issues/64505) + * Catch StrictUndefined in salt jinja custom filters. [#64915](https://github.com/saltstack/salt/issues/64915) + * Stopped logging the misleading "An extra return was detected from minion ... this could be a replay attack" ERROR for benign duplicate returns (also fixes #65516). The local_cache returner now compares a duplicate return to the cached one and logs at DEBUG when the payloads match (the common retry-after-timeout or syndic re-forward case) and at WARNING -- without the "replay attack" wording -- when the payloads differ. [#65301](https://github.com/saltstack/salt/issues/65301) + * Fixed non-root salt CLI access when ``publisher_acl`` or ``external_auth`` is configured. Since 3006.3 the master defaults to running as the ``salt`` user, which left ``sock_dir`` and ``cachedir`` mode ``0o750`` and blocked authorised non-root users from traversing into them to reach ``master_event_pub.ipc`` / ``publish_pull.ipc`` and their per-user ``._key``. The master now adds the world-execute bit to those two directories when ACLs are configured, without exposing directory listings. [#65317](https://github.com/saltstack/salt/issues/65317) + * Fixed ``salt.ext.tornado.netutil`` import on Python 3.12+ where ``ssl.match_hostname`` was removed and the unmaintained ``backports.ssl_match_hostname`` package is unavailable, which previously broke any Salt master-initiated job (e.g. ``test.ping``, ``state.apply``) on Fedora 39+/Ubuntu 24.04 masters. [#65360](https://github.com/saltstack/salt/issues/65360) + * See #65301 -- the same fix to ``salt/returners/local_cache.py`` quiets the spurious "extra return ... replay attack" ERROR that appeared in multimaster and master-of-masters/syndic setups when the same return arrived more than once. [#65516](https://github.com/saltstack/salt/issues/65516) + * Fix deadlock in parallel `cmd.script` states when the script is served by the master. + + Same fork*inherited ZeroMQ socket race as the `file.managed` fix: a + `cmd.script` state with `parallel: True` downloads the script via + `cp.cache_file` in a forked child that inherited the parent's ZeroMQ + REQ socket, deadlocking the asyncio loop at ~100% CPU. Resolved by the + same `os.register_at_fork` handlers that drop inherited channel/socket + references in forked children. [#65709](https://github.com/saltstack/salt/issues/65709) + * Fixed pip.uninstall rejecting the extra_args keyword argument, matching the behavior of pip.install. [#65870](https://github.com/saltstack/salt/issues/65870) + * Fixed salt-ssh failing to fetch ``gitfs_remotes``. ``salt.config.master_config`` + sets ``__fs_update = True`` to suppress fileserver refreshes done by ``FSChan`` + (the master daemon's maintenance thread handles them). salt*ssh inherits the + master config but has no maintenance thread, so its ``FSClient`` never refreshed + the fileserver backends and wrappers such as ``cp.list_states`` saw no gitfs + content until the user ran ``salt*run fileserver.update`` or manually + ``git fetch``ed the cached repos. ``salt.client.ssh.SSH.__init__`` now removes + the suppression flag before instantiating ``FSClient`` so gitfs is refreshed + once at startup. [#66148](https://github.com/saltstack/salt/issues/66148) + * Fixed ``salt/version.py`` reporting the wrong major version on the 3006.x branch when built from a checkout that has no ``salt/_version.txt`` and no usable ``.git`` directory. ``SaltVersionsInfo.current_release()`` now returns the branch's own codename (``Sulfur``) instead of the next un-released codename in the table, so source builds and other tooling no longer leak ``3007.0`` into the reported version. [#67061](https://github.com/saltstack/salt/issues/67061) + * Fixed ``saltutil.runner`` and ``saltutil.wheel`` running master-side functions + as the minion's user (typically ``root``) instead of the master's configured + user (the packaged default since 3006 is ``salt``). Running as the wrong user + left root*owned files in, and tripped git's ``safe.directory`` check on, the + salt*owned master cache -- breaking, for example, ``git_pillar.update`` invoked + via ``saltutil.runner``. These functions now drop to the master's configured + user before executing when invoked from a more*privileged process. [#67716](https://github.com/saltstack/salt/issues/67716) + * Fixed `LocalClient.cmd_subset` raising `TypeError: argument of type 'bool' is not iterable` when one or more targeted minions failed to respond to the `sys.list_functions` probe. Failed minions are now skipped during subset selection. [#68103](https://github.com/saltstack/salt/issues/68103) + * Fixed ``slack_bolt`` engine crashing with ``UnboundLocalError`` when a Slack workflow or other bot posts a message to a monitored channel. Bot messages (``subtype: bot_message``) carry ``bot_id`` and ``username`` instead of a ``user`` field, and these are now used as fallbacks so the engine continues running. [#68105](https://github.com/saltstack/salt/issues/68105) + * Fixed `user.present` to not fail with `result: False` in test mode when a referenced group does not yet exist; the state now reports the pending changes so users can preview states that depend on groups created by a `group.present` requisite in the same run. [#68110](https://github.com/saltstack/salt/issues/68110) + * Fixed ``salt-minion`` and ``salt-proxy`` leaving a privileged (root) keepalive supervisor process at the head of an otherwise unprivileged minion process tree when ``user`` is set to a non-root account. The supervisor now drops privileges to the configured user once the keepalive child has been spawned. [#68115](https://github.com/saltstack/salt/issues/68115) + * Fixed ``ValueError: Formatting field not found in record: 'colorlevel'`` errors when ``log_fmt_console`` uses custom color attributes such as ``%(colorlevel)s`` or ``%(colormsg)s``. ``SaltLogRecord`` now always provides the ``color*`` attributes (uncolored by default) so that log records buffered by the temporary deferred stream handler can be formatted by a colorized console formatter once it is installed. [#68129](https://github.com/saltstack/salt/issues/68129) + * Fixed ``salt-call`` silently ignoring ``--file-root``, ``--pillar-root``, and ``--states-dir`` when ``--local`` was not passed. These overrides only affect the local minion config and are clobbered by the master's values via the remote file client, so ``salt-call`` now emits a warning explaining that ``--local`` is required for the override to take effect. [#68137](https://github.com/saltstack/salt/issues/68137) + * Fixed event signature verification failing under ``minion_sign_messages``. The minion was signing the return load before ``salt.channel.client.AsyncReqChannel._package_load`` attached transport metadata (``nonce``, ``ts``, ``tok``, ``id``), so the bytes the master re-serialized to verify did not match what was signed and every signed return was dropped. Signing is now performed inside ``_package_load`` after the metadata is attached, against the same bytes the master verifies. [#68181](https://github.com/saltstack/salt/issues/68181) + * Fixed ``pkgrepo.managed`` honouring ``clean_file: True`` when the desired + repo line is already present in the managed file alongside unrelated stale + lines. Previously the state returned "already configured" and silently + skipped both the file truncation and the re*write, leaving the stale + entries (for example an obsolete ``bullseye*backports`` line in a file + managed for ``bookworm*backports``) in place. The clean + reconfigure + path now runs whenever the managed file contains any non*comment, + non*blank content other than the desired repo line; when the file already + contains only the desired line the state remains idempotent. [#68208](https://github.com/saltstack/salt/issues/68208) + * Fixed ``pkg.group_installed`` reporting failure on RPM-based systems when a package group's default or optional members are not available in any enabled repository. The state now only considers mandatory group members and explicitly requested ``include`` packages when checking for install failures, matching the behavior of ``yum/dnf group install`` (which reports "No match for group package" but still exits 0). [#68210](https://github.com/saltstack/salt/issues/68210) + * Pass ``--disable-pip-version-check`` when ``pip.list``, ``pip.freeze``, ``pip.list_upgrades``, ``pip.upgrade``, and ``pip.list_all_versions`` invoke pip, so these calls no longer hang for ~20s per invocation on airgapped minions while pip tries to reach PyPI for its self-version check. [#68214](https://github.com/saltstack/salt/issues/68214) + * Fixed ``archive.extracted`` failing to enforce ``user``/``group`` ownership on archives whose tar/zip members include no explicit directory entries (e.g. Oracle's GraalVM JDK tarballs). ``archive.list`` now derives the top-level directory from the common prefix of file and link members in addition to dir members, so ownership is applied to the extracted top-level directory in all cases. [#68227](https://github.com/saltstack/salt/issues/68227) + * Fixed deltaproxy sub-proxies returning identical grain data for every controlled minion. ``subproxy_post_master_init`` now re-packs each sub-proxy's freshly loaded per-minion grains into its execution-module, returner, executor and proxy LazyLoaders so ``__grains__`` inside loaded modules reflects that sub-proxy's device instead of the placeholder values captured during the first-pass grains load through the control proxy. [#68248](https://github.com/saltstack/salt/issues/68248) + * Fixed the salt-minion (and salt-api, salt-cloud, salt-master, salt-syndic) Debian postinst scripts hanging or erroring with "Bad file descriptor" when run from a non-interactive Debian preseed late_command chroot, by tearing down the debconf protocol with ``db_stop`` and explicitly closing file descriptor 3 before the auto-generated ``#DEBHELPER#`` section runs. [#68269](https://github.com/saltstack/salt/issues/68269) + * Fixed ``file.managed`` failing with ``WinError 123`` on Windows when caching a remote URL whose path embeds another URL (e.g. an archive.org snapshot of an ``https://...`` resource). The URL-path portion of the ``extrn_files`` cache path is now sanitised the same way the network location already is. [#68273](https://github.com/saltstack/salt/issues/68273) + * Fixed ``logrotate.set`` dropping the second ``endscript`` (and turning + embedded shell commands into bogus setting keys) when a stanza contained + multiple script blocks such as both ``prerotate`` and ``postrotate``. Script + directives are now parsed as opaque multi*line bodies and round-trip with + their own ``endscript`` terminator each. [#68293](https://github.com/saltstack/salt/issues/68293) + * Fixed the `salt.state` orchestrate state silently reporting only `Run failed on minions: ` when a targeted minion returned `False`, no return at all, or a list of error strings. The orchestrate comment now includes the per-minion failure detail (the minion's actual return value or "did not return a state result") so operators can diagnose `salt-run state.orchestrate` failures without re-running with extra logging. [#68326](https://github.com/saltstack/salt/issues/68326) + * Fixed worker process crash when salt is used outside CLI tools. [#68332](https://github.com/saltstack/salt/issues/68332) + * Fixed ``clean_old_jobs`` in the default local job cache returner to use the jid file's modification time (``st_mtime``) instead of the inode change time (``st_ctime``). A package upgrade's ``chown -R /var/cache/salt/master`` resets ``st_ctime`` on every existing jid file, which previously made the maintenance process treat every pre-upgrade job as freshly created and prevented cleanup until ``keep_jobs_seconds`` had elapsed. On busy masters this exhausted the partition's inodes within a day. [#68351](https://github.com/saltstack/salt/issues/68351) + * Fixed the ``proxmox`` salt-cloud driver raising ``Could not determine an IP address to use`` before the VM was created and started. The IP address is now determined after the VM is running, and the running VM's address reported by Proxmox is used as a fallback when neither a static ``ip_address`` nor ``agent_get_ip`` is configured. [#68353](https://github.com/saltstack/salt/issues/68353) + * Changed ``KillMode`` in the shipped ``salt-minion.service`` systemd unit from ``process`` to ``mixed`` so that ``systemctl stop`` / ``systemctl restart salt-minion`` no longer leaves orphaned ``Minion._thread_return`` worker processes outside the cgroup. SIGTERM is still sent only to the main PID (so the job return scheduled by ``service.restart salt-minion`` from #68183 has time to finish), but any remaining children are reaped with SIGKILL after the main process exits or ``TimeoutStopSec`` elapses. [#68406](https://github.com/saltstack/salt/issues/68406) + * Fixed `task.edit_task` on Windows rejecting `restart_count=999` even though the documented and error-message-stated maximum is 999. The validation now accepts the full 1..999 range. [#68419](https://github.com/saltstack/salt/issues/68419) + * Fixed ``win_task.add_trigger`` so that ``repeat_duration="Indefinitely"`` actually produces an indefinite repetition pattern. Previously the empty string from the internal duration lookup was assigned to ``Repetition.Duration``, which the Windows Task Scheduler treats as "0 seconds" and silently disables repetition. The Duration property is now left at its default for the "Indefinitely" case, which is the documented way to repeat forever. [#68420](https://github.com/saltstack/salt/issues/68420) + * Fixed ``user.setpassword`` on Windows reporting success (``retcode: 0``) when the target user does not exist. The execution module now returns ``False`` and logs an error in that case, so callers and the ``user.present`` state correctly detect the failure instead of swallowing the Win32 "user name could not be found" message as a successful return. [#68428](https://github.com/saltstack/salt/issues/68428) + * Fixed ``user.present`` on Windows so it actually updates a user's password + when the existing password differs from the one specified in the state. + Previously the state reported "User is already present and up to date" and + left the password unchanged. [#68429](https://github.com/saltstack/salt/issues/68429) + * Stop salt-ssh state runs from clobbering the master-side fileclient ``cachedir`` with the on-target ``thin_dir`` cachedir. The state fileserver cache for salt-ssh state runs is now written under the configured master ``cachedir`` (e.g. ``/var/cache/salt/master/``) instead of under the minion's thin_dir path on the master filesystem. [#68458](https://github.com/saltstack/salt/issues/68458) + * Fixed ``pkg.add_repo_key`` and ``pkgrepo.managed`` so APT keyring files that target an ``.asc`` destination keep their ASCII armor instead of being dearmored, matching the apt-secure(8) convention and allowing armored keyfiles that bundle multiple keys to be installed even when the ``gpg`` binary is not available on the minion. [#68464](https://github.com/saltstack/salt/issues/68464) + * Fixed ``jobs.list_jobs search_metadata`` so it matches jobs whose metadata + was passed as a CLI keyword argument (e.g. ``state.apply metadata={...}``) + and is therefore carried inside the job's ``Arguments`` rather than at the + top of the job payload. [#68481](https://github.com/saltstack/salt/issues/68481) + * Fixed `lgpo.set` state reporting "Failed to set the following policies" on subsequent runs of policies with sub-elements (e.g. Storage Sense thresholds). The state compared a user-supplied dict keyed by element id with a current dict keyed by the ADML display name; both forms now normalize to the canonical element id before comparison so the state is idempotent. [#68489](https://github.com/saltstack/salt/issues/68489) + * Fixed minion rejecting the master with "Invalid master key" after restart when the cached `minion_master.pub` differs from the master's payload pub_key only in trailing whitespace. `AsyncAuth.verify_master` now normalizes both sides through `clean_key` before comparing and caches the normalized form on first contact. [#68493](https://github.com/saltstack/salt/issues/68493) + * Fixed ``TypeError: 'NoneType' object is not iterable`` raised from ``AsyncReqMessageClient._send_recv`` when a per-message timeout completes the future before the send/receive coroutine catches a transient transport exception, which aborted the minion's connect loop and prevented it from connecting to the master. [#68506](https://github.com/saltstack/salt/issues/68506) + * Fixed ``docker_network.present`` recreating networks on every run against Docker 29+. Docker 29 added an empty ``IPRange`` field to every IPAM Config entry; ``docker.compare_networks`` now drops empty/None placeholder values before comparing pools, and the state's default-pool short-circuit treats the empty field as absent. [#68518](https://github.com/saltstack/salt/issues/68518) + * Fixed `pkg.installed` verification on x86_64 hosts that mix `x86_64` and `x86_64_v2` packages (e.g. AlmaLinux 10.1). `salt.utils.pkg.rpm.resolve_name` and `salt.modules.yumpkg.normalize_name` now treat `x86_64_v2` as compatible with `x86_64` instead of appending the arch suffix, so installed packages match the names Salt records. [#68540](https://github.com/saltstack/salt/issues/68540) + * Fixed ``mysql_grants.present`` reporting "Failed to execute" when granting ``ALL PRIVILEGES`` on ``*.*`` against MySQL 8.4+, where the server's privilege set drifted from Salt's hard-coded list (``SET_USER_ID`` removed, many dynamic privileges added). ``grant_exists`` now derives the expected privilege set from the connected server's ``SHOW PRIVILEGES`` output instead of a static list. [#68567](https://github.com/saltstack/salt/issues/68567) + * Fixed ``cp.get_template`` raising ``AttributeError: 'NoneType' object has no attribute 'get'`` when the Jinja template uses ``{% from '...' import ... with context %}``. The cp module's loader-backed ``__opts__`` is now unwrapped to a plain dict before the SaltCacheLoader instantiates the file client and channel that fetch the imported template. [#68572](https://github.com/saltstack/salt/issues/68572) + * Fixed `ImportError: cannot import name 'wait' from partially initialized module 'multiprocessing.connection'` raised during salt-master/minion shutdown when a reentrant SIGTERM hit `ProcessManager.kill_children()` mid `Process.join(0)`. `salt.utils.process` now eagerly imports `multiprocessing.connection` so the module is fully initialised before any signal handler can trigger its lazy import. [#68573](https://github.com/saltstack/salt/issues/68573) + * Fixed `cmd.script` on Windows raising `Invalid user: ` when `runas` is a domain account (`DOMAIN\user`, `user@DOMAIN`, or a SID). The pre-execution `user.info` check is backed by `NetUserGetInfo` which only resolves local-machine accounts and returns empty for many valid domain users; the missing lookup is now logged as a warning and execution continues so the underlying `win_runas` machinery can authenticate the account. [#68578](https://github.com/saltstack/salt/issues/68578) + * Fixed `pkg.install` on Windows silently downgrading the salt-minion when a numeric `version=` argument was passed (e.g. `version=3007.10` was YAML-parsed to the float `3007.1` and then matched the wrong winrepo entry). When the numeric version uniquely matches a string-keyed winrepo entry it is now resolved to that entry; when it is ambiguous (e.g. both `3007.1` and `3007.10` are in the winrepo) the install is refused with a clear error pointing the user at the quoted-version syntax. [#68620](https://github.com/saltstack/salt/issues/68620) + * Fixed the loader masking failure reasons when multiple modules declare the same `__virtualname__` and each `__virtual__()` returns False, so users now see every reason (e.g. both x509 v1's "Superseded, using x509_v2" and x509_v2's "Could not load cryptography") instead of only the first one recorded. [#68625](https://github.com/saltstack/salt/issues/68625) + * Fix `NetapiClient.runner` raising `TypeError` when `timeout` arrives as a string from the salt-api HTTP form. [#68653](https://github.com/saltstack/salt/issues/68653) + * Fixed `master_job_cache: redis_return` raising `KeyError: 'redis_return.prep_jid'` by registering the `redis` returner under both `redis` and `redis_return` virtual names, matching the documented `--return redis_return` usage and the module's file name. [#68663](https://github.com/saltstack/salt/issues/68663) + * Fixed ``ini.options_present`` with ``strict: True`` to remove sections that are present in the ini file but absent from the supplied ``sections`` mapping. [#68673](https://github.com/saltstack/salt/issues/68673) + * Handle `SaltDeserializationError` in grains cache loading so a corrupted cache file no longer propagates as CRITICAL during minion startup. [#68678](https://github.com/saltstack/salt/issues/68678) + * Fixed ``network.interfaces`` on Windows systems falling back to WMI (i.e. .NET older than 4.7.2): the default gateway is now reported under ``gateway`` instead of being mistakenly emitted as ``broadcast``. [#68692](https://github.com/saltstack/salt/issues/68692) + * Fixed ``file.managed`` (and other template-rendering callers) silently overwriting user-supplied ``slspath``, ``sls_path``, ``slsdotpath`` and ``slscolonpath`` values in ``defaults``/``context`` with values regenerated from the caller's ``sls`` key. [#68754](https://github.com/saltstack/salt/issues/68754) + * Fixed ``env_order`` not being honored when merging pillar data across environments. ``Pillar.render_pillar`` now iterates matched environments in the configured ``env_order`` so that, with ``top_file_merging_strategy: merge_all``, the last environment in ``env_order`` wins on conflicting pillar keys instead of the result depending on dict insertion order. [#68785](https://github.com/saltstack/salt/issues/68785) + * Improved the "Malformed topfile" error from ``HighState.verify_tops`` to name the saltenv and the matcher whose state declarations were not formed as a list, so users can locate the offending entry in their ``top.sls``. [#68792](https://github.com/saltstack/salt/issues/68792) + * Removed orphaned GnuPG dotlock files (``.#lk..``) from ``gpg_keydir`` before each decrypt in the ``gpg`` renderer so they no longer accumulate when a gpg subprocess is killed mid-operation. [#68869](https://github.com/saltstack/salt/issues/68869) + * Fix `pkg.installed` idempotency on FreeBSD when `with_origin=True` causes + `pkg.list_pkgs` to return per*package dicts instead of version lists; extract + the version list before version*string comparison so a second state run no + longer falsely reports packages as changed. [#68886](https://github.com/saltstack/salt/issues/68886) + * Fix gen_signature() signing raw pub key content instead of clean_key'd content, causing master_use_pubkey_signature verification to always fail. [#68930](https://github.com/saltstack/salt/issues/68930) + * Fixed spurious ``FileLockError: lock_fn ... exists and is not a file`` raised by ``salt.utils.files.wait_lock`` and ``salt.utils.files.await_lock`` (and therefore by ``state.apply`` queue locking) when another process removed the lock file between the two separate ``os.path.exists`` / ``os.path.isfile`` stats. The pre-check now uses a single ``os.stat`` call so a transient regular-file lock no longer trips the "not a file" branch. [#68931](https://github.com/saltstack/salt/issues/68931) + * Fixed pkg.installed(update_holds=True) for APT multiarch packages by preserving arch-qualified package names through install target parsing and verification. [#68932](https://github.com/saltstack/salt/issues/68932) + * Fix deadlock in parallel `file.managed` states when source is served by the master. + + Forked parallel*state children previously inherited the parent's ZeroMQ + REQ socket and asyncio loop from `salt.fileclient.RemoteClient`, + `salt.crypt.AsyncAuth/SAuth`, and `salt.utils.event.SaltEvent`. Multiple + sibling children racing those handles deadlocked the asyncio loop with + ~98% CPU and never completed. Salt now registers `os.register_at_fork` + handlers on those classes that drop inherited channel/socket references + in any forked child; the next use rebuilds them fresh. [#68940](https://github.com/saltstack/salt/issues/68940) + * Fixed grain and pillar targeting matching minions whose data cache entry was missing. ``CkMinions._check_cache_minions`` now excludes accepted minions that have no cached grains/pillar data from greedy target results, instead of silently including them as candidates. [#68976](https://github.com/saltstack/salt/issues/68976) + * Avoid AttributeError on a closed IPCClient when the connect coroutine resolves after close(). [#68993](https://github.com/saltstack/salt/issues/68993) + * Fixed `salt.utils.network.sanitize_host` stripping colons from IPv6 addresses, which broke `network.ping` and any other caller that passed an IPv6 host. [#68995](https://github.com/saltstack/salt/issues/68995) + * Added support for MAINTAIN (m) privilege introduced in PostgreSQL 17 to salt.modules.postgres and salt.states.postgres_privileges [#69003](https://github.com/saltstack/salt/issues/69003) + * Fixed `redis.get_master_ip` silently dropping the `password` argument. The + function was forwarding its arguments positionally to `_connect`, but + `_connect`'s third positional slot is `db`, not `password`, so the + caller's password landed in the database*index argument and the actual + password fell through to `config.option("redis.password")`. Arguments + are now passed by keyword. [#69029](https://github.com/saltstack/salt/issues/69029) + * Fixed `salt.modules.redismod._connect` rejecting valid `db=0`. The helper + used a truthy check (`if not db`) to decide whether to fall back to + `config.option("redis.db")`, but `not 0` is `True`, so an explicitly + supplied `db=0` was silently replaced by the configured value. The check + is now `if db is None`, matching the pattern already used by the sibling + `_sconnect` helper in the same module. Other arguments keep their + truthy*check semantics on purpose. [#69030](https://github.com/saltstack/salt/issues/69030) + * Fixed two distinct bugs in the `salt.engines.redis_sentinel` engine that + together prevented it from being usable. `start()` no longer raises + `AttributeError: 'dict_values' object has no attribute 'pop'` on Python 3 + (the dict.values() result is now wrapped in `list(...)`). `Listener` and + `start()` now accept an optional `password` argument and forward it to + the redis client, allowing the engine to authenticate against a Sentinel + that requires AUTH; the default of `None` keeps existing configurations + working unchanged. [#69031](https://github.com/saltstack/salt/issues/69031) + * Fixed `salt.returners.redis_return` silently ignoring the documented + `redis.password` configuration option. The returner now reads + `redis.password` from config (in both regular and proxy modes) and + forwards it to both the single*server `redis.StrictRedis` and the + `StrictRedisCluster` constructors. Operators with auth*protected Redis + no longer lose every job return to a hidden `NOAUTH Authentication + required` failure; deployments without a password are unaffected. [#69032](https://github.com/saltstack/salt/issues/69032) + * Fixed three closely-related bugs in `salt.cache.redis_cache` that + together broke hierarchical*bank semantics: + `_build_bank_hier` now registers each child bank name in both the + parent's `$BANK_` set (consumed by `flush()` tree traversal) and the + parent's `$BANKEYS_` set (consumed by `list_()`); `_get_banks_to_remove` + now decodes the bytes returned by `smembers` and skips the `"."` + placeholder, so recursive `flush()` of a parent bank actually descends + into sub*banks instead of corrupting the path; and `flush(bank)` of a + sub*bank now removes the flushed bank's own reference from its + parent's index sets so `list_(parent)` no longer reports it as + present. Together these fixes restore `cache.list("minions")`, + `salt*run manage.present` and `salt-run manage.up` for masters + configured with `cache: redis`. [#69033](https://github.com/saltstack/salt/issues/69033) + * Fixed `salt.tokens.rediscluster` being unable to retrieve any eauth + token. The cluster client was created with `decode_responses=True`, + which caused `redis_client.get()` to return `str` and broke + `salt.payload.loads` (msgpack rejects `str`); it also caused + `redis_client.keys()` to return `str` and broke + `[k.decode("utf8") for k in ...]` (`str` has no `.decode`). Both + errors were swallowed by broad `except Exception` handlers, so eauth + appeared to silently reject every token. `decode_responses=True` is + removed; values now round*trip as bytes through msgpack as the rest + of the module already expected. [#69035](https://github.com/saltstack/salt/issues/69035) + * Fixed `salt.returners.redis_return` leaking `:` last-jid + pointer keys indefinitely. The pointer was written with `pipeline.set` + and no `ex=` TTL, so any (minion, fun) pair that stopped running stuck + in Redis forever *- O(minions × distinct funcs) keys accumulating over + the lifetime of the master. The pointer now expires on the same TTL + as the rest of the returner data (`keep_jobs_seconds`). Operators with + external scripts reading these keys directly may observe them + expiring; the documentation never promised they would not. [#69038](https://github.com/saltstack/salt/issues/69038) + * Fixed `salt.returners.redis_return.get_fun` always returning an + empty dict. The function read return data from a `:` + key that no other code in the module ever wrote *- a leftover from + an older storage schema. It now reads from the canonical + `ret:` hash via `HGET ret: `, matching the + storage layout that `returner` actually produces and the read + pattern that `get_jid` already uses. [#69039](https://github.com/saltstack/salt/issues/69039) + * Fixed `salt.returners.pgjsonb` writing database errors to `sys.stderr` + instead of Salt's logger. Errors from `_get_serv`, `_purge_jobs` and + `_archive_jobs` are now reported via `log.exception`, so they reach + the configured `log_file` / syslog destination on a daemonized master, + including a full traceback. The unused `import sys` is also dropped. [#69048](https://github.com/saltstack/salt/issues/69048) + * Fixed `salt.returners.pgjsonb.returner` letting any non-connection + `psycopg2.DatabaseError` propagate to the caller — including the + syndic*aggregate publish path in `salt/master.py` which had no outer + catch — so a single bad row could escape into a master subprocess. + `event_return` had no error handling at all and a database failure + during a flush propagated similarly. Both functions now catch + `SaltMasterError` and `psycopg2.DatabaseError` locally, log a + contextual message (jid/id for returns, batch size for events), and + drop the affected payload. While here, fix `event_return` passing + the events list as the positional `ret` argument to `_get_serv`, + which was a copy*paste leftover from `returner(ret)`. [#69058](https://github.com/saltstack/salt/issues/69058) + * Fixed `salt-api`'s `/events` endpoint accepting eauth tokens via query + string (``?token=...`` or ``?salt_token=...``). Tokens supplied that + way end up in HTTP access logs, the browser ``Referer`` header, log* + aggregation systems and shell history; the token retains validity for + ``token_expire`` (12h by default), so any party reading those logs can + replay the token. The endpoint now rejects query*string tokens with a + 400 error pointing at the ``X*Auth-Token`` header (for non-browser + clients) or the session cookie established by ``/login`` (for browser + ``EventSource`` clients) as the supported channels. ``X*Auth-Token`` + header support is added; cookie*based auth continues to work + unchanged. [#69071](https://github.com/saltstack/salt/issues/69071) + * ``LoadAuth.get_tok`` now distinguishes between corrupt token blobs (removed from the store) and transient backend errors such as Redis connection drops or NFS hangs (token kept, request treated as not-authenticated). Previously a single backend hiccup could log every authenticated user out by deleting valid tokens. [#69073](https://github.com/saltstack/salt/issues/69073) + * ``cmd.run`` and friends no longer include the ``env`` and ``stdin`` arguments in the ``CommandExecutionError`` raised when the underlying subprocess fails to start (typically ``ENOENT`` / binary not found). Both fields routinely carry credentials passed in by the caller (``env={"DB_PASSWORD": "..."}``, password piped via ``stdin``), and the error message ends up in master/minion logs and in event-bus return data visible to the API caller. [#69075](https://github.com/saltstack/salt/issues/69075) + * Lowered the "Cache version mismatch clearing" log message in ``salt.utils.cache.verify_cache_version`` from ``WARNING`` to ``DEBUG``; the cache is rebuilt as part of normal operation after upgrades or when an ephemeral cache directory has been removed, and does not warrant user attention. [#69106](https://github.com/saltstack/salt/issues/69106) + * * Relenv 0.22.14 + * Update sqlite to 3.53.2.0 + * Update openssl to 3.5.7 [#69129](https://github.com/saltstack/salt/issues/69129) + * Surface the real cause of a proxymodule load failure in salt-proxy's abort message. The misleading "Proxymodule X is missing an init() or a shutdown() or both" wording is now only used when init/shutdown really are missing from a loaded module; if the module failed to load (for example because its ``__virtual__`` returned False), the underlying reason is included in the error. [#69139](https://github.com/saltstack/salt/issues/69139) + * Fixed ``pkg.hold`` and ``pkg.list_holds`` on dnf5 systems (e.g. Fedora 42+): + ``pkg.hold`` now calls ``dnf5 versionlock add `` (the bare + ``versionlock `` form was rejected by dnf5), and ``pkg.list_holds`` + reads ``/etc/dnf/versionlock.toml`` directly so ``pkg.installed`` with + ``hold: true`` is again idempotent. [#69181](https://github.com/saltstack/salt/issues/69181) + * Fixed Salt-SSH syncing internal modules as extmods [#69199](https://github.com/saltstack/salt/issues/69199) + * Fixed ``lgpo_reg.value_absent`` failing when the Registry.pol entry was already absent but the registry value still existed. ``lgpo_reg.delete_value`` was returning early before reaching the registry cleanup code, causing the state to see no changes and report failure. The registry value is now removed regardless of whether the pol entry was present. [#69203](https://github.com/saltstack/salt/issues/69203) + * Fixed `postgres_local_cache.save_load` raising `psycopg2.errors.UniqueViolation` when more than one master in an active-active multi-master cluster persists the same JID; the INSERT is now idempotent via `ON CONFLICT (jid) DO NOTHING` on PostgreSQL >= 9.5, and the duplicate is tolerated on older servers. [#69214](https://github.com/saltstack/salt/issues/69214) + * Fixed Windows MSI self-upgrade via ``pkg.install`` failing with error 1603. The old product's ``DeleteConfig_DECAC`` custom action was unconditionally deleting ``ROOTDIR\var`` during ``RemoveExistingProducts``, destroying the MSI that ``pkg.install`` had cached to ``ROOTDIR\var\cache`` before launching the upgrade. Users who had ``REMOVE_CONFIG=1`` persisted in the registry (from checking "On uninstall" at install time) hit a worse variant where the entire ``ROOTDIR`` was deleted. The fix checks ``UPGRADINGPRODUCTCODE`` — set by Windows Installer whenever an uninstall is triggered by a major upgrade — and skips all ``ROOTDIR`` deletion during upgrades, matching the behaviour of the NSIS installer which has always preserved ``ROOTDIR`` during upgrades. [#69219](https://github.com/saltstack/salt/issues/69219) + * Fixed `TypeError: string indices must be integers` in the minion when the master returns a bare string error response (e.g. `"bad load"`, `"Some exception handling minion payload"`) for a pillar request. The minion now raises a clean `AuthenticationError` instead of crashing, allowing the caller to retry or fail gracefully. [#69228](https://github.com/saltstack/salt/issues/69228) + * pkg.list_patches in yumpkg.py parses tdnf output on Photon OS [#69229](https://github.com/saltstack/salt/issues/69229) + * Fix `git.tag` so that the documented `message` argument is actually forwarded to `git tag`, creating an annotated tag with the supplied message instead of silently producing a lightweight tag. [#69298](https://github.com/saltstack/salt/issues/69298) + * Fixed `salt.auth.pam` conversation callback so it answers `PAM_PROMPT_ECHO_ON` prompts with the supplied username; previously only `PAM_PROMPT_ECHO_OFF` prompts were answered, which caused `pam_authenticate` to silently fail (and salt-api to return 401) against PAM stacks that re-prompt for the user. [#69304](https://github.com/saltstack/salt/issues/69304) + * Ensure multiple masters have their own job/state queues [#69308](https://github.com/saltstack/salt/issues/69308) + * Fixed loading private keys from PKCS#12 containers with x509_v2 [#69312](https://github.com/saltstack/salt/issues/69312) + * Fixed creating self-signed PKCS#12-encoded certificates [#69319](https://github.com/saltstack/salt/issues/69319) + * Fixed minion state queue replacing the master-assigned JID on queued state runs, so returns now come back tagged with the JID the master actually published. [#69386](https://github.com/saltstack/salt/issues/69386) + * Made the salt user's home directory and the relenv ``extras-`` directory configurable in the Linux packaging. The DEB preinst scripts now source ``/etc/default/salt-setup`` (and ``/etc/sysconfig/salt-minion-setup`` for cross-distro parity with RPM) before applying the ``SALT_HOME``/``SALT_USER``/``SALT_GROUP``/``SALT_NAME`` defaults, mirroring the long-standing RPM behavior. A new ``SALT_EXTRAS_DIR`` override is honored by both stacks so the extras tree can be relocated outside ``/opt/saltstack/salt`` and its ownership is correctly restored on upgrade. [#69402](https://github.com/saltstack/salt/issues/69402) + * Fixed minion worker threads hanging or crashing when returning job results + to the master. The main process now fires an error event back to the worker + when ``req_channel.send()`` times out, so workers wake up immediately rather + than waiting out their full timeout. Replaced the bare ``TimeoutError`` raised + in ``_send_req_sync`` with ``SaltReqTimeoutError`` so ``_return_pub``'s existing + handler catches it correctly. The worker's wait timeout is now derived from + ``return_retry_timer_max * return_retry_tries`` to ensure it always outlasts + the main process's retry budget. [#69416](https://github.com/saltstack/salt/issues/69416) + * Fixed zsh completion by using the proper python3 instead of python2. [#69419](https://github.com/saltstack/salt/issues/69419) + * Fixed Photon OS Arm64 FIPS CI by re-enabling the OpenSSL default provider after installing openssl-fips-provider, working around the disabled-default-provider bug in `openssl-fips-provider <= 3.1.2-3.ph5` on the lagging Photon aarch64 mirror. [#69449](https://github.com/saltstack/salt/issues/69449) + * Add regression test for changelog template multi-line rendering and harden template with indent filter so continuation lines are correctly indented under the bullet (defensive backport of #69458 to 3006.x). [#69454](https://github.com/saltstack/salt/issues/69454) + * Fixed minion not honoring SIGTERM while stuck in the master DNS retry loop, which caused systemd to escalate to SIGKILL after 90 seconds. [#69466](https://github.com/saltstack/salt/issues/69466) + * Fixed ``lgpo_reg`` module and state functions failing on Windows Domain Controllers with ``Access is denied`` when writing to ``HKLM\SOFTWARE\Policies\`` subkeys. The ``set_value``, ``disable_value``, and ``delete_value`` execution module functions now accept a ``write_registry`` parameter (default ``None``) that auto-detects Domain Controllers via the ``ProductType`` registry key and skips the direct registry write when one is detected, instead relying on the Group Policy engine to apply the policy on the next refresh. An explicit ``True`` or ``False`` overrides auto-detection. A ``refresh_policy`` parameter (default ``False``) has been added to all three functions to trigger an in-process ``userenv.RefreshPolicy`` call immediately after the ``Registry.pol`` file is updated. The corresponding state functions ``value_present``, ``value_disabled``, and ``value_absent`` expose the same parameters. A standalone ``lgpo_reg.refresh_policy`` execution function and ``lgpo_reg.refresh_policy`` state have been added to allow a single Group Policy refresh to be issued after a batch of policy writes. ``is_domain_controller`` has been added to ``salt.utils.win_functions`` and ``refresh_policy`` has been added to ``salt.utils.win_lgpo_reg``. [#69468](https://github.com/saltstack/salt/issues/69468) + * Fixed 3006.x Windows nightly CI by pinning the runner-host Python to 3.14.6 (OpenSSL 3.5.7); the setup-python default `3.14` was resolving to a cached 3.14.5 build whose OpenSSL 3.0.20 rejected the cert pypi.org currently serves. [#69486](https://github.com/saltstack/salt/issues/69486) + * Fixed 3006.x Windows nightly CI Deps by dropping a sitecustomize hook into the salt onedir's `Lib/site-packages` that applies the cpython#104135 iter-and-skip patch before pip touches TLS; the prior runner-host Python pin in #69486 targeted the wrong interpreter (the failing pip runs in a venv created from the relenv-bundled Python 3.10) and is reverted. [#69490](https://github.com/saltstack/salt/issues/69490) + * Fixed ``lgpo_reg`` failures on Windows when ``Registry.pol`` is temporarily locked by the Group Policy service or other processes. Salt now uses ``EnterCriticalPolicySection`` / ``LeaveCriticalPolicySection`` from ``userenv.dll`` — the same synchronization primitive used by the GP engine — to serialize read-modify-write access to ``Registry.pol``. A retry loop with configurable attempts and delay is also applied for non-GP lockers such as antivirus scanners or VSS snapshots that do not participate in the GP critical section handshake. [#69492](https://github.com/saltstack/salt/issues/69492) + + # Added + + * Added ``shadow.verify_password`` to ``salt.modules.win_shadow``, which + validates a Windows user's password via ``LogonUser`` with + ``LOGON32_LOGON_NETWORK`` (Microsoft's recommended approach per + `KB180548 `_) without + creating an interactive session. If the check causes an account lockout, + the account is automatically unlocked. Updated ``user.present`` on Windows + to use ``shadow.verify_password`` so the password is only changed when it + differs from the current value, matching the idempotent behaviour on other + platforms. [#41347](https://github.com/saltstack/salt/issues/41347) + * Added ability to configure the pillar destination for the `netbox` ext_pillar via `destination_pillar_key` [#65531](https://github.com/saltstack/salt/issues/65531) + * Migrate Salt documentation to the PyData Sphinx theme. This update modernizes the documentation UI, improves navigation with a persistent sidebar tree, and fixes issues with embedded video playback. [#69185](https://github.com/saltstack/salt/issues/69185) + * fix etcdv3 module authentification when using etcd3-py lib [#69202](https://github.com/saltstack/salt/issues/69202) + * Added ``lgpo_reg.get_rsop_value`` to query the Resultant Set of Policy (RSoP) for a registry key/value and detect whether it is managed by a Domain Group Policy Object. The ``lgpo_reg`` module functions ``set_value``, ``disable_value``, and ``delete_value`` now log a warning when a Domain GPO is detected for the target value. The ``lgpo_reg`` state functions ``value_present``, ``value_disabled``, and ``value_absent`` append the same warning to the state comment so it is visible in state output. [#69205](https://github.com/saltstack/salt/issues/69205) + + + -- Salt Project Packaging Wed, 24 Jun 2026 08:38:51 +0000 + salt (3006.25) stable; urgency=medium diff --git a/pkg/rpm/salt.spec b/pkg/rpm/salt.spec index 5e4f7a1e98ae..7a8c79111ed9 100644 --- a/pkg/rpm/salt.spec +++ b/pkg/rpm/salt.spec @@ -944,6 +944,322 @@ if [ $1 -ge 1 ] ; then fi %changelog +* Wed Jun 24 2026 Salt Project Packaging - 3006.26 + +# Removed + +- Removed the unmaintained `linode-python` package dependency to stop SyntaxWarnings during install for retired Linode API v3. [#68992](https://github.com/saltstack/salt/issues/68992) + +# Changed + +- Changed `salt.returners.redis_return` to enumerate the Redis keyspace + with `SCAN` instead of the blocking `KEYS pattern` command in both + `get_jids` and `clean_old_jobs`. `KEYS` walks the entire keyspace + synchronously and stalls the Redis server for the duration; on a + master with hundreds of thousands of jobs this can block all clients + of that Redis instance for seconds. `SCAN` is incremental and + non-blocking. Order of returned keys is no longer guaranteed (the + returner does not rely on order); operators with custom scripts that + read `ret:*` or `load:*` directly may see them in a different order. [#69037](https://github.com/saltstack/salt/issues/69037) + +# Fixed + +- Fixed multi-line scalar variables loaded via `import_yaml` (or `load_yaml`) being rendered as literal `\n` instead of actual newlines when the loaded data is interpolated into a YAML state file (e.g. `- context: {{ data }}`). `PrintableDict.__str__`/`__repr__` now emit string values containing newlines as YAML-safe double-quoted scalars rather than Python `repr()` so they round-trip correctly through the subsequent YAML render pass. [#30690](https://github.com/saltstack/salt/issues/30690) +- Handle requisites correctly for empty SLS files [#30971](https://github.com/saltstack/salt/issues/30971) +- Fixed ``win_pkg`` functions ignoring the ``saltenv`` setting in minion configuration. All public functions (``refresh_db``, ``genrepo``, ``install``, ``remove``, ``list_pkgs``, ``latest_version``, ``upgrade_available``, ``list_upgrades``, ``list_available``, ``version``, ``get_repo_data``, ``get_package_info``) now fall back to ``__opts__["saltenv"]`` when ``saltenv`` is not passed explicitly, instead of always defaulting to ``base``. [#38551](https://github.com/saltstack/salt/issues/38551) +- ``dpkg_lowpkg`` no longer reads ``/var/lib/dpkg/available`` or ``/var/lib/dpkg/info/.list`` directly. It now uses ``dpkg-query`` exclusively, addressing the lintian ``uses-dpkg-database-directly`` warning reported in #52605. ``lowpkg.info`` derives the package install time from dpkg's ``${db-fsys:Last-Modified}`` field instead of the ``.list`` file mtime. [#52605](https://github.com/saltstack/salt/issues/52605) +- Added ``encoding`` parameter to ``file.replace`` execution module and state to support UTF-16, UTF-32, and other multi-byte encoded files that would otherwise be incorrectly treated as binary. [#52793](https://github.com/saltstack/salt/issues/52793) +- Fixed `postgres._find_pg_binary` ignoring `postgres.bins_dir` when a `psql` binary is also present on the system PATH, ensuring the configured `bins_dir` is always preferred over the system PATH. [#53190](https://github.com/saltstack/salt/issues/53190) +- Percent-encode the user and password when adding HTTP basic auth to a URL so reserved characters no longer corrupt the result [#55561](https://github.com/saltstack/salt/issues/55561) +- Fixed a ``SaltCacheError`` ("maximum recursion depth exceeded") raised by the + etcd data cache when listing an empty folder, which etcd reports as a child of + itself. The directory walk now stops at the self-referential entry instead of + recursing indefinitely. [#57377](https://github.com/saltstack/salt/issues/57377) +- Fixed `timezone.system` state always returning `result=False` with "Failed to set UTC to True" on Windows. The hardware clock on Windows is always localtime and cannot be changed, so the UTC/hwclock block is now skipped entirely on Windows. [#57754](https://github.com/saltstack/salt/issues/57754) +- Fixed `archive.tar` placing the `-C ` option after the source/member operands, where tar ignores it. The directory-change option is now emitted before the operands so it takes effect in both create and extract modes. [#57847](https://github.com/saltstack/salt/issues/57847) +- Fixed `OSError: The operation completed successfully` raised by `CreateProcessWithTokenW` on Windows when the underlying advapi32 call fails. The error code is now read from `ctypes.get_last_error()` (the ctypes-saved slot) instead of `win32api.GetLastError()` (the live Windows slot, which may be reset to 0 before it is read). [#57848](https://github.com/saltstack/salt/issues/57848) +- Improved documentation for the `runas` and `password` parameters in `cmd.run`, `cmd.script`, and all `salt.modules.cmdmod` execution functions on Windows. The docs now accurately describe when a password is required: only when the salt-minion is **not** running as SYSTEM or as an elevated Administrator. Removed the inaccurate claim that the target user account must be in the Administrators group. Also changed `cmd.script` to log a warning instead of hard-failing when `runas` is used without a password on Windows, since a password is not always required. [#57951](https://github.com/saltstack/salt/issues/57951) +- Fixed ``pkg.group_installed``/``pkg.group_info`` failing to expand a dnf environment group whose member groups have multi-word names (e.g. ``Group '@Common NetworkManager submodules' not found`` when installing ``Workstation`` on RHEL/AlmaLinux 8, 9 and 10). The member group is now resolved by its bare name when the ``@``-prefixed lookup fails. This affects dnf4 only; dnf5 group handling is unchanged. [#60276](https://github.com/saltstack/salt/issues/60276) +- Fix `tls.create_csr` log message path to use `os.path.join` instead of f-string interpolation so paths render correctly when csr_path has a trailing slash. [#60877](https://github.com/saltstack/salt/issues/60877) +- Fixed the LDAP eauth group-membership lookup re-binding the user on every job + payload when ``auth.ldap.freeipa`` is enabled. The user is now only re-bound on + the first payload of a job, matching the standard LDAP code path, so single-use + 2FA credentials (such as a FreeIPA OTP) are no longer consumed more than once. [#61974](https://github.com/saltstack/salt/issues/61974) +- Fixed `SSL: DECRYPTION_FAILED_OR_BAD_RECORD_MAC` errors in the VMware cloud driver by reconnecting when a cached vCenter service instance is found to be stale or corrupted (for example when inherited across a fork by salt-cloud's parallel provider queries). [#61983](https://github.com/saltstack/salt/issues/61983) +- Fix metadata grain so EC2 ``user-data`` is returned verbatim instead of being mangled by the ``=`` line-splitter, which previously corrupted any user-data payload containing ``=`` (e.g. cloud-init ``#cloud-config`` blocks). [#62061](https://github.com/saltstack/salt/issues/62061) +- Fixed LGPO ``get_policy_info`` incorrectly returning a "multiple policies" error when duplicate ADMX policy definitions (e.g. ``TerminalServer.admx`` and ``TerminalServer-Server.admx``) resolve to the same full path. [#62732](https://github.com/saltstack/salt/issues/62732) +- Re-enable test_interrupt_on_long_running_job by removing the initial-onedir-rollout skip marker. [#63627](https://github.com/saltstack/salt/issues/63627) +- Fix missing `dns_plugin_propagate_seconds` arg in acme state/module so DNS propagation timeout is actually forwarded to certbot. [#63700](https://github.com/saltstack/salt/issues/63700) +- Improve PAM eauth diagnostics when ``salt-master`` runs as a non-root user. Previously, ``salt-master``/``salt-api`` running as the ``salt`` user (the 3006.x packaging default) silently failed every PAM authentication with only ``Pam auth failed for :`` in the log; the cause is that the helper subprocess inherits the master's uid and PAM's ``unix_chkpwd`` refuses to validate other users without ``/etc/shadow`` access. The master now emits a one-shot CRITICAL log entry that names the cause and the two standard remediations (run as ``root``, or add the master user to the ``shadow`` group on Debian-derived distributions), and the module documentation describes the constraint. [#64275](https://github.com/saltstack/salt/issues/64275) +- Fixed incorrect minion presence events being sent out on hourly ``Maintenance`` process restarts [#64505](https://github.com/saltstack/salt/issues/64505) +- Catch StrictUndefined in salt jinja custom filters. [#64915](https://github.com/saltstack/salt/issues/64915) +- Stopped logging the misleading "An extra return was detected from minion ... this could be a replay attack" ERROR for benign duplicate returns (also fixes #65516). The local_cache returner now compares a duplicate return to the cached one and logs at DEBUG when the payloads match (the common retry-after-timeout or syndic re-forward case) and at WARNING -- without the "replay attack" wording -- when the payloads differ. [#65301](https://github.com/saltstack/salt/issues/65301) +- Fixed non-root salt CLI access when ``publisher_acl`` or ``external_auth`` is configured. Since 3006.3 the master defaults to running as the ``salt`` user, which left ``sock_dir`` and ``cachedir`` mode ``0o750`` and blocked authorised non-root users from traversing into them to reach ``master_event_pub.ipc`` / ``publish_pull.ipc`` and their per-user ``._key``. The master now adds the world-execute bit to those two directories when ACLs are configured, without exposing directory listings. [#65317](https://github.com/saltstack/salt/issues/65317) +- Fixed ``salt.ext.tornado.netutil`` import on Python 3.12+ where ``ssl.match_hostname`` was removed and the unmaintained ``backports.ssl_match_hostname`` package is unavailable, which previously broke any Salt master-initiated job (e.g. ``test.ping``, ``state.apply``) on Fedora 39+/Ubuntu 24.04 masters. [#65360](https://github.com/saltstack/salt/issues/65360) +- See #65301 -- the same fix to ``salt/returners/local_cache.py`` quiets the spurious "extra return ... replay attack" ERROR that appeared in multimaster and master-of-masters/syndic setups when the same return arrived more than once. [#65516](https://github.com/saltstack/salt/issues/65516) +- Fix deadlock in parallel `cmd.script` states when the script is served by the master. + + Same fork-inherited ZeroMQ socket race as the `file.managed` fix: a + `cmd.script` state with `parallel: True` downloads the script via + `cp.cache_file` in a forked child that inherited the parent's ZeroMQ + REQ socket, deadlocking the asyncio loop at ~100% CPU. Resolved by the + same `os.register_at_fork` handlers that drop inherited channel/socket + references in forked children. [#65709](https://github.com/saltstack/salt/issues/65709) +- Fixed pip.uninstall rejecting the extra_args keyword argument, matching the behavior of pip.install. [#65870](https://github.com/saltstack/salt/issues/65870) +- Fixed salt-ssh failing to fetch ``gitfs_remotes``. ``salt.config.master_config`` + sets ``__fs_update = True`` to suppress fileserver refreshes done by ``FSChan`` + (the master daemon's maintenance thread handles them). salt-ssh inherits the + master config but has no maintenance thread, so its ``FSClient`` never refreshed + the fileserver backends and wrappers such as ``cp.list_states`` saw no gitfs + content until the user ran ``salt-run fileserver.update`` or manually + ``git fetch``ed the cached repos. ``salt.client.ssh.SSH.__init__`` now removes + the suppression flag before instantiating ``FSClient`` so gitfs is refreshed + once at startup. [#66148](https://github.com/saltstack/salt/issues/66148) +- Fixed ``salt/version.py`` reporting the wrong major version on the 3006.x branch when built from a checkout that has no ``salt/_version.txt`` and no usable ``.git`` directory. ``SaltVersionsInfo.current_release()`` now returns the branch's own codename (``Sulfur``) instead of the next un-released codename in the table, so source builds and other tooling no longer leak ``3007.0`` into the reported version. [#67061](https://github.com/saltstack/salt/issues/67061) +- Fixed ``saltutil.runner`` and ``saltutil.wheel`` running master-side functions + as the minion's user (typically ``root``) instead of the master's configured + user (the packaged default since 3006 is ``salt``). Running as the wrong user + left root-owned files in, and tripped git's ``safe.directory`` check on, the + salt-owned master cache -- breaking, for example, ``git_pillar.update`` invoked + via ``saltutil.runner``. These functions now drop to the master's configured + user before executing when invoked from a more-privileged process. [#67716](https://github.com/saltstack/salt/issues/67716) +- Fixed `LocalClient.cmd_subset` raising `TypeError: argument of type 'bool' is not iterable` when one or more targeted minions failed to respond to the `sys.list_functions` probe. Failed minions are now skipped during subset selection. [#68103](https://github.com/saltstack/salt/issues/68103) +- Fixed ``slack_bolt`` engine crashing with ``UnboundLocalError`` when a Slack workflow or other bot posts a message to a monitored channel. Bot messages (``subtype: bot_message``) carry ``bot_id`` and ``username`` instead of a ``user`` field, and these are now used as fallbacks so the engine continues running. [#68105](https://github.com/saltstack/salt/issues/68105) +- Fixed `user.present` to not fail with `result: False` in test mode when a referenced group does not yet exist; the state now reports the pending changes so users can preview states that depend on groups created by a `group.present` requisite in the same run. [#68110](https://github.com/saltstack/salt/issues/68110) +- Fixed ``salt-minion`` and ``salt-proxy`` leaving a privileged (root) keepalive supervisor process at the head of an otherwise unprivileged minion process tree when ``user`` is set to a non-root account. The supervisor now drops privileges to the configured user once the keepalive child has been spawned. [#68115](https://github.com/saltstack/salt/issues/68115) +- Fixed ``ValueError: Formatting field not found in record: 'colorlevel'`` errors when ``log_fmt_console`` uses custom color attributes such as ``%(colorlevel)s`` or ``%(colormsg)s``. ``SaltLogRecord`` now always provides the ``color*`` attributes (uncolored by default) so that log records buffered by the temporary deferred stream handler can be formatted by a colorized console formatter once it is installed. [#68129](https://github.com/saltstack/salt/issues/68129) +- Fixed ``salt-call`` silently ignoring ``--file-root``, ``--pillar-root``, and ``--states-dir`` when ``--local`` was not passed. These overrides only affect the local minion config and are clobbered by the master's values via the remote file client, so ``salt-call`` now emits a warning explaining that ``--local`` is required for the override to take effect. [#68137](https://github.com/saltstack/salt/issues/68137) +- Fixed event signature verification failing under ``minion_sign_messages``. The minion was signing the return load before ``salt.channel.client.AsyncReqChannel._package_load`` attached transport metadata (``nonce``, ``ts``, ``tok``, ``id``), so the bytes the master re-serialized to verify did not match what was signed and every signed return was dropped. Signing is now performed inside ``_package_load`` after the metadata is attached, against the same bytes the master verifies. [#68181](https://github.com/saltstack/salt/issues/68181) +- Fixed ``pkgrepo.managed`` honouring ``clean_file: True`` when the desired + repo line is already present in the managed file alongside unrelated stale + lines. Previously the state returned "already configured" and silently + skipped both the file truncation and the re-write, leaving the stale + entries (for example an obsolete ``bullseye-backports`` line in a file + managed for ``bookworm-backports``) in place. The clean + reconfigure + path now runs whenever the managed file contains any non-comment, + non-blank content other than the desired repo line; when the file already + contains only the desired line the state remains idempotent. [#68208](https://github.com/saltstack/salt/issues/68208) +- Fixed ``pkg.group_installed`` reporting failure on RPM-based systems when a package group's default or optional members are not available in any enabled repository. The state now only considers mandatory group members and explicitly requested ``include`` packages when checking for install failures, matching the behavior of ``yum/dnf group install`` (which reports "No match for group package" but still exits 0). [#68210](https://github.com/saltstack/salt/issues/68210) +- Pass ``--disable-pip-version-check`` when ``pip.list``, ``pip.freeze``, ``pip.list_upgrades``, ``pip.upgrade``, and ``pip.list_all_versions`` invoke pip, so these calls no longer hang for ~20s per invocation on airgapped minions while pip tries to reach PyPI for its self-version check. [#68214](https://github.com/saltstack/salt/issues/68214) +- Fixed ``archive.extracted`` failing to enforce ``user``/``group`` ownership on archives whose tar/zip members include no explicit directory entries (e.g. Oracle's GraalVM JDK tarballs). ``archive.list`` now derives the top-level directory from the common prefix of file and link members in addition to dir members, so ownership is applied to the extracted top-level directory in all cases. [#68227](https://github.com/saltstack/salt/issues/68227) +- Fixed deltaproxy sub-proxies returning identical grain data for every controlled minion. ``subproxy_post_master_init`` now re-packs each sub-proxy's freshly loaded per-minion grains into its execution-module, returner, executor and proxy LazyLoaders so ``__grains__`` inside loaded modules reflects that sub-proxy's device instead of the placeholder values captured during the first-pass grains load through the control proxy. [#68248](https://github.com/saltstack/salt/issues/68248) +- Fixed the salt-minion (and salt-api, salt-cloud, salt-master, salt-syndic) Debian postinst scripts hanging or erroring with "Bad file descriptor" when run from a non-interactive Debian preseed late_command chroot, by tearing down the debconf protocol with ``db_stop`` and explicitly closing file descriptor 3 before the auto-generated ``#DEBHELPER#`` section runs. [#68269](https://github.com/saltstack/salt/issues/68269) +- Fixed ``file.managed`` failing with ``WinError 123`` on Windows when caching a remote URL whose path embeds another URL (e.g. an archive.org snapshot of an ``https://...`` resource). The URL-path portion of the ``extrn_files`` cache path is now sanitised the same way the network location already is. [#68273](https://github.com/saltstack/salt/issues/68273) +- Fixed ``logrotate.set`` dropping the second ``endscript`` (and turning + embedded shell commands into bogus setting keys) when a stanza contained + multiple script blocks such as both ``prerotate`` and ``postrotate``. Script + directives are now parsed as opaque multi-line bodies and round-trip with + their own ``endscript`` terminator each. [#68293](https://github.com/saltstack/salt/issues/68293) +- Fixed the `salt.state` orchestrate state silently reporting only `Run failed on minions: ` when a targeted minion returned `False`, no return at all, or a list of error strings. The orchestrate comment now includes the per-minion failure detail (the minion's actual return value or "did not return a state result") so operators can diagnose `salt-run state.orchestrate` failures without re-running with extra logging. [#68326](https://github.com/saltstack/salt/issues/68326) +- Fixed worker process crash when salt is used outside CLI tools. [#68332](https://github.com/saltstack/salt/issues/68332) +- Fixed ``clean_old_jobs`` in the default local job cache returner to use the jid file's modification time (``st_mtime``) instead of the inode change time (``st_ctime``). A package upgrade's ``chown -R /var/cache/salt/master`` resets ``st_ctime`` on every existing jid file, which previously made the maintenance process treat every pre-upgrade job as freshly created and prevented cleanup until ``keep_jobs_seconds`` had elapsed. On busy masters this exhausted the partition's inodes within a day. [#68351](https://github.com/saltstack/salt/issues/68351) +- Fixed the ``proxmox`` salt-cloud driver raising ``Could not determine an IP address to use`` before the VM was created and started. The IP address is now determined after the VM is running, and the running VM's address reported by Proxmox is used as a fallback when neither a static ``ip_address`` nor ``agent_get_ip`` is configured. [#68353](https://github.com/saltstack/salt/issues/68353) +- Changed ``KillMode`` in the shipped ``salt-minion.service`` systemd unit from ``process`` to ``mixed`` so that ``systemctl stop`` / ``systemctl restart salt-minion`` no longer leaves orphaned ``Minion._thread_return`` worker processes outside the cgroup. SIGTERM is still sent only to the main PID (so the job return scheduled by ``service.restart salt-minion`` from #68183 has time to finish), but any remaining children are reaped with SIGKILL after the main process exits or ``TimeoutStopSec`` elapses. [#68406](https://github.com/saltstack/salt/issues/68406) +- Fixed `task.edit_task` on Windows rejecting `restart_count=999` even though the documented and error-message-stated maximum is 999. The validation now accepts the full 1..999 range. [#68419](https://github.com/saltstack/salt/issues/68419) +- Fixed ``win_task.add_trigger`` so that ``repeat_duration="Indefinitely"`` actually produces an indefinite repetition pattern. Previously the empty string from the internal duration lookup was assigned to ``Repetition.Duration``, which the Windows Task Scheduler treats as "0 seconds" and silently disables repetition. The Duration property is now left at its default for the "Indefinitely" case, which is the documented way to repeat forever. [#68420](https://github.com/saltstack/salt/issues/68420) +- Fixed ``user.setpassword`` on Windows reporting success (``retcode: 0``) when the target user does not exist. The execution module now returns ``False`` and logs an error in that case, so callers and the ``user.present`` state correctly detect the failure instead of swallowing the Win32 "user name could not be found" message as a successful return. [#68428](https://github.com/saltstack/salt/issues/68428) +- Fixed ``user.present`` on Windows so it actually updates a user's password + when the existing password differs from the one specified in the state. + Previously the state reported "User is already present and up to date" and + left the password unchanged. [#68429](https://github.com/saltstack/salt/issues/68429) +- Stop salt-ssh state runs from clobbering the master-side fileclient ``cachedir`` with the on-target ``thin_dir`` cachedir. The state fileserver cache for salt-ssh state runs is now written under the configured master ``cachedir`` (e.g. ``/var/cache/salt/master/``) instead of under the minion's thin_dir path on the master filesystem. [#68458](https://github.com/saltstack/salt/issues/68458) +- Fixed ``pkg.add_repo_key`` and ``pkgrepo.managed`` so APT keyring files that target an ``.asc`` destination keep their ASCII armor instead of being dearmored, matching the apt-secure(8) convention and allowing armored keyfiles that bundle multiple keys to be installed even when the ``gpg`` binary is not available on the minion. [#68464](https://github.com/saltstack/salt/issues/68464) +- Fixed ``jobs.list_jobs search_metadata`` so it matches jobs whose metadata + was passed as a CLI keyword argument (e.g. ``state.apply metadata={...}``) + and is therefore carried inside the job's ``Arguments`` rather than at the + top of the job payload. [#68481](https://github.com/saltstack/salt/issues/68481) +- Fixed `lgpo.set` state reporting "Failed to set the following policies" on subsequent runs of policies with sub-elements (e.g. Storage Sense thresholds). The state compared a user-supplied dict keyed by element id with a current dict keyed by the ADML display name; both forms now normalize to the canonical element id before comparison so the state is idempotent. [#68489](https://github.com/saltstack/salt/issues/68489) +- Fixed minion rejecting the master with "Invalid master key" after restart when the cached `minion_master.pub` differs from the master's payload pub_key only in trailing whitespace. `AsyncAuth.verify_master` now normalizes both sides through `clean_key` before comparing and caches the normalized form on first contact. [#68493](https://github.com/saltstack/salt/issues/68493) +- Fixed ``TypeError: 'NoneType' object is not iterable`` raised from ``AsyncReqMessageClient._send_recv`` when a per-message timeout completes the future before the send/receive coroutine catches a transient transport exception, which aborted the minion's connect loop and prevented it from connecting to the master. [#68506](https://github.com/saltstack/salt/issues/68506) +- Fixed ``docker_network.present`` recreating networks on every run against Docker 29+. Docker 29 added an empty ``IPRange`` field to every IPAM Config entry; ``docker.compare_networks`` now drops empty/None placeholder values before comparing pools, and the state's default-pool short-circuit treats the empty field as absent. [#68518](https://github.com/saltstack/salt/issues/68518) +- Fixed `pkg.installed` verification on x86_64 hosts that mix `x86_64` and `x86_64_v2` packages (e.g. AlmaLinux 10.1). `salt.utils.pkg.rpm.resolve_name` and `salt.modules.yumpkg.normalize_name` now treat `x86_64_v2` as compatible with `x86_64` instead of appending the arch suffix, so installed packages match the names Salt records. [#68540](https://github.com/saltstack/salt/issues/68540) +- Fixed ``mysql_grants.present`` reporting "Failed to execute" when granting ``ALL PRIVILEGES`` on ``*.*`` against MySQL 8.4+, where the server's privilege set drifted from Salt's hard-coded list (``SET_USER_ID`` removed, many dynamic privileges added). ``grant_exists`` now derives the expected privilege set from the connected server's ``SHOW PRIVILEGES`` output instead of a static list. [#68567](https://github.com/saltstack/salt/issues/68567) +- Fixed ``cp.get_template`` raising ``AttributeError: 'NoneType' object has no attribute 'get'`` when the Jinja template uses ``{% from '...' import ... with context %}``. The cp module's loader-backed ``__opts__`` is now unwrapped to a plain dict before the SaltCacheLoader instantiates the file client and channel that fetch the imported template. [#68572](https://github.com/saltstack/salt/issues/68572) +- Fixed `ImportError: cannot import name 'wait' from partially initialized module 'multiprocessing.connection'` raised during salt-master/minion shutdown when a reentrant SIGTERM hit `ProcessManager.kill_children()` mid `Process.join(0)`. `salt.utils.process` now eagerly imports `multiprocessing.connection` so the module is fully initialised before any signal handler can trigger its lazy import. [#68573](https://github.com/saltstack/salt/issues/68573) +- Fixed `cmd.script` on Windows raising `Invalid user: ` when `runas` is a domain account (`DOMAIN\user`, `user@DOMAIN`, or a SID). The pre-execution `user.info` check is backed by `NetUserGetInfo` which only resolves local-machine accounts and returns empty for many valid domain users; the missing lookup is now logged as a warning and execution continues so the underlying `win_runas` machinery can authenticate the account. [#68578](https://github.com/saltstack/salt/issues/68578) +- Fixed `pkg.install` on Windows silently downgrading the salt-minion when a numeric `version=` argument was passed (e.g. `version=3007.10` was YAML-parsed to the float `3007.1` and then matched the wrong winrepo entry). When the numeric version uniquely matches a string-keyed winrepo entry it is now resolved to that entry; when it is ambiguous (e.g. both `3007.1` and `3007.10` are in the winrepo) the install is refused with a clear error pointing the user at the quoted-version syntax. [#68620](https://github.com/saltstack/salt/issues/68620) +- Fixed the loader masking failure reasons when multiple modules declare the same `__virtualname__` and each `__virtual__()` returns False, so users now see every reason (e.g. both x509 v1's "Superseded, using x509_v2" and x509_v2's "Could not load cryptography") instead of only the first one recorded. [#68625](https://github.com/saltstack/salt/issues/68625) +- Fix `NetapiClient.runner` raising `TypeError` when `timeout` arrives as a string from the salt-api HTTP form. [#68653](https://github.com/saltstack/salt/issues/68653) +- Fixed `master_job_cache: redis_return` raising `KeyError: 'redis_return.prep_jid'` by registering the `redis` returner under both `redis` and `redis_return` virtual names, matching the documented `--return redis_return` usage and the module's file name. [#68663](https://github.com/saltstack/salt/issues/68663) +- Fixed ``ini.options_present`` with ``strict: True`` to remove sections that are present in the ini file but absent from the supplied ``sections`` mapping. [#68673](https://github.com/saltstack/salt/issues/68673) +- Handle `SaltDeserializationError` in grains cache loading so a corrupted cache file no longer propagates as CRITICAL during minion startup. [#68678](https://github.com/saltstack/salt/issues/68678) +- Fixed ``network.interfaces`` on Windows systems falling back to WMI (i.e. .NET older than 4.7.2): the default gateway is now reported under ``gateway`` instead of being mistakenly emitted as ``broadcast``. [#68692](https://github.com/saltstack/salt/issues/68692) +- Fixed ``file.managed`` (and other template-rendering callers) silently overwriting user-supplied ``slspath``, ``sls_path``, ``slsdotpath`` and ``slscolonpath`` values in ``defaults``/``context`` with values regenerated from the caller's ``sls`` key. [#68754](https://github.com/saltstack/salt/issues/68754) +- Fixed ``env_order`` not being honored when merging pillar data across environments. ``Pillar.render_pillar`` now iterates matched environments in the configured ``env_order`` so that, with ``top_file_merging_strategy: merge_all``, the last environment in ``env_order`` wins on conflicting pillar keys instead of the result depending on dict insertion order. [#68785](https://github.com/saltstack/salt/issues/68785) +- Improved the "Malformed topfile" error from ``HighState.verify_tops`` to name the saltenv and the matcher whose state declarations were not formed as a list, so users can locate the offending entry in their ``top.sls``. [#68792](https://github.com/saltstack/salt/issues/68792) +- Removed orphaned GnuPG dotlock files (``.#lk..``) from ``gpg_keydir`` before each decrypt in the ``gpg`` renderer so they no longer accumulate when a gpg subprocess is killed mid-operation. [#68869](https://github.com/saltstack/salt/issues/68869) +- Fix `pkg.installed` idempotency on FreeBSD when `with_origin=True` causes + `pkg.list_pkgs` to return per-package dicts instead of version lists; extract + the version list before version-string comparison so a second state run no + longer falsely reports packages as changed. [#68886](https://github.com/saltstack/salt/issues/68886) +- Fix gen_signature() signing raw pub key content instead of clean_key'd content, causing master_use_pubkey_signature verification to always fail. [#68930](https://github.com/saltstack/salt/issues/68930) +- Fixed spurious ``FileLockError: lock_fn ... exists and is not a file`` raised by ``salt.utils.files.wait_lock`` and ``salt.utils.files.await_lock`` (and therefore by ``state.apply`` queue locking) when another process removed the lock file between the two separate ``os.path.exists`` / ``os.path.isfile`` stats. The pre-check now uses a single ``os.stat`` call so a transient regular-file lock no longer trips the "not a file" branch. [#68931](https://github.com/saltstack/salt/issues/68931) +- Fixed pkg.installed(update_holds=True) for APT multiarch packages by preserving arch-qualified package names through install target parsing and verification. [#68932](https://github.com/saltstack/salt/issues/68932) +- Fix deadlock in parallel `file.managed` states when source is served by the master. + + Forked parallel-state children previously inherited the parent's ZeroMQ + REQ socket and asyncio loop from `salt.fileclient.RemoteClient`, + `salt.crypt.AsyncAuth/SAuth`, and `salt.utils.event.SaltEvent`. Multiple + sibling children racing those handles deadlocked the asyncio loop with + ~98% CPU and never completed. Salt now registers `os.register_at_fork` + handlers on those classes that drop inherited channel/socket references + in any forked child; the next use rebuilds them fresh. [#68940](https://github.com/saltstack/salt/issues/68940) +- Fixed grain and pillar targeting matching minions whose data cache entry was missing. ``CkMinions._check_cache_minions`` now excludes accepted minions that have no cached grains/pillar data from greedy target results, instead of silently including them as candidates. [#68976](https://github.com/saltstack/salt/issues/68976) +- Avoid AttributeError on a closed IPCClient when the connect coroutine resolves after close(). [#68993](https://github.com/saltstack/salt/issues/68993) +- Fixed `salt.utils.network.sanitize_host` stripping colons from IPv6 addresses, which broke `network.ping` and any other caller that passed an IPv6 host. [#68995](https://github.com/saltstack/salt/issues/68995) +- Added support for MAINTAIN (m) privilege introduced in PostgreSQL 17 to salt.modules.postgres and salt.states.postgres_privileges [#69003](https://github.com/saltstack/salt/issues/69003) +- Fixed `redis.get_master_ip` silently dropping the `password` argument. The + function was forwarding its arguments positionally to `_connect`, but + `_connect`'s third positional slot is `db`, not `password`, so the + caller's password landed in the database-index argument and the actual + password fell through to `config.option("redis.password")`. Arguments + are now passed by keyword. [#69029](https://github.com/saltstack/salt/issues/69029) +- Fixed `salt.modules.redismod._connect` rejecting valid `db=0`. The helper + used a truthy check (`if not db`) to decide whether to fall back to + `config.option("redis.db")`, but `not 0` is `True`, so an explicitly + supplied `db=0` was silently replaced by the configured value. The check + is now `if db is None`, matching the pattern already used by the sibling + `_sconnect` helper in the same module. Other arguments keep their + truthy-check semantics on purpose. [#69030](https://github.com/saltstack/salt/issues/69030) +- Fixed two distinct bugs in the `salt.engines.redis_sentinel` engine that + together prevented it from being usable. `start()` no longer raises + `AttributeError: 'dict_values' object has no attribute 'pop'` on Python 3 + (the dict.values() result is now wrapped in `list(...)`). `Listener` and + `start()` now accept an optional `password` argument and forward it to + the redis client, allowing the engine to authenticate against a Sentinel + that requires AUTH; the default of `None` keeps existing configurations + working unchanged. [#69031](https://github.com/saltstack/salt/issues/69031) +- Fixed `salt.returners.redis_return` silently ignoring the documented + `redis.password` configuration option. The returner now reads + `redis.password` from config (in both regular and proxy modes) and + forwards it to both the single-server `redis.StrictRedis` and the + `StrictRedisCluster` constructors. Operators with auth-protected Redis + no longer lose every job return to a hidden `NOAUTH Authentication + required` failure; deployments without a password are unaffected. [#69032](https://github.com/saltstack/salt/issues/69032) +- Fixed three closely-related bugs in `salt.cache.redis_cache` that + together broke hierarchical-bank semantics: + `_build_bank_hier` now registers each child bank name in both the + parent's `$BANK_` set (consumed by `flush()` tree traversal) and the + parent's `$BANKEYS_` set (consumed by `list_()`); `_get_banks_to_remove` + now decodes the bytes returned by `smembers` and skips the `"."` + placeholder, so recursive `flush()` of a parent bank actually descends + into sub-banks instead of corrupting the path; and `flush(bank)` of a + sub-bank now removes the flushed bank's own reference from its + parent's index sets so `list_(parent)` no longer reports it as + present. Together these fixes restore `cache.list("minions")`, + `salt-run manage.present` and `salt-run manage.up` for masters + configured with `cache: redis`. [#69033](https://github.com/saltstack/salt/issues/69033) +- Fixed `salt.tokens.rediscluster` being unable to retrieve any eauth + token. The cluster client was created with `decode_responses=True`, + which caused `redis_client.get()` to return `str` and broke + `salt.payload.loads` (msgpack rejects `str`); it also caused + `redis_client.keys()` to return `str` and broke + `[k.decode("utf8") for k in ...]` (`str` has no `.decode`). Both + errors were swallowed by broad `except Exception` handlers, so eauth + appeared to silently reject every token. `decode_responses=True` is + removed; values now round-trip as bytes through msgpack as the rest + of the module already expected. [#69035](https://github.com/saltstack/salt/issues/69035) +- Fixed `salt.returners.redis_return` leaking `:` last-jid + pointer keys indefinitely. The pointer was written with `pipeline.set` + and no `ex=` TTL, so any (minion, fun) pair that stopped running stuck + in Redis forever -- O(minions × distinct funcs) keys accumulating over + the lifetime of the master. The pointer now expires on the same TTL + as the rest of the returner data (`keep_jobs_seconds`). Operators with + external scripts reading these keys directly may observe them + expiring; the documentation never promised they would not. [#69038](https://github.com/saltstack/salt/issues/69038) +- Fixed `salt.returners.redis_return.get_fun` always returning an + empty dict. The function read return data from a `:` + key that no other code in the module ever wrote -- a leftover from + an older storage schema. It now reads from the canonical + `ret:` hash via `HGET ret: `, matching the + storage layout that `returner` actually produces and the read + pattern that `get_jid` already uses. [#69039](https://github.com/saltstack/salt/issues/69039) +- Fixed `salt.returners.pgjsonb` writing database errors to `sys.stderr` + instead of Salt's logger. Errors from `_get_serv`, `_purge_jobs` and + `_archive_jobs` are now reported via `log.exception`, so they reach + the configured `log_file` / syslog destination on a daemonized master, + including a full traceback. The unused `import sys` is also dropped. [#69048](https://github.com/saltstack/salt/issues/69048) +- Fixed `salt.returners.pgjsonb.returner` letting any non-connection + `psycopg2.DatabaseError` propagate to the caller — including the + syndic-aggregate publish path in `salt/master.py` which had no outer + catch — so a single bad row could escape into a master subprocess. + `event_return` had no error handling at all and a database failure + during a flush propagated similarly. Both functions now catch + `SaltMasterError` and `psycopg2.DatabaseError` locally, log a + contextual message (jid/id for returns, batch size for events), and + drop the affected payload. While here, fix `event_return` passing + the events list as the positional `ret` argument to `_get_serv`, + which was a copy-paste leftover from `returner(ret)`. [#69058](https://github.com/saltstack/salt/issues/69058) +- Fixed `salt-api`'s `/events` endpoint accepting eauth tokens via query + string (``?token=...`` or ``?salt_token=...``). Tokens supplied that + way end up in HTTP access logs, the browser ``Referer`` header, log- + aggregation systems and shell history; the token retains validity for + ``token_expire`` (12h by default), so any party reading those logs can + replay the token. The endpoint now rejects query-string tokens with a + 400 error pointing at the ``X-Auth-Token`` header (for non-browser + clients) or the session cookie established by ``/login`` (for browser + ``EventSource`` clients) as the supported channels. ``X-Auth-Token`` + header support is added; cookie-based auth continues to work + unchanged. [#69071](https://github.com/saltstack/salt/issues/69071) +- ``LoadAuth.get_tok`` now distinguishes between corrupt token blobs (removed from the store) and transient backend errors such as Redis connection drops or NFS hangs (token kept, request treated as not-authenticated). Previously a single backend hiccup could log every authenticated user out by deleting valid tokens. [#69073](https://github.com/saltstack/salt/issues/69073) +- ``cmd.run`` and friends no longer include the ``env`` and ``stdin`` arguments in the ``CommandExecutionError`` raised when the underlying subprocess fails to start (typically ``ENOENT`` / binary not found). Both fields routinely carry credentials passed in by the caller (``env={"DB_PASSWORD": "..."}``, password piped via ``stdin``), and the error message ends up in master/minion logs and in event-bus return data visible to the API caller. [#69075](https://github.com/saltstack/salt/issues/69075) +- Lowered the "Cache version mismatch clearing" log message in ``salt.utils.cache.verify_cache_version`` from ``WARNING`` to ``DEBUG``; the cache is rebuilt as part of normal operation after upgrades or when an ephemeral cache directory has been removed, and does not warrant user attention. [#69106](https://github.com/saltstack/salt/issues/69106) +- * Relenv 0.22.14 + - Update sqlite to 3.53.2.0 + - Update openssl to 3.5.7 [#69129](https://github.com/saltstack/salt/issues/69129) +- Surface the real cause of a proxymodule load failure in salt-proxy's abort message. The misleading "Proxymodule X is missing an init() or a shutdown() or both" wording is now only used when init/shutdown really are missing from a loaded module; if the module failed to load (for example because its ``__virtual__`` returned False), the underlying reason is included in the error. [#69139](https://github.com/saltstack/salt/issues/69139) +- Fixed ``pkg.hold`` and ``pkg.list_holds`` on dnf5 systems (e.g. Fedora 42+): + ``pkg.hold`` now calls ``dnf5 versionlock add `` (the bare + ``versionlock `` form was rejected by dnf5), and ``pkg.list_holds`` + reads ``/etc/dnf/versionlock.toml`` directly so ``pkg.installed`` with + ``hold: true`` is again idempotent. [#69181](https://github.com/saltstack/salt/issues/69181) +- Fixed Salt-SSH syncing internal modules as extmods [#69199](https://github.com/saltstack/salt/issues/69199) +- Fixed ``lgpo_reg.value_absent`` failing when the Registry.pol entry was already absent but the registry value still existed. ``lgpo_reg.delete_value`` was returning early before reaching the registry cleanup code, causing the state to see no changes and report failure. The registry value is now removed regardless of whether the pol entry was present. [#69203](https://github.com/saltstack/salt/issues/69203) +- Fixed `postgres_local_cache.save_load` raising `psycopg2.errors.UniqueViolation` when more than one master in an active-active multi-master cluster persists the same JID; the INSERT is now idempotent via `ON CONFLICT (jid) DO NOTHING` on PostgreSQL >= 9.5, and the duplicate is tolerated on older servers. [#69214](https://github.com/saltstack/salt/issues/69214) +- Fixed Windows MSI self-upgrade via ``pkg.install`` failing with error 1603. The old product's ``DeleteConfig_DECAC`` custom action was unconditionally deleting ``ROOTDIR\var`` during ``RemoveExistingProducts``, destroying the MSI that ``pkg.install`` had cached to ``ROOTDIR\var\cache`` before launching the upgrade. Users who had ``REMOVE_CONFIG=1`` persisted in the registry (from checking "On uninstall" at install time) hit a worse variant where the entire ``ROOTDIR`` was deleted. The fix checks ``UPGRADINGPRODUCTCODE`` — set by Windows Installer whenever an uninstall is triggered by a major upgrade — and skips all ``ROOTDIR`` deletion during upgrades, matching the behaviour of the NSIS installer which has always preserved ``ROOTDIR`` during upgrades. [#69219](https://github.com/saltstack/salt/issues/69219) +- Fixed `TypeError: string indices must be integers` in the minion when the master returns a bare string error response (e.g. `"bad load"`, `"Some exception handling minion payload"`) for a pillar request. The minion now raises a clean `AuthenticationError` instead of crashing, allowing the caller to retry or fail gracefully. [#69228](https://github.com/saltstack/salt/issues/69228) +- pkg.list_patches in yumpkg.py parses tdnf output on Photon OS [#69229](https://github.com/saltstack/salt/issues/69229) +- Fix `git.tag` so that the documented `message` argument is actually forwarded to `git tag`, creating an annotated tag with the supplied message instead of silently producing a lightweight tag. [#69298](https://github.com/saltstack/salt/issues/69298) +- Fixed `salt.auth.pam` conversation callback so it answers `PAM_PROMPT_ECHO_ON` prompts with the supplied username; previously only `PAM_PROMPT_ECHO_OFF` prompts were answered, which caused `pam_authenticate` to silently fail (and salt-api to return 401) against PAM stacks that re-prompt for the user. [#69304](https://github.com/saltstack/salt/issues/69304) +- Ensure multiple masters have their own job/state queues [#69308](https://github.com/saltstack/salt/issues/69308) +- Fixed loading private keys from PKCS#12 containers with x509_v2 [#69312](https://github.com/saltstack/salt/issues/69312) +- Fixed creating self-signed PKCS#12-encoded certificates [#69319](https://github.com/saltstack/salt/issues/69319) +- Fixed minion state queue replacing the master-assigned JID on queued state runs, so returns now come back tagged with the JID the master actually published. [#69386](https://github.com/saltstack/salt/issues/69386) +- Made the salt user's home directory and the relenv ``extras-`` directory configurable in the Linux packaging. The DEB preinst scripts now source ``/etc/default/salt-setup`` (and ``/etc/sysconfig/salt-minion-setup`` for cross-distro parity with RPM) before applying the ``SALT_HOME``/``SALT_USER``/``SALT_GROUP``/``SALT_NAME`` defaults, mirroring the long-standing RPM behavior. A new ``SALT_EXTRAS_DIR`` override is honored by both stacks so the extras tree can be relocated outside ``/opt/saltstack/salt`` and its ownership is correctly restored on upgrade. [#69402](https://github.com/saltstack/salt/issues/69402) +- Fixed minion worker threads hanging or crashing when returning job results + to the master. The main process now fires an error event back to the worker + when ``req_channel.send()`` times out, so workers wake up immediately rather + than waiting out their full timeout. Replaced the bare ``TimeoutError`` raised + in ``_send_req_sync`` with ``SaltReqTimeoutError`` so ``_return_pub``'s existing + handler catches it correctly. The worker's wait timeout is now derived from + ``return_retry_timer_max * return_retry_tries`` to ensure it always outlasts + the main process's retry budget. [#69416](https://github.com/saltstack/salt/issues/69416) +- Fixed zsh completion by using the proper python3 instead of python2. [#69419](https://github.com/saltstack/salt/issues/69419) +- Fixed Photon OS Arm64 FIPS CI by re-enabling the OpenSSL default provider after installing openssl-fips-provider, working around the disabled-default-provider bug in `openssl-fips-provider <= 3.1.2-3.ph5` on the lagging Photon aarch64 mirror. [#69449](https://github.com/saltstack/salt/issues/69449) +- Add regression test for changelog template multi-line rendering and harden template with indent filter so continuation lines are correctly indented under the bullet (defensive backport of #69458 to 3006.x). [#69454](https://github.com/saltstack/salt/issues/69454) +- Fixed minion not honoring SIGTERM while stuck in the master DNS retry loop, which caused systemd to escalate to SIGKILL after 90 seconds. [#69466](https://github.com/saltstack/salt/issues/69466) +- Fixed ``lgpo_reg`` module and state functions failing on Windows Domain Controllers with ``Access is denied`` when writing to ``HKLM\SOFTWARE\Policies\`` subkeys. The ``set_value``, ``disable_value``, and ``delete_value`` execution module functions now accept a ``write_registry`` parameter (default ``None``) that auto-detects Domain Controllers via the ``ProductType`` registry key and skips the direct registry write when one is detected, instead relying on the Group Policy engine to apply the policy on the next refresh. An explicit ``True`` or ``False`` overrides auto-detection. A ``refresh_policy`` parameter (default ``False``) has been added to all three functions to trigger an in-process ``userenv.RefreshPolicy`` call immediately after the ``Registry.pol`` file is updated. The corresponding state functions ``value_present``, ``value_disabled``, and ``value_absent`` expose the same parameters. A standalone ``lgpo_reg.refresh_policy`` execution function and ``lgpo_reg.refresh_policy`` state have been added to allow a single Group Policy refresh to be issued after a batch of policy writes. ``is_domain_controller`` has been added to ``salt.utils.win_functions`` and ``refresh_policy`` has been added to ``salt.utils.win_lgpo_reg``. [#69468](https://github.com/saltstack/salt/issues/69468) +- Fixed 3006.x Windows nightly CI by pinning the runner-host Python to 3.14.6 (OpenSSL 3.5.7); the setup-python default `3.14` was resolving to a cached 3.14.5 build whose OpenSSL 3.0.20 rejected the cert pypi.org currently serves. [#69486](https://github.com/saltstack/salt/issues/69486) +- Fixed 3006.x Windows nightly CI Deps by dropping a sitecustomize hook into the salt onedir's `Lib/site-packages` that applies the cpython#104135 iter-and-skip patch before pip touches TLS; the prior runner-host Python pin in #69486 targeted the wrong interpreter (the failing pip runs in a venv created from the relenv-bundled Python 3.10) and is reverted. [#69490](https://github.com/saltstack/salt/issues/69490) +- Fixed ``lgpo_reg`` failures on Windows when ``Registry.pol`` is temporarily locked by the Group Policy service or other processes. Salt now uses ``EnterCriticalPolicySection`` / ``LeaveCriticalPolicySection`` from ``userenv.dll`` — the same synchronization primitive used by the GP engine — to serialize read-modify-write access to ``Registry.pol``. A retry loop with configurable attempts and delay is also applied for non-GP lockers such as antivirus scanners or VSS snapshots that do not participate in the GP critical section handshake. [#69492](https://github.com/saltstack/salt/issues/69492) + +# Added + +- Added ``shadow.verify_password`` to ``salt.modules.win_shadow``, which + validates a Windows user's password via ``LogonUser`` with + ``LOGON32_LOGON_NETWORK`` (Microsoft's recommended approach per + `KB180548 `_) without + creating an interactive session. If the check causes an account lockout, + the account is automatically unlocked. Updated ``user.present`` on Windows + to use ``shadow.verify_password`` so the password is only changed when it + differs from the current value, matching the idempotent behaviour on other + platforms. [#41347](https://github.com/saltstack/salt/issues/41347) +- Added ability to configure the pillar destination for the `netbox` ext_pillar via `destination_pillar_key` [#65531](https://github.com/saltstack/salt/issues/65531) +- Migrate Salt documentation to the PyData Sphinx theme. This update modernizes the documentation UI, improves navigation with a persistent sidebar tree, and fixes issues with embedded video playback. [#69185](https://github.com/saltstack/salt/issues/69185) +- fix etcdv3 module authentification when using etcd3-py lib [#69202](https://github.com/saltstack/salt/issues/69202) +- Added ``lgpo_reg.get_rsop_value`` to query the Resultant Set of Policy (RSoP) for a registry key/value and detect whether it is managed by a Domain Group Policy Object. The ``lgpo_reg`` module functions ``set_value``, ``disable_value``, and ``delete_value`` now log a warning when a Domain GPO is detected for the target value. The ``lgpo_reg`` state functions ``value_present``, ``value_disabled``, and ``value_absent`` append the same warning to the state comment so it is visible in state output. [#69205](https://github.com/saltstack/salt/issues/69205) + + * Wed Apr 29 2026 Salt Project Packaging - 3007.14 # Fixed diff --git a/requirements/static/ci/py3.10/darwin-crypto.in b/requirements/static/ci/py3.10/darwin-crypto.in new file mode 100644 index 000000000000..62f61a5e2fb3 --- /dev/null +++ b/requirements/static/ci/py3.10/darwin-crypto.in @@ -0,0 +1,8 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile requirements/static/ci/crypto.in --python-platform=macos --python-version=3.10 --constraint requirements/constraints.txt --no-emit-index-url -o=requirements/static/ci/py3.10/darwin-crypto.in +m2crypto==0.48.0 + # via -r requirements/static/ci/crypto.in +packaging==26.2 + # via m2crypto +pycryptodome==3.23.0 + # via -r requirements/static/ci/crypto.in diff --git a/requirements/static/ci/py3.10/freebsd-crypto.in b/requirements/static/ci/py3.10/freebsd-crypto.in new file mode 100644 index 000000000000..4837d5b1afe3 --- /dev/null +++ b/requirements/static/ci/py3.10/freebsd-crypto.in @@ -0,0 +1,8 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile requirements/static/ci/crypto.in --universal --python-version=3.10 --constraint requirements/constraints.txt --no-emit-index-url -o=requirements/static/ci/py3.10/freebsd-crypto.in +m2crypto==0.48.0 + # via -r requirements/static/ci/crypto.in +packaging==26.2 + # via m2crypto +pycryptodome==3.23.0 + # via -r requirements/static/ci/crypto.in diff --git a/requirements/static/ci/py3.10/linux-crypto.in b/requirements/static/ci/py3.10/linux-crypto.in new file mode 100644 index 000000000000..2a53f92829e5 --- /dev/null +++ b/requirements/static/ci/py3.10/linux-crypto.in @@ -0,0 +1,8 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile requirements/static/ci/crypto.in --python-platform=linux --python-version=3.10 --constraint requirements/constraints.txt --no-emit-index-url -o=requirements/static/ci/py3.10/linux-crypto.in +m2crypto==0.48.0 + # via -r requirements/static/ci/crypto.in +packaging==26.2 + # via m2crypto +pycryptodome==3.23.0 + # via -r requirements/static/ci/crypto.in diff --git a/requirements/static/ci/py3.10/windows-crypto.in b/requirements/static/ci/py3.10/windows-crypto.in new file mode 100644 index 000000000000..2f2e7c78e5ac --- /dev/null +++ b/requirements/static/ci/py3.10/windows-crypto.in @@ -0,0 +1,8 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile requirements/static/ci/crypto.in --python-platform=windows --python-version=3.10 --constraint requirements/constraints.txt --no-emit-index-url -o=requirements/static/ci/py3.10/windows-crypto.in +m2crypto==0.48.0 + # via -r requirements/static/ci/crypto.in +packaging==26.2 + # via m2crypto +pycryptodome==3.23.0 + # via -r requirements/static/ci/crypto.in diff --git a/requirements/static/ci/py3.11/darwin-crypto.in b/requirements/static/ci/py3.11/darwin-crypto.in new file mode 100644 index 000000000000..2d46746767e1 --- /dev/null +++ b/requirements/static/ci/py3.11/darwin-crypto.in @@ -0,0 +1,8 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile requirements/static/ci/crypto.in --python-platform=macos --python-version=3.11 --constraint requirements/constraints.txt --no-emit-index-url -o=requirements/static/ci/py3.11/darwin-crypto.in +m2crypto==0.48.0 + # via -r requirements/static/ci/crypto.in +packaging==26.2 + # via m2crypto +pycryptodome==3.23.0 + # via -r requirements/static/ci/crypto.in diff --git a/requirements/static/ci/py3.11/freebsd-crypto.in b/requirements/static/ci/py3.11/freebsd-crypto.in new file mode 100644 index 000000000000..9312a2878712 --- /dev/null +++ b/requirements/static/ci/py3.11/freebsd-crypto.in @@ -0,0 +1,8 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile requirements/static/ci/crypto.in --universal --python-version=3.11 --constraint requirements/constraints.txt --no-emit-index-url -o=requirements/static/ci/py3.11/freebsd-crypto.in +m2crypto==0.48.0 + # via -r requirements/static/ci/crypto.in +packaging==26.2 + # via m2crypto +pycryptodome==3.23.0 + # via -r requirements/static/ci/crypto.in diff --git a/requirements/static/ci/py3.11/linux-crypto.in b/requirements/static/ci/py3.11/linux-crypto.in new file mode 100644 index 000000000000..8f13b4f7e1d3 --- /dev/null +++ b/requirements/static/ci/py3.11/linux-crypto.in @@ -0,0 +1,8 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile requirements/static/ci/crypto.in --python-platform=linux --python-version=3.11 --constraint requirements/constraints.txt --no-emit-index-url -o=requirements/static/ci/py3.11/linux-crypto.in +m2crypto==0.48.0 + # via -r requirements/static/ci/crypto.in +packaging==26.2 + # via m2crypto +pycryptodome==3.23.0 + # via -r requirements/static/ci/crypto.in diff --git a/requirements/static/ci/py3.11/windows-crypto.in b/requirements/static/ci/py3.11/windows-crypto.in new file mode 100644 index 000000000000..fb0c8d21093f --- /dev/null +++ b/requirements/static/ci/py3.11/windows-crypto.in @@ -0,0 +1,8 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile requirements/static/ci/crypto.in --python-platform=windows --python-version=3.11 --constraint requirements/constraints.txt --no-emit-index-url -o=requirements/static/ci/py3.11/windows-crypto.in +m2crypto==0.48.0 + # via -r requirements/static/ci/crypto.in +packaging==26.2 + # via m2crypto +pycryptodome==3.23.0 + # via -r requirements/static/ci/crypto.in diff --git a/requirements/static/ci/py3.12/darwin-crypto.in b/requirements/static/ci/py3.12/darwin-crypto.in new file mode 100644 index 000000000000..36052747205f --- /dev/null +++ b/requirements/static/ci/py3.12/darwin-crypto.in @@ -0,0 +1,8 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile requirements/static/ci/crypto.in --python-platform=macos --python-version=3.12 --constraint requirements/constraints.txt --no-emit-index-url -o=requirements/static/ci/py3.12/darwin-crypto.in +m2crypto==0.48.0 + # via -r requirements/static/ci/crypto.in +packaging==26.2 + # via m2crypto +pycryptodome==3.23.0 + # via -r requirements/static/ci/crypto.in diff --git a/requirements/static/ci/py3.12/freebsd-crypto.in b/requirements/static/ci/py3.12/freebsd-crypto.in new file mode 100644 index 000000000000..5041924f4ab5 --- /dev/null +++ b/requirements/static/ci/py3.12/freebsd-crypto.in @@ -0,0 +1,8 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile requirements/static/ci/crypto.in --universal --python-version=3.12 --constraint requirements/constraints.txt --no-emit-index-url -o=requirements/static/ci/py3.12/freebsd-crypto.in +m2crypto==0.48.0 + # via -r requirements/static/ci/crypto.in +packaging==26.2 + # via m2crypto +pycryptodome==3.23.0 + # via -r requirements/static/ci/crypto.in diff --git a/requirements/static/ci/py3.12/linux-crypto.in b/requirements/static/ci/py3.12/linux-crypto.in new file mode 100644 index 000000000000..fda4b4f39a2e --- /dev/null +++ b/requirements/static/ci/py3.12/linux-crypto.in @@ -0,0 +1,8 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile requirements/static/ci/crypto.in --python-platform=linux --python-version=3.12 --constraint requirements/constraints.txt --no-emit-index-url -o=requirements/static/ci/py3.12/linux-crypto.in +m2crypto==0.48.0 + # via -r requirements/static/ci/crypto.in +packaging==26.2 + # via m2crypto +pycryptodome==3.23.0 + # via -r requirements/static/ci/crypto.in diff --git a/requirements/static/ci/py3.12/windows-crypto.in b/requirements/static/ci/py3.12/windows-crypto.in new file mode 100644 index 000000000000..4f80e914c088 --- /dev/null +++ b/requirements/static/ci/py3.12/windows-crypto.in @@ -0,0 +1,8 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile requirements/static/ci/crypto.in --python-platform=windows --python-version=3.12 --constraint requirements/constraints.txt --no-emit-index-url -o=requirements/static/ci/py3.12/windows-crypto.in +m2crypto==0.48.0 + # via -r requirements/static/ci/crypto.in +packaging==26.2 + # via m2crypto +pycryptodome==3.23.0 + # via -r requirements/static/ci/crypto.in diff --git a/requirements/static/ci/py3.13/darwin-crypto.in b/requirements/static/ci/py3.13/darwin-crypto.in new file mode 100644 index 000000000000..6fb97c487657 --- /dev/null +++ b/requirements/static/ci/py3.13/darwin-crypto.in @@ -0,0 +1,8 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile requirements/static/ci/crypto.in --python-platform=macos --python-version=3.13 --constraint requirements/constraints.txt --no-emit-index-url -o=requirements/static/ci/py3.13/darwin-crypto.in +m2crypto==0.48.0 + # via -r requirements/static/ci/crypto.in +packaging==26.2 + # via m2crypto +pycryptodome==3.23.0 + # via -r requirements/static/ci/crypto.in diff --git a/requirements/static/ci/py3.13/freebsd-crypto.in b/requirements/static/ci/py3.13/freebsd-crypto.in new file mode 100644 index 000000000000..e231abfda076 --- /dev/null +++ b/requirements/static/ci/py3.13/freebsd-crypto.in @@ -0,0 +1,8 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile requirements/static/ci/crypto.in --universal --python-version=3.13 --constraint requirements/constraints.txt --no-emit-index-url -o=requirements/static/ci/py3.13/freebsd-crypto.in +m2crypto==0.48.0 + # via -r requirements/static/ci/crypto.in +packaging==26.2 + # via m2crypto +pycryptodome==3.23.0 + # via -r requirements/static/ci/crypto.in diff --git a/requirements/static/ci/py3.13/linux-crypto.in b/requirements/static/ci/py3.13/linux-crypto.in new file mode 100644 index 000000000000..564b53d254f7 --- /dev/null +++ b/requirements/static/ci/py3.13/linux-crypto.in @@ -0,0 +1,8 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile requirements/static/ci/crypto.in --python-platform=linux --python-version=3.13 --constraint requirements/constraints.txt --no-emit-index-url -o=requirements/static/ci/py3.13/linux-crypto.in +m2crypto==0.48.0 + # via -r requirements/static/ci/crypto.in +packaging==26.2 + # via m2crypto +pycryptodome==3.23.0 + # via -r requirements/static/ci/crypto.in diff --git a/requirements/static/ci/py3.13/windows-crypto.in b/requirements/static/ci/py3.13/windows-crypto.in new file mode 100644 index 000000000000..97b39b95d980 --- /dev/null +++ b/requirements/static/ci/py3.13/windows-crypto.in @@ -0,0 +1,8 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile requirements/static/ci/crypto.in --python-platform=windows --python-version=3.13 --constraint requirements/constraints.txt --no-emit-index-url -o=requirements/static/ci/py3.13/windows-crypto.in +m2crypto==0.48.0 + # via -r requirements/static/ci/crypto.in +packaging==26.2 + # via m2crypto +pycryptodome==3.23.0 + # via -r requirements/static/ci/crypto.in diff --git a/requirements/static/ci/py3.9/darwin-crypto.in b/requirements/static/ci/py3.9/darwin-crypto.in new file mode 100644 index 000000000000..0b3dd41437ce --- /dev/null +++ b/requirements/static/ci/py3.9/darwin-crypto.in @@ -0,0 +1,8 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile requirements/static/ci/crypto.in --python-platform=macos --python-version=3.9 --constraint requirements/constraints.txt --no-emit-index-url -o=requirements/static/ci/py3.9/darwin-crypto.in +m2crypto==0.48.0 + # via -r requirements/static/ci/crypto.in +packaging==26.2 + # via m2crypto +pycryptodome==3.23.0 + # via -r requirements/static/ci/crypto.in diff --git a/requirements/static/ci/py3.9/freebsd-crypto.in b/requirements/static/ci/py3.9/freebsd-crypto.in new file mode 100644 index 000000000000..0df5190541ba --- /dev/null +++ b/requirements/static/ci/py3.9/freebsd-crypto.in @@ -0,0 +1,8 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile requirements/static/ci/crypto.in --universal --python-version=3.9 --constraint requirements/constraints.txt --no-emit-index-url -o=requirements/static/ci/py3.9/freebsd-crypto.in +m2crypto==0.48.0 + # via -r requirements/static/ci/crypto.in +packaging==26.2 + # via m2crypto +pycryptodome==3.23.0 + # via -r requirements/static/ci/crypto.in diff --git a/requirements/static/ci/py3.9/linux-crypto.in b/requirements/static/ci/py3.9/linux-crypto.in new file mode 100644 index 000000000000..26da8966844d --- /dev/null +++ b/requirements/static/ci/py3.9/linux-crypto.in @@ -0,0 +1,8 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile requirements/static/ci/crypto.in --python-platform=linux --python-version=3.9 --constraint requirements/constraints.txt --no-emit-index-url -o=requirements/static/ci/py3.9/linux-crypto.in +m2crypto==0.48.0 + # via -r requirements/static/ci/crypto.in +packaging==26.2 + # via m2crypto +pycryptodome==3.23.0 + # via -r requirements/static/ci/crypto.in diff --git a/requirements/static/ci/py3.9/windows-crypto.in b/requirements/static/ci/py3.9/windows-crypto.in new file mode 100644 index 000000000000..8c55225f2f69 --- /dev/null +++ b/requirements/static/ci/py3.9/windows-crypto.in @@ -0,0 +1,8 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile requirements/static/ci/crypto.in --python-platform=windows --python-version=3.9 --constraint requirements/constraints.txt --no-emit-index-url -o=requirements/static/ci/py3.9/windows-crypto.in +m2crypto==0.48.0 + # via -r requirements/static/ci/crypto.in +packaging==26.2 + # via m2crypto +pycryptodome==3.23.0 + # via -r requirements/static/ci/crypto.in diff --git a/salt/__init__.py b/salt/__init__.py index 7abf1e2e6aba..cf9e821216eb 100644 --- a/salt/__init__.py +++ b/salt/__init__.py @@ -159,6 +159,35 @@ def exec_module(self, module): category=DeprecationWarning, ) +# Third-party libraries that salt's loader pulls in eagerly (boto modules +# via salt.utils.boto*, paramiko via salt-ssh, etc.) emit SyntaxWarning / +# CryptographyDeprecationWarning at *compile* time on Python 3.10. They +# bypass the per-test ``recwarn`` plumbing and leak straight to ``stderr``, +# tripping CLI tests that gate on ``assert not cmd.stderr`` (e.g. +# ``tests/pytests/integration/cli/test_batch.py``). Filter them here - +# before ``salt.loader`` triggers any of these imports - so the warnings +# never reach the subprocess stderr. +# +# Python's compile-time ``SyntaxWarning`` emission feeds +# ``PyErr_WarnExplicitObject`` with ``module=NULL`` -- ``warnings.warn`` +# then derives ``__module__`` from the source filename's basename (e.g. +# ``connection`` for ``boto/iam/connection.py``). A ``module=r"boto\..*"`` +# regex therefore never matches; a category-only filter is the only +# reliable knob for compile-time warnings. Salt itself does not produce +# ``SyntaxWarning`` (all in-tree files are linted by black/flake8), so +# silencing the category globally is safe. +warnings.filterwarnings( + "ignore", + category=SyntaxWarning, +) +# ``CryptographyDeprecationWarning`` subclasses ``UserWarning`` (not +# ``DeprecationWarning``) in cryptography>=37, so we cannot just gate +# on the DeprecationWarning category here. Match by message text. +warnings.filterwarnings( + "ignore", + message=".*TripleDES has been moved.*", +) + def __define_global_system_encoding_variable__(): import builtins diff --git a/salt/_logging/impl.py b/salt/_logging/impl.py index 5a0b8465ad0e..10bb7e244945 100644 --- a/salt/_logging/impl.py +++ b/salt/_logging/impl.py @@ -441,6 +441,8 @@ def set_logging_options_dict(opts): """ Create a logging related options dictionary based off of the loaded salt config """ + if opts is None: + return try: if isinstance(set_logging_options_dict.__options_dict__, ImmutableDict): raise RuntimeError( @@ -991,7 +993,7 @@ def setup_log_granular_levels(log_granular_levels): def setup_logging(): opts = get_logging_options_dict() if not opts: - raise RuntimeError("The logging options have not been set yet.") + return if ( opts.get("configure_console_logger", True) and not is_console_handler_configured() diff --git a/salt/auth/__init__.py b/salt/auth/__init__.py index 56f48a1f13df..add8c9e85c70 100644 --- a/salt/auth/__init__.py +++ b/salt/auth/__init__.py @@ -62,6 +62,31 @@ def __init__(self, opts, ckminions=None): self.tokens = salt.loader.eauth_tokens(opts) self.ckminions = ckminions or salt.utils.minions.CkMinions(opts) + def destroy(self): + """ + Clean up resources + """ + if hasattr(self, "auth") and self.auth is not None: + if hasattr(self.auth, "destroy"): + self.auth.destroy() + self.auth = {} + if hasattr(self, "tokens") and self.tokens is not None: + if hasattr(self.tokens, "destroy"): + self.tokens.destroy() + self.tokens = {} + if hasattr(self, "ckminions") and self.ckminions is not None: + if hasattr(self.ckminions, "cache") and self.ckminions.cache is not None: + if hasattr(self.ckminions.cache, "destroy"): + self.ckminions.cache.destroy() + self.ckminions.cache = None + self.ckminions = None + + def __enter__(self): + return self + + def __exit__(self, *args): + self.destroy() + def load_name(self, load): """ Return the primary name associate with the load, if an empty string diff --git a/salt/auth/pam.py b/salt/auth/pam.py index 352c223780bb..5decdba8c2dd 100644 --- a/salt/auth/pam.py +++ b/salt/auth/pam.py @@ -27,6 +27,27 @@ .. note:: This module executes itself in a subprocess in order to user the system python and pam libraries. We do this to avoid openssl version conflicts when running under a salt onedir build. + +.. note:: Running ``salt-master`` as a non-root user (the 3006.x packaging + default is the ``salt`` user) and using PAM eauth requires extra + privileges so that PAM's ``unix_chkpwd`` helper can validate other + users' passwords. ``unix_chkpwd`` refuses to authenticate users other + than the caller unless the caller can read ``/etc/shadow``. The two + standard remediations are: + + 1. **Debian-derived distributions:** add the master's user to the + ``shadow`` group (e.g. ``usermod -a -G shadow salt``) so the master + process can read ``/etc/shadow`` indirectly via the setgid-shadow + ``unix_chkpwd`` helper. + 2. **RPM-based distributions:** revert the master to run as ``root`` + (``user: root`` in ``/etc/salt/master``); ``/etc/shadow`` cannot be + made readable to a non-root group safely there. + + When PAM auth fails and the master is running as a non-root user + without ``/etc/shadow`` access, a CRITICAL log entry naming the cause + and the two remediations is emitted (once per process). See + https://github.com/saltstack/salt/issues/64275 for the full + discussion. """ import logging @@ -228,6 +249,87 @@ def my_conv(n_messages, messages, p_response, app_data): return retval == 0 +# Memo so the one-shot /etc/shadow-inaccessibility diagnostic only fires +# once per master process. Module-level so it survives across calls to +# ``authenticate()`` for the lifetime of the interpreter. +_SHADOW_DIAGNOSTIC_LOGGED = False + +# Standard path to the shadow password database on Linux. Centralised so +# tests (and any non-standard distro layouts) can override. +_SHADOW_PATH = "/etc/shadow" + + +def _can_validate_other_users(): + """ + Return ``(True, "")`` if the current process has the privileges PAM + needs to validate a *different* user's password via ``unix_chkpwd``; + return ``(False, )`` otherwise. + + On Linux PAM's ``pam_unix`` module shells out to the setgid-shadow + helper ``unix_chkpwd`` for password verification. ``unix_chkpwd`` + refuses to authenticate users other than the caller unless the + caller can read ``/etc/shadow`` — either because the caller's + effective uid is 0, or because the caller is in the ``shadow`` + group (Debian-style). See linux-pam upstream discussion at + https://github.com/linux-pam/linux-pam/issues/112 for the full + rationale. + + This helper is used to produce an actionable diagnostic when + ``authenticate()`` fails on a master running as a non-root user + without ``shadow``-group access — the failure mode behind issue + #64275, which previously logged only a bare "Pam auth failed" with + empty stdout/stderr. + """ + try: + if os.geteuid() == 0: + return True, "" + except AttributeError: + # No ``geteuid`` on this platform (e.g. Windows). PAM auth + # itself won't load there, but be defensive. + return True, "" + if os.access(_SHADOW_PATH, os.R_OK): + return True, "" + return ( + False, + ( + "process running as uid {uid} cannot read {shadow}, so PAM's " + "unix_chkpwd helper will refuse to authenticate users other " + "than the caller" + ).format(uid=os.geteuid(), shadow=_SHADOW_PATH), + ) + + +def _log_shadow_diagnostic_once(username): + """ + Emit, at most once per process, a CRITICAL log entry that explains + why PAM auth is failing on a non-root master and how to fix it. + + Issue #64275: when the master runs as the ``salt`` user (the 3006.x + packaging default) PAM auth fails silently because the helper + subprocess inherits that uid and ``unix_chkpwd`` can't read + ``/etc/shadow``. Three years of users hit this without a + diagnostic; this function makes the failure self-explanatory. + """ + global _SHADOW_DIAGNOSTIC_LOGGED + if _SHADOW_DIAGNOSTIC_LOGGED: + return + ok, reason = _can_validate_other_users() + if ok: + return + _SHADOW_DIAGNOSTIC_LOGGED = True + log.critical( + "PAM authentication for %r failed and %s. Either run the " + "salt-master as the 'root' user, or add the master's user to " + "the 'shadow' group so it can read %s (the latter works on " + "Debian-derived distributions; on RPM-based distributions " + "the master must run as root for PAM eauth to work). See " + "https://github.com/saltstack/salt/issues/64275 for context.", + username, + reason, + _SHADOW_PATH, + ) + + def authenticate(username, password): """ Returns True if the given username and password authenticate for the @@ -256,6 +358,11 @@ def authenticate(username, password): if ret.returncode == 0: return True log.error("Pam auth failed for %s: %s %s", username, ret.stdout, ret.stderr) + # Issue #64275: when the master runs as a non-root user without + # /etc/shadow read access, every PAM auth for users other than the + # master's own uid fails with no useful diagnostic. Emit a one-shot + # CRITICAL log naming the cause and remediation. + _log_shadow_diagnostic_once(username) return False diff --git a/salt/cache/__init__.py b/salt/cache/__init__.py index a094f8727b47..e80b42b3268d 100644 --- a/salt/cache/__init__.py +++ b/salt/cache/__init__.py @@ -81,6 +81,12 @@ def modules(self): self.__lazy_init() return self._modules + def destroy(self): + if hasattr(self, "_modules") and self._modules is not None: + if hasattr(self._modules, "destroy"): + self._modules.destroy() + self._modules = None + def cache(self, bank, key, fun, loop_fun=None, **kwargs): """ Check cache for the data. If it is there, check to see if it needs to diff --git a/salt/channel/server.py b/salt/channel/server.py index 3b3aeaa45190..e4e8d5610ad1 100644 --- a/salt/channel/server.py +++ b/salt/channel/server.py @@ -889,6 +889,12 @@ def close(self): self.transport.close() if self.event is not None: self.event.destroy() + if hasattr(self, "ckminions") and self.ckminions is not None: + if hasattr(self.ckminions, "cache") and self.ckminions.cache is not None: + if hasattr(self.ckminions.cache, "destroy"): + self.ckminions.cache.destroy() + self.ckminions.cache = None + self.ckminions = None class PubServerChannel: @@ -953,6 +959,12 @@ def close(self): if self.aes_funcs is not None: self.aes_funcs.destroy() self.aes_funcs = None + if hasattr(self, "ckminions") and self.ckminions is not None: + if hasattr(self.ckminions, "cache") and self.ckminions.cache is not None: + if hasattr(self.ckminions.cache, "destroy"): + self.ckminions.cache.destroy() + self.ckminions.cache = None + self.ckminions = None def pre_fork(self, process_manager, kwargs=None): """ diff --git a/salt/client/__init__.py b/salt/client/__init__.py index 0c62ef2189a5..e0ecf876e5e8 100644 --- a/salt/client/__init__.py +++ b/salt/client/__init__.py @@ -2078,6 +2078,18 @@ def destroy(self): if self.event is not None: self.event.destroy() self.event = None + if hasattr(self, "returners") and self.returners is not None: + if hasattr(self.returners, "destroy"): + self.returners.destroy() + self.returners = {} + if hasattr(self, "functions") and self.functions is not None: + if hasattr(self.functions, "destroy"): + self.functions.destroy() + self.functions = {} + if hasattr(self, "utils") and self.utils is not None: + if hasattr(self.utils, "destroy"): + self.utils.destroy() + self.utils = {} def __enter__(self): return self diff --git a/salt/config/__init__.py b/salt/config/__init__.py index 7a56669d5d16..a678ec634921 100644 --- a/salt/config/__init__.py +++ b/salt/config/__init__.py @@ -2399,6 +2399,8 @@ def mminion_config(path, overrides, ignore_config_errors=True): apply_sdb(opts) _validate_opts(opts) + if "grains" in opts and hasattr(opts["grains"], "destroy"): + opts["grains"].destroy() opts["grains"] = salt.loader.grains(opts) opts["pillar"] = {} salt.features.setup_features(opts) diff --git a/salt/daemons/masterapi.py b/salt/daemons/masterapi.py index d008c6d2ac9e..71db68a4eb25 100644 --- a/salt/daemons/masterapi.py +++ b/salt/daemons/masterapi.py @@ -136,15 +136,33 @@ def clean_fsbackend(opts): ) -def clean_expired_tokens(opts): +def clean_expired_tokens(opts, loadauth=None): """ - Clean expired tokens from the master + Clean expired tokens from the master. + + If ``loadauth`` is provided, reuse the caller's LoadAuth instance + rather than constructing a fresh one. Useful in long-running loops + (e.g. Maintenance) to avoid recreating the auth/eauth_tokens + LazyLoaders on every iteration. """ - loadauth = salt.auth.LoadAuth(opts) - for tok in loadauth.list_tokens(): - token_data = loadauth.get_tok(tok) - if "expire" not in token_data or token_data.get("expire", 0) < time.time(): - loadauth.rm_token(tok) + if loadauth is not None: + _loadauth = loadauth + _owned = False + else: + _loadauth = salt.auth.LoadAuth(opts) + _owned = True + try: + for tok in _loadauth.list_tokens(): + token_data = _loadauth.get_tok(tok) + if ( + not token_data + or "expire" not in token_data + or token_data.get("expire", 0) < time.time() + ): + _loadauth.rm_token(tok) + finally: + if _owned: + _loadauth.destroy() def clean_pub_auth(opts): @@ -166,25 +184,34 @@ def clean_pub_auth(opts): log.error("Unable to delete pub auth file") -def clean_old_jobs(opts): +def clean_old_jobs(opts, mminion=None): """ - Clean out the old jobs from the job cache + Clean out the old jobs from the job cache. + + If ``mminion`` is provided, reuse the caller's MasterMinion rather + than constructing a fresh one. See ``clean_expired_tokens`` for the + same rationale. """ - # TODO: better way to not require creating the masterminion every time? - mminion = salt.minion.MasterMinion( - opts, - states=False, - rend=False, - ) # If the master job cache has a clean_old_jobs, call it fstr = "{}.clean_old_jobs".format(opts["master_job_cache"]) - if fstr in mminion.returners: - mminion.returners[fstr]() + if mminion is not None: + _mminion = mminion + _owned = False + else: + _mminion = salt.minion.MasterMinion(opts, states=False, rend=False) + _owned = True + try: + if fstr in _mminion.returners: + _mminion.returners[fstr]() + finally: + if _owned: + if hasattr(_mminion, "destroy"): + _mminion.destroy() def mk_key(opts, user): + uid = None if HAS_PWD: - uid = None try: uid = pwd.getpwnam(user).pw_uid except KeyError: @@ -458,6 +485,13 @@ class RemoteFuncs: def __init__(self, opts): self.opts = opts + self.event = None + self.ckminions = None + self.tops = None + self.local = None + self.mminion = None + self.cache = None + self.wheel_ = None self.event = salt.utils.event.get_event( "master", self.opts["sock_dir"], @@ -478,15 +512,15 @@ def __setup_fileserver(self): """ Set the local file objects from the file server interface """ - fs_ = salt.fileserver.Fileserver(self.opts) - self._serve_file = fs_.serve_file - self._file_find = fs_._find_file - self._file_hash = fs_.file_hash - self._file_list = fs_.file_list - self._file_list_emptydirs = fs_.file_list_emptydirs - self._dir_list = fs_.dir_list - self._symlink_list = fs_.symlink_list - self._file_envs = fs_.envs + self.fs_ = salt.fileserver.Fileserver(self.opts) + self._serve_file = self.fs_.serve_file + self._file_find = self.fs_._find_file + self._file_hash = self.fs_.file_hash + self._file_list = self.fs_.file_list + self._file_list_emptydirs = self.fs_.file_list_emptydirs + self._dir_list = self.fs_.dir_list + self._symlink_list = self.fs_.symlink_list + self._file_envs = self.fs_.envs def __verify_minion_publish(self, load): """ @@ -1115,6 +1149,37 @@ def destroy(self): if self.local is not None: self.local.destroy() self.local = None + if self.mminion is not None: + self.mminion.destroy() + self.mminion = None + if self.tops is not None: + if hasattr(self.tops, "destroy"): + self.tops.destroy() + self.tops = None + if self.cache is not None: + if hasattr(self.cache, "destroy"): + self.cache.destroy() + self.cache = None + if self.ckminions is not None: + if hasattr(self.ckminions, "cache") and self.ckminions.cache is not None: + if hasattr(self.ckminions.cache, "destroy"): + self.ckminions.cache.destroy() + self.ckminions.cache = None + self.ckminions = None + self.wheel_ = None + # Clear bound methods from fileserver to allow GC + if hasattr(self, "fs_") and self.fs_ is not None: + if hasattr(self.fs_, "destroy"): + self.fs_.destroy() + self.fs_ = None + self._serve_file = None + self._file_find = None + self._file_hash = None + self._file_list = None + self._file_list_emptydirs = None + self._dir_list = None + self._symlink_list = None + self._file_envs = None class LocalFuncs: @@ -1129,6 +1194,11 @@ class LocalFuncs: def __init__(self, opts, key): self.opts = opts self.key = key + self.event = None + self.local = None + self.ckminions = None + self.loadauth = None + self.mminion = None # Create the event manager self.event = salt.utils.event.get_event( "master", @@ -1144,8 +1214,6 @@ def __init__(self, opts, key): self.loadauth = salt.auth.LoadAuth(opts) # Stand up the master Minion to access returner data self.mminion = salt.minion.MasterMinion(self.opts, states=False, rend=False) - # Make a wheel object - self.wheel_ = salt.wheel.Wheel(opts) def runner(self, load): """ @@ -1184,10 +1252,10 @@ def runner(self, load): # Authorized. Do the job! try: fun = load.pop("fun") - runner_client = salt.runner.RunnerClient(self.opts) - return runner_client.asynchronous(fun, load.get("kwarg", {}), username) + with salt.runner.RunnerClient(self.opts) as runner_client: + return runner_client.asynchronous(fun, load.get("kwarg", {}), username) except Exception as exc: # pylint: disable=broad-except - log.exception("Exception occurred while introspecting %s") + log.exception("Exception occurred while introspecting %s", fun) return { "error": { "name": exc.__class__.__name__, @@ -1245,7 +1313,8 @@ def wheel(self, load): } try: self.event.fire_event(data, salt.utils.event.tagify([jid, "new"], "wheel")) - ret = self.wheel_.call_func(fun, **load) + with salt.wheel.WheelClient(self.opts) as wheel_client: + ret = wheel_client.call_func(fun, **load) data["return"] = ret data["success"] = True self.event.fire_event(data, salt.utils.event.tagify([jid, "ret"], "wheel")) @@ -1498,3 +1567,15 @@ def destroy(self): if self.local is not None: self.local.destroy() self.local = None + if self.mminion is not None: + self.mminion.destroy() + self.mminion = None + if self.loadauth is not None: + self.loadauth.destroy() + self.loadauth = None + if self.ckminions is not None: + if hasattr(self.ckminions, "cache") and self.ckminions.cache is not None: + if hasattr(self.ckminions.cache, "destroy"): + self.ckminions.cache.destroy() + self.ckminions.cache = None + self.ckminions = None diff --git a/salt/fileserver/__init__.py b/salt/fileserver/__init__.py index ee7b7b23a79c..fd51d1dec3aa 100644 --- a/salt/fileserver/__init__.py +++ b/salt/fileserver/__init__.py @@ -383,6 +383,12 @@ def master_opts(self, load): """ return self.opts + def destroy(self): + if hasattr(self, "servers") and self.servers is not None: + if hasattr(self.servers, "destroy"): + self.servers.destroy() + self.servers = {} + def update_opts(self): # This fix func monkey patching by pillar for name, func in self.servers.items(): @@ -879,4 +885,7 @@ def send( return getattr(self.fs, cmd)(load) def close(self): - pass + if hasattr(self, "fs") and self.fs is not None: + if hasattr(self.fs, "destroy"): + self.fs.destroy() + self.fs = None diff --git a/salt/grains/metadata.py b/salt/grains/metadata.py index 82362d19eb2f..29c2d37e0c02 100644 --- a/salt/grains/metadata.py +++ b/salt/grains/metadata.py @@ -88,10 +88,12 @@ def _search(prefix="latest/"): if "body" not in linedata: return ret body = salt.utils.stringutils.to_unicode(linedata["body"]) - if ( - linedata["headers"].get("Content-Type", "text/plain") - == "application/octet-stream" - ): + # Since 3006.3, salt.utils.http.query (tornado backend) returns ``body`` + # on HTTPError but does not populate ``headers``. Treat a missing + # ``headers`` key as "no Content-Type information" rather than letting + # KeyError propagate and break the whole grain load (#65184). + response_headers = linedata.get("headers") or {} + if response_headers.get("Content-Type", "text/plain") == "application/octet-stream": return body for line in body.split("\n"): if line.endswith("/"): diff --git a/salt/loader/lazy.py b/salt/loader/lazy.py index 995741568207..49d7def2ff49 100644 --- a/salt/loader/lazy.py +++ b/salt/loader/lazy.py @@ -386,6 +386,54 @@ def __init__( def _get_lock(self): return threading.RLock() + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.destroy() + + def destroy(self): + """ + Destroy the loader. + + This is intentionally a no-op. The earlier 3006.x leak-fix + (``33ad623aa4a``) added internal-state clearing here -- calling + ``clean_modules()`` (sys.modules eviction), ``self.pack.clear()``, + ``self._dict.clear()``, ``self.loaded_modules.clear()`` and + ``self.missing_modules.clear()`` -- so callers like + ``LocalClient.destroy()`` / ``MasterMinion.destroy()`` / + ``RunnerClient.__exit__`` would proactively free LazyLoader memory. + + After dropping ``clean_modules()`` (commit ``2d119bba048``) the + rest_tornado functional suite still timed out (CI 28333916927 had + the same 86 failures as the prior round). Round-6 investigation + narrowed the remaining cost to the LazyLoader state-clearing + itself: ``LocalClient.__del__`` runs synchronously during Python + GC inside the tornado io_loop, and the cascading destroy() chain + (LocalClient -> functions/utils/returners LazyLoaders -> + ``pack.clear`` / ``_dict.clear`` / ``loaded_modules.clear``) stalls + the loop for several seconds per request -- exactly the symptom + reported in CI (~6-7 s per ``http_client.fetch`` vs ~30 ms on the + 3007.x baseline). + + Reverting ``destroy()`` to a no-op restores the 3007.x behavior + (LazyLoader objects are reclaimed by Python's normal GC when their + owner dies) without re-introducing the original ``clean_modules`` + sys.modules thrash. The ``__enter__``/``__exit__`` shape is kept so + the per-request ``with RunnerClient(...) as runner:`` / ``with + WheelClient(...) as wheel:`` blocks added in + ``salt/netapi/__init__.py`` and ``salt/daemons/masterapi.py`` + continue to compile and run without raising AttributeError -- + their ``__exit__`` paths just stop interfering with the loader. + + ``clean_modules()`` remains a public method for the one caller + that still needs sys.modules eviction (``loader.grains`` after a + grains refresh). Long-term cleanup of LazyLoader memory in + long-running daemons should be handled at the daemon scope + (Maintenance / MWorker recycle), not on the per-request + ``LocalClient.__del__`` hot path. + """ + def clean_modules(self): """ Clean modules and free memory for this loader's tag only. diff --git a/salt/master.py b/salt/master.py index 22c04c0822ae..ec3219a7c0ba 100644 --- a/salt/master.py +++ b/salt/master.py @@ -257,6 +257,12 @@ def __init__(self, opts, **kwargs): self.pki_dir = self.opts["cluster_pki_dir"] else: self.pki_dir = self.opts.get("pki_dir", "") + # Long-lived helpers used by ``run()`` — populated in + # ``_post_fork_init`` (the leak-fix path that caches them across + # iterations). Pre-init to ``None`` so attribute access in + # ``run()`` survives test paths that mock ``_post_fork_init``. + self._cached_mminion = None + self._cached_loadauth = None def _post_fork_init(self): """ @@ -273,6 +279,15 @@ def _post_fork_init(self): runner_client = salt.runner.RunnerClient(ropts) # Load Returners self.returners = salt.loader.returners(self.opts, {}) + # Cache long-lived helpers so the maintenance loop reuses them across + # iterations rather than constructing fresh ones. Each construction + # triggers a fresh LazyLoader + __virtual__ cascade + module-load chain + # that allocates bytecode/dicts/strings retained in sys.modules — the + # primary driver of the Maintenance-process slow drift. + self._cached_loadauth = salt.auth.LoadAuth(self.opts) + self._cached_mminion = salt.minion.MasterMinion( + self.opts, states=False, rend=False + ) # Init Scheduler self.schedule = salt.utils.schedule.Schedule( @@ -343,8 +358,12 @@ def run(self): while time.time() - start < self.restart_interval: log.trace("Running maintenance routines") if not last or (now - last) >= self.loop_interval: - salt.daemons.masterapi.clean_old_jobs(self.opts) - salt.daemons.masterapi.clean_expired_tokens(self.opts) + salt.daemons.masterapi.clean_old_jobs( + self.opts, mminion=self._cached_mminion + ) + salt.daemons.masterapi.clean_expired_tokens( + self.opts, loadauth=self._cached_loadauth + ) salt.daemons.masterapi.clean_pub_auth(self.opts) if not last or (now - last_git_pillar_update) >= git_pillar_update_interval: last_git_pillar_update = now @@ -358,6 +377,31 @@ def run(self): now = int(time.time()) time.sleep(self.loop_interval) + def destroy(self): + """ + Clean up resources + """ + if hasattr(self, "event") and self.event is not None: + self.event.destroy() + self.event = None + if hasattr(self, "ckminions") and self.ckminions is not None: + if hasattr(self.ckminions, "cache") and self.ckminions.cache is not None: + self.ckminions.cache = None + self.ckminions = None + if hasattr(self, "schedule") and self.schedule is not None: + self.schedule = None + if getattr(self, "_cached_loadauth", None) is not None: + self._cached_loadauth.destroy() + self._cached_loadauth = None + if getattr(self, "_cached_mminion", None) is not None: + if hasattr(self._cached_mminion, "destroy"): + self._cached_mminion.destroy() + self._cached_mminion = None + + def _handle_signals(self, signum, sigframe): + self.destroy() + super()._handle_signals(signum, sigframe) + def handle_key_cache(self): """ Evaluate accepted keys and create a msgpack file @@ -1188,6 +1232,12 @@ def _handle_signals(self, signum, sigframe): except Exception: # pylint: disable=broad-except # Don't stop signal handling because an exception occurred. pass + aes_funcs = getattr(self, "aes_funcs", None) + if aes_funcs is not None: + try: + aes_funcs.destroy() + except Exception: # pylint: disable=broad-except + pass super()._handle_signals(signum, sigframe) def __bind(self): @@ -1209,22 +1259,6 @@ async def _handle_payload(self, payload): """ The _handle_payload method is the key method used to figure out what needs to be done with communication to the server - - Example cleartext payload generated for 'salt myminion test.ping': - - {'enc': 'clear', - 'load': {'arg': [], - 'cmd': 'publish', - 'fun': 'test.ping', - 'jid': '', - 'key': 'alsdkjfa.,maljf-==adflkjadflkjalkjadfadflkajdflkj', - 'kwargs': {'show_jid': False, 'show_timeout': False}, - 'ret': '', - 'tgt': 'myminion', - 'tgt_type': 'glob', - 'user': 'root'}} - - :param dict payload: The payload route to the appropriate handler """ key = payload["enc"] load = payload["load"] @@ -1422,6 +1456,13 @@ def __init__(self, opts): :returns: Instance for handling AES operations """ self.opts = opts + self.event = None + self.ckminions = None + self.local = None + self.mminion = None + self.fs_ = None + self.masterapi = None + self.cache = None self.event = salt.utils.event.get_master_event( self.opts, self.opts["sock_dir"], listen=False ) @@ -2112,10 +2153,42 @@ def run_func(self, func, load): return ret, {"fun": "send"} def destroy(self): - self.masterapi.destroy() + if self.masterapi is not None: + self.masterapi.destroy() + self.masterapi = None if self.local is not None: self.local.destroy() self.local = None + if self.mminion is not None: + self.mminion.destroy() + self.mminion = None + if self.event is not None: + self.event.destroy() + self.event = None + if self.ckminions is not None: + if self.ckminions.cache is not None: + if hasattr(self.ckminions.cache, "destroy"): + self.ckminions.cache.destroy() + self.ckminions.cache = None + self.ckminions = None + if self.cache is not None: + if hasattr(self.cache, "destroy"): + self.cache.destroy() + self.cache = None + # Clear bound methods from fileserver + if self.fs_ is not None: + if hasattr(self.fs_, "destroy"): + self.fs_.destroy() + self.fs_ = None + self._serve_file = None + self._file_find = None + self._file_hash = None + self._file_hash_and_stat = None + self._file_list = None + self._file_list_emptydirs = None + self._dir_list = None + self._symlink_list = None + self._file_envs = None class ClearFuncs(TransportMethods): @@ -2143,6 +2216,12 @@ class ClearFuncs(TransportMethods): def __init__(self, opts, key): self.opts = opts self.key = key + self.event = None + self.local = None + self.ckminions = None + self.loadauth = None + self.mminion = None + self.masterapi = None # Create the event manager self.event = salt.utils.event.get_master_event( self.opts, self.opts["sock_dir"], listen=False @@ -2713,6 +2792,25 @@ def destroy(self): if self.local is not None: self.local.destroy() self.local = None + if self.mminion is not None: + self.mminion.destroy() + self.mminion = None + if self.event is not None: + self.event.destroy() + self.event = None + if self.ckminions is not None: + if self.ckminions.cache is not None: + if hasattr(self.ckminions.cache, "destroy"): + self.ckminions.cache.destroy() + self.ckminions.cache = None + self.ckminions = None + if self.loadauth is not None: + self.loadauth.destroy() + self.loadauth = None + if self.wheel_ is not None: + if hasattr(self.wheel_, "destroy"): + self.wheel_.destroy() + self.wheel_ = None while self.channels: chan = self.channels.pop() chan.close() diff --git a/salt/minion.py b/salt/minion.py index 385e9e940be4..a7a9c9f93df6 100644 --- a/salt/minion.py +++ b/salt/minion.py @@ -8,6 +8,7 @@ import contextlib import copy import errno +import gc import logging import multiprocessing import os @@ -109,6 +110,45 @@ log = logging.getLogger(__name__) + +# Event used to abort an in-progress resolve_dns() retry loop. The minion +# signal handler sets this so that a SIGTERM arriving while the minion is +# stuck retrying master DNS resolution can shut the io_loop down promptly +# instead of waiting for ``retry_dns`` seconds * forever. See #69466. +_RESOLVE_DNS_ABORT = threading.Event() + + +def request_resolve_dns_abort(): + """ + Signal any in-progress resolve_dns() retry loop to abort on its next + wakeup. Used by the minion shutdown path so SIGTERM is not blocked by + a synchronous ``time.sleep`` inside the DNS retry loop. + """ + _RESOLVE_DNS_ABORT.set() + + +def _interruptible_sleep(duration, abort_event, chunk=1.0): + """ + Sleep up to ``duration`` seconds in ``chunk``-second slices, returning + early if ``abort_event`` becomes set. Returns True if the event was + observed set (i.e. the sleep was aborted), False otherwise. + + Using small chunks rather than ``abort_event.wait(duration)`` keeps + behavior consistent across platforms where ``Event.wait`` may starve + other threads sharing the GIL during very long timeouts. + """ + if duration <= 0: + return abort_event.is_set() + deadline = time.monotonic() + duration + while True: + if abort_event.is_set(): + return True + remaining = deadline - time.monotonic() + if remaining <= 0: + return abort_event.is_set() + time.sleep(min(chunk, remaining)) + + # To set up a minion: # 1. Read in the configuration # 2. Generate the function mapping dict @@ -140,6 +180,10 @@ def resolve_dns(opts, fallback=True): except SaltClientError: retry_dns_count = opts.get("retry_dns_count", None) if opts["retry_dns"]: + # Clear any leftover abort from a previous resolve. The flag + # is only meaningful for the duration of an active retry + # loop; if it is already set when we enter we honor it on + # the first iteration below. while True: if retry_dns_count is not None: if retry_dns_count == 0: @@ -151,7 +195,16 @@ def resolve_dns(opts, fallback=True): opts["master"], opts["retry_dns"], ) - time.sleep(opts["retry_dns"]) + aborted = _interruptible_sleep( + opts["retry_dns"], _RESOLVE_DNS_ABORT + ) + if aborted: + log.warning( + "Master DNS retry loop aborted by shutdown " + "request before '%s' could be resolved.", + opts["master"], + ) + raise SaltMasterUnresolvableError try: ret["master_ip"] = salt.utils.network.dns_check( opts["master"], int(opts["master_port"]), True, opts["ipv6"] @@ -813,6 +866,8 @@ def eval_master(self, opts, timeout=60, safe=True, failed=False, failback=False) ) opts.update(prep_ip_port(opts)) opts.update(resolve_dns(opts)) + pub_channel = None + ret_pub_channel = None try: if self.opts["transport"] == "detect": self.opts["detect_mode"] = True @@ -825,6 +880,11 @@ def eval_master(self, opts, timeout=60, safe=True, failed=False, failback=False) ) yield pub_channel.connect() if not pub_channel.auth.authenticated: + # Close the unauthenticated channel before + # the next iteration overwrites the + # reference. See #68901. + pub_channel.close() + pub_channel = None continue del self.opts["detect_mode"] break @@ -835,14 +895,21 @@ def eval_master(self, opts, timeout=60, safe=True, failed=False, failback=False) yield pub_channel.connect() self.tok = pub_channel.auth.gen_token(b"salt") self.connected = True - raise tornado.gen.Return((opts["master"], pub_channel)) + # Hand the channel off to the caller; clear the local so + # the finally block does not close it. + ret_pub_channel = pub_channel + pub_channel = None + raise tornado.gen.Return((opts["master"], ret_pub_channel)) except SaltClientError: - if pub_channel: - pub_channel.close() if attempts == tries: # Exhausted all attempts. Return exception. self.connected = False raise + finally: + # Ensure the pub channel is closed on every failure path, + # not only SaltClientError. See #68901. + if pub_channel is not None: + pub_channel.close() def _discover_masters(self): """ @@ -1001,6 +1068,8 @@ def __init__( whitelist=None, ignore_config_errors=True, ): + self.executors = None + self.matchers = None self.opts = salt.config.mminion_config( opts["conf_file"], opts, ignore_config_errors=ignore_config_errors ) @@ -1010,8 +1079,69 @@ def __init__( self.mk_rend = rend self.mk_matcher = matcher + self.returners = None + self.functions = None + self.utils = None + self.proxy = None self.gen_modules(initial_load=True) + def destroy(self): + """ + Destroy the MasterMinion object + """ + if self.returners is not None: + # Some returners have a destroy method + for returner in self.returners: + try: + func = self.returners[returner] + if hasattr(func, "destroy"): + func.destroy() + except Exception: # pylint: disable=broad-except + pass + if hasattr(self.returners, "destroy"): + self.returners.destroy() + self.returners = {} + if self.functions is not None and hasattr(self.functions, "destroy"): + self.functions.destroy() + self.functions = {} + if self.utils is not None and hasattr(self.utils, "destroy"): + self.utils.destroy() + self.utils = {} + if hasattr(self, "states") and self.states is not None: + if hasattr(self.states, "destroy"): + self.states.destroy() + self.states = {} + if hasattr(self, "rend") and self.rend is not None: + if hasattr(self.rend, "destroy"): + self.rend.destroy() + self.rend = {} + if hasattr(self, "matchers") and self.matchers is not None: + if hasattr(self.matchers, "destroy"): + self.matchers.destroy() + self.matchers = {} + if hasattr(self, "executors") and self.executors is not None: + if hasattr(self.executors, "destroy"): + self.executors.destroy() + self.executors = {} + if hasattr(self, "proxy") and self.proxy is not None: + if hasattr(self.proxy, "destroy"): + self.proxy.destroy() + self.proxy = {} + if hasattr(self, "serializers") and self.serializers is not None: + if hasattr(self.serializers, "destroy"): + self.serializers.destroy() + self.serializers = {} + if self.opts and "grains" in self.opts: + if hasattr(self.opts["grains"], "destroy"): + self.opts["grains"].destroy() + self.opts["grains"] = {} + + def __enter__(self): + return self + + def __exit__(self, *args): + self.destroy() + def gen_modules(self, initial_load=False): """ Tell the minion to reload the execution modules @@ -1090,6 +1220,19 @@ async def handle_event(self, package): except Exception as exc: # pylint: disable=broad-except log.error("Error dispatching event. %s", exc) + def destroy(self): + """ + Tear down the MinionManager + """ + if hasattr(self, "process_manager") and self.process_manager is not None: + self.process_manager.stop_restarting() + self.process_manager.kill_children() + if hasattr(self, "minions"): + for minion in self.minions: + if hasattr(minion, "destroy"): + minion.destroy() + self.minions = [] + def _create_minion_object( self, opts, @@ -1230,6 +1373,13 @@ def stop(self, signum, parent_sig_handler): Called from cli.daemons.Minion._handle_signals(). Adds stop_async as callback to the io_loop to prevent blocking. """ + # Trip the resolve_dns() abort flag first so a minion currently + # stuck in the synchronous DNS retry loop wakes up and releases + # the io_loop, allowing stop_async (scheduled below) to actually + # run. Without this, a SIGTERM that arrives while a master + # hostname is unresolvable is silently swallowed until systemd + # escalates to SIGKILL. See #69466. + request_resolve_dns_abort() self.io_loop.add_callback( # pylint: disable=not-callable self.stop_async, signum, parent_sig_handler ) @@ -1265,16 +1415,6 @@ def stop_async(self, signum, parent_sig_handler): # Call the parent signal handler parent_sig_handler(signum, None) - def destroy(self): - for minion in self.minions: - minion.destroy() - if self.event_publisher is not None: - self.event_publisher.close() - self.event_publisher = None - if self.event is not None: - self.event.destroy() - self.event = None - class Minion(MinionBase): """ @@ -1650,6 +1790,7 @@ def _load_modules( # a memory limit on module imports # this feature ONLY works on *nix like OSs (resource module doesn't work on windows) modules_max_memory = False + old_mem_limit = None if opts.get("modules_max_memory", -1) > 0 and HAS_PSUTIL and HAS_RESOURCE: log.debug( "modules_max_memory set, enforcing a maximum of %s", @@ -4165,6 +4306,15 @@ def ping_timeout_handler(*_): elif self.opts.get("master_type") != "disable": log.error("No connection to master found. Scheduled jobs will not run.") + # Periodic full-generation gc.collect() to reap reference cycles + # created by Tornado coroutine timeouts (FutureWithTimeout, + # Runner.handle_yield closures, traceback objects, etc.). Python's + # default GC thresholds (700, 10, 10) run generation-2 too rarely + # for the rate these cycles accumulate in a busy minion (~50 MB/hr + # of cyclic garbage measured under stress). Reaping every 60 s + # keeps the working set steady. + self.add_periodic_callback("gc_collect", gc.collect, interval=60) + if start: try: self.io_loop.start() @@ -4244,6 +4394,36 @@ def destroy(self): for cb in self.periodic_callbacks.values(): cb.stop() + # Clean up loaders + if hasattr(self, "functions") and self.functions is not None: + if hasattr(self.functions, "destroy"): + self.functions.destroy() + self.functions = {} + if hasattr(self, "returners") and self.returners is not None: + if hasattr(self.returners, "destroy"): + self.returners.destroy() + self.returners = {} + if hasattr(self, "states") and self.states is not None: + if hasattr(self.states, "destroy"): + self.states.destroy() + self.states = {} + if hasattr(self, "rend") and self.rend is not None: + if hasattr(self.rend, "destroy"): + self.rend.destroy() + self.rend = {} + if hasattr(self, "matchers") and self.matchers is not None: + if hasattr(self.matchers, "destroy"): + self.matchers.destroy() + self.matchers = {} + if hasattr(self, "executors") and self.executors is not None: + if hasattr(self.executors, "destroy"): + self.executors.destroy() + self.executors = {} + if hasattr(self, "utils") and self.utils is not None: + if hasattr(self.utils, "destroy"): + self.utils.destroy() + self.utils = {} + # pylint: disable=W1701 def __del__(self): self.destroy() @@ -4403,6 +4583,9 @@ def destroy(self): if self.local is not None: self.local.destroy() self.local = None + if hasattr(self, "mminion") and self.mminion is not None: + self.mminion.destroy() + self.mminion = None if self.forward_events is not None: self.forward_events.stop() @@ -4778,6 +4961,10 @@ def destroy(self): self._closing = True if self.local is not None: self.local.destroy() + self.local = None + if hasattr(self, "mminion") and self.mminion is not None: + self.mminion.destroy() + self.mminion = None class ProxyMinionManager(MinionManager): diff --git a/salt/netapi/__init__.py b/salt/netapi/__init__.py index a6c4ef064280..523d359d7161 100644 --- a/salt/netapi/__init__.py +++ b/salt/netapi/__init__.py @@ -69,6 +69,9 @@ class NetapiClient: def __init__(self, opts): self.opts = opts + self.resolver = None + self.loadauth = None + self.ckminions = None apiopts = copy.deepcopy(self.opts) apiopts["enable_ssh_minions"] = True apiopts["cachedir"] = os.path.join(opts["cachedir"], "saltapi") @@ -79,6 +82,33 @@ def __init__(self, opts): self.key = salt.daemons.masterapi.access_keys(apiopts) self.ckminions = salt.utils.minions.CkMinions(apiopts) + def destroy(self): + """ + Clean up resources + """ + if self.resolver is not None: + if hasattr(self.resolver, "auth"): + if hasattr(self.resolver.auth, "destroy"): + self.resolver.auth.destroy() + self.resolver.auth = {} + self.resolver = None + if self.loadauth is not None: + if hasattr(self.loadauth, "destroy"): + self.loadauth.destroy() + self.loadauth = None + if self.ckminions is not None: + if hasattr(self.ckminions, "cache") and self.ckminions.cache is not None: + if hasattr(self.ckminions.cache, "destroy"): + self.ckminions.cache.destroy() + self.ckminions.cache = None + self.ckminions = None + + def __enter__(self): + return self + + def __exit__(self, *args): + self.destroy() + def _is_master_running(self): """ Perform a lightweight check to see if the master daemon is running @@ -262,8 +292,8 @@ def runner(self, fun, timeout=None, full_return=False, **kwargs): if timeout is not None: timeout = float(timeout) - runner = salt.runner.RunnerClient(self.opts) - return runner.cmd_sync(kwargs, timeout=timeout, full_return=full_return) + with salt.runner.RunnerClient(self.opts) as runner: + return runner.cmd_sync(kwargs, timeout=timeout, full_return=full_return) def runner_async(self, fun, **kwargs): """ @@ -277,8 +307,8 @@ def runner_async(self, fun, **kwargs): :return: event data and a job ID for the executed function. """ kwargs["fun"] = fun - runner = salt.runner.RunnerClient(self.opts) - return runner.cmd_async(kwargs) + with salt.runner.RunnerClient(self.opts) as runner: + return runner.cmd_async(kwargs) def wheel(self, fun, **kwargs): """ @@ -292,8 +322,8 @@ def wheel(self, fun, **kwargs): :return: Returns the result from the wheel module """ kwargs["fun"] = fun - wheel = salt.wheel.WheelClient(self.opts) - return wheel.cmd_sync(kwargs) + with salt.wheel.WheelClient(self.opts) as wheel: + return wheel.cmd_sync(kwargs) def wheel_async(self, fun, **kwargs): """ @@ -307,8 +337,8 @@ def wheel_async(self, fun, **kwargs): :return: Returns the result from the wheel module """ kwargs["fun"] = fun - wheel = salt.wheel.WheelClient(self.opts) - return wheel.cmd_async(kwargs) + with salt.wheel.WheelClient(self.opts) as wheel: + return wheel.cmd_async(kwargs) CLIENTS = [ diff --git a/salt/netapi/rest_cherrypy/app.py b/salt/netapi/rest_cherrypy/app.py index 6cdcf777b8ba..ec8ad253acfe 100644 --- a/salt/netapi/rest_cherrypy/app.py +++ b/salt/netapi/rest_cherrypy/app.py @@ -1176,7 +1176,6 @@ class LowDataAdapter: def __init__(self): self.opts = cherrypy.config["saltopts"] self.apiopts = cherrypy.config["apiopts"] - self.api = salt.netapi.NetapiClient(self.opts) def exec_lowstate(self, client=None, token=None): """ @@ -1198,32 +1197,33 @@ def exec_lowstate(self, client=None, token=None): # Make any requested additions or modifications to each lowstate, then # execute each one and yield the result. - for chunk in lowstate: - if token: - chunk["token"] = token - - if "token" in chunk: - # Make sure that auth token is hex - try: - int(chunk["token"], 16) - except (TypeError, ValueError): - raise cherrypy.HTTPError(401, "Invalid token") - - if client: - chunk["client"] = client - - # Make any 'arg' params a list if not already. - # This is largely to fix a deficiency in the urlencoded format. - if "arg" in chunk and not isinstance(chunk["arg"], list): - chunk["arg"] = [chunk["arg"]] - - ret = self.api.run(chunk) - - # Sometimes Salt gives us a return and sometimes an iterator - if isinstance(ret, Iterator): - yield from ret - else: - yield ret + with salt.netapi.NetapiClient(self.opts) as api: + for chunk in lowstate: + if token: + chunk["token"] = token + + if "token" in chunk: + # Make sure that auth token is hex + try: + int(chunk["token"], 16) + except (TypeError, ValueError): + raise cherrypy.HTTPError(401, "Invalid token") + + if client: + chunk["client"] = client + + # Make any 'arg' params a list if not already. + # This is largely to fix a deficiency in the urlencoded format. + if "arg" in chunk and not isinstance(chunk["arg"], list): + chunk["arg"] = [chunk["arg"]] + + ret = api.run(chunk) + + # Sometimes Salt gives us a return and sometimes an iterator + if isinstance(ret, Iterator): + yield from ret + else: + yield ret @cherrypy.config(**{"tools.sessions.on": False}) def GET(self): @@ -1870,8 +1870,11 @@ def POST(self, **kwargs): ] }} """ - if not self.api._is_master_running(): - raise salt.exceptions.SaltDaemonNotRunning("Salt Master is not available.") + with salt.netapi.NetapiClient(self.opts) as api: + if not api._is_master_running(): + raise salt.exceptions.SaltDaemonNotRunning( + "Salt Master is not available." + ) # the urlencoded_processor will wrap this in a list if isinstance(cherrypy.serving.request.lowstate, list): diff --git a/salt/roster/__init__.py b/salt/roster/__init__.py index a6b8bb2475de..3b695ddcaadb 100644 --- a/salt/roster/__init__.py +++ b/salt/roster/__init__.py @@ -69,9 +69,25 @@ def __init__(self, opts, backends="flat"): self.backends = backends if not backends: self.backends = ["flat"] - utils = salt.loader.utils(self.opts) - runner = salt.loader.runner(self.opts, utils=utils) - self.rosters = salt.loader.roster(self.opts, runner=runner, utils=utils) + self.utils = salt.loader.utils(self.opts) + self.runner = salt.loader.runner(self.opts, utils=self.utils) + self.rosters = salt.loader.roster( + self.opts, runner=self.runner, utils=self.utils + ) + + def destroy(self): + if hasattr(self, "rosters") and self.rosters is not None: + if hasattr(self.rosters, "destroy"): + self.rosters.destroy() + self.rosters = {} + if hasattr(self, "runner") and self.runner is not None: + if hasattr(self.runner, "destroy"): + self.runner.destroy() + self.runner = {} + if hasattr(self, "utils") and self.utils is not None: + if hasattr(self.utils, "destroy"): + self.utils.destroy() + self.utils = {} def _gen_back(self): """ diff --git a/salt/runner.py b/salt/runner.py index d606cb485104..927c13b6ccd1 100644 --- a/salt/runner.py +++ b/salt/runner.py @@ -38,6 +38,36 @@ class RunnerClient(mixins.SyncClientMixin, mixins.AsyncClientMixin): client = "runner" tag_prefix = "run" + def __init__(self, opts, context=None): + mixins.SyncClientMixin.__init__(self, opts, context=context) + mixins.AsyncClientMixin.__init__(self, opts, context=context) + self.opts = opts + self.context = context or {} + self.event = None + self.salt_user = salt.utils.user.get_specific_user() + self.event = salt.utils.event.get_event( + "master", self.opts["sock_dir"], opts=self.opts, listen=False + ) + + def destroy(self): + if self.event is not None: + self.event.destroy() + self.event = None + if hasattr(self, "_functions") and self._functions is not None: + if hasattr(self._functions, "destroy"): + self._functions.destroy() + self._functions = {} + if hasattr(self, "utils") and self.utils is not None: + if hasattr(self.utils, "destroy"): + self.utils.destroy() + self.utils = {} + + def __enter__(self): + return self + + def __exit__(self, *args): + self.destroy() + @property def functions(self): if not hasattr(self, "_functions"): @@ -196,6 +226,17 @@ def __init__(self, opts, context=None): self.returners = salt.loader.returners(opts, self.functions, context=context) self.outputters = salt.loader.outputters(opts) + def destroy(self): + if hasattr(self, "returners") and self.returners is not None: + if hasattr(self.returners, "destroy"): + self.returners.destroy() + self.returners = {} + if hasattr(self, "outputters") and self.outputters is not None: + if hasattr(self.outputters, "destroy"): + self.outputters.destroy() + self.outputters = {} + super().destroy() + def print_docs(self): """ Print out the documentation! diff --git a/salt/tokens/localfs.py b/salt/tokens/localfs.py index 93cfffa934f4..4f0dc55cb07e 100644 --- a/salt/tokens/localfs.py +++ b/salt/tokens/localfs.py @@ -89,10 +89,10 @@ def list_tokens(opts): List all tokens in the store. :param opts: Salt master config options - :returns: List of dicts (tokens) + :returns: Generator of tokens """ - ret = [] - for dirpath, dirnames, filenames in salt.utils.path.os_walk(opts["token_dir"]): - for token in filenames: - ret.append(token) - return ret + if not os.path.exists(opts["token_dir"]): + return + for entry in os.scandir(opts["token_dir"]): + if entry.is_file(): + yield entry.name diff --git a/salt/transport/frame.py b/salt/transport/frame.py index aa6961f5ad91..f3d3cd53494b 100644 --- a/salt/transport/frame.py +++ b/salt/transport/frame.py @@ -2,6 +2,8 @@ Helper functions for transport components to handle message framing """ +import struct + import salt.utils.msgpack @@ -20,10 +22,14 @@ def frame_msg(body, header=None, raw_body=False): # pylint: disable=unused-argu def frame_msg_ipc(body, header=None, raw_body=False): # pylint: disable=unused-argument """ - Frame the given message with our wire protocol for IPC + Frame the given message with our wire protocol for IPC. - For IPC, we don't need to be backwards compatible, so - use the more efficient "use_bin_type=True" on Python 3. + Prefixes the msgpack payload with a 4-byte big-endian length so the + receiver can read exactly the right number of bytes per message. This + prevents msgpack stream corruption when concurrent large writes exceed + the Unix socket PIPE_BUF atomic-write boundary (~65 536 bytes on Linux), + which caused interleaved bytes and UnicodeDecodeError / ExtraData crashes + in subscribers such as EventReturn under high event-bus load. """ framed_msg = {} if header is None: @@ -31,7 +37,8 @@ def frame_msg_ipc(body, header=None, raw_body=False): # pylint: disable=unused- framed_msg["head"] = header framed_msg["body"] = body - return salt.utils.msgpack.dumps(framed_msg, use_bin_type=True) + payload = salt.utils.msgpack.dumps(framed_msg, use_bin_type=True) + return struct.pack(">I", len(payload)) + payload def _decode_embedded_list(src): diff --git a/salt/transport/ipc.py b/salt/transport/ipc.py index ec2504d7aec8..a4f4bb57f1d0 100644 --- a/salt/transport/ipc.py +++ b/salt/transport/ipc.py @@ -5,6 +5,7 @@ import errno import logging import socket +import struct import time import warnings @@ -179,18 +180,18 @@ def return_message(msg): else: return _null - unpacker = salt.utils.msgpack.Unpacker(raw=False) while not self._closing and not stream.closed(): try: - wire_bytes = yield stream.read_bytes(4096, partial=True) - unpacker.feed(wire_bytes) - for framed_msg in unpacker: - body = framed_msg["body"] - self.io_loop.spawn_callback( - self.payload_handler, - body, - write_callback(stream, framed_msg["head"]), - ) + length_bytes = yield stream.read_bytes(4) + length = struct.unpack(">I", length_bytes)[0] + payload = yield stream.read_bytes(length) + framed_msg = salt.utils.msgpack.unpackb(payload, raw=False) + body = framed_msg["body"] + self.io_loop.spawn_callback( + self.payload_handler, + body, + write_callback(stream, framed_msg.get("head", {})), + ) except _StreamClosedError: log.trace("Client disconnected from IPC %s", self.socket_path) break @@ -278,7 +279,6 @@ def __init__(self, socket_path, io_loop=None): self.socket_path = socket_path self._closing = False self.stream = None - self.unpacker = salt.utils.msgpack.Unpacker(raw=False) self._connecting_future = None def connected(self): @@ -533,18 +533,43 @@ def start(self): ) self._started = True - @tornado.gen.coroutine def _write(self, stream, pack): + """ + Queue a write to ``stream`` and attach a completion callback to + handle exceptions. + + Note: this is intentionally NOT a Tornado @gen.coroutine. When it + was a coroutine, every published message produced a long-lived + gen.Runner per subscriber stream that waited inside ``yield + stream.write(...)`` until the OS drained the bytes. Under high + event rates (beacons, command returns, flood_events), Runners + piled up faster than the OS could flush, and the + Runner/generator/frame/Future quadruple was the dominant minion + leak. Returning a non-Awaitable lets stream.write enqueue the + bytes in Tornado's own write buffer (which Tornado already + manages efficiently) and the done-callback handles the disconnect + path without spawning a coroutine. + """ + + def _on_done(future, _stream=stream): + try: + future.result() + except StreamClosedError: + log.trace("Client disconnected from IPC %s", self.socket_path) + self.streams.discard(_stream) + except Exception as exc: # pylint: disable=broad-except + log.error("Exception occurred while handling stream: %s", exc) + if not _stream.closed(): + _stream.close() + self.streams.discard(_stream) + try: - yield stream.write(pack) + future = stream.write(pack) except StreamClosedError: - log.trace("Client disconnected from IPC %s", self.socket_path) - self.streams.discard(stream) - except Exception as exc: # pylint: disable=broad-except - log.error("Exception occurred while handling stream: %s", exc) - if not stream.closed(): - stream.close() self.streams.discard(stream) + return + if future is not None: + future.add_done_callback(_on_done) def publish(self, msg): """ @@ -554,8 +579,14 @@ def publish(self, msg): return pack = salt.transport.frame.frame_msg_ipc(msg, raw_body=True) - for stream in self.streams: - self.io_loop.spawn_callback(self._write, stream, pack) + # Iterate a snapshot: ``_write`` may call ``self.streams.discard`` + # synchronously when a stream is already closed at write time, + # which would otherwise raise "Set changed size during iteration". + for stream in tuple(self.streams): + # _write is now a regular function that returns immediately + # after queuing the write into Tornado's IOStream buffer. + # No spawn_callback (and therefore no gen.Runner) is needed. + self._write(stream, pack) def handle_connection(self, connection, address): log.trace("IPCServer: Handling connection to address: %s", address) @@ -645,70 +676,86 @@ class IPCMessageSubscriber(IPCClient): def __init__(self, socket_path, io_loop=None): super().__init__(socket_path, io_loop=io_loop) self._read_stream_future = None - self._saved_data = [] + self._saved_data = [] # retained for API compatibility; no longer populated self._read_in_progress = Lock() self._closing = False @tornado.gen.coroutine def _read(self, timeout, callback=None): + """ + Read framed IPC messages. + + Each message on the wire is: [4-byte big-endian length][msgpack payload]. + We read the length prefix first (applying the caller's timeout there), + then read exactly that many bytes for the payload — eliminating the + streaming-Unpacker approach that was vulnerable to byte interleaving + when large messages exceeded PIPE_BUF on the Unix domain socket. + + When a ``callback`` is provided, this coroutine loops indefinitely, + invoking the callback for every received message until the stream + is closed. Without a callback, it returns the body of the first + message (or None on timeout / closed stream). + """ try: try: yield self._read_in_progress.acquire(timeout=0.00000001) except tornado.gen.TimeoutError: raise tornado.gen.Return(None) - exc_to_raise = None ret = None try: while True: + # Step 1: read the 4-byte length prefix, honouring the timeout. if self._read_stream_future is None: - self._read_stream_future = self.stream.read_bytes( - 4096, partial=True - ) + self._read_stream_future = self.stream.read_bytes(4) + if timeout is None: - wire_bytes = yield self._read_stream_future + length_bytes = yield self._read_stream_future else: - wire_bytes = yield FutureWithTimeout( + length_bytes = yield FutureWithTimeout( self.io_loop, self._read_stream_future, timeout ) self._read_stream_future = None - # Remove the timeout once we get some data or an exception - # occurs. We will assume that the rest of the data is already - # there or is coming soon if an exception doesn't occur. + # Remove the timeout once we've received the length prefix + # so the payload read isn't artificially constrained. timeout = None - self.unpacker.feed(wire_bytes) - first_sync_msg = True - for framed_msg in self.unpacker: + # Step 2: read exactly `length` bytes for the msgpack payload. + length = struct.unpack(">I", length_bytes)[0] + payload = yield self.stream.read_bytes(length) + framed_msg = salt.utils.msgpack.unpackb(payload, raw=False) + + if isinstance(framed_msg, dict) and "body" in framed_msg: + body = framed_msg["body"] + else: + log.debug( + "IPC subscriber: malformed frame (type=%s), skipping", + type(framed_msg).__name__, + ) if callback: - self.io_loop.spawn_callback(callback, framed_msg["body"]) - elif first_sync_msg: - ret = framed_msg["body"] - first_sync_msg = False - else: - self._saved_data.append(framed_msg["body"]) - if not first_sync_msg: - # We read at least one piece of data and we're on sync run + continue break + + if callback: + self.io_loop.spawn_callback(callback, body) + continue + ret = body + break except TornadoTimeoutError: - # In the timeout case, just return None. - # Keep 'self._read_stream_future' alive. + # Timed out waiting for the length prefix; keep the pending + # future so the next call can reuse it. ret = None - except StreamClosedError as exc: + except StreamClosedError: log.trace("Subscriber disconnected from IPC %s", self.socket_path) self._read_stream_future = None except Exception as exc: # pylint: disable=broad-except - log.error( + log.debug( "Exception occurred in Subscriber while handling stream: %s", exc ) self._read_stream_future = None - exc_to_raise = exc self._read_in_progress.release() - - if exc_to_raise is not None: - raise exc_to_raise # pylint: disable=E0702 raise tornado.gen.Return(ret) # Handle ctrl+c gracefully except TypeError: diff --git a/salt/transport/tcp.py b/salt/transport/tcp.py index 28f9d71c6d29..5e9b70859f50 100644 --- a/salt/transport/tcp.py +++ b/salt/transport/tcp.py @@ -13,6 +13,7 @@ import queue import selectors import socket +import struct import threading import time import urllib @@ -1248,17 +1249,26 @@ async def handle_stream(self, stream): See https://tornado.readthedocs.io/en/latest/iostream.html#tornado.iostream.IOStream for additional details. """ - unpacker = salt.utils.msgpack.Unpacker(raw=False) + # Senders frame payloads as ``frame_msg_ipc(...)`` which prefixes + # the msgpack body with a 4-byte big-endian length (3006.x + # ``d4e2e075aa3``). The streaming ``msgpack.Unpacker`` was a + # 3006.x-era TCPPuller artifact that has no awareness of the + # length prefix and reads it as a msgpack int, surfacing as + # ``'int' object is not subscriptable`` at ``framed_msg["body"]``. + # Mirror ``salt.transport.ipc.IPCServer.handle_stream``: read the + # 4-byte length, then exactly that many payload bytes, unpack + # once. while not stream.closed(): try: - wire_bytes = await stream.read_bytes(4096, partial=True) - unpacker.feed(wire_bytes) - for framed_msg in unpacker: - body = framed_msg["body"] - self.io_loop.spawn_callback( - self.payload_handler, - body, - ) + length_bytes = await stream.read_bytes(4) + length = struct.unpack(">I", length_bytes)[0] + payload = await stream.read_bytes(length) + framed_msg = salt.utils.msgpack.unpackb(payload, raw=False) + body = framed_msg["body"] + self.io_loop.spawn_callback( + self.payload_handler, + body, + ) except tornado.iostream.StreamClosedError: if self.path: log.trace("Client disconnected from IPC %s", self.path) diff --git a/salt/transport/zeromq.py b/salt/transport/zeromq.py index dfb0aaf9f908..63633a2e1159 100644 --- a/salt/transport/zeromq.py +++ b/salt/transport/zeromq.py @@ -11,6 +11,7 @@ import multiprocessing import os import signal +import socket import sys import threading from random import randint @@ -423,18 +424,44 @@ def zmq_device(self): Multiprocessing target for the zmq queue device """ self.__setup_signals() - context = zmq.Context(self.opts["worker_threads"]) + # The first argument to zmq.Context is ``io_threads`` -- the + # number of background I/O threads libzmq spawns -- not the + # number of MWorker processes. Each libzmq I/O thread keeps + # its own message-buffer pool that grows under sustained + # traffic and is never released, so passing in + # ``opts["worker_threads"]`` (typically 5-10) caused the + # MWorkerQueue process RSS to climb ~7-8 MB/min indefinitely. + # The QUEUE device only proxies two sockets; one I/O thread is + # plenty. + context = zmq.Context(1) # Prepare the zeromq sockets self.uri = "tcp://{interface}:{ret_port}".format(**self.opts) self.clients = context.socket(zmq.ROUTER) - self.clients.setsockopt(zmq.LINGER, -1) + # LINGER=-1 ("never discard") combined with the salt CLI's pattern + # of one-shot connections (connect, send, recv, disconnect) caused + # libzmq to retain undelivered queue slots for every disconnected + # peer indefinitely under sustained CLI churn. A small finite + # LINGER lets libzmq reap those slots. ROUTER_HANDOVER=1 makes + # the router swap a stale peer (same routing-id, new connection) + # instead of blocking on the old one -- relevant for minions that + # reconnect after a brief network blip. TCP_KEEPALIVE forces + # libzmq to notice peers that disappear without sending FIN, so + # their queues are reaped instead of leaking until the OS default + # 2-hour idle timer fires. + self.clients.setsockopt(zmq.LINGER, 1000) + if hasattr(zmq, "ROUTER_HANDOVER"): + self.clients.setsockopt(zmq.ROUTER_HANDOVER, 1) + self.clients.setsockopt(zmq.TCP_KEEPALIVE, 1) + self.clients.setsockopt(zmq.TCP_KEEPALIVE_IDLE, 60) + self.clients.setsockopt(zmq.TCP_KEEPALIVE_INTVL, 15) + self.clients.setsockopt(zmq.TCP_KEEPALIVE_CNT, 3) if self.opts["ipv6"] is True and hasattr(zmq, "IPV4ONLY"): # IPv6 sockets work for both IPv6 and IPv4 addresses self.clients.setsockopt(zmq.IPV4ONLY, 0) self.clients.setsockopt(zmq.BACKLOG, self.opts.get("zmq_backlog", 1000)) self._start_zmq_monitor() self.workers = context.socket(zmq.DEALER) - self.workers.setsockopt(zmq.LINGER, -1) + self.workers.setsockopt(zmq.LINGER, 1000) if self.opts["mworker_queue_niceness"] and not salt.utils.platform.is_windows(): log.info( @@ -688,6 +715,40 @@ def _init_socket(self): if hasattr(zmq, "RECONNECT_IVL_MAX"): self.socket.setsockopt(zmq.RECONNECT_IVL_MAX, 5000) + # Set a stable ZMQ routing identity so the master's ROUTER socket + # reuses an existing slot for this caller (combined with + # ROUTER_HANDOVER=1 on the master) rather than allocating a new + # entry in its per-peer table for every CLI invocation. Without + # this, the master's libzmq peer-id hashtable grows unbounded + # under sustained CLI churn (about 6 MB/min in stress). + # + # Only do this for salt CLI tools (which do NOT set ``__role`` in + # opts). All long-lived daemons -- minion, syndic, master -- + # open multiple AsyncReqMessageClient instances concurrently from + # a single process: the minion at startup for auth + pillar + + # file requests, the syndic when relaying multiple downstream + # minions' returns upstream, and a master when forwarding to + # peer masters. Giving them all the same stable identity would + # cause ROUTER_HANDOVER on the upstream ROUTER to silently drop + # any reply still in flight to the previous REQ as each new one + # arrived, hanging startup and breaking syndic relays. Their + # own REQ churn is bounded anyway (one peer per daemon), so they + # can keep using libzmq's default per-connection random + # routing-ids. + if not self.opts.get("__role"): + role = self.opts.get("id") or "clir" + try: + uid = os.getuid() + except AttributeError: # Windows + uid = 0 + identity = "salt-req/{role}/{host}/{uid}/{slot}".format( + role=role, + host=socket.gethostname(), + uid=uid, + slot=os.getpid() % 256, + ) + self.socket.setsockopt(zmq.IDENTITY, identity.encode("utf-8")) + _set_tcp_keepalive(self.socket, self.opts) if self.addr.startswith("tcp://["): # Hint PF type if bracket enclosed IPv6 address @@ -1160,6 +1221,11 @@ def stop(self): self._socket.disable_monitor() except zmq.Error: pass + if self._monitor_socket is not None: + try: + self._monitor_socket.close(0) + except zmq.Error: + pass self._socket = None self._running.clear() self._monitor_socket = None diff --git a/salt/utils/args.py b/salt/utils/args.py index cde9b84c9113..6caf99bc9d39 100644 --- a/salt/utils/args.py +++ b/salt/utils/args.py @@ -223,6 +223,9 @@ def yamlify_arg(arg): return original_arg +_ArgSpec = namedtuple("ArgSpec", "args varargs keywords defaults") + + def get_function_argspec(func, is_class_method=None): """ A small wrapper around inspect.signature that also supports callable objects and wrapped functions @@ -249,7 +252,6 @@ def get_function_argspec(func, is_class_method=None): raise TypeError(f"Cannot inspect argument list for '{func}'") # Build a namedtuple which looks like the result of a Python 2 argspec - _ArgSpec = namedtuple("ArgSpec", "args varargs keywords defaults") args = [] defaults = [] varargs = keywords = None diff --git a/salt/utils/context.py b/salt/utils/context.py index 45776ab4c717..c46e03e7272e 100644 --- a/salt/utils/context.py +++ b/salt/utils/context.py @@ -83,6 +83,19 @@ def active(self): except AttributeError: return False + def destroy(self): + """ + Destroy the ContextDict and clear internal state + """ + if hasattr(self, "_state"): + self._state.data = None + try: + del self._state.data + except AttributeError: + pass + if hasattr(self, "global_data"): + self.global_data.clear() + # TODO: rename? def clone(self, **kwargs): """ diff --git a/salt/utils/event.py b/salt/utils/event.py index be009f440d52..0732377408c1 100644 --- a/salt/utils/event.py +++ b/salt/utils/event.py @@ -239,6 +239,12 @@ def _after_fork_in_child(cls): except Exception: # pylint: disable=broad-except pass + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.destroy() + def __init__( self, node, @@ -602,6 +608,15 @@ def _get_event(self, wait, tag, match_func=None, no_block=False): return None except RuntimeError: return None + except salt.exceptions.SaltDeserializationError: + # Malformed msgpack frame — can occur under extreme event bus + # load when multiple events are concatenated in the IPC buffer + # and msgpack reports ExtraData or a UTF-8 decode failure. + # Skip this frame rather than crashing the subscriber. + log.debug( + "Event subscriber: skipping malformed event (deserialization error)" + ) + continue if not match_func(ret["tag"], tag) or not self._subproxy_match(ret["data"]): # tag not match @@ -929,23 +944,6 @@ def set_event_handler(self, event_handler): # This will handle reconnects self.io_loop.spawn_callback(self.subscriber.on_recv, event_handler) - # pylint: disable=W1701 - def __del__(self): - # skip exceptions in destroy-- since destroy() doesn't cover interpreter - # shutdown-- where globals start going missing - try: - self.destroy() - except Exception: # pylint: disable=broad-except - pass - - # pylint: enable=W1701 - - def __enter__(self): - return self - - def __exit__(self, *args): - self.destroy() - class MasterEvent(SaltEvent): """ @@ -1290,6 +1288,28 @@ def __init__(self, opts, **kwargs): local_minion_opts = self.opts.copy() local_minion_opts["file_client"] = "local" self.minion = salt.minion.MasterMinion(local_minion_opts) + # Validate all configured returners exist at startup so operators get + # a clear error immediately rather than thousands of per-event errors. + configured = self.opts["event_return"] + if not isinstance(configured, list): + configured = [configured] + missing = [ + r for r in configured if f"{r}.event_return" not in self.minion.returners + ] + if missing: + log.error( + "EventReturn: the following configured event_return returner(s) " + "were not found and events will NOT be stored: %s. " + "Check that the returner modules are installed and the " + "returner_dirs configuration is correct.", + missing, + ) + self._missing_returners = set(missing) + # Track last warning time per returner to rate-limit log spam. + # With event_return_queue=0 every event flushes independently, so + # a per-flush-cycle set would still log once per event. Use wall + # time instead: only warn once every 60 seconds per returner. + self._warned_returners = {} # returner_name -> last_warn_time self.event_queue = [] self.stop = False @@ -1334,10 +1354,16 @@ def _flush_event_single(self, event_return): "Event data that caused an exception: %s", self.event_queue ) else: - log.error( - "Could not store return for event(s) - returner '%s' not found.", - event_return, - ) + # Rate-limit to one error per returner per 60 s to prevent log + # spam at high event rates (e.g. event_return_queue=0 flushes + # on every single event). + now = time.time() + if now - self._warned_returners.get(event_return, 0) >= 60: + log.error( + "Could not store return for event(s) - returner '%s' not found.", + event_return, + ) + self._warned_returners[event_return] = now def run(self): """ diff --git a/salt/utils/job.py b/salt/utils/job.py index 5b52449666f2..4cb7a49608e5 100644 --- a/salt/utils/job.py +++ b/salt/utils/job.py @@ -25,8 +25,13 @@ def store_job(opts, load, event=None, mminion=None): if not salt.utils.verify.valid_id(opts, load["id"]): return False if mminion is None: - mminion = salt.minion.MasterMinion(opts, states=False, rend=False) + with salt.minion.MasterMinion(opts, states=False, rend=False) as mminion: + return _store_job(opts, load, event, mminion, endtime=endtime) + else: + return _store_job(opts, load, event, mminion, endtime=endtime) + +def _store_job(opts, load, event, mminion, endtime=None): job_cache = opts["master_job_cache"] if load["jid"] == "req": # The minion is returning a standalone job, request a jobid @@ -158,7 +163,13 @@ def store_minions(opts, jid, minions, mminion=None, syndic_id=None): master_job_cache """ if mminion is None: - mminion = salt.minion.MasterMinion(opts, states=False, rend=False) + with salt.minion.MasterMinion(opts, states=False, rend=False) as mminion: + return _store_minions(opts, jid, minions, mminion, syndic_id) + else: + return _store_minions(opts, jid, minions, mminion, syndic_id) + + +def _store_minions(opts, jid, minions, mminion, syndic_id=None): job_cache = opts["master_job_cache"] minions_fstr = f"{job_cache}.save_minions" diff --git a/salt/utils/minions.py b/salt/utils/minions.py index e61f617927bd..76136916a0a1 100644 --- a/salt/utils/minions.py +++ b/salt/utils/minions.py @@ -737,6 +737,7 @@ def check_minions( if ssh_minions: _res["minions"].extend(ssh_minions) _res["ssh_minions"] = True + roster.destroy() except Exception: # pylint: disable=broad-except log.exception( "Failed matching available minions with %s pattern: %s", tgt_type, expr diff --git a/salt/utils/process.py b/salt/utils/process.py index fb009c6d01db..b9897257563c 100644 --- a/salt/utils/process.py +++ b/salt/utils/process.py @@ -531,12 +531,14 @@ def add_process(self, tgt, args=None, kwargs=None, name=None): kwargs = {} if inspect.isclass(tgt) and issubclass(tgt, multiprocessing.Process): - kwargs["name"] = name or tgt.__qualname__ + if name is None: + name = getattr(tgt, "__qualname__", str(tgt)) + kwargs["name"] = name process = tgt(*args, **kwargs) else: - process = Process( - target=tgt, args=args, kwargs=kwargs, name=name or tgt.__qualname__ - ) + if name is None: + name = getattr(tgt, "__qualname__", str(tgt)) + process = Process(target=tgt, args=args, kwargs=kwargs, name=name) process.register_finalize_method(cleanup_finalize_process, args, kwargs) diff --git a/salt/utils/timed_subprocess.py b/salt/utils/timed_subprocess.py index 6166806a80a4..998c62471feb 100644 --- a/salt/utils/timed_subprocess.py +++ b/salt/utils/timed_subprocess.py @@ -95,12 +95,7 @@ def receive(): if rt.is_alive(): # Subprocess cleanup (best effort) self.process.kill() - - def terminate(): - if rt.is_alive(): - self.process.terminate() - - threading.Timer(10, terminate).start() + self.process.wait() raise salt.exceptions.TimedProcTimeoutError( "{} : Timed out after {} seconds".format( self.command, diff --git a/salt/wheel/__init__.py b/salt/wheel/__init__.py index 15a679439aa3..b861ec871df8 100644 --- a/salt/wheel/__init__.py +++ b/salt/wheel/__init__.py @@ -40,9 +40,32 @@ class WheelClient( tag_prefix = "wheel" def __init__(self, opts, context=None): - super().__init__(opts, context=context) + salt.client.mixins.SyncClientMixin.__init__(self, opts, context=context) + salt.client.mixins.AsyncClientMixin.__init__(self, opts, context=context) + self.opts = opts + self.context = context or {} + self.event = None + self.salt_user = salt.utils.user.get_specific_user() + self.event = salt.utils.event.get_event( + "master", self.opts["sock_dir"], opts=self.opts, listen=False + ) self.functions = salt.loader.wheels(opts, context=self.context) + def destroy(self): + if self.event is not None: + self.event.destroy() + self.event = None + if hasattr(self, "functions") and self.functions is not None: + if hasattr(self.functions, "destroy"): + self.functions.destroy() + self.functions = {} + + def __enter__(self): + return self + + def __exit__(self, *args): + self.destroy() + # TODO: remove/deprecate def call_func(self, fun, **kwargs): """ diff --git a/tests/monitoring/.gitignore b/tests/monitoring/.gitignore new file mode 100644 index 000000000000..fe865cdbfdbc --- /dev/null +++ b/tests/monitoring/.gitignore @@ -0,0 +1,3 @@ +pki/ +ids/ +event_log.txt diff --git a/tests/monitoring/Dockerfile.salt b/tests/monitoring/Dockerfile.salt new file mode 100644 index 000000000000..f2e08b3389a8 --- /dev/null +++ b/tests/monitoring/Dockerfile.salt @@ -0,0 +1,65 @@ +FROM python:3.10-slim + +RUN apt-get update && apt-get install -y \ + build-essential \ + libssl-dev \ + libffi-dev \ + python3-dev \ + procps \ + curl \ + wget \ + libzmq3-dev \ + tini \ + gdb \ + zlib1g-dev \ + libbz2-dev \ + liblzma-dev \ + libsqlite3-dev \ + libreadline-dev \ + libncurses-dev \ + && rm -rf /var/lib/apt/lists/* + +ARG PYTHON_VERSION=3.10.20 +RUN cd /tmp && \ + wget -q https://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tar.xz && \ + tar xf Python-${PYTHON_VERSION}.tar.xz && \ + cd Python-${PYTHON_VERSION} && \ + ./configure \ + --enable-shared \ + --prefix=/usr/local \ + --with-ensurepip=install \ + CFLAGS="-g -O2 -fno-omit-frame-pointer" && \ + make -j"$(nproc)" && \ + make install && \ + ldconfig && \ + cd / && rm -rf /tmp/Python-${PYTHON_VERSION}* + +RUN pip install --no-cache-dir memray + +WORKDIR /app + +# Install Salt dependencies +# We copy everything needed for pip install -e . +COPY requirements/ /app/requirements/ +COPY setup.py /app/ +COPY pyproject.toml /app/ +COPY MANIFEST.in /app/ +COPY README.rst /app/ +COPY AUTHORS /app/ +COPY LICENSE /app/ +COPY NOTICE /app/ +COPY salt/ /app/salt/ +COPY tools/ /app/tools/ +COPY scripts/ /app/scripts/ + +RUN pip install --no-cache-dir -r requirements/base.txt -r requirements/zeromq.txt +RUN pip install --no-cache-dir -e . + +# Extra tools for monitoring and salt-api +RUN pip install --no-cache-dir psutil CherryPy + +# Create salt user for API testing +RUN useradd -m -s /bin/bash salt && echo "salt:salt" | chpasswd +RUN usermod -aG shadow salt + +ENTRYPOINT ["/usr/bin/tini", "--", "/usr/local/bin/salt-master"] diff --git a/tests/monitoring/README.md b/tests/monitoring/README.md new file mode 100644 index 000000000000..9a66438554da --- /dev/null +++ b/tests/monitoring/README.md @@ -0,0 +1,56 @@ +# Salt Monitoring Environment + +This environment sets up a Salt Master, two Minions, Prometheus, and cAdvisor for monitoring. + +## Prerequisite + +- Docker and Docker Compose (or Podman and podman-compose) + +> **Note for Podman users:** If running in rootless mode, cAdvisor might require additional configuration to access host metrics. You may need to run Podman as root for full cAdvisor functionality, or use `podman stats` as an alternative. + +## Usage + +1. Start the environment: + ```bash + docker-compose up -d + ``` + +2. Access the Salt Master: + ```bash + docker exec -it salt-master bash + ``` + +3. Run a test command: + ```bash + salt '*' test.ping + ``` + +4. Access Prometheus: + Go to `http://localhost:9090` + +5. Access cAdvisor: + Go to `http://localhost:18081` + +6. Access Grafana: + Go to `http://localhost:13000` + The "Salt Monitoring" dashboard is pre-provisioned. + +## Monitoring for Memory Leaks + +In Prometheus, you can use the following query to monitor memory usage of the salt-master container: + +```promql +container_memory_usage_bytes{container_label_com_docker_compose_service="salt-master"} +``` + +Or more specifically for RSS: +```promql +container_memory_rss{container_label_com_docker_compose_service="salt-master"} +``` + +## Configuration + +- `master.conf`: Salt Master configuration +- `minion.conf`: Salt Minion configuration (shared by both minions) +- `prometheus.yml`: Prometheus configuration +- `Dockerfile.salt`: Dockerfile for Salt components diff --git a/tests/monitoring/analyze_stats.py b/tests/monitoring/analyze_stats.py new file mode 100644 index 000000000000..09a6e6608b96 --- /dev/null +++ b/tests/monitoring/analyze_stats.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 +import json +import sys +import urllib.parse +import urllib.request + +PROM_URL = "http://localhost:19090" + + +def query_prom(query): + params = urllib.parse.urlencode({"query": query}) + url = f"{PROM_URL}/api/v1/query?{params}" + with urllib.request.urlopen(url) as response: + return json.loads(response.read().decode()) + + +def get_linear_slope(metric_name, duration="30m"): + # Returns the slope (rate of change per second) over the duration + query = f"deriv({metric_name}[{duration}])" + data = query_prom(query) + try: + return float(data["data"]["result"][0]["value"][1]) + except (IndexError, KeyError, ValueError): + return 0.0 + + +def main(): + print("--- Salt Stress Test Analysis ---") + + # 1. Check for zombie processes (process count growth) + master_procs_slope = get_linear_slope("salt_master_process_count") + api_procs_slope = get_linear_slope("salt_api_process_count") + + # 2. Check for memory leaks + master_rss_slope = get_linear_slope("salt_master_rss_bytes") + api_rss_slope = get_linear_slope("salt_api_rss_bytes") + + # 3. Check for FD leaks + master_fds_slope = get_linear_slope("salt_master_open_fds") + api_fds_slope = get_linear_slope("salt_api_open_fds") + + failed = False + + print(f"Master RSS Slope: {master_rss_slope:.2f} bytes/sec") + print(f"API RSS Slope: {api_rss_slope:.2f} bytes/sec") + print(f"Master FD Slope: {master_fds_slope:.6f} fds/sec") + print(f"Master Proc Slope: {master_procs_slope:.6f} procs/sec") + + # Thresholds + # Memory: > 10KB/sec sustained over 30m might indicate a real leak + if master_rss_slope > 10240: + print("FAIL: Master memory leak detected!") + failed = True + + if master_procs_slope > 0.001: # Sustained growth in process count + print("FAIL: Master process/zombie leak detected!") + failed = True + + if master_fds_slope > 0.01: # Sustained growth in FDs + print("FAIL: Master file descriptor leak detected!") + failed = True + + if failed: + sys.exit(1) + + print("SUCCESS: No significant resource leaks detected.") + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/tests/monitoring/docker-compose.yml b/tests/monitoring/docker-compose.yml new file mode 100644 index 000000000000..d610cf2317fb --- /dev/null +++ b/tests/monitoring/docker-compose.yml @@ -0,0 +1,120 @@ +services: + salt-master: + build: + context: ../.. + dockerfile: tests/monitoring/Dockerfile.salt + container_name: salt-master + entrypoint: ["/usr/bin/tini", "--"] + command: ["sh", "-c", "salt-master -d && salt-api"] + volumes: + - ../../salt:/app/salt + - ./master.conf:/etc/salt/master + - ./minion.conf:/etc/salt/minion + - ./srv/salt:/srv/salt + - /home/dan/src/mops/salt/saltstack-raas-master:/app/saltstack-raas-master + - ./raas.conf:/etc/salt/master.d/raas.conf + - /etc/localtime:/etc/localtime:ro + - /etc/timezone:/etc/timezone:ro + - ./pki/master:/etc/salt/pki + - ./ids/master_id:/etc/salt/minion_id + ports: + - "44505:44505" + - "44506:44506" + - "18000:8000" + networks: + salt-net: + aliases: + - salt + + salt-minion-1: + build: + context: ../.. + dockerfile: tests/monitoring/Dockerfile.salt + container_name: salt-minion-1 + hostname: salt-minion-1 + entrypoint: ["/usr/local/bin/salt-minion"] + volumes: + - ../../salt:/app/salt + - ./minion.conf:/etc/salt/minion + - ./pki/minion-1:/etc/salt/pki + - ./ids/minion-1_id:/etc/salt/minion_id + depends_on: + - salt-master + networks: + - salt-net + + salt-minion-2: + build: + context: ../.. + dockerfile: tests/monitoring/Dockerfile.salt + container_name: salt-minion-2 + hostname: salt-minion-2 + entrypoint: ["/usr/local/bin/salt-minion"] + volumes: + - ../../salt:/app/salt + - ./minion.conf:/etc/salt/minion + - ./pki/minion-2:/etc/salt/pki + - ./ids/minion-2_id:/etc/salt/minion_id + depends_on: + - salt-master + networks: + - salt-net + + salt-minion-3: + build: + context: ../.. + dockerfile: tests/monitoring/Dockerfile.salt + container_name: salt-minion-3 + hostname: salt-minion-3 + entrypoint: ["/usr/local/bin/salt-minion"] + volumes: + - ../../salt:/app/salt + - ./minion.conf:/etc/salt/minion + - ./pki/minion-3:/etc/salt/pki + - ./ids/minion-3_id:/etc/salt/minion_id + depends_on: + - salt-master + networks: + - salt-net + + prometheus: + image: prom/prometheus:latest + container_name: prometheus + volumes: + - ./prometheus.yml:/etc/prometheus/prometheus.yml + - ./prometheus_data:/prometheus + ports: + - "19090:9090" + networks: + - salt-net + + cadvisor: + image: gcr.io/cadvisor/cadvisor:latest + container_name: cadvisor + privileged: true + ports: + - "18081:8080" + volumes: + - /:/rootfs:ro + - /var/run:/var/run:rw + - /sys:/sys:ro + - /var/lib/docker/:/var/lib/docker:ro + networks: + - salt-net + + grafana: + image: grafana/grafana:latest + container_name: grafana + ports: + - "13000:3000" + volumes: + - ./grafana/provisioning:/etc/grafana/provisioning + environment: + - GF_AUTH_ANONYMOUS_ENABLED=true + - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin + networks: + - salt-net + +networks: + salt-net: + driver: bridge diff --git a/tests/monitoring/grafana/provisioning/dashboards/dashboard_provider.yml b/tests/monitoring/grafana/provisioning/dashboards/dashboard_provider.yml new file mode 100644 index 000000000000..cbc3acf7d644 --- /dev/null +++ b/tests/monitoring/grafana/provisioning/dashboards/dashboard_provider.yml @@ -0,0 +1,10 @@ +apiVersion: 1 +providers: + - name: 'Default' + orgId: 1 + folder: '' + type: file + disableDeletion: false + editable: true + options: + path: /etc/grafana/provisioning/dashboards diff --git a/tests/monitoring/grafana/provisioning/dashboards/salt_monitoring.json b/tests/monitoring/grafana/provisioning/dashboards/salt_monitoring.json new file mode 100644 index 000000000000..929d844b8aec --- /dev/null +++ b/tests/monitoring/grafana/provisioning/dashboards/salt_monitoring.json @@ -0,0 +1,628 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 1, + "links": [], + "liveNow": false, + "panels": [ + { + "id": 1, + "gridPos": { + "h": 3, + "w": 4, + "x": 0, + "y": 0 + }, + "title": "Current Time", + "type": "stat", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + } + ] + }, + "unit": "dateTimeAsLocal", + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value", + "wideLayout": true + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "time() * 1000", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ] + }, + { + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 3 + }, + "id": 100, + "title": "Salt Master", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "bytes" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 4 + }, + "id": 10, + "targets": [ + { + "expr": "salt_master_rss_bytes", + "legendFormat": "Master Process RSS", + "refId": "A" + }, + { + "expr": "container_memory_rss{container_label_com_docker_compose_project=\"monitoring\",container_label_com_docker_compose_service=\"salt-master\"}", + "legendFormat": "Total Container RSS", + "refId": "B" + } + ], + "title": "Master Memory RSS (Process vs Container)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "percentunit" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 4 + }, + "id": 11, + "targets": [ + { + "expr": "rate(container_cpu_usage_seconds_total{cpu=\"total\",container_label_com_docker_compose_project=\"monitoring\",container_label_com_docker_compose_service=\"salt-master\"}[1m])", + "legendFormat": "Master CPU", + "refId": "A" + } + ], + "title": "Master CPU Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "short" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 4 + }, + "id": 12, + "targets": [ + { + "expr": "salt_master_open_fds", + "legendFormat": "Total Open FDs", + "refId": "A" + }, + { + "expr": "salt_master_process_count", + "legendFormat": "Process Count", + "refId": "B" + } + ], + "title": "Master Resource Usage (FDs & Processes)", + "type": "timeseries" + }, + { + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 101, + "title": "Minion 1", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "bytes" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 12 + }, + "id": 20, + "targets": [ + { + "expr": "container_memory_rss{container_label_com_docker_compose_project=\"monitoring\",container_label_com_docker_compose_service=\"salt-minion-1\"}", + "legendFormat": "Minion 1 RSS", + "refId": "A" + } + ], + "title": "Minion 1 Memory RSS", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "percentunit" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 12 + }, + "id": 21, + "targets": [ + { + "expr": "rate(container_cpu_usage_seconds_total{cpu=\"total\",container_label_com_docker_compose_project=\"monitoring\",container_label_com_docker_compose_service=\"salt-minion-1\"}[1m])", + "legendFormat": "Minion 1 CPU", + "refId": "A" + } + ], + "title": "Minion 1 CPU Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "short" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 12 + }, + "id": 22, + "targets": [ + { + "expr": "sum(container_fs_inodes_total{container_label_com_docker_compose_project=\"monitoring\",container_label_com_docker_compose_service=\"salt-minion-1\"}) by (name) - sum(container_fs_inodes_free{container_label_com_docker_compose_project=\"monitoring\",container_label_com_docker_compose_service=\"salt-minion-1\"}) by (name)", + "legendFormat": "Minion 1 Inodes", + "refId": "A" + } + ], + "title": "Minion Inodes (Disk Files)", + "type": "timeseries" + }, + { + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 19 + }, + "id": 102, + "title": "Minion 2", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "bytes" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 20 + }, + "id": 30, + "targets": [ + { + "expr": "container_memory_rss{container_label_com_docker_compose_project=\"monitoring\",container_label_com_docker_compose_service=\"salt-minion-2\"}", + "legendFormat": "Minion 2 RSS", + "refId": "A" + } + ], + "title": "Minion 2 Memory RSS", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "percentunit" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 20 + }, + "id": 31, + "targets": [ + { + "expr": "rate(container_cpu_usage_seconds_total{cpu=\"total\",container_label_com_docker_compose_project=\"monitoring\",container_label_com_docker_compose_service=\"salt-minion-2\"}[1m])", + "legendFormat": "Minion 2 CPU", + "refId": "A" + } + ], + "title": "Minion 2 CPU Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "short" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 20 + }, + "id": 32, + "targets": [ + { + "expr": "sum(container_fs_inodes_total{container_label_com_docker_compose_project=\"monitoring\",container_label_com_docker_compose_service=\"salt-minion-2\"}) by (name) - sum(container_fs_inodes_free{container_label_com_docker_compose_project=\"monitoring\",container_label_com_docker_compose_service=\"salt-minion-2\"}) by (name)", + "legendFormat": "Minion 2 Inodes", + "refId": "A" + } + ], + "title": "Minion Inodes (Disk Files)", + "type": "timeseries" + }, + { + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 27 + }, + "id": 103, + "title": "Minion 3", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "bytes" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 28 + }, + "id": 40, + "targets": [ + { + "expr": "container_memory_rss{container_label_com_docker_compose_project=\"monitoring\",container_label_com_docker_compose_service=\"salt-minion-3\"}", + "legendFormat": "Minion 3 RSS", + "refId": "A" + } + ], + "title": "Minion 3 Memory RSS", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "percentunit" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 28 + }, + "id": 41, + "targets": [ + { + "expr": "rate(container_cpu_usage_seconds_total{cpu=\"total\",container_label_com_docker_compose_project=\"monitoring\",container_label_com_docker_compose_service=\"salt-minion-3\"}[1m])", + "legendFormat": "Minion 3 CPU", + "refId": "A" + } + ], + "title": "Minion 3 CPU Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "short" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 28 + }, + "id": 42, + "targets": [ + { + "expr": "sum(container_fs_inodes_total{container_label_com_docker_compose_project=\"monitoring\",container_label_com_docker_compose_service=\"salt-minion-3\"}) by (name) - sum(container_fs_inodes_free{container_label_com_docker_compose_project=\"monitoring\",container_label_com_docker_compose_service=\"salt-minion-3\"}) by (name)", + "legendFormat": "Minion 3 Inodes", + "refId": "A" + } + ], + "title": "Minion 3 Inodes (Disk Files)", + "type": "timeseries" + }, + { + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 35 + }, + "id": 104, + "title": "Salt API", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "bytes" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 36 + }, + "id": 50, + "targets": [ + { + "expr": "salt_api_rss_bytes", + "legendFormat": "API Process RSS", + "refId": "A" + } + ], + "title": "API Process Memory RSS", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "percentunit" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 36 + }, + "id": 51, + "targets": [ + { + "expr": "rate(container_cpu_usage_seconds_total{cpu=\"total\",container_label_com_docker_compose_project=\"monitoring\",container_label_com_docker_compose_service=\"salt-master\"}[1m])", + "legendFormat": "API CPU", + "refId": "A" + } + ], + "title": "API CPU Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "short" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 36 + }, + "id": 52, + "targets": [ + { + "expr": "salt_api_open_fds", + "legendFormat": "Total Open FDs", + "refId": "A" + }, + { + "expr": "salt_api_process_count", + "legendFormat": "Process Count", + "refId": "B" + } + ], + "title": "API Resource Usage (FDs & Processes)", + "type": "timeseries" + } + ], + "refresh": "10s", + "schemaVersion": 36, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Salt Monitoring", + "uid": "salt-mon", + "version": 3, + "weekStart": "" +} diff --git a/tests/monitoring/grafana/provisioning/datasources/prometheus.yml b/tests/monitoring/grafana/provisioning/datasources/prometheus.yml new file mode 100644 index 000000000000..0eddf26296da --- /dev/null +++ b/tests/monitoring/grafana/provisioning/datasources/prometheus.yml @@ -0,0 +1,7 @@ +apiVersion: 1 +datasources: + - name: Prometheus + type: prometheus + access: proxy + url: http://prometheus:9090 + isDefault: true diff --git a/tests/monitoring/master.conf b/tests/monitoring/master.conf new file mode 100644 index 000000000000..3a53bd7ceeb5 --- /dev/null +++ b/tests/monitoring/master.conf @@ -0,0 +1,31 @@ +interface: 0.0.0.0 +publish_port: 44505 +ret_port: 44506 +open_mode: True +auto_accept: True +log_level: debug +master: salt +master_port: 44506 +file_roots: + base: + - /srv/salt +worker_threads: 10 +worker_resource_backcount: 50 +ipc_write_buffer: 104857600 + +rest_cherrypy: + port: 8000 + disable_ssl: True + +netapi_enable_clients: + - local + - runner + - wheel + +external_auth: + pam: + salt: + - .* + - '@runner' + - '@wheel' +id: salt-master diff --git a/tests/monitoring/minion.conf b/tests/monitoring/minion.conf new file mode 100644 index 000000000000..ea32c796c1e4 --- /dev/null +++ b/tests/monitoring/minion.conf @@ -0,0 +1,5 @@ +master: salt-master +master_port: 44506 +log_level: warning +ipc_write_buffer: 104857600 +# id will be set via /etc/salt/minion_id or command line diff --git a/tests/monitoring/prometheus.yml b/tests/monitoring/prometheus.yml new file mode 100644 index 000000000000..c3861b42022f --- /dev/null +++ b/tests/monitoring/prometheus.yml @@ -0,0 +1,15 @@ +global: + scrape_interval: 15s + +scrape_configs: + - job_name: 'prometheus' + static_configs: + - targets: ['localhost:9090'] + + - job_name: 'cadvisor' + static_configs: + - targets: ['cadvisor:8080'] + + - job_name: 'salt-fds' + static_configs: + - targets: ['salt-master:8002'] diff --git a/tests/monitoring/raas.conf b/tests/monitoring/raas.conf new file mode 100644 index 000000000000..8408892e872d --- /dev/null +++ b/tests/monitoring/raas.conf @@ -0,0 +1,41 @@ +# RaaS Configuration +sseapi_server: http://192.168.80.1:18080 +sseapi_username: root +sseapi_password: salt + +# Plugin External Modules Path(s) +beacons_dirs: + - /app/saltstack-raas-master/sseape/beacons +engines_dirs: + - /app/saltstack-raas-master/sseape/engines +fileserver_dirs: + - /app/saltstack-raas-master/sseape/fileserver +pillar_dirs: + - /app/saltstack-raas-master/sseape/pillar +returner_dirs: + - /app/saltstack-raas-master/sseape/returners +roster_dirs: + - /app/saltstack-raas-master/sseape/roster +runner_dirs: + - /app/saltstack-raas-master/sseape/runners +module_dirs: + - /app/saltstack-raas-master/sseape/modules +states_dirs: + - /app/saltstack-raas-master/sseape/states + +# Enable minimal SSE engines +engines: + - sseapi: {} + +# Enable SSE master job cache and event returner +master_job_cache: sseapi +event_return: sseapi + +# Enable SSE external pillar +ext_pillar: + - sseapi: {} + +# Enable SSE fileserver backend +fileserver_backend: + - sseapi + - roots diff --git a/tests/monitoring/srv/salt/_grains/test_grain.py b/tests/monitoring/srv/salt/_grains/test_grain.py new file mode 100644 index 000000000000..77478477977d --- /dev/null +++ b/tests/monitoring/srv/salt/_grains/test_grain.py @@ -0,0 +1,5 @@ +import time + + +def my_time(): + return {"current_time": time.time()} diff --git a/tests/monitoring/srv/salt/fd_exporter.py b/tests/monitoring/srv/salt/fd_exporter.py new file mode 100644 index 000000000000..26ffd15cfcb6 --- /dev/null +++ b/tests/monitoring/srv/salt/fd_exporter.py @@ -0,0 +1,105 @@ +# pylint: disable=resource-leakage +import http.server +import os + + +class FDHandler(http.server.BaseHTTPRequestHandler): + def log_message(self, format, *args): + # Silence logs + return + + def do_GET(self): + if self.path == "/metrics": + self.send_response(200) + self.send_header("Content-Type", "text/plain") + self.end_headers() + + master_fds = 0 + master_procs = 0 + master_rss = 0 + api_fds = 0 + api_procs = 0 + api_rss = 0 + + try: + # Iterate over /proc directly once for efficiency + for pid_dir in os.listdir("/proc"): + if not pid_dir.isdigit(): + continue + + try: + pid = pid_dir + with open(f"/proc/{pid}/cmdline", "rb") as f: + cmdline = ( + f.read().replace(b"\0", b" ").decode(errors="ignore") + ) + + # Skip if it's the exporter itself + if "fd_exporter.py" in cmdline: + continue + + is_api = "salt-api" in cmdline + is_master = "salt-master" in cmdline and not is_api + + if is_master or is_api: + # FD count + try: + fd_count = len(os.listdir(f"/proc/{pid}/fd")) + except (OSError, PermissionError): + fd_count = 0 + + # RSS Memory (from /proc/[pid]/stat, field 24 is RSS in pages) + try: + with open(f"/proc/{pid}/stat", encoding="utf-8") as f: + stat = f.read().split() + rss_pages = int(stat[23]) + rss_bytes = rss_pages * 4096 # Assuming 4KB pages + except (OSError, ValueError, IndexError): + rss_bytes = 0 + + if is_master: + master_fds += fd_count + master_procs += 1 + master_rss += rss_bytes + if is_api: + api_fds += fd_count + api_procs += 1 + api_rss += rss_bytes + except (FileNotFoundError, ProcessLookupError, PermissionError): + # Process died while we were reading it + continue + except OSError: + continue + except OSError: + pass + + lines = [ + "# HELP salt_master_open_fds Number of open file descriptors for master", + "# TYPE salt_master_open_fds gauge", + f"salt_master_open_fds {master_fds}", + "# HELP salt_master_process_count Number of master processes", + "# TYPE salt_master_process_count gauge", + f"salt_master_process_count {master_procs}", + "# HELP salt_master_rss_bytes RSS memory usage for master in bytes", + "# TYPE salt_master_rss_bytes gauge", + f"salt_master_rss_bytes {master_rss}", + "# HELP salt_api_open_fds Number of open file descriptors for salt-api", + "# TYPE salt_api_open_fds gauge", + f"salt_api_open_fds {api_fds}", + "# HELP salt_api_process_count Number of salt-api processes", + "# TYPE salt_api_process_count gauge", + f"salt_api_process_count {api_procs}", + "# HELP salt_api_rss_bytes RSS memory usage for salt-api in bytes", + "# TYPE salt_api_rss_bytes gauge", + f"salt_api_rss_bytes {api_rss}", + ] + self.wfile.write(("\n".join(lines) + "\n").encode()) + else: + self.send_response(404) + self.end_headers() + + +if __name__ == "__main__": + port = 8002 + print(f"Starting FD and Memory Exporter on port {port}...") + http.server.HTTPServer(("0.0.0.0", port), FDHandler).serve_forever() diff --git a/tests/monitoring/srv/salt/flood_events.py b/tests/monitoring/srv/salt/flood_events.py new file mode 100644 index 000000000000..1dfba6f8f27e --- /dev/null +++ b/tests/monitoring/srv/salt/flood_events.py @@ -0,0 +1,24 @@ +import os +import time + +import salt.config +import salt.utils.event + +# Load master config +opts = salt.config.client_config("/etc/salt/master") +event = salt.utils.event.get_event("master", opts=opts, listen=False) + +print(f"Starting event flood from PID {os.getpid()}...") +try: + count = 0 + while True: + # Fire events with a 1KB payload + event.fire_event( + {"count": count, "payload": "f" * 1024, "timestamp": time.time()}, + "stress/test/flood", + ) + count += 1 + if count % 1000 == 0: + print(f"Fired {count} events...") +except KeyboardInterrupt: + print("Stopped.") diff --git a/tests/monitoring/srv/salt/haproxy.cfg.jinja b/tests/monitoring/srv/salt/haproxy.cfg.jinja new file mode 100644 index 000000000000..77e499df9687 --- /dev/null +++ b/tests/monitoring/srv/salt/haproxy.cfg.jinja @@ -0,0 +1,27 @@ +global + log /dev/log local0 + log /dev/log local1 notice + chroot /var/lib/haproxy + stats socket /run/haproxy/admin.sock mode 660 level admin expose-fd listeners + stats timeout 30s + user haproxy + group haproxy + daemon + +defaults + log global + mode http + option httplog + option dontlognull + timeout connect 5000 + timeout client 50000 + timeout server 50000 + +frontend localnodes + bind *:80 + default_backend web-backend + +backend web-backend + balance roundrobin + server web1 salt-minion-2:80 check + server web2 salt-minion-3:80 check diff --git a/tests/monitoring/srv/salt/heavy/cmd.sls b/tests/monitoring/srv/salt/heavy/cmd.sls new file mode 100644 index 000000000000..c31101841c56 --- /dev/null +++ b/tests/monitoring/srv/salt/heavy/cmd.sls @@ -0,0 +1,10 @@ +run_noisy_command: + cmd.run: + - shell: /bin/bash + - name: | + for i in {1..50}; do + echo "Batch $i output start" + ps aux + ls -R /etc + echo "Batch $i output end" + done diff --git a/tests/monitoring/srv/salt/heavy/heavy_template.jinja b/tests/monitoring/srv/salt/heavy/heavy_template.jinja new file mode 100644 index 000000000000..81d1ac10a95b --- /dev/null +++ b/tests/monitoring/srv/salt/heavy/heavy_template.jinja @@ -0,0 +1,7 @@ +# Heavy Jinja Template +{% for i in range(iterations) %} +## Block {{ i }} +{% for j in range(sub_iterations) %} +Item {{ i }}.{{ j }}: {{ "abcdefghijklmnopqrstuvwxyz" | reverse }} - {{ (i * j) | string | md5 }} +{% endfor %} +{% endfor %} diff --git a/tests/monitoring/srv/salt/heavy/jinja.sls b/tests/monitoring/srv/salt/heavy/jinja.sls new file mode 100644 index 000000000000..ce27d66f30f2 --- /dev/null +++ b/tests/monitoring/srv/salt/heavy/jinja.sls @@ -0,0 +1,8 @@ +generate_heavy_file: + file.managed: + - name: /tmp/heavy_jinja_output + - source: salt://heavy/heavy_template.jinja + - template: jinja + - context: + iterations: 500 + sub_iterations: 100 diff --git a/tests/monitoring/srv/salt/heavy/many_files.sls b/tests/monitoring/srv/salt/heavy/many_files.sls new file mode 100644 index 000000000000..6484c21a8fcf --- /dev/null +++ b/tests/monitoring/srv/salt/heavy/many_files.sls @@ -0,0 +1,6 @@ +{% for i in range(100) %} +/tmp/stress_file_{{ i }}: + file.managed: + - contents: "Stress test file content for index {{ i }}. This is repeated many times to increase state size. {{ 'A' * 100 }}" + - makedirs: True +{% endfor %} diff --git a/tests/monitoring/srv/salt/heavy/software_install.sls b/tests/monitoring/srv/salt/heavy/software_install.sls new file mode 100644 index 000000000000..1d0860cb1a10 --- /dev/null +++ b/tests/monitoring/srv/salt/heavy/software_install.sls @@ -0,0 +1,5 @@ +{% set pkgs = ['ed', 'bc', 'jq', 'tree', 'zip', 'unzip', 'less'] %} + +install_pkgs: + pkg.installed: + - pkgs: {{ pkgs }} diff --git a/tests/monitoring/srv/salt/heavy/software_remove.sls b/tests/monitoring/srv/salt/heavy/software_remove.sls new file mode 100644 index 000000000000..f98703648702 --- /dev/null +++ b/tests/monitoring/srv/salt/heavy/software_remove.sls @@ -0,0 +1,5 @@ +{% set pkgs = ['ed', 'bc', 'jq', 'tree', 'zip', 'unzip', 'less'] %} + +remove_pkgs: + pkg.removed: + - pkgs: {{ pkgs }} diff --git a/tests/monitoring/srv/salt/listen_events.py b/tests/monitoring/srv/salt/listen_events.py new file mode 100644 index 000000000000..218b8528b2fd --- /dev/null +++ b/tests/monitoring/srv/salt/listen_events.py @@ -0,0 +1,20 @@ +import time + +import salt.config +import salt.utils.event + +opts = salt.config.client_config("/etc/salt/master") +event = salt.utils.event.get_event("master", opts=opts, listen=True) + +print("Listening for events (30 seconds)...") +start = time.time() +while time.time() - start < 30: + ev = event.get_event(wait=1, full=True) + if ev: + print(f"Tag: {ev.get('tag')}") + # print(f"Data: {ev.get('data')}") + if ( + "grains" in str(ev.get("tag")).lower() + or "minion" in str(ev.get("tag")).lower() + ): + print(f"DATA: {ev.get('data')}") diff --git a/tests/monitoring/srv/salt/loadbalancer.sls b/tests/monitoring/srv/salt/loadbalancer.sls new file mode 100644 index 000000000000..fb66a92641c9 --- /dev/null +++ b/tests/monitoring/srv/salt/loadbalancer.sls @@ -0,0 +1,18 @@ +install_haproxy: + pkg.installed: + - name: haproxy + +haproxy_cfg: + file.managed: + - name: /etc/haproxy/haproxy.cfg + - source: salt://haproxy.cfg.jinja + - template: jinja + - require: + - pkg: install_haproxy + +haproxy_service: + service.running: + - name: haproxy + - enable: True + - watch: + - file: haproxy_cfg diff --git a/tests/monitoring/srv/salt/top.sls b/tests/monitoring/srv/salt/top.sls new file mode 100644 index 000000000000..435be57c3678 --- /dev/null +++ b/tests/monitoring/srv/salt/top.sls @@ -0,0 +1,11 @@ +base: + '*': + - heavy.jinja + - heavy.many_files + - heavy.cmd + 'salt-minion-1': + - loadbalancer + 'salt-minion-2': + - webserver + 'salt-minion-3': + - webserver diff --git a/tests/monitoring/srv/salt/webserver.sls b/tests/monitoring/srv/salt/webserver.sls new file mode 100644 index 000000000000..60b30a644069 --- /dev/null +++ b/tests/monitoring/srv/salt/webserver.sls @@ -0,0 +1,17 @@ +install_apache: + pkg.installed: + - name: apache2 + +apache_service: + service.running: + - name: apache2 + - enable: True + - require: + - pkg: install_apache + +welcome_page: + file.managed: + - name: /var/www/html/index.html + - contents: "Hello from {{ grains['id'] }}" + - require: + - pkg: install_apache diff --git a/tests/monitoring/stress_api.sh b/tests/monitoring/stress_api.sh new file mode 100755 index 000000000000..f3631023e9ef --- /dev/null +++ b/tests/monitoring/stress_api.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# Stress test the Salt API + +API_URL="http://localhost:18000" +USER="salt" +PASS="salt" + +echo "Starting Salt API stress test..." + +# Function to get a token +get_token() { + curl -s -c /tmp/cookies.txt -H "Accept: application/json" \ + -d username=$USER -d password=$PASS -d eauth=pam \ + $API_URL/login | python3 -c "import sys, json; print(sys.stdin.read())" | grep -oP '"token": "\K[^"]+' +} + +TOKEN=$(get_token) +echo "Got token: $TOKEN" + +while true; do + # Run a command via API + curl -s -H "Accept: application/json" -H "X-Auth-Token: $TOKEN" \ + -d client=local -d tgt='*' -d fun=test.ping \ + $API_URL > /dev/null + + # Run a runner via API + curl -s -H "Accept: application/json" -H "X-Auth-Token: $TOKEN" \ + -d client=runner -d fun=manage.status \ + $API_URL > /dev/null + + sleep 0.1 +done diff --git a/tests/monitoring/stress_test.sh b/tests/monitoring/stress_test.sh new file mode 100755 index 000000000000..3742d6dee8f9 --- /dev/null +++ b/tests/monitoring/stress_test.sh @@ -0,0 +1,70 @@ +#!/bin/bash +# Aggressive Salt Master Stress Test + +echo "Starting aggressive stress test..." + +# 1. Start Event Flooder in the background on the master +echo "Launching event flooder..." +docker exec -d salt-master python3 /srv/salt/flood_events.py + +# 2. Loop Highstates on all minions +echo "Starting Highstate loop..." +( + while true; do + echo "[$(date)] Running Highstate..." + docker exec salt-master salt '*' state.highstate --timeout=120 --async + sleep 10 + done +) & + +# 3. Loop various executions (Wheel, Runner, and Local) +echo "Starting Execution loops..." +( + while true; do + # Stress the runner system + docker exec salt-master salt-run manage.status --async + # Stress the wheel system + docker exec salt-master salt-key -L + # Rapid fire pings + docker exec salt-master salt '*' test.ping --timeout=5 + # Large data returns + docker exec salt-master salt '*' grains.items --async + sleep 2 + done +) & + +# 4. Stress the file server +( + while true; do + docker exec salt-master salt '*' cp.cache_file salt://heavy/heavy_template.jinja + sleep 5 + done +) & + +# 5. Stress the Salt API +( + while true; do + ./stress_api.sh + sleep 1 + done +) & + +# 6. Deploy and Remove software in a loop +( + # First update apt on all minions once + docker exec salt-master salt '*' pkg.refresh_db + while true; do + echo "[$(date)] Deploying software and infra..." + docker exec salt-master salt '*' state.apply heavy.software_install,webserver,loadbalancer --timeout=300 + sleep 5 + echo "[$(date)] Removing software (keeping infra)..." + docker exec salt-master salt '*' state.apply heavy.software_remove --timeout=300 + sleep 5 + done +) & + +echo "Stress test is running in the background." +echo "Monitor memory at http://localhost:19090 or http://localhost:13000" +echo "To stop: kill all background jobs of this script." + +wait diff --git a/tests/pytests/functional/master/test_event_publisher.py b/tests/pytests/functional/master/test_event_publisher.py index 0f4b3fde0c19..ba3f30a4d7ac 100644 --- a/tests/pytests/functional/master/test_event_publisher.py +++ b/tests/pytests/functional/master/test_event_publisher.py @@ -168,7 +168,7 @@ def test_publisher_mem(publisher, publish, listeners, stop_event): try: # After the loader tests run we have a baseline of almost 300MB # assert baseline < 150 - leak_threshold = baseline + (baseline * 0.5) + leak_threshold = baseline + 100 + (baseline * 0.5) while time.time() - start < 60: assert publisher.is_alive() mem = psutil.Process(publisher.pid).memory_info().rss / 1024**2 diff --git a/tests/pytests/functional/utils/test_process.py b/tests/pytests/functional/utils/test_process.py index ac5218eb3e59..7ffe5a401a5d 100644 --- a/tests/pytests/functional/utils/test_process.py +++ b/tests/pytests/functional/utils/test_process.py @@ -125,3 +125,18 @@ def test_process_preimports_multiprocessing_connection_68573(tmp_path): cwd=str(tmp_path), ) assert result.returncode == 0, f"stdout={result.stdout!r} stderr={result.stderr!r}" + + +def test_process_unseeded_logging_options(): + """ + Regression test for issue #68332. + """ + + def target(): + pass + + salt._logging.set_logging_options_dict.__options_dict__ = None + proc = salt.utils.process.Process(target=target) + proc.start() + proc.join() + assert proc.exitcode == 0 diff --git a/tests/pytests/integration/cli/test_salt.py b/tests/pytests/integration/cli/test_salt.py index 90e3eed6d78c..ec808f683c11 100644 --- a/tests/pytests/integration/cli/test_salt.py +++ b/tests/pytests/integration/cli/test_salt.py @@ -41,8 +41,20 @@ def salt_minion_2(salt_master): }, extra_cli_arguments_after_first_start_failure=["--log-level=info"], ) - with factory.started(start_timeout=120): - yield factory + try: + with factory.started(start_timeout=120): + yield factory + finally: + # ``factory.started()`` stops the minion daemon on exit but leaves the + # minion's accepted key under ``{master_pki_dir}/minions/minion-2``. + # The subsequent ``test_salt_key.py::test_list_*`` tests in the same + # session enumerate PKI keys and fail their expected-list assertions + # when this stale key is present. Delete it via the master's + # salt-key CLI so the master pki dir is clean for the next test. + # ``salt_master.salt_key_cli`` is a *factory* method on the saltfactories + # ``SaltMaster``, not an attribute -- it must be called to obtain a + # runnable ``SaltKey`` CLI factory. + salt_master.salt_key_cli().run("-d", factory.id, "-y") def test_context_retcode_salt(salt_cli, salt_minion): @@ -154,7 +166,6 @@ def test_exit_status_correct_usage(salt_cli, salt_minion): @pytest.mark.skip_on_windows(reason="Windows does not support SIGINT") -@pytest.mark.skip_initial_onedir_failure def test_interrupt_on_long_running_job(salt_cli, salt_master, salt_minion): """ Ensure that a call to ``salt`` that is taking too long, when a user diff --git a/tests/pytests/integration/cli/test_salt_call_ownership.py b/tests/pytests/integration/cli/test_salt_call_ownership.py index 175931da6079..28ff605b2e85 100644 --- a/tests/pytests/integration/cli/test_salt_call_ownership.py +++ b/tests/pytests/integration/cli/test_salt_call_ownership.py @@ -62,8 +62,19 @@ def non_root_minion(salt_master, salt_factories): random_string("non-root-minion-"), overrides=config_overrides, ) - with factory.started(): - yield factory + try: + with factory.started(): + yield factory + finally: + # ``factory.started()`` stops the minion daemon but leaves the + # minion's accepted key under ``{master_pki_dir}/minions/``. + # The subsequent ``test_salt_key.py::test_list_*`` tests in the same + # session enumerate PKI keys and fail their expected-list assertions + # when this stale key is present. Delete it explicitly. + # ``salt_master.salt_key_cli`` is a *factory* method on the saltfactories + # ``SaltMaster``, not an attribute -- it must be called to obtain a + # runnable ``SaltKey`` CLI factory. + salt_master.salt_key_cli().run("-d", factory.id, "-y") @pytest.mark.skipif(shutil.which("sudo") is None, reason="sudo is not available") diff --git a/tests/pytests/scenarios/performance/test_performance.py b/tests/pytests/scenarios/performance/test_performance.py index 71e9a970025b..6707c60e5197 100644 --- a/tests/pytests/scenarios/performance/test_performance.py +++ b/tests/pytests/scenarios/performance/test_performance.py @@ -1,7 +1,6 @@ import logging import os import shutil -import sys import pytest from pytestshellutils.utils import ports @@ -198,25 +197,76 @@ def prev_sls(sls_contents, state_tree, tmp_path): yield sls_name -def _install_salt_in_container(container): - ret = container.run( - "python3", - "-c", - "import sys; sys.stderr.write('{}.{}'.format(*sys.version_info))", +def _container_python_executable(container): + """ + Pick a python executable inside the container whose major.minor matches + one of the lockfiles in ``requirements/static/pkg/``. + + Older ``salt`` reference images (e.g. ``salt:3005``) ship the salt onedir + on Python 3.7 as ``/usr/local/bin/python3`` but also carry the distro's + own ``/usr/bin/python3`` (3.11 on Debian 12). The 3.7 interpreter cannot + install the modern lockfile (``aiohappyeyeballs==2.6.1`` requires + ``>=3.9``), so prefer whichever python in the container matches an + available lockfile. + """ + candidates = ("python3", "/usr/bin/python3", "/usr/local/bin/python3") + available_lockdirs = { + p.name + for p in (CODE_DIR / "requirements" / "static" / "pkg").iterdir() + if p.is_dir() and p.name.startswith("py") + } + seen = set() + for candidate in candidates: + ret = container.run( + candidate, + "-c", + "import sys; print('{}.{}'.format(*sys.version_info))", + ) + if ret.returncode != 0 or not ret.stdout: + continue + version = ret.stdout.strip() + if version in seen: + continue + seen.add(version) + if f"py{version}" in available_lockdirs: + return candidate, version + pytest.skip( + "No python interpreter inside the container matches an available " + f"requirements lockfile (tried {sorted(seen)})." ) - assert ret.returncode == 0 - if not ret.stdout: - requirements_py_version = "{}.{}".format(*sys.version_info) - else: - requirements_py_version = ret.stdout.strip() + + +def _install_salt_in_container(container): + python_executable, requirements_py_version = _container_python_executable(container) + + # Make sure the chosen interpreter has a working ``pip`` available. The + # distro's system python on the salt reference images doesn't always ship + # pip (e.g. salt:3005's /usr/bin/python3 == python3.11 has no pip); the + # onedir interpreter does. Try ensurepip first, then fall back to the + # distro's package manager. + ret = container.run(python_executable, "-m", "pip", "--version") + if ret.returncode != 0: + ret = container.run(python_executable, "-m", "ensurepip", "--upgrade") + log.debug("ensurepip in the container: %s", ret) + if ret.returncode != 0: + apt_ret = container.run( + "sh", + "-c", + "apt-get update >/dev/null && apt-get install -y python3-pip", + ) + log.debug("apt-get install python3-pip in the container: %s", apt_ret) + assert apt_ret.returncode == 0, apt_ret.stderr + ret = container.run(python_executable, "-m", "pip", "--version") + assert ret.returncode == 0, ret.stderr ret = container.run( "env", "SETUPTOOLS_USE_DISTUTILS=stdlib", - "python3", + python_executable, "-m", "pip", "install", + "--break-system-packages", "-r", f"/salt/requirements/static/pkg/py{requirements_py_version}/linux.lock", ) @@ -225,10 +275,11 @@ def _install_salt_in_container(container): ret = container.run( "env", "SETUPTOOLS_USE_DISTUTILS=stdlib", - "python3", + python_executable, "-m", "pip", "install", + "--break-system-packages", f"--constraint=/salt/requirements/static/ci/py{requirements_py_version}/linux.lock", "/salt", ) diff --git a/tests/pytests/unit/_logging/test_impl.py b/tests/pytests/unit/_logging/test_impl.py index b4e5b6a28d17..5d90b47a835e 100644 --- a/tests/pytests/unit/_logging/test_impl.py +++ b/tests/pytests/unit/_logging/test_impl.py @@ -13,7 +13,10 @@ SaltLogRecord, get_log_record_factory, set_log_record_factory, + set_logging_options_dict, + setup_logging, ) +from tests.support.mock import patch @pytest.fixture @@ -119,3 +122,18 @@ def test_deferred_records_flushed_through_color_formatter( output = console_stream.getvalue() assert "buffered message" in output assert "DEBUG" in output + + +def test_set_logging_options_dict_with_none(): + """ + Regression test for issue #68332. + """ + set_logging_options_dict(None) + + +def test_setup_logging_with_unseeded_options(): + """ + Regression test for issue #68332. + """ + with patch.object(set_logging_options_dict, "__options_dict__", None, create=True): + setup_logging() diff --git a/tests/pytests/unit/auth/test_pam.py b/tests/pytests/unit/auth/test_pam.py index 85317637968e..60fcd5cd18b1 100644 --- a/tests/pytests/unit/auth/test_pam.py +++ b/tests/pytests/unit/auth/test_pam.py @@ -165,3 +165,114 @@ def test_my_conv_handles_pam_prompt_echo_off(): assert captured["resp"] is not None assert captured["resp"].resp == b"sekret" assert result is True + + +def test_diagnoses_non_root_shadow_inaccess_64275(caplog, tmp_path): + """ + Regression test for issue #64275. + + When ``salt-master`` runs as the non-root ``salt`` user (the 3006.x + packaging default) the PAM helper subprocess inherits that uid and + ``unix_chkpwd`` refuses to validate any user other than the caller, + because the process cannot read ``/etc/shadow``. Prior to this fix the + only diagnostic was ``Pam auth failed for :`` with empty stdout / + stderr, which left a long trail of confused users on the issue + (19 comments, 3 years). + + Assert that when ``authenticate()`` sees the helper subprocess fail in + that situation, it logs an actionable CRITICAL message that names + *both* the cause (process cannot read ``/etc/shadow``, so PAM cannot + validate other users) and the two standard remediations (run the + master as ``root``, or add the master user to the ``shadow`` group). + """ + import logging + + import salt.auth.pam + + # Pretend the helper subprocess failed (this is what unix_chkpwd + # produces when the calling uid can't read /etc/shadow on Linux). + class FailedRet: + returncode = 1 + stdout = b"" + stderr = b"" + + # Make sure a pyexe path exists so the function gets past its + # 'auth.pam.python does not exist' early return. Point it at an + # existing file in the test's tmp dir so .exists() returns True. + fake_pyexe = tmp_path / "python3" + fake_pyexe.write_text("") + fake_pyexe.chmod(0o755) + + # Pretend we are running as a non-root user (uid 1234) and + # /etc/shadow is not readable. Reset the one-shot diagnostic memo so + # the test is independent of test-ordering. + salt.auth.pam._SHADOW_DIAGNOSTIC_LOGGED = False + + opts = {"auth.pam.python": str(fake_pyexe)} + with patch.dict(salt.auth.pam.__opts__, opts, clear=False), patch( + "salt.auth.pam.subprocess.run", return_value=FailedRet + ), patch("salt.auth.pam.os.geteuid", return_value=1234), patch( + "salt.auth.pam.os.access", return_value=False + ), patch( + "salt.auth.pam.__salt_system_encoding__", "utf-8", create=True + ), caplog.at_level( + logging.CRITICAL, logger="salt.auth.pam" + ): + result = salt.auth.pam.authenticate("fnord", "sekret") + + assert result is False, "auth should still fail when subprocess fails" + + # The diagnostic must name the cause and the two standard remedies so + # operators have a concrete next step instead of a bare 'auth failed'. + text = caplog.text + assert ( + "/etc/shadow" in text + ), f"expected /etc/shadow in error diagnostic, got:\n{text}" + assert "shadow" in text.lower(), text + # Mentions the 'shadow' group remedy (Debian-style fix). + assert "shadow" in text.lower() and "group" in text.lower(), text + # Mentions running as root as the alternative. + assert "root" in text.lower(), text + # Mentions the issue number so an operator can find context. + assert "64275" in text, text + + +def test_diagnostic_not_emitted_when_running_as_root(caplog, tmp_path): + """ + The /etc/shadow-inaccessible diagnostic must NOT fire when the master + is running as root, because in that case unix_chkpwd has direct + access to ``/etc/shadow`` and the failure is something else (bad + password, account locked, etc.). A spurious shadow-remediation + message in those cases would be misleading. + """ + import logging + + import salt.auth.pam + + class FailedRet: + returncode = 1 + stdout = b"" + stderr = b"" + + fake_pyexe = tmp_path / "python3" + fake_pyexe.write_text("") + fake_pyexe.chmod(0o755) + + salt.auth.pam._SHADOW_DIAGNOSTIC_LOGGED = False + + opts = {"auth.pam.python": str(fake_pyexe)} + with patch.dict(salt.auth.pam.__opts__, opts, clear=False), patch( + "salt.auth.pam.subprocess.run", return_value=FailedRet + ), patch("salt.auth.pam.os.geteuid", return_value=0), patch( + "salt.auth.pam.os.access", return_value=True + ), patch( + "salt.auth.pam.__salt_system_encoding__", "utf-8", create=True + ), caplog.at_level( + logging.DEBUG, logger="salt.auth.pam" + ): + salt.auth.pam.authenticate("fnord", "sekret") + + assert "64275" not in caplog.text, ( + "shadow-inaccessibility diagnostic must not fire when the master " + "runs as root and can read /etc/shadow" + ) diff --git a/tests/pytests/unit/grains/test_metadata.py b/tests/pytests/unit/grains/test_metadata.py index f1643ea99430..17f8b3669b13 100644 --- a/tests/pytests/unit/grains/test_metadata.py +++ b/tests/pytests/unit/grains/test_metadata.py @@ -8,6 +8,18 @@ instead of falling through to the ``=`` line-splitter, which previously corrupted any user-data payload containing ``=`` characters (e.g. cloud-init ``#cloud-config`` blocks with ``key=value`` lines). + +Regression coverage for #65184: when ``salt.utils.http.query`` returns an +error response (4xx/5xx with a body, e.g. AWS IMDS returning HTTP 400 for a +bogus path produced by the legacy ``=``-splitter), the tornado backend +populates ``body`` and ``status`` but does NOT set ``headers``. +``salt.grains.metadata._search()`` previously indexed ``linedata["headers"]`` +unconditionally and crashed with ``KeyError: 'headers'``, causing the entire +metadata grain to fail to load with:: + + [CRITICAL] Failed to load grains defined in grain file metadata.metadata + ... + KeyError: 'headers' """ import logging @@ -335,3 +347,100 @@ def test_equals_lines_other_than_user_data_still_parse_via_splitter(): sc = result["meta-data"]["iam"]["security-credentials"] assert "role-arn-suffix" in sc, sc assert "myrole-user-data=role-arn-suffix" not in sc, sc + + +def test_search_handles_error_response_without_headers_65184(): + """ + Regression for #65184: a recursive ``http.query`` call that returns an + error-shaped response (``body`` present, ``headers`` absent — the shape + produced by the tornado backend on HTTPError since 3006.3) must not + crash ``_search()`` with ``KeyError: 'headers'``. + + The reporter's traceback shows the crash happens on the recursive call + triggered by a top-level metadata listing entry (the ``prefix == "latest/"`` + branch), where the recursive ``_search`` then calls ``http.query`` for + ``latest/dynamic/`` (or similar) and gets back an error response without a + ``headers`` key. Before the fix the indexing ``linedata["headers"]`` raised. + After the fix the missing-headers case is treated like "no Content-Type + information" and parsing proceeds. + """ + responses = { + "http://169.254.169.254/latest/": { + "body": "dynamic", + "headers": {"Content-Type": "text/plain"}, + }, + # Recursive call: error-shape response. Body + status + error, NO + # headers key. This is exactly what salt.utils.http.query returns on + # tornado HTTPError since commit 43b7fb52842 (3006.3). + "http://169.254.169.254/latest/dynamic/": { + "body": "

400 Bad request

\n", + "status": 400, + "error": "HTTP 400: Bad request", + }, + } + + with patch( + "salt.utils.http.query", + create_autospec( + http.query, autospec=True, side_effect=_make_mock_http(responses) + ), + ): + # Must not raise KeyError. Whatever it returns for the bad leaf is + # secondary; the contract is "do not crash the whole grain load". + result = metadata.metadata() + + assert isinstance(result, dict) + assert "dynamic" in result + + +def test_search_handles_missing_headers_on_initial_query_65184(): + """ + Companion to the above: the very first call inside ``_search()`` can also + produce a no-headers response (e.g. the metadata service returns 4xx for + the top-level listing). The function must still return a dict instead of + raising. + """ + responses = { + "http://169.254.169.254/latest/": { + "body": "some-error-body", + "status": 400, + "error": "HTTP 400: Bad request", + }, + } + + with patch( + "salt.utils.http.query", + create_autospec( + http.query, autospec=True, side_effect=_make_mock_http(responses) + ), + ): + result = metadata.metadata() + + # Either an empty dict or a parsed body is acceptable; the contract is + # "no KeyError". + assert isinstance(result, (dict, str)) + + +def test_search_octet_stream_still_returns_body_verbatim(): + """ + Sanity guard: the existing ``application/octet-stream`` short-circuit + (return body verbatim) must keep working. The fix for #65184 must not + regress that path. + """ + responses = { + "http://169.254.169.254/latest/": { + "body": "raw-octet-stream-payload", + "headers": {"Content-Type": "application/octet-stream"}, + }, + } + + with patch( + "salt.utils.http.query", + create_autospec( + http.query, autospec=True, side_effect=_make_mock_http(responses) + ), + ): + result = metadata.metadata() + + # Body returned verbatim, not wrapped in a dict. + assert result == "raw-octet-stream-payload" diff --git a/tests/pytests/unit/netapi/cherrypy/test_login.py b/tests/pytests/unit/netapi/cherrypy/test_login.py index 8066c59dab16..6c70c301d824 100644 --- a/tests/pytests/unit/netapi/cherrypy/test_login.py +++ b/tests/pytests/unit/netapi/cherrypy/test_login.py @@ -30,6 +30,12 @@ def __init__(self, *args, **kwargs): def _is_master_running(self): return True + def __enter__(self): + return self + + def __exit__(self, *args): + pass + class MockResolver: def __init__(self, *args, **kwargs): diff --git a/tests/pytests/unit/netapi/test_netapi_client_runner.py b/tests/pytests/unit/netapi/test_netapi_client_runner.py index a3ff13a39b44..039552c98733 100644 --- a/tests/pytests/unit/netapi/test_netapi_client_runner.py +++ b/tests/pytests/unit/netapi/test_netapi_client_runner.py @@ -32,6 +32,12 @@ class FakeRunner: def __init__(self, opts): self.opts = opts + def __enter__(self): + return self + + def __exit__(self, *exc): + return False + def cmd_sync(self, low, timeout=None, full_return=False): captured["timeout"] = timeout captured["low"] = low @@ -59,6 +65,12 @@ class FakeRunner: def __init__(self, opts): pass + def __enter__(self): + return self + + def __exit__(self, *exc): + return False + def cmd_sync(self, low, timeout=None, full_return=False): captured["timeout"] = timeout return {"return": "ok"} @@ -80,6 +92,12 @@ class FakeRunner: def __init__(self, opts): pass + def __enter__(self): + return self + + def __exit__(self, *exc): + return False + def cmd_sync(self, low, timeout=None, full_return=False): captured["timeout"] = timeout return {"return": "ok"} @@ -101,6 +119,12 @@ class FakeRunner: def __init__(self, opts): pass + def __enter__(self): + return self + + def __exit__(self, *exc): + return False + def cmd_sync(self, low, timeout=None, full_return=False): captured["timeout"] = timeout return {"return": "ok"} diff --git a/tests/pytests/unit/test_minion.py b/tests/pytests/unit/test_minion.py index cc2c52e43dec..de0c0fbf2a8e 100644 --- a/tests/pytests/unit/test_minion.py +++ b/tests/pytests/unit/test_minion.py @@ -4,6 +4,7 @@ import os import pathlib import signal +import threading import time import uuid @@ -997,6 +998,102 @@ def test_minion_retry_dns_count(minion_opts): salt.minion.resolve_dns(minion_opts) +def test_resolve_dns_retry_aborts_on_shutdown_request_69466(minion_opts): + """ + Regression test for #69466. + + The resolve_dns() retry loop must wake up promptly when a shutdown is + requested (e.g. SIGTERM via MinionManager.stop()) instead of blocking + the io_loop for the full ``retry_dns`` interval. Without the fix the + blocking ``time.sleep(opts["retry_dns"])`` inside resolve_dns starved + the io_loop and the shutdown callback never ran until systemd sent + SIGKILL. + """ + # The fix exposes a public module-level abort hook used by + # MinionManager.stop(). Its absence is itself a regression. + assert hasattr(salt.minion, "request_resolve_dns_abort"), ( + "salt.minion is missing request_resolve_dns_abort(); the SIGTERM " + "path cannot interrupt the DNS retry loop. See #69466." + ) + assert hasattr(salt.minion, "_RESOLVE_DNS_ABORT"), ( + "salt.minion is missing the _RESOLVE_DNS_ABORT event used to " + "wake an in-progress resolve_dns() retry. See #69466." + ) + + minion_opts.update( + { + "ipv6": False, + "master": "dummy", + "master_port": "4555", + # A retry interval that is much larger than the test deadline. + # If the abort path is not honored, this test would block for + # the full 90 seconds. + "retry_dns": 90, + "retry_dns_count": None, + }, + ) + + # The resolve_dns abort flag is process-wide; make sure we leave it + # clean for other tests. + salt.minion._RESOLVE_DNS_ABORT.clear() + + def trip_abort(): + # Give resolve_dns a moment to enter its sleep, then request abort + # the same way MinionManager.stop() does on SIGTERM. + time.sleep(0.25) + salt.minion.request_resolve_dns_abort() + + aborter = threading.Thread(target=trip_abort, daemon=True) + started = time.monotonic() + try: + aborter.start() + with pytest.raises(SaltMasterUnresolvableError): + salt.minion.resolve_dns(minion_opts) + finally: + aborter.join(timeout=5) + salt.minion._RESOLVE_DNS_ABORT.clear() + + elapsed = time.monotonic() - started + # The fix should wake well under 5s; the broken code would sleep for + # the full retry_dns (90s) per iteration. + assert elapsed < 5, ( + f"resolve_dns did not honor the shutdown abort flag " + f"(elapsed={elapsed:.2f}s); regression of #69466." + ) + + +def test_minion_manager_stop_unblocks_resolve_dns_69466(minion_opts): + """ + Regression test for #69466. + + ``MinionManager.stop()`` is the entry point invoked from the SIGTERM + handler. It must trip the resolve_dns abort flag before scheduling + the async shutdown so a minion currently stuck in the DNS retry loop + yields the io_loop. Without this, ``stop_async`` is queued but never + runs and systemd escalates to SIGKILL after 90 seconds. + """ + # The abort flag must be cleared at entry; stop() should set it. + salt.minion._RESOLVE_DNS_ABORT.clear() + assert not salt.minion._RESOLVE_DNS_ABORT.is_set() + + manager = salt.minion.MinionManager.__new__(salt.minion.MinionManager) + manager.io_loop = MagicMock() + # Populate the attributes __del__ -> destroy() touches so the + # interpreter does not log an AttributeError at GC time. + manager.minions = [] + manager.event_publisher = None + manager.event = None + try: + manager.stop(signal.SIGTERM, lambda *a, **kw: None) + assert salt.minion._RESOLVE_DNS_ABORT.is_set(), ( + "MinionManager.stop() did not request a resolve_dns abort; " + "a SIGTERM during the DNS retry loop will be ignored. See #69466." + ) + manager.io_loop.add_callback.assert_called_once() + finally: + salt.minion._RESOLVE_DNS_ABORT.clear() + + @pytest.mark.slow_test def test_gen_modules_executors(minion_opts): """ @@ -1242,6 +1339,68 @@ def mock_resolve_dns(opts, fallback=False): await minion.connect_master() +def test_eval_master_single_master_closes_pub_channel_on_failure_68901(minion_opts): + """ + Regression test for #68901: every AsyncPubChannel constructed by + Minion.eval_master in the single-master sign-in path must be close()-d + when the connection attempt fails, regardless of which exception type + pub_channel.connect() raised. Failing to do so leaks the channel's + underlying socket file descriptor on each retry, which over time + exhausts the minion's fd limit. + """ + minion_opts.update( + { + "master": "127.0.0.1", + "master_type": "str", + "transport": "zeromq", + "__role": "", + "retry_dns": 0, + "acceptance_wait_time": 0, + "acceptance_wait_time_max": 0, + "master_tries": 1, + } + ) + + created = [] + + class MockPubChannel: + def __init__(self): + self.closed = 0 + created.append(self) + + @tornado.gen.coroutine + def connect(self): + # Non-SaltClientError on purpose: prior to the fix, this leaks + # the channel because the single-master path only closes + # pub_channel inside an `except SaltClientError` clause. + raise OSError("simulated transport failure") + + def close(self): + self.closed += 1 + + def mock_channel_factory(opts, **kwargs): + return MockPubChannel() + + def mock_resolve_dns(opts, fallback=True): + return {"master_ip": "127.0.0.1", "master_uri": "tcp://127.0.0.1:4506"} + + io_loop = tornado.ioloop.IOLoop() + try: + with patch("salt.minion.resolve_dns", mock_resolve_dns), patch( + "salt.channel.client.AsyncPubChannel.factory", mock_channel_factory + ), patch("salt.loader.grains", MagicMock(return_value={})): + minion = salt.minion.Minion(minion_opts, io_loop=io_loop, load_grains=False) + with pytest.raises(OSError): + io_loop.run_sync(lambda: minion.eval_master(minion_opts, timeout=1)) + finally: + io_loop.close(all_fds=True) + + assert len(created) == 1, "exactly one pub channel should have been created" + assert ( + created[0].closed == 1 + ), "pub channel was not closed on connection failure (#68901 leak)" + + def test_config_cache_path_overrides(): cachedir = os.path.abspath("/path/to/master/cache") opts = {"cachedir": cachedir, "conf_file": None} diff --git a/tests/pytests/unit/test_pkg_systemd_units.py b/tests/pytests/unit/test_pkg_systemd_units.py new file mode 100644 index 000000000000..5d07d7551c5a --- /dev/null +++ b/tests/pytests/unit/test_pkg_systemd_units.py @@ -0,0 +1,53 @@ +""" +Tests for the systemd unit files shipped under ``pkg/common/``. + +These are static-file audits: they parse the unit files committed to the +source tree and assert invariants we don't want to silently regress. +""" + +import configparser +import pathlib + +REPO_ROOT = pathlib.Path(__file__).resolve().parents[3] +COMMON_UNIT_DIR = REPO_ROOT / "pkg" / "common" + + +def _read_unit(name): + parser = configparser.ConfigParser(strict=False) + # systemd unit files are case sensitive + parser.optionxform = str + parser.read(COMMON_UNIT_DIR / name, encoding="utf-8") + return parser + + +def test_salt_minion_service_killmode_is_not_process(): + """ + Regression test for https://github.com/saltstack/salt/issues/68406. + + The salt-minion unit historically used ``KillMode=process`` so that an + in-progress ``pkg.upgrade`` of salt-minion itself could survive systemd + tearing down the parent. That setting also lets ordinary worker + processes (``Minion._thread_return``, ``ProcessPayload`` jobs) escape + the cgroup, so ``systemctl stop`` / ``restart salt-minion`` leaves + orphaned children running and over time the service stays in a failed + state. Both ``aptpkg`` and ``yumpkg`` now run package operations in a + separate systemd scope, so the historical reason no longer holds and + ``KillMode=process`` must not return. + """ + parser = _read_unit("salt-minion.service") + kill_mode = parser.get("Service", "KillMode", fallback=None) + assert kill_mode != "process", ( + "salt-minion.service must not use KillMode=process; that lets " + "child processes escape systemd's cgroup. See issue #68406." + ) + + +def test_salt_minion_service_killmode_is_mixed(): + """ + Pin the salt-minion unit to ``KillMode=mixed``: SIGTERM to the main + PID only (so the return job from ``service.restart salt-minion`` in + #68183 / #68209 can finish), then SIGKILL to the rest of the cgroup + after the main process exits or ``TimeoutStopSec`` elapses. + """ + parser = _read_unit("salt-minion.service") + assert parser.get("Service", "KillMode", fallback=None) == "mixed" diff --git a/tests/unit/test_module_names.py b/tests/unit/test_module_names.py index 147945f02b3f..f114c4616981 100644 --- a/tests/unit/test_module_names.py +++ b/tests/unit/test_module_names.py @@ -15,6 +15,7 @@ EXCLUDED_DIRS = [ os.path.join("tests", "integration", "cloud", "helpers"), os.path.join("tests", "integration", "files"), + os.path.join("tests", "monitoring"), os.path.join("tests", "perf"), os.path.join("tests", "pkg"), os.path.join("tests", "support"), diff --git a/tests/unit/utils/test_job.py b/tests/unit/utils/test_job.py index 2e824e02351f..91a282de6025 100644 --- a/tests/unit/utils/test_job.py +++ b/tests/unit/utils/test_job.py @@ -24,6 +24,12 @@ def return_mock_jobs(self): def __init__(self, *args, **kwargs): pass + def __enter__(self): + return self + + def __exit__(self, *args): + pass + class JobTest(TestCase): """