Skip to content

Commit de441ba

Browse files
authored
Merge pull request #824 from atlanhq/SEC-147
SEC-147 | Bock sensitive file paths in `FileClient.upload_file()`
2 parents d7b30bd + 58c5f7e commit de441ba

487 files changed

Lines changed: 3942 additions & 3973 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.pre-commit-config.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@ repos:
33
rev: v6.0.0
44
hooks:
55
- id: check-yaml
6+
exclude: ^\.github/workflows/
67
- id: end-of-file-fixer
78
- id: trailing-whitespace
89
- id: debug-statements
10+
exclude: ^pyatlan_v9/model/assets/_overlays/
911

1012
# Use uv to run formatting and QA tools
1113
- repo: https://github.com/astral-sh/ruff-pre-commit

pyatlan/client/common/file.py

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# Copyright 2025 Atlan Pte. Ltd.
3+
import os
4+
from pathlib import Path
35
from typing import Any
46

57
from pyatlan.client.constants import (
@@ -13,6 +15,28 @@
1315
from pyatlan.errors import ErrorCode
1416
from pyatlan.model.file import CloudStorageIdentifier, PresignedURLRequest
1517

18+
# System directories that must never be read from.
19+
_SENSITIVE_SYSTEM_PREFIXES = (
20+
"/etc/",
21+
"/proc/",
22+
"/sys/",
23+
"/dev/",
24+
"/root/",
25+
"/private/etc/", # macOS: /etc is a symlink to /private/etc
26+
"/private/var/", # macOS
27+
)
28+
29+
# Hidden credential/config directories that must never be read from.
30+
_SENSITIVE_DIR_NAMES = frozenset({".aws", ".ssh", ".gnupg"})
31+
32+
# File name prefixes for environment/secret files.
33+
_SENSITIVE_FILE_PREFIXES = (".env",)
34+
35+
36+
def _parse_env_list(env_var: str) -> list:
37+
val = os.environ.get(env_var, "")
38+
return [p.strip() for p in val.split(",") if p.strip()] if val else []
39+
1640

1741
class FilePresignedUrl:
1842
"""
@@ -54,10 +78,52 @@ def validate_file_path(file_path: str) -> Any:
5478
5579
:param file_path: path to the file to upload
5680
:returns: opened file object
81+
:raises INVALID_UPLOAD_FILE_PATH_TRAVERSAL: if path traversal is detected
82+
:raises INVALID_UPLOAD_FILE_PATH_SENSITIVE: if path points to a sensitive location
5783
:raises INVALID_UPLOAD_FILE_PATH: if file not found
5884
"""
85+
path = Path(file_path)
86+
87+
# Block directory traversal via '..'
88+
if ".." in path.parts:
89+
raise ErrorCode.INVALID_UPLOAD_FILE_PATH_TRAVERSAL.exception_with_parameters(
90+
file_path
91+
)
92+
93+
resolved = path.resolve()
94+
resolved_str = str(resolved)
95+
96+
# Block sensitive system directories (e.g. /etc/, /proc/, /dev/)
97+
if resolved_str.startswith(_SENSITIVE_SYSTEM_PREFIXES):
98+
raise ErrorCode.INVALID_UPLOAD_FILE_PATH_SENSITIVE.exception_with_parameters(
99+
file_path
100+
)
101+
102+
# Block credential/config hidden directories (e.g. .aws, .ssh, .gnupg)
103+
if any(part in _SENSITIVE_DIR_NAMES for part in resolved.parts):
104+
raise ErrorCode.INVALID_UPLOAD_FILE_PATH_SENSITIVE.exception_with_parameters(
105+
file_path
106+
)
107+
108+
# Block environment/secret files (e.g. .env, .env.local, .env.production)
109+
if resolved.name.startswith(_SENSITIVE_FILE_PREFIXES):
110+
raise ErrorCode.INVALID_UPLOAD_FILE_PATH_SENSITIVE.exception_with_parameters(
111+
file_path
112+
)
113+
114+
# Block user-defined paths via PYATLAN_UPLOAD_FILE_BLOCKED_PATHS (comma-separated).
115+
# Each entry is matched as a substring against the full resolved path, so it
116+
# can express system prefixes ("/vault/"), dir names (".vault"), or
117+
# file prefixes (".credentials").
118+
# e.g. PYATLAN_UPLOAD_FILE_BLOCKED_PATHS="/custom/secrets/,.vault,.credentials"
119+
user_blocked = _parse_env_list("PYATLAN_UPLOAD_FILE_BLOCKED_PATHS")
120+
if any(pattern in resolved_str for pattern in user_blocked):
121+
raise ErrorCode.INVALID_UPLOAD_FILE_PATH_SENSITIVE.exception_with_parameters(
122+
file_path
123+
)
124+
59125
try:
60-
return open(file_path, "rb")
126+
return open(resolved, "rb")
61127
except FileNotFoundError as err:
62128
raise ErrorCode.INVALID_UPLOAD_FILE_PATH.exception_with_parameters(
63129
str(err.strerror), file_path

pyatlan/errors.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -675,6 +675,20 @@ class ErrorCode(Enum):
675675
"Set multi_valued=False when creating rich text attributes.",
676676
InvalidRequestError,
677677
)
678+
INVALID_UPLOAD_FILE_PATH_TRAVERSAL = (
679+
400,
680+
"ATLAN-PYTHON-400-077",
681+
"Path traversal detected in file path: {0}.",
682+
"Ensure the file path does not contain '..' components.",
683+
InvalidRequestError,
684+
)
685+
INVALID_UPLOAD_FILE_PATH_SENSITIVE = (
686+
400,
687+
"ATLAN-PYTHON-400-078",
688+
"Access to blocked file path is not allowed: {0}.",
689+
"Ensure the file path does not point to a blocked location (system files, credential directories, or paths defined in PYATLAN_UPLOAD_FILE_BLOCKED_PATHS).",
690+
InvalidRequestError,
691+
)
678692
AUTHENTICATION_PASSTHROUGH = (
679693
401,
680694
"ATLAN-PYTHON-401-000",

pyatlan_v9/model/assets/_init_adf.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,12 @@
88
This module provides convenient imports for all ADF types and their Related variants.
99
"""
1010

11+
from .adf import ADF
12+
from .adf_activity import AdfActivity
13+
from .adf_dataflow import AdfDataflow
14+
from .adf_dataset import AdfDataset
15+
from .adf_linkedservice import AdfLinkedservice
16+
from .adf_pipeline import AdfPipeline
1117
from .adf_related import (
1218
RelatedADF,
1319
RelatedAdfActivity,
@@ -16,12 +22,6 @@
1622
RelatedAdfLinkedservice,
1723
RelatedAdfPipeline,
1824
)
19-
from .adf import ADF
20-
from .adf_activity import AdfActivity
21-
from .adf_dataflow import AdfDataflow
22-
from .adf_dataset import AdfDataset
23-
from .adf_linkedservice import AdfLinkedservice
24-
from .adf_pipeline import AdfPipeline
2525

2626
__all__ = [
2727
"ADF",

pyatlan_v9/model/assets/_init_adls.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,16 @@
88
This module provides convenient imports for all ADLS types and their Related variants.
99
"""
1010

11+
from .adls import ADLS
12+
from .adls_account import ADLSAccount
13+
from .adls_container import ADLSContainer
14+
from .adls_object import ADLSObject
1115
from .adls_related import (
1216
RelatedADLS,
1317
RelatedADLSAccount,
1418
RelatedADLSContainer,
1519
RelatedADLSObject,
1620
)
17-
from .adls import ADLS
18-
from .adls_account import ADLSAccount
19-
from .adls_container import ADLSContainer
20-
from .adls_object import ADLSObject
2121

2222
__all__ = [
2323
"ADLS",

pyatlan_v9/model/assets/_init_ai.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,16 @@
88
This module provides convenient imports for all AI types and their Related variants.
99
"""
1010

11+
from .ai import AI
12+
from .ai_application import AIApplication
13+
from .ai_model import AIModel
14+
from .ai_model_version import AIModelVersion
1115
from .ai_related import (
1216
RelatedAI,
1317
RelatedAIApplication,
1418
RelatedAIModel,
1519
RelatedAIModelVersion,
1620
)
17-
from .ai import AI
18-
from .ai_application import AIApplication
19-
from .ai_model import AIModel
20-
from .ai_model_version import AIModelVersion
2121

2222
__all__ = [
2323
"AI",

pyatlan_v9/model/assets/_init_airflow.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,9 @@
88
This module provides convenient imports for all Airflow types and their Related variants.
99
"""
1010

11-
from .airflow_related import (
12-
RelatedAirflow,
13-
RelatedAirflowDag,
14-
RelatedAirflowTask,
15-
)
1611
from .airflow import Airflow
1712
from .airflow_dag import AirflowDag
13+
from .airflow_related import RelatedAirflow, RelatedAirflowDag, RelatedAirflowTask
1814
from .airflow_task import AirflowTask
1915

2016
__all__ = [

pyatlan_v9/model/assets/_init_anaplan.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,14 @@
88
This module provides convenient imports for all Anaplan types and their Related variants.
99
"""
1010

11+
from .anaplan import Anaplan
12+
from .anaplan_app import AnaplanApp
13+
from .anaplan_dimension import AnaplanDimension
14+
from .anaplan_line_item import AnaplanLineItem
15+
from .anaplan_list import AnaplanList
16+
from .anaplan_model import AnaplanModel
17+
from .anaplan_module import AnaplanModule
18+
from .anaplan_page import AnaplanPage
1119
from .anaplan_related import (
1220
RelatedAnaplan,
1321
RelatedAnaplanApp,
@@ -21,14 +29,6 @@
2129
RelatedAnaplanView,
2230
RelatedAnaplanWorkspace,
2331
)
24-
from .anaplan import Anaplan
25-
from .anaplan_app import AnaplanApp
26-
from .anaplan_dimension import AnaplanDimension
27-
from .anaplan_line_item import AnaplanLineItem
28-
from .anaplan_list import AnaplanList
29-
from .anaplan_model import AnaplanModel
30-
from .anaplan_module import AnaplanModule
31-
from .anaplan_page import AnaplanPage
3232
from .anaplan_system_dimension import AnaplanSystemDimension
3333
from .anaplan_view import AnaplanView
3434
from .anaplan_workspace import AnaplanWorkspace

pyatlan_v9/model/assets/_init_anomalo.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,9 @@
88
This module provides convenient imports for all Anomalo types and their Related variants.
99
"""
1010

11-
from .anomalo_related import (
12-
RelatedAnomalo,
13-
RelatedAnomaloCheck,
14-
)
1511
from .anomalo import Anomalo
1612
from .anomalo_check import AnomaloCheck
13+
from .anomalo_related import RelatedAnomalo, RelatedAnomaloCheck
1714

1815
__all__ = [
1916
"Anomalo",

pyatlan_v9/model/assets/_init_api.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@
88
This module provides convenient imports for all API types and their Related variants.
99
"""
1010

11+
from .api import API
12+
from .api_field import APIField
13+
from .api_object import APIObject
14+
from .api_path import APIPath
15+
from .api_query import APIQuery
1116
from .api_related import (
1217
RelatedAPI,
1318
RelatedAPIField,
@@ -16,11 +21,6 @@
1621
RelatedAPIQuery,
1722
RelatedAPISpec,
1823
)
19-
from .api import API
20-
from .api_field import APIField
21-
from .api_object import APIObject
22-
from .api_path import APIPath
23-
from .api_query import APIQuery
2424
from .api_spec import APISpec
2525

2626
__all__ = [

0 commit comments

Comments
 (0)