Skip to content

Commit 34fa1c6

Browse files
authored
Merge pull request #788 from atlanhq/DEVX-329
DEVX-329 | Separate existence checks between keyword and text fields
2 parents c56d46d + 3d9d2d1 commit 34fa1c6

2 files changed

Lines changed: 61 additions & 1 deletion

File tree

pyatlan/model/fields/atlan_fields.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,25 @@
2929
from pyatlan.utils import ComparisonCategory, is_comparable_type
3030

3131

32+
class AtlanSearchableFieldType(str, Enum):
33+
"""
34+
Enum to specify which field index to use when checking for existence of a value
35+
in a KeywordTextField. This is particularly useful for the has_any_value() method.
36+
"""
37+
38+
KEYWORD = "KEYWORD"
39+
"""
40+
Use the keyword field index (e.g., userDescription.keyword).
41+
Note: Elasticsearch does not index .keyword for text exceeding 5K characters.
42+
"""
43+
44+
TEXT = "TEXT"
45+
"""
46+
Use the text field index (e.g., userDescription.text or userDescription).
47+
This should be used when field values may exceed 5K characters.
48+
"""
49+
50+
3251
class AtlanField(ABC):
3352
"""
3453
Base enumeration of all attributes that exist in Atlan, so you do not have to remember their
@@ -75,13 +94,28 @@ def __init__(self, atlan_field_name: StrictStr, elastic_field_name: StrictStr):
7594
def internal_field_name(self):
7695
return self.atlan_field_name
7796

78-
def has_any_value(self) -> Query:
97+
def has_any_value(
98+
self,
99+
field_type: Optional[AtlanSearchableFieldType] = None,
100+
) -> Query:
79101
"""
80102
Returns a query that will only match assets that have some non-null, non-empty value
81103
(no matter what actual value) for the field.
82104
105+
Note: When text exceeds a particular length (5K characters), the keyword field on an
106+
attribute can be empty while the text field on the same attribute is populated.
107+
For KeywordTextField types, use field_type=AtlanSearchableFieldType.TEXT to check
108+
the text field instead. For other field types, this parameter is ignored.
109+
110+
:param field_type: optional field type to check for existence (KEYWORD or TEXT).
111+
Only applicable for KeywordTextField types.
112+
Defaults to KEYWORD (None) for backwards compatibility.
83113
:returns: a query that will only match assets that have some non-null, non-empty value for the field
84114
"""
115+
if field_type == AtlanSearchableFieldType.TEXT and hasattr(
116+
self, "_text_field_name"
117+
):
118+
return Exists(field=self._text_field_name)
85119
return Exists(field=self.elastic_field_name)
86120

87121
def order(self, order: SortOrder = SortOrder.ASCENDING) -> SortItem:

tests/unit/model/fields/atlan_fields_test.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from pyatlan.model.enums import SortOrder
66
from pyatlan.model.fields.atlan_fields import (
7+
AtlanSearchableFieldType,
78
InternalKeywordTextField,
89
KeywordField,
910
KeywordTextField,
@@ -90,6 +91,31 @@ def test_atlan_field_name(self, sut: KeywordTextField):
9091
def test_keyword_field_name(self, sut: KeywordTextField):
9192
assert sut.keyword_field_name == KEYWORD_FIELD_NAME
9293

94+
def test_has_any_value_default_uses_keyword_field(self, sut: KeywordTextField):
95+
"""Test that has_any_value() with no arguments uses keyword field for backwards compatibility."""
96+
exists = sut.has_any_value()
97+
98+
assert isinstance(exists, Exists)
99+
assert exists.field == KEYWORD_FIELD_NAME
100+
101+
def test_has_any_value_with_keyword_field_type(self, sut: KeywordTextField):
102+
"""Test that has_any_value() with KEYWORD field_type uses keyword field."""
103+
exists = sut.has_any_value(field_type=AtlanSearchableFieldType.KEYWORD)
104+
105+
assert isinstance(exists, Exists)
106+
assert exists.field == KEYWORD_FIELD_NAME
107+
108+
def test_has_any_value_with_text_field_type(self, sut: KeywordTextField):
109+
"""Test that has_any_value() with TEXT field_type uses text field.
110+
111+
This is useful when field values may exceed 5K characters, as Elasticsearch
112+
does not index .keyword for text exceeding 5K characters.
113+
"""
114+
exists = sut.has_any_value(field_type=AtlanSearchableFieldType.TEXT)
115+
116+
assert isinstance(exists, Exists)
117+
assert exists.field == TEXT_FIELD_NAME
118+
93119

94120
class TestInternalKeywordTextField:
95121
@pytest.fixture()

0 commit comments

Comments
 (0)