Skip to content

Commit 1f6f8cc

Browse files
feat: [dlp] added support for detecting key-value pairs in document metadata (#7822)
* feat: added support for detecting key-value pairs in document metadata feat: added support for image exclusion and adjustment rules feat: add InfoType launch status in InfoType Description docs: added formatting to various comments PiperOrigin-RevId: 885724590 Source-Link: googleapis/googleapis@dfcbe68 Source-Link: googleapis/googleapis-gen@31a2776 Copy-Tag: eyJwIjoicGFja2FnZXMvZ29vZ2xlLXByaXZhY3ktZGxwLy5Pd2xCb3QueWFtbCIsImgiOiIzMWEyNzc2MGNiYzJjNWYzMzAzYWE2NjYwYTk4YmY5OWFiNmU0N2E5In0= * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
1 parent 6d949cf commit 1f6f8cc

5 files changed

Lines changed: 29893 additions & 25755 deletions

File tree

packages/google-privacy-dlp/protos/google/privacy/dlp/v2/dlp.proto

Lines changed: 196 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2025 Google LLC
1+
// Copyright 2026 Google LLC
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -1001,6 +1001,30 @@ message ExcludeByHotword {
10011001
CustomInfoType.DetectionRule.Proximity proximity = 2;
10021002
}
10031003

1004+
// The rule to exclude image findings based on spatial relationships with
1005+
// other image findings. For example, exclude an image finding if it overlaps
1006+
// with another image finding.
1007+
// This rule is silently ignored if the content being inspected is not an image.
1008+
message ExcludeByImageFindings {
1009+
// A list of image-supported infoTypes—excluding [document
1010+
// infoTypes](https://cloud.google.com/sensitive-data-protection/docs/infotypes-reference#documents)—to
1011+
// be used as context for the exclusion rule. A finding is excluded if
1012+
// its bounding box has the specified spatial relationship (defined by
1013+
// `image_containment_type`) with a finding of an infoType in this list.
1014+
//
1015+
// For example, if `InspectionRuleSet.info_types` includes
1016+
// `OBJECT_TYPE/PERSON` and this `exclusion_rule` specifies `info_types` as
1017+
// `OBJECT_TYPE/PERSON/PASSPORT` with `image_containment_type` set to
1018+
// `encloses`, then `OBJECT_TYPE/PERSON` findings will be excluded if they
1019+
// are fully contained within the bounding box of an
1020+
// `OBJECT_TYPE/PERSON/PASSPORT` finding.
1021+
repeated InfoType info_types = 1;
1022+
1023+
// Specifies the required spatial relationship between the bounding boxes
1024+
// of the target finding and the context infoType findings.
1025+
ImageContainmentType image_containment_type = 2;
1026+
}
1027+
10041028
// The rule that specifies conditions when findings of infoTypes specified in
10051029
// `InspectionRuleSet` are removed from results.
10061030
message ExclusionRule {
@@ -1018,12 +1042,95 @@ message ExclusionRule {
10181042
// Drop if the hotword rule is contained in the proximate context. For
10191043
// tabular data, the context includes the column name.
10201044
ExcludeByHotword exclude_by_hotword = 5;
1045+
1046+
// Exclude findings based on image containment rules. For example, exclude
1047+
// an image finding if it overlaps with another image finding.
1048+
ExcludeByImageFindings exclude_by_image_findings = 6;
10211049
}
10221050

10231051
// How the rule is applied, see MatchingType documentation for details.
10241052
MatchingType matching_type = 4;
10251053
}
10261054

1055+
// AdjustmentRule condition for matching infoTypes.
1056+
message AdjustByMatchingInfoTypes {
1057+
// Sensitive Data Protection adjusts the likelihood of a finding if that
1058+
// finding also matches one of these infoTypes.
1059+
//
1060+
// For example, you can create a rule to adjust the likelihood of a
1061+
// `PHONE_NUMBER` finding if the string is found within a document that is
1062+
// classified as `DOCUMENT_TYPE/HR/RESUME`. To configure this, set
1063+
// `PHONE_NUMBER` in `InspectionRuleSet.info_types`. Add an `adjustment_rule`
1064+
// with an `adjust_by_matching_info_types.info_types` that contains
1065+
// `DOCUMENT_TYPE/HR/RESUME`. In this case, the likelihood of the
1066+
// `PHONE_NUMBER` finding is adjusted, but the likelihood of the
1067+
// `DOCUMENT_TYPE/HR/RESUME` finding is not.
1068+
repeated InfoType info_types = 1;
1069+
1070+
// Required. Minimum likelihood of the
1071+
// `adjust_by_matching_info_types.info_types` finding. If the likelihood is
1072+
// lower than this value, Sensitive Data Protection doesn't adjust the
1073+
// likelihood of the `InspectionRuleSet.info_types` finding.
1074+
Likelihood min_likelihood = 2;
1075+
1076+
// How the adjustment rule is applied.
1077+
//
1078+
// Only `MATCHING_TYPE_PARTIAL_MATCH` is supported:
1079+
//
1080+
// - Partial match: adjusts the findings of infoTypes specified in the
1081+
// inspection rule when they have a nonempty intersection with a finding of an
1082+
// infoType specified in this adjustment rule.
1083+
MatchingType matching_type = 3;
1084+
}
1085+
1086+
// AdjustmentRule condition for image findings.
1087+
// This rule is silently ignored if the content being inspected is not an image.
1088+
message AdjustByImageFindings {
1089+
// A list of image-supported infoTypes—excluding [document
1090+
// infoTypes](https://cloud.google.com/sensitive-data-protection/docs/infotypes-reference#documents)—to
1091+
// be used as context for the adjustment rule. Sensitive Data Protection
1092+
// adjusts the likelihood of an image finding if its bounding box has the
1093+
// specified spatial relationship (defined by `image_containment_type`) with a
1094+
// finding of an infoType in this list.
1095+
//
1096+
// For example, you can create a rule to adjust the likelihood of a
1097+
// `US_PASSPORT` finding if it is enclosed by a finding of
1098+
// `OBJECT_TYPE/PERSON/PASSPORT`. To configure this, set `US_PASSPORT` in
1099+
// `InspectionRuleSet.info_types`. Add an `adjustment_rule` with an
1100+
// `adjust_by_image_findings.info_types` that contains
1101+
// `OBJECT_TYPE/PERSON/PASSPORT` and `image_containment_type` set
1102+
// to `encloses`. In this case, the likelihood of the `US_PASSPORT` finding is
1103+
// adjusted, but the likelihood of the `OBJECT_TYPE/PERSON/PASSPORT`
1104+
// finding is not.
1105+
repeated InfoType info_types = 1;
1106+
1107+
// Required. Minimum likelihood of the
1108+
// `adjust_by_image_findings.info_types` finding. If the likelihood is
1109+
// lower than this value, Sensitive Data Protection doesn't adjust the
1110+
// likelihood of the `InspectionRuleSet.info_types` finding.
1111+
Likelihood min_likelihood = 2;
1112+
1113+
// Specifies the required spatial relationship between the bounding boxes
1114+
// of the target finding and the context infoType findings.
1115+
ImageContainmentType image_containment_type = 3;
1116+
}
1117+
1118+
// Rule that specifies conditions when a certain infoType's finding details
1119+
// should be adjusted.
1120+
message AdjustmentRule {
1121+
// Condition under which the adjustment rule is applied.
1122+
oneof conditions {
1123+
// Set of infoTypes for which findings would affect this rule.
1124+
AdjustByMatchingInfoTypes adjust_by_matching_info_types = 1;
1125+
1126+
// AdjustmentRule condition for image findings.
1127+
AdjustByImageFindings adjust_by_image_findings = 3;
1128+
}
1129+
1130+
// Likelihood adjustment to apply to the infoType.
1131+
CustomInfoType.DetectionRule.LikelihoodAdjustment likelihood_adjustment = 2;
1132+
}
1133+
10271134
// A single inspection rule to be applied to infoTypes, specified in
10281135
// `InspectionRuleSet`.
10291136
message InspectionRule {
@@ -1034,6 +1141,9 @@ message InspectionRule {
10341141

10351142
// Exclusion rule.
10361143
ExclusionRule exclusion_rule = 2;
1144+
1145+
// Adjustment rule.
1146+
AdjustmentRule adjustment_rule = 3;
10371147
}
10381148
}
10391149

@@ -1183,7 +1293,8 @@ message InspectConfig {
11831293

11841294
// Set of rules to apply to the findings for this InspectConfig.
11851295
// Exclusion rules, contained in the set are executed in the end, other
1186-
// rules are executed in the order they are specified for each info type.
1296+
// rules are executed in the order they are specified for each info type. Not
1297+
// supported for the `metadata_key_value_expression` CustomInfoType.
11871298
repeated InspectionRuleSet rule_set = 10;
11881299
}
11891300

@@ -1452,6 +1563,9 @@ message MetadataLocation {
14521563
oneof label {
14531564
// Storage metadata.
14541565
StorageMetadataLabel storage_label = 3;
1566+
1567+
// Metadata key that contains the finding.
1568+
KeyValueMetadataLabel key_value_metadata_label = 4;
14551569
}
14561570
}
14571571

@@ -1461,6 +1575,17 @@ message StorageMetadataLabel {
14611575
string key = 1;
14621576
}
14631577

1578+
// The metadata key that contains a finding.
1579+
message KeyValueMetadataLabel {
1580+
// The metadata key. The format depends on the source of the metadata.
1581+
//
1582+
// Example:
1583+
//
1584+
// - `MSIP_Label_122709e3-8f6b-4860-985f-7f722a94f61e_Enabled` (a Microsoft
1585+
// Purview Information Protection key example)
1586+
string key = 1;
1587+
}
1588+
14641589
// Location of a finding within a document.
14651590
message DocumentLocation {
14661591
// Offset of the line, from the beginning of the file, where the finding
@@ -1884,6 +2009,7 @@ message OutputStorageConfig {
18842009
}
18852010

18862011
// Output storage types.
2012+
// *
18872013
oneof type {
18882014
// Store findings in an existing table or a new table in an existing
18892015
// dataset. If table_id is not set a new one will be generated
@@ -2081,6 +2207,21 @@ message LocationSupport {
20812207

20822208
// InfoType description.
20832209
message InfoTypeDescription {
2210+
// The launch status of an infoType.
2211+
enum InfoTypeLaunchStatus {
2212+
// Unspecified.
2213+
INFO_TYPE_LAUNCH_STATUS_UNSPECIFIED = 0;
2214+
2215+
// InfoType is generally available.
2216+
GENERAL_AVAILABILITY = 1;
2217+
2218+
// InfoType is in public preview.
2219+
PUBLIC_PREVIEW = 2;
2220+
2221+
// InfoType is in private preview.
2222+
PRIVATE_PREVIEW = 3;
2223+
}
2224+
20842225
// Internal name of the infoType.
20852226
string name = 1;
20862227

@@ -2115,6 +2256,9 @@ message InfoTypeDescription {
21152256
// For example, the "GEOGRAPHIC_DATA" general infoType would have set for this
21162257
// field "LOCATION", "LOCATION_COORDINATES", and "STREET_ADDRESS".
21172258
repeated string specific_info_types = 12;
2259+
2260+
// The launch status of the infoType.
2261+
InfoTypeLaunchStatus launch_status = 13;
21182262
}
21192263

21202264
// Classification of infoTypes to organize them according to geographic
@@ -7270,8 +7414,48 @@ enum MatchingType {
72707414
// - Regex: finding doesn't match the regex
72717415
// - Exclude infoType: no intersection with affecting infoTypes findings
72727416
MATCHING_TYPE_INVERSE_MATCH = 3;
7417+
7418+
// Rule-specific match.
7419+
//
7420+
// The matching logic is based on the specific rule being used. This is
7421+
// required for rules where the matching behavior is not a simple string
7422+
// comparison (e.g., image containment). This matching type can only be
7423+
// used with the `ExcludeByImageFindings` rule.
7424+
//
7425+
// - Exclude by image findings: The matching logic is defined within
7426+
// `ExcludeByImageFindings` based on spatial relationships between bounding
7427+
// boxes.
7428+
MATCHING_TYPE_RULE_SPECIFIC = 4;
7429+
}
7430+
7431+
// Specifies the relationship between bounding boxes for image findings.
7432+
message ImageContainmentType {
7433+
// The type of relationship to check between the target finding and the
7434+
// context finding.
7435+
oneof type {
7436+
// The context finding's bounding box must fully contain the target
7437+
// finding's bounding box.
7438+
Encloses encloses = 1;
7439+
7440+
// The context finding's bounding box must be fully inside the target
7441+
// finding's bounding box.
7442+
FullyInside fully_inside = 2;
7443+
7444+
// The context finding's bounding box and the target finding's bounding box
7445+
// must have a non-zero intersection.
7446+
Overlap overlaps = 3;
7447+
}
72737448
}
72747449

7450+
// Defines a condition for overlapping bounding boxes.
7451+
message Overlap {}
7452+
7453+
// Defines a condition where one bounding box encloses another.
7454+
message Encloses {}
7455+
7456+
// Defines a condition where one bounding box is fully inside another.
7457+
message FullyInside {}
7458+
72757459
// Deprecated and unused.
72767460
enum ContentOption {
72777461
// Includes entire content of a file or a data stream.
@@ -7291,6 +7475,9 @@ enum MetadataType {
72917475

72927476
// General file metadata provided by Cloud Storage.
72937477
STORAGE_METADATA = 2;
7478+
7479+
// Metadata extracted from the files.
7480+
CONTENT_METADATA = 3;
72947481
}
72957482

72967483
// Parts of the APIs which use certain infoTypes.
@@ -8910,17 +9097,22 @@ message Domain {
89109097
}
89119098

89129099
// The signal used to determine the category.
8913-
// This list may increase over time.
9100+
// New values may be added in the future.
89149101
enum Signal {
89159102
// Unused.
89169103
SIGNAL_UNSPECIFIED = 0;
89179104

89189105
// One or more machine learning models are present.
89199106
MODEL = 1;
89209107

8921-
// A table appears to be a text embedding.
9108+
// A table appears to contain text embeddings.
89229109
TEXT_EMBEDDING = 2;
89239110

9111+
// A table appears to contain embeddings of any type (for example, text,
9112+
// image, multimodal). The `TEXT_EMBEDDING` signal might also be present if
9113+
// the table contains text embeddings.
9114+
EMBEDDING = 7;
9115+
89249116
// The [Cloud SQL Vertex
89259117
// AI](https://cloud.google.com/sql/docs/postgres/integrate-cloud-sql-with-vertex-ai)
89269118
// plugin is installed on the database.

packages/google-privacy-dlp/protos/google/privacy/dlp/v2/storage.proto

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2025 Google LLC
1+
// Copyright 2026 Google LLC
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -193,6 +193,17 @@ message CustomInfoType {
193193
// not support the use of `detection_rules`.
194194
message SurrogateType {}
195195

196+
// Configuration for a custom infoType that detects key-value pairs in the
197+
// metadata matching the specified regular expressions.
198+
message MetadataKeyValueExpression {
199+
// The regular expression for the key. Key should be
200+
// non-empty.
201+
string key_regex = 1;
202+
203+
// The regular expression for the value. Value should be non-empty.
204+
string value_regex = 2;
205+
}
206+
196207
// Deprecated; use `InspectionRuleSet` instead. Rule for modifying a
197208
// `CustomInfoType` to alter behavior under certain circumstances, depending
198209
// on the specific details of the rule. Not supported for the `surrogate_type`
@@ -299,18 +310,21 @@ message CustomInfoType {
299310
// support reversing.
300311
SurrogateType surrogate_type = 4;
301312

302-
// Load an existing `StoredInfoType` resource for use in
303-
// `InspectDataSource`. Not currently supported in `InspectContent`.
313+
// Loads an existing `StoredInfoType` resource.
304314
StoredType stored_type = 5;
315+
316+
// Key-value pair to detect in the metadata.
317+
MetadataKeyValueExpression metadata_key_value_expression = 10;
305318
}
306319

307320
// Set of detection rules to apply to all findings of this CustomInfoType.
308-
// Rules are applied in order that they are specified. Not supported for the
309-
// `surrogate_type` CustomInfoType.
321+
// Rules are applied in the order that they are specified. Only supported
322+
// for the `dictionary`, `regex`, and `stored_type` CustomInfoTypes.
310323
repeated DetectionRule detection_rules = 7;
311324

312325
// If set to EXCLUSION_TYPE_EXCLUDE this infoType will not cause a finding
313-
// to be returned. It still can be used for rules matching.
326+
// to be returned. It still can be used for rules matching. Only supported
327+
// for the `dictionary`, `regex`, and `stored_type` CustomInfoTypes.
314328
ExclusionType exclusion_type = 8;
315329

316330
// Sensitivity for this CustomInfoType. If this CustomInfoType extends an

0 commit comments

Comments
 (0)