Skip to content

Commit 1aa6c7c

Browse files
Added ML support to XSD and proto. Corrected few minor issues with JSON schema. Added test cases.
Signed-off-by: Steve Springett <steve@springett.us>
1 parent 819976a commit 1aa6c7c

6 files changed

Lines changed: 1130 additions & 11 deletions

File tree

schema/bom-1.5.proto

Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,12 @@ enum Classification {
5757
CLASSIFICATION_CONTAINER = 7;
5858
// A special type of software that provides low-level control over a devices hardware. Refer to https://en.wikipedia.org/wiki/Firmware
5959
CLASSIFICATION_FIRMWARE = 8;
60+
// A runtime environment which interprets or executes software. This may include runtimes such as those that execute bytecode or low-code/no-code application platforms.
61+
CLASSIFICATION_PLATFORM = 9;
62+
// A model based on training data that can make predictions or decisions without being explicitly programmed to do so.
63+
CLASSIFICATION_MACHINE_LEARNING_MODEL = 10;
64+
// A collection of discrete values that convey information.
65+
CLASSIFICATION_DATA = 11;
6066
}
6167

6268
message Commit {
@@ -119,6 +125,10 @@ message Component {
119125
repeated Evidence evidence = 23;
120126
// Specifies optional release notes.
121127
optional ReleaseNotes releaseNotes = 24;
128+
// A model card describes the intended uses of a machine learning model, potential limitations, biases, ethical considerations, training parameters, datasets used to train the model, performance metrics, and other relevant data useful for ML transparency.
129+
optional ModelCard modelCard = 25;
130+
// This object SHOULD be specified for any component of type `data` and MUST NOT be specified for other component types.
131+
optional ComponentData data = 26;
122132
}
123133

124134
// Specifies the data classification.
@@ -195,6 +205,8 @@ enum ExternalReferenceType {
195205
EXTERNAL_REFERENCE_TYPE_BUILD_SYSTEM = 14;
196206
// Specifies a way to contact the maintainer, supplier, or provider in the event of a security incident. Common URIs include links to a disclosure procedure, a mailto (RFC-2368) that specifies an email address, a tel (RFC-3966) that specifies a phone number, or dns (RFC-4501]) that specifies the records containing DNS Security TXT.
197207
EXTERNAL_REFERENCE_TYPE_SECURITY_CONTACT = 15;
208+
// A model card describes the intended uses of a machine learning model, potential limitations, biases, ethical considerations, training parameters, datasets used to train the model, performance metrics, and other relevant data useful for ML transparency.
209+
EXTERNAL_REFERENCE_TYPE_MODEL_CARD = 16;
198210
}
199211

200212
enum HashAlg {
@@ -780,4 +792,181 @@ message Annotation {
780792
google.protobuf.Timestamp timestamp = 4;
781793
// The textual content of the annotation.
782794
string text = 5;
795+
}
796+
797+
message ModelCard {
798+
// An optional identifier which can be used to reference the model card elsewhere in the BOM. Every bom-ref MUST be unique within the BOM.
799+
optional string bom_ref = 1;
800+
// Hyper-parameters for construction of the model.
801+
optional ModelParameters modelParameters = 2;
802+
// A quantitative analysis of the model
803+
optional QuantitativeAnalysis quantitativeAnalysis = 3;
804+
// What considerations should be taken into account regarding the model's construction, training, and application?
805+
optional ModelCardConsiderations considerations = 4;
806+
807+
message ModelParameters {
808+
// The overall approach to learning used by the model for problem solving.
809+
optional Approach approach = 1;
810+
// Directly influences the input and/or output. Examples include classification, regression, clustering, etc.
811+
optional string task = 2;
812+
// The model architecture family such as transformer network, convolutional neural network, residual neural network, LSTM neural network, etc.
813+
optional string architectureFamily = 3;
814+
//The specific architecture of the model such as GPT-1, ResNet-50, YOLOv3, etc.
815+
optional string modelArchitecture = 4;
816+
// The datasets used to train and evaluate the model.
817+
repeated Datasets datasets = 5;
818+
// The input format(s) of the model
819+
repeated MachineLearningInputOutputParameters inputs = 6;
820+
// The output format(s) from the model
821+
repeated MachineLearningInputOutputParameters outputs = 7;
822+
823+
message Approach {
824+
optional ModelParameterApproachType type = 1;
825+
}
826+
message Datasets {
827+
oneof choice {
828+
ComponentData dataset = 1;
829+
// References a data component by the components bom-ref attribute
830+
string ref = 2;
831+
}
832+
}
833+
message MachineLearningInputOutputParameters {
834+
// The data format for input/output to the model. Example formats include string, image, time-series
835+
optional string format = 1;
836+
}
837+
}
838+
message QuantitativeAnalysis {
839+
// The model performance metrics being reported. Examples may include accuracy, F1 score, precision, top-3 error rates, MSC, etc.
840+
repeated PerformanceMetrics performanceMetrics = 1;
841+
optional GraphicsCollection graphics = 2;
842+
843+
message PerformanceMetrics {
844+
// The type of performance metric.
845+
optional string type = 1;
846+
// The value of the performance metric.
847+
optional string value = 2;
848+
// The name of the slice this metric was computed on. By default, assume this metric is not sliced.
849+
optional string slice = 3;
850+
// The confidence interval of the metric.
851+
optional ConfidenceInterval confidenceInterval = 4;
852+
853+
message ConfidenceInterval {
854+
// The lower bound of the confidence interval.
855+
optional string lowerBound = 1;
856+
// The upper bound of the confidence interval.
857+
optional string upperBound = 2;
858+
}
859+
}
860+
}
861+
message ModelCardConsiderations {
862+
// Who are the intended users of the model?
863+
repeated string users = 1;
864+
// What are the intended use cases of the model?
865+
repeated string useCases = 2;
866+
// What are the known technical limitations of the model? E.g. What kind(s) of data should the model be expected not to perform well on? What are the factors that might degrade model performance?
867+
repeated string technicalLimitations = 3;
868+
// What are the known tradeoffs in accuracy/performance of the model?
869+
repeated string performanceTradeoffs = 4;
870+
// What are the ethical (or environmental) risks involved in the application of this model?
871+
repeated EthicalConsiderations ethicalConsiderations = 5;
872+
// How does the model affect groups at risk of being systematically disadvantaged? What are the harms and benefits to the various affected groups?
873+
repeated FairnessAssessments fairnessAssessments = 6;
874+
875+
message EthicalConsiderations {
876+
// The name of the risk.
877+
optional string name = 1;
878+
// Strategy used to address this risk.
879+
optional string mitigationStrategy = 2;
880+
}
881+
message FairnessAssessments {
882+
// The groups or individuals at risk of being systematically disadvantaged by the model.
883+
optional string groupAtRisk = 1;
884+
// Expected benefits to the identified groups.
885+
optional string benefits = 2;
886+
// Expected harms to the identified groups.
887+
optional string harms = 3;
888+
// With respect to the benefits and harms outlined, please describe any mitigation strategy implemented.
889+
optional string mitigationStrategy = 4;
890+
}
891+
}
892+
}
893+
894+
enum ModelParameterApproachType {
895+
MODEL_PARAMETER_APPROACH_TYPE_SUPERVISED = 0;
896+
MODEL_PARAMETER_APPROACH_TYPE_UNSUPERVISED = 1;
897+
MODEL_PARAMETER_APPROACH_TYPE_REINFORCED_LEARNING = 2;
898+
MODEL_PARAMETER_APPROACH_TYPE_SEMI_SUPERVISED = 3;
899+
MODEL_PARAMETER_APPROACH_TYPE_SELF_SUPERVISED = 4;
900+
}
901+
902+
message ComponentData {
903+
// An optional identifier which can be used to reference the dataset elsewhere in the BOM. Every bom-ref MUST be unique within the BOM.
904+
optional string bom_ref = 1;
905+
// The general theme or subject matter of the data being specified.
906+
ComponentDataType type = 2;
907+
// The name of the dataset.
908+
optional string name = 3;
909+
// The contents or references to the contents of the data being described.
910+
optional ComponentDataContents contents = 4;
911+
// Data classification tags data according to its type, sensitivity, and value if altered, stolen, or destroyed.
912+
optional string classification = 5;
913+
// A description of any sensitive data in a dataset.
914+
repeated string sensitiveData = 6;
915+
// A collection of graphics that represent various measurements.
916+
optional GraphicsCollection graphics = 7;
917+
// A description of the dataset. Can describe size of dataset, whether it's used for source code, training, testing, or validation, etc.
918+
optional string description = 8;
919+
// Data Governance
920+
optional DataGovernance governance = 9;
921+
922+
message ComponentDataContents {
923+
// An optional way to include textual or encoded data.
924+
optional AttachedText attachment = 1;
925+
// The URL to where the data can be retrieved.
926+
optional string url = 2;
927+
// Provides the ability to document name-value parameters used for configuration.
928+
repeated Property properties = 3;
929+
}
930+
931+
message DataGovernance {
932+
// Data custodians are responsible for the safe custody, transport, and storage of data.
933+
repeated DataGovernanceResponsibleParty custodians = 1;
934+
// Data stewards are responsible for data content, context, and associated business rules.
935+
repeated DataGovernanceResponsibleParty stewards = 2;
936+
// Data owners are concerned with risk and appropriate access to data.
937+
repeated DataGovernanceResponsibleParty owners = 3;
938+
939+
message DataGovernanceResponsibleParty {
940+
oneof choice {
941+
OrganizationalEntity organization = 1;
942+
OrganizationalContact contact = 2;
943+
}
944+
}
945+
}
946+
}
947+
948+
enum ComponentDataType {
949+
// Any type of code, code snippet, or data-as-code
950+
COMPONENT_DATA_TYPE_SOURCE_CODE = 0;
951+
// Parameters or settings that may be used by other components.
952+
COMPONENT_DATA_TYPE_CONFIGURATION = 1;
953+
// A collection of data.
954+
COMPONENT_DATA_TYPE_DATASET = 2;
955+
// Any other type of data that does not fit into existing definitions.
956+
COMPONENT_DATA_TYPE_OTHER = 3;
957+
}
958+
959+
message GraphicsCollection {
960+
// A description of this collection of graphics.
961+
optional string description = 1;
962+
// A collection of graphics.
963+
repeated Graphic graphic = 2;
964+
965+
message Graphic {
966+
// The name of the graphic.
967+
optional string name = 1;
968+
// The graphic (vector or raster). Base64 encoding MUST be specified for binary images.
969+
optional AttachedText image = 2;
970+
}
971+
783972
}

schema/bom-1.5.schema.json

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2037,16 +2037,18 @@
20372037
"items" : {
20382038
"oneOf" : [
20392039
{
2040-
"title": "Inline Component Dataset",
2040+
"title": "Inline Component Data",
20412041
"$ref": "#/definitions/componentData"
20422042
},
20432043
{
20442044
"title": "Data Component Reference",
2045-
"description": "References a data component by the components bom-ref attribute",
2046-
"ref": {
2047-
"type": "string",
2048-
"title": "Reference",
2049-
"description": "References a data component by the components bom-ref attribute"
2045+
"additionalProperties": false,
2046+
"properties": {
2047+
"ref": {
2048+
"type": "string",
2049+
"title": "Reference",
2050+
"description": "References a data component by the components bom-ref attribute"
2051+
}
20502052
}
20512053
}
20522054
]
@@ -2133,9 +2135,9 @@
21332135
"additionalItems": false,
21342136
"items": { "$ref": "#/definitions/risk" }
21352137
},
2136-
"fairnessAssessment": {
2138+
"fairnessAssessments": {
21372139
"type": "array",
2138-
"title": "Fairness Assessment",
2140+
"title": "Fairness Assessments",
21392141
"description": "How does the model affect groups at risk of being systematically disadvantaged? What are the harms and benefits to the various affected groups?",
21402142
"additionalItems": false,
21412143
"items": {
@@ -2242,19 +2244,23 @@
22422244
"additionalProperties": false,
22432245
"properties": {
22442246
"custodians": {
2247+
"type": "array",
22452248
"title": "Data Custodians",
22462249
"description": "Data custodians are responsible for the safe custody, transport, and storage of data.",
2247-
"$ref": "#/definitions/dataGovernanceResponsibleParty"
2250+
"additionalItems": false,
2251+
"items": { "$ref": "#/definitions/dataGovernanceResponsibleParty" }
22482252
},
22492253
"stewards": {
2254+
"type": "array",
22502255
"title": "Data Stewards",
22512256
"description": "Data stewards are responsible for data content, context, and associated business rules.",
2252-
"$ref": "#/definitions/dataGovernanceResponsibleParty"
2257+
"items": { "$ref": "#/definitions/dataGovernanceResponsibleParty" }
22532258
},
22542259
"owners": {
2260+
"type": "array",
22552261
"title": "Data Owners",
22562262
"description": "Data owners are concerned with risk and appropriate access to data.",
2257-
"$ref": "#/definitions/dataGovernanceResponsibleParty"
2263+
"items": { "$ref": "#/definitions/dataGovernanceResponsibleParty" }
22582264
}
22592265
}
22602266
}

0 commit comments

Comments
 (0)