Skip to content

Commit 52104c3

Browse files
Merge pull request #209 from CycloneDX/v1.5-dev-machine-learning
Added support for ML
2 parents 8820d5e + 70d81eb commit 52104c3

6 files changed

Lines changed: 1555 additions & 9 deletions

File tree

schema/bom-1.5.proto

Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,12 @@ enum Classification {
5959
CLASSIFICATION_FIRMWARE = 8;
6060
// A special type of software that operates or controls a particular type of device. Refer to https://en.wikipedia.org/wiki/Device_driver
6161
CLASSIFICATION_DEVICE_DRIVER = 9;
62+
// A runtime environment which interprets or executes software. This may include runtimes such as those that execute bytecode or low-code/no-code application platforms.
63+
CLASSIFICATION_PLATFORM = 10;
64+
// A model based on training data that can make predictions or decisions without being explicitly programmed to do so.
65+
CLASSIFICATION_MACHINE_LEARNING_MODEL = 11;
66+
// A collection of discrete values that convey information.
67+
CLASSIFICATION_DATA = 12;
6268
}
6369

6470
message Commit {
@@ -121,6 +127,10 @@ message Component {
121127
repeated Evidence evidence = 23;
122128
// Specifies optional release notes.
123129
optional ReleaseNotes releaseNotes = 24;
130+
// A model card describes the intended uses of a machine learning model, potential limitations, biases, ethical considerations, training parameters, datasets used to train the model, performance metrics, and other relevant data useful for ML transparency.
131+
optional ModelCard modelCard = 25;
132+
// This object SHOULD be specified for any component of type `data` and MUST NOT be specified for other component types.
133+
optional ComponentData data = 26;
124134
}
125135

126136
// Specifies the data flow.
@@ -233,6 +243,8 @@ enum ExternalReferenceType {
233243
EXTERNAL_REFERENCE_TYPE_QUALITY_METRICS = 28;
234244
// Code or configuration that defines and provisions virtualized infrastructure, commonly referred to as Infrastructure as Code (IaC)
235245
EXTERNAL_REFERENCE_TYPE_CODIFIED_INFRASTRUCTURE = 29;
246+
// A model card describes the intended uses of a machine learning model, potential limitations, biases, ethical considerations, training parameters, datasets used to train the model, performance metrics, and other relevant data useful for ML transparency.
247+
EXTERNAL_REFERENCE_TYPE_MODEL_CARD = 30;
236248
}
237249

238250
enum HashAlg {
@@ -916,4 +928,181 @@ message Annotation {
916928
google.protobuf.Timestamp timestamp = 4;
917929
// The textual content of the annotation.
918930
string text = 5;
931+
}
932+
933+
message ModelCard {
934+
// An optional identifier which can be used to reference the model card elsewhere in the BOM. Every bom-ref MUST be unique within the BOM.
935+
optional string bom_ref = 1;
936+
// Hyper-parameters for construction of the model.
937+
optional ModelParameters modelParameters = 2;
938+
// A quantitative analysis of the model
939+
optional QuantitativeAnalysis quantitativeAnalysis = 3;
940+
// What considerations should be taken into account regarding the model's construction, training, and application?
941+
optional ModelCardConsiderations considerations = 4;
942+
943+
message ModelParameters {
944+
// The overall approach to learning used by the model for problem solving.
945+
optional Approach approach = 1;
946+
// Directly influences the input and/or output. Examples include classification, regression, clustering, etc.
947+
optional string task = 2;
948+
// The model architecture family such as transformer network, convolutional neural network, residual neural network, LSTM neural network, etc.
949+
optional string architectureFamily = 3;
950+
//The specific architecture of the model such as GPT-1, ResNet-50, YOLOv3, etc.
951+
optional string modelArchitecture = 4;
952+
// The datasets used to train and evaluate the model.
953+
repeated Datasets datasets = 5;
954+
// The input format(s) of the model
955+
repeated MachineLearningInputOutputParameters inputs = 6;
956+
// The output format(s) from the model
957+
repeated MachineLearningInputOutputParameters outputs = 7;
958+
959+
message Approach {
960+
optional ModelParameterApproachType type = 1;
961+
}
962+
message Datasets {
963+
oneof choice {
964+
ComponentData dataset = 1;
965+
// References a data component by the components bom-ref attribute
966+
string ref = 2;
967+
}
968+
}
969+
message MachineLearningInputOutputParameters {
970+
// The data format for input/output to the model. Example formats include string, image, time-series
971+
optional string format = 1;
972+
}
973+
}
974+
message QuantitativeAnalysis {
975+
// The model performance metrics being reported. Examples may include accuracy, F1 score, precision, top-3 error rates, MSC, etc.
976+
repeated PerformanceMetrics performanceMetrics = 1;
977+
optional GraphicsCollection graphics = 2;
978+
979+
message PerformanceMetrics {
980+
// The type of performance metric.
981+
optional string type = 1;
982+
// The value of the performance metric.
983+
optional string value = 2;
984+
// The name of the slice this metric was computed on. By default, assume this metric is not sliced.
985+
optional string slice = 3;
986+
// The confidence interval of the metric.
987+
optional ConfidenceInterval confidenceInterval = 4;
988+
989+
message ConfidenceInterval {
990+
// The lower bound of the confidence interval.
991+
optional string lowerBound = 1;
992+
// The upper bound of the confidence interval.
993+
optional string upperBound = 2;
994+
}
995+
}
996+
}
997+
message ModelCardConsiderations {
998+
// Who are the intended users of the model?
999+
repeated string users = 1;
1000+
// What are the intended use cases of the model?
1001+
repeated string useCases = 2;
1002+
// What are the known technical limitations of the model? E.g. What kind(s) of data should the model be expected not to perform well on? What are the factors that might degrade model performance?
1003+
repeated string technicalLimitations = 3;
1004+
// What are the known tradeoffs in accuracy/performance of the model?
1005+
repeated string performanceTradeoffs = 4;
1006+
// What are the ethical (or environmental) risks involved in the application of this model?
1007+
repeated EthicalConsiderations ethicalConsiderations = 5;
1008+
// How does the model affect groups at risk of being systematically disadvantaged? What are the harms and benefits to the various affected groups?
1009+
repeated FairnessAssessments fairnessAssessments = 6;
1010+
1011+
message EthicalConsiderations {
1012+
// The name of the risk.
1013+
optional string name = 1;
1014+
// Strategy used to address this risk.
1015+
optional string mitigationStrategy = 2;
1016+
}
1017+
message FairnessAssessments {
1018+
// The groups or individuals at risk of being systematically disadvantaged by the model.
1019+
optional string groupAtRisk = 1;
1020+
// Expected benefits to the identified groups.
1021+
optional string benefits = 2;
1022+
// Expected harms to the identified groups.
1023+
optional string harms = 3;
1024+
// With respect to the benefits and harms outlined, please describe any mitigation strategy implemented.
1025+
optional string mitigationStrategy = 4;
1026+
}
1027+
}
1028+
}
1029+
1030+
enum ModelParameterApproachType {
1031+
MODEL_PARAMETER_APPROACH_TYPE_SUPERVISED = 0;
1032+
MODEL_PARAMETER_APPROACH_TYPE_UNSUPERVISED = 1;
1033+
MODEL_PARAMETER_APPROACH_TYPE_REINFORCED_LEARNING = 2;
1034+
MODEL_PARAMETER_APPROACH_TYPE_SEMI_SUPERVISED = 3;
1035+
MODEL_PARAMETER_APPROACH_TYPE_SELF_SUPERVISED = 4;
1036+
}
1037+
1038+
message ComponentData {
1039+
// An optional identifier which can be used to reference the dataset elsewhere in the BOM. Every bom-ref MUST be unique within the BOM.
1040+
optional string bom_ref = 1;
1041+
// The general theme or subject matter of the data being specified.
1042+
ComponentDataType type = 2;
1043+
// The name of the dataset.
1044+
optional string name = 3;
1045+
// The contents or references to the contents of the data being described.
1046+
optional ComponentDataContents contents = 4;
1047+
// Data classification tags data according to its type, sensitivity, and value if altered, stolen, or destroyed.
1048+
optional string classification = 5;
1049+
// A description of any sensitive data in a dataset.
1050+
repeated string sensitiveData = 6;
1051+
// A collection of graphics that represent various measurements.
1052+
optional GraphicsCollection graphics = 7;
1053+
// A description of the dataset. Can describe size of dataset, whether it's used for source code, training, testing, or validation, etc.
1054+
optional string description = 8;
1055+
// Data Governance
1056+
optional DataGovernance governance = 9;
1057+
1058+
message ComponentDataContents {
1059+
// An optional way to include textual or encoded data.
1060+
optional AttachedText attachment = 1;
1061+
// The URL to where the data can be retrieved.
1062+
optional string url = 2;
1063+
// Provides the ability to document name-value parameters used for configuration.
1064+
repeated Property properties = 3;
1065+
}
1066+
1067+
message DataGovernance {
1068+
// Data custodians are responsible for the safe custody, transport, and storage of data.
1069+
repeated DataGovernanceResponsibleParty custodians = 1;
1070+
// Data stewards are responsible for data content, context, and associated business rules.
1071+
repeated DataGovernanceResponsibleParty stewards = 2;
1072+
// Data owners are concerned with risk and appropriate access to data.
1073+
repeated DataGovernanceResponsibleParty owners = 3;
1074+
1075+
message DataGovernanceResponsibleParty {
1076+
oneof choice {
1077+
OrganizationalEntity organization = 1;
1078+
OrganizationalContact contact = 2;
1079+
}
1080+
}
1081+
}
1082+
}
1083+
1084+
enum ComponentDataType {
1085+
// Any type of code, code snippet, or data-as-code
1086+
COMPONENT_DATA_TYPE_SOURCE_CODE = 0;
1087+
// Parameters or settings that may be used by other components.
1088+
COMPONENT_DATA_TYPE_CONFIGURATION = 1;
1089+
// A collection of data.
1090+
COMPONENT_DATA_TYPE_DATASET = 2;
1091+
// Any other type of data that does not fit into existing definitions.
1092+
COMPONENT_DATA_TYPE_OTHER = 3;
1093+
}
1094+
1095+
message GraphicsCollection {
1096+
// A description of this collection of graphics.
1097+
optional string description = 1;
1098+
// A collection of graphics.
1099+
repeated Graphic graphic = 2;
1100+
1101+
message Graphic {
1102+
// The name of the graphic.
1103+
optional string name = 1;
1104+
// The graphic (vector or raster). Base64 encoding MUST be specified for binary images.
1105+
optional AttachedText image = 2;
1106+
}
1107+
9191108
}

0 commit comments

Comments
 (0)