Skip to content

Commit c34ba7f

Browse files
jwcullentdaede
authored andcommitted
Sync proto schema and update test vectors based on AOMediaCodec/iamf-tools@5531b3f0
1 parent eddba69 commit c34ba7f

175 files changed

Lines changed: 1195 additions & 1284 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

proto/arbitrary_obu.proto

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,11 +54,23 @@ enum InsertionHook {
5454
INSERTION_HOOK_INVALID = 0;
5555
INSERTION_HOOK_BEFORE_DESCRIPTORS = 1;
5656
INSERTION_HOOK_AFTER_DESCRIPTORS = 2;
57+
INSERTION_HOOK_AFTER_IA_SEQUENCE_HEADER = 3;
58+
INSERTION_HOOK_AFTER_CODEC_CONFIGS = 4;
59+
INSERTION_HOOK_AFTER_AUDIO_ELEMENTS = 5;
60+
INSERTION_HOOK_AFTER_MIX_PRESENTATIONS = 6;
61+
INSERTION_HOOK_BEFORE_PARAMETER_BLOCKS_AT_TICK = 7;
62+
INSERTION_HOOK_AFTER_PARAMETER_BLOCKS_AT_TICK = 8;
63+
INSERTION_HOOK_AFTER_AUDIO_FRAMES_AT_TICK = 9;
5764
}
5865

5966
message ArbitraryObuMetadata {
6067
optional InsertionHook insertion_hook = 1;
6168
optional ArbitraryObuType obu_type = 4;
6269
optional ObuHeaderMetadata obu_header = 2;
6370
optional bytes payload = 3;
71+
// Used to force the bitstream to be invalid when this OBU is inserted.
72+
optional bool invalidates_bitstream = 5;
73+
// Used to control the temporal unit when using the *_AT_TICK insertion hooks.
74+
// Ignored otherwise.
75+
optional int64 insertion_tick = 6;
6476
}

proto/audio_element.proto

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,16 +27,36 @@ message ParamDefinitionExtension {
2727
}
2828

2929
message AudioElementParam {
30-
optional uint32 param_definition_type = 1;
30+
optional uint32 deprecated_param_definition_type = 1 [deprecated = true];
31+
optional ParamDefinitionType param_definition_type = 5;
3132
oneof param_definition {
3233
DemixingParamDefinition demixing_param = 2;
3334
ReconGainParamDefinition recon_gain_param = 3;
3435
ParamDefinitionExtension param_definition_extension = 4;
3536
}
3637
}
3738

39+
// Valid proto enums start at index 1, which are different from the
40+
// corresponding enums in C++, e.g. kLayoutMono = 0.
41+
enum LoudspeakerLayout {
42+
LOUDSPEAKER_LAYOUT_INVALID = 0;
43+
LOUDSPEAKER_LAYOUT_MONO = 1;
44+
LOUDSPEAKER_LAYOUT_STEREO = 2;
45+
LOUDSPEAKER_LAYOUT_5_1_CH = 3;
46+
LOUDSPEAKER_LAYOUT_5_1_2_CH = 4;
47+
LOUDSPEAKER_LAYOUT_5_1_4_CH = 5;
48+
LOUDSPEAKER_LAYOUT_7_1_CH = 6;
49+
LOUDSPEAKER_LAYOUT_7_1_2_CH = 7;
50+
LOUDSPEAKER_LAYOUT_7_1_4_CH = 8;
51+
LOUDSPEAKER_LAYOUT_3_1_2_CH = 9;
52+
LOUDSPEAKER_LAYOUT_BINAURAL = 10;
53+
LOUDSPEAKER_LAYOUT_RESERVED_BEGIN = 11;
54+
LOUDSPEAKER_LAYOUT_RESERVED_END = 16;
55+
}
56+
3857
message ChannelAudioLayerConfig {
39-
optional uint32 loudspeaker_layout = 1;
58+
optional uint32 deprecated_loudspeaker_layout = 1 [deprecated = true];
59+
optional LoudspeakerLayout loudspeaker_layout = 10;
4060
optional uint32 output_gain_is_present_flag = 2;
4161
optional uint32 recon_gain_is_present_flag = 3;
4262
optional uint32 reserved_a = 4;

proto/codec_config.proto

Lines changed: 32 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ package libiamf_proto;
1515
import "obu_header.proto";
1616

1717
// Valid proto enums start at index 1, which are different from the
18-
// corresponding enums in C, e.g. kLpcmBigEndian = 0.
18+
// corresponding enums in C++, e.g. kLpcmBigEndian = 0.
1919
enum LpcmFormatFlags {
2020
LPCM_INVALID = 0;
2121
LPCM_BIG_ENDIAN = 1;
@@ -39,19 +39,20 @@ enum OpusApplicationFlag {
3939
message OpusEncoderMetadata {
4040
optional int32 target_bitrate_per_channel = 1;
4141
optional OpusApplicationFlag application = 2;
42+
optional bool use_float_api = 3 [default = true];
4243
}
4344

4445
message OpusDecoderConfig {
4546
optional uint32 version = 1;
46-
optional uint32 output_channel_count = 2;
47+
optional uint32 output_channel_count = 2 [default = 2];
4748
optional uint32 pre_skip = 3;
4849
optional uint32 input_sample_rate = 4;
49-
optional int32 output_gain = 5;
50-
optional uint32 mapping_family = 6;
50+
optional int32 output_gain = 5 [default = 0];
51+
optional uint32 mapping_family = 6 [default = 0];
5152
optional OpusEncoderMetadata opus_encoder_metadata = 7;
5253
}
5354

54-
enum AacSampleFrequencyIndex {
55+
enum SampleFrequencyIndex {
5556
AAC_SAMPLE_FREQUENCY_INDEX_INVALID = 0;
5657
AAC_SAMPLE_FREQUENCY_INDEX_96000 = 1;
5758
AAC_SAMPLE_FREQUENCY_INDEX_88200 = 2;
@@ -80,23 +81,23 @@ message AacEncoderMetadata {
8081

8182
message AacDecoderSpecificInfo {
8283
optional uint32 decoder_specific_info_descriptor_tag = 5 [default = 0x05];
83-
optional uint32 audio_object_type = 1;
84-
optional AacSampleFrequencyIndex sample_frequency_index = 2;
84+
optional uint32 audio_object_type = 1 [default = 2];
85+
optional SampleFrequencyIndex sample_frequency_index = 2;
8586
optional uint32 sampling_frequency = 3;
86-
optional uint32 channel_configuration = 4;
87+
optional uint32 channel_configuration = 4 [default = 2];
8788
}
8889

8990
message AacGaSpecificConfig {
90-
optional bool frame_length_flag = 1;
91-
optional bool depends_on_core_coder = 2;
92-
optional bool extension_flag = 3;
91+
optional bool frame_length_flag = 1 [default = false];
92+
optional bool depends_on_core_coder = 2 [default = false];
93+
optional bool extension_flag = 3 [default = false];
9394
}
9495

9596
message AacDecoderConfig {
9697
optional uint32 decoder_config_descriptor_tag = 11 [default = 0x04];
97-
optional uint32 object_type_indication = 1;
98-
optional uint32 stream_type = 2;
99-
optional bool upstream = 3;
98+
optional uint32 object_type_indication = 1 [default = 0x40];
99+
optional uint32 stream_type = 2 [default = 5];
100+
optional bool upstream = 3 [default = false];
100101
optional bool reserved = 4;
101102
optional uint32 buffer_size_db = 5;
102103
optional uint32 max_bitrate = 6;
@@ -126,13 +127,15 @@ message FlacMetaBlockHeader {
126127
message FlacMetaBlockStreamInfo {
127128
optional uint32 minimum_block_size = 1;
128129
optional uint32 maximum_block_size = 2;
129-
optional uint32 minimum_frame_size = 3;
130-
optional uint32 maximum_frame_size = 4;
130+
optional uint32 minimum_frame_size = 3 [default = 0];
131+
optional uint32 maximum_frame_size = 4 [default = 0];
131132
optional uint32 sample_rate = 5;
132-
optional uint32 number_of_channels = 6;
133+
optional uint32 number_of_channels = 6 [default = 1];
133134
optional uint32 bits_per_sample = 7;
134135
optional uint64 total_samples_in_stream = 8;
135-
optional bytes md5_signature = 9;
136+
optional bytes md5_signature = 9
137+
[default =
138+
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"];
136139
}
137140

138141
// Settings to configure `libflac`.
@@ -153,10 +156,19 @@ message FlacDecoderConfig {
153156
optional FlacEncoderMetadata flac_encoder_metadata = 4;
154157
}
155158

159+
enum CodecId {
160+
CODEC_ID_INVALID = 0;
161+
CODEC_ID_OPUS = 0x4f707573; // "Opus"
162+
CODEC_ID_LPCM = 0x6970636d; // "ipcm"
163+
CODEC_ID_AAC_LC = 0x6d703461; // "mp4a"
164+
CODEC_ID_FLAC = 0x664c6143; // "fLaC"
165+
}
166+
156167
message CodecConfig {
157-
optional uint32 codec_id = 1;
168+
optional uint32 deprecated_codec_id = 1 [deprecated = true];
169+
optional CodecId codec_id = 10;
158170
optional uint32 num_samples_per_frame = 2;
159-
reserved 3;
171+
reserved 3, 4;
160172
optional int32 audio_roll_distance = 9;
161173
oneof decoder_config {
162174
LpcmDecoderConfig decoder_config_lpcm = 5;

proto/ia_sequence_header.proto

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,16 @@ package libiamf_proto;
1414

1515
import "obu_header.proto";
1616

17-
enum ProfileVerson {
17+
enum ProfileVersion {
1818
PROFILE_VERSION_INVALID = 0;
1919
PROFILE_VERSION_SIMPLE = 1;
2020
PROFILE_VERSION_BASE = 2;
21+
PROFILE_VERSION_BASE_ENHANCED = 3;
2122
}
2223

2324
message IASequenceHeaderObuMetadata {
24-
optional uint32 ia_code = 1;
25-
optional ProfileVerson primary_profile = 2;
26-
optional ProfileVerson additional_profile = 3;
25+
optional uint32 ia_code = 1 [default = 0x69616d66 /* "iamf" */];
26+
optional ProfileVersion primary_profile = 2;
27+
optional ProfileVersion additional_profile = 3;
2728
optional ObuHeaderMetadata obu_header = 4;
2829
}

proto/mix_presentation.proto

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ enum HeadPhonesRenderingMode {
3131
HEADPHONES_RENDERING_MODE_INVALID = 0;
3232
HEADPHONES_RENDERING_MODE_STEREO = 1;
3333
HEADPHONES_RENDERING_MODE_BINAURAL = 2;
34+
HEADPHONES_RENDERING_MODE_RESERVED_2 = 3;
35+
HEADPHONES_RENDERING_MODE_RESERVED_3 = 4;
3436
}
3537

3638
message RenderingConfig {
@@ -68,6 +70,7 @@ enum SoundSystem {
6870
SOUND_SYSTEM_10_2_7_0 = 11;
6971
SOUND_SYSTEM_11_2_3_0 = 12;
7072
SOUND_SYSTEM_12_0_1_0 = 13;
73+
SOUND_SYSTEM_13_6_9_0 = 14;
7174
}
7275

7376
message LoudspeakersSsConventionLayout {
@@ -80,10 +83,11 @@ message LoudspeakersReservedOrBinauralLayout {
8083
}
8184

8285
enum LayoutType {
83-
LAYOUT_TYPE_RESERVED_0 = 0;
84-
LAYOUT_TYPE_RESERVED_1 = 1;
85-
LAYOUT_TYPE_LOUDSPEAKERS_SS_CONVENTION = 2;
86-
LAYOUT_TYPE_BINAURAL = 3;
86+
LAYOUT_TYPE_INVALID = 0;
87+
LAYOUT_TYPE_RESERVED_0 = 1;
88+
LAYOUT_TYPE_RESERVED_1 = 2;
89+
LAYOUT_TYPE_LOUDSPEAKERS_SS_CONVENTION = 3;
90+
LAYOUT_TYPE_BINAURAL = 4;
8791
}
8892

8993
message Layout {
@@ -96,7 +100,7 @@ message Layout {
96100
}
97101

98102
enum AnchorType {
99-
ANCHOR_TYPE_NOT_DEFINED = 0;
103+
ANCHOR_TYPE_INVALID = 0;
100104
ANCHOR_TYPE_UNKNOWN = 1;
101105
ANCHOR_TYPE_DIALOGUE = 2;
102106
ANCHOR_TYPE_ALBUM = 3;
@@ -112,8 +116,21 @@ message AnchoredLoudness {
112116
repeated AnchorElement anchor_elements = 2;
113117
}
114118

119+
enum LoudnessInfoTypeBitMask {
120+
LOUDNESS_INFO_TYPE_INVALID = 0;
121+
LOUDNESS_INFO_TYPE_TRUE_PEAK = 1;
122+
LOUDNESS_INFO_TYPE_ANCHORED_LOUDNESS = 2;
123+
LOUDNESS_INFO_TYPE_RESERVED_4 = 4;
124+
LOUDNESS_INFO_TYPE_RESERVED_8 = 8;
125+
LOUDNESS_INFO_TYPE_RESERVED_16 = 16;
126+
LOUDNESS_INFO_TYPE_RESERVED_32 = 32;
127+
LOUDNESS_INFO_TYPE_RESERVED_64 = 64;
128+
LOUDNESS_INFO_TYPE_RESERVED_128 = 128;
129+
}
130+
115131
message LoudnessInfo {
116-
optional uint32 info_type = 1;
132+
optional uint32 deprecated_info_type = 1 [deprecated = true];
133+
repeated LoudnessInfoTypeBitMask info_type_bit_masks = 8 [packed = true];
117134
optional int32 integrated_loudness = 2;
118135
optional int32 digital_peak = 3;
119136
optional int32 true_peak = 4;

proto/param_definitions.proto

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,16 @@ package libiamf_proto;
1414

1515
import "parameter_data.proto";
1616

17+
// Valid proto enums start at index 1, which are different from the
18+
// corresponding enums in C++, e.g. kParameterDefinitionMixGain = 0.
19+
enum ParamDefinitionType {
20+
PARAM_DEFINITION_TYPE_INVALID = 0;
21+
PARAM_DEFINITION_TYPE_MIX_GAIN = 1;
22+
PARAM_DEFINITION_TYPE_DEMIXING = 2;
23+
PARAM_DEFINITION_TYPE_RECON_GAIN = 3;
24+
PARAM_DEFINITION_TYPE_RESERVED_3 = 4;
25+
}
26+
1727
message ParamDefinition {
1828
optional uint32 parameter_id = 1;
1929
optional uint32 parameter_rate = 2;

proto/test_vector_metadata.proto

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,14 @@ message TestVectorMetadata {
4040
optional int32 ms_per_fragment = 8 [default = 10000];
4141
optional bool override_computed_recon_gains = 9 [default = false];
4242

43+
// Controls whether to validate the user-provided loudness against the
44+
// computed loudness.
45+
optional bool validate_user_loudness = 13 [default = false];
46+
47+
// An override to control the output bit-depth of the output `rendered` wav
48+
// file.
49+
optional uint32 output_wav_file_bit_depth_override = 12;
50+
4351
// `true` partitions the input mix gain parameter blocks to be aligned with
4452
// single frames. The `param_definition` in the descriptor OBUs must be
4553
// accurate.

tests/README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ Title | Summary
4141
`audiolab-acoustic-guitar_2OA_470_ALLRAD_5s.wav` | Short clip of a guitar playing using 7.1.4. | 12 | 48kHz | pcm_s16le | 5s
4242
`audiolab-acoustic-guitar_2OA_470_ALLRAD_concat.wav` | Clip of a guitar playing which which is repeated once using 7.1.4. | 12 | 48kHz | pcm_s16le | 22.77s
4343
`dialog_clip_stereo.wav` | English dialog. | 2 | 48kHz | pcm_s16le | 5s
44-
`Mechanism_5s_32bit.wav` | Mechanical noises using 7.1.4. | 12 | 48kHz | pcm_f32le | 5s
44+
`Mechanism_5s_32bit.wav` | Mechanical noises using 7.1.4. | 12 | 48kHz | pcm_s32le | 5s
4545
`Mechanism_5s.wav` | Mechanical noises using 7.1.4. | 12 | 48kHz | pcm_s16le | 5s
4646
`sample1_48kHz_stereo.wav` | Sawtooth wave. | 2 | 48kHz | pcm_s16le | 5s
4747
`sawtooth_10000_foa_48kHz.wav` | Sawtooth wave using first-order ambisonics. | 4 | 48kHz | pcm_s16le | 500ms
@@ -51,6 +51,7 @@ Title | Summary
5151
`sawtooth_100_stereo.wav` | Sawtooth wave. | 2 | 16kHz | pcm_s16le | 500ms
5252
`sawtooth_8000_toa_48kHz.wav` | Sawtooth wave using third-order ambisonics. | 16 | 48kHz | pcm_s16le | 500ms
5353
`sine_1000_16kHz_512ms.wav` | Sine wave. | 2 | 16kHz | pcm_s16le | 512ms
54+
`sine_1000_16khz_512ms_s32le.wav` | Sine wave. | 1 | 16kHz | pcm_s32le | 512ms
5455
`sine_1000_48kHz_512ms.wav` | Sine wave. | 2 | 48kHz | pcm_s16le | 512ms
5556
`sine_1000_48kHz.wav` | Sine wave. | 2 | 48kHz | pcm_s16le | 500ms
5657
`stereo_8_samples_48khz_s16le.wav` | Tiny test file. The first channel encodes 1, 2, ... 8. The second channel encodes 65535, 65534, ... 65528. | 2 | 48kHz | pcm_s16le | 8 samples
32 KB
Binary file not shown.

tests/test_000000_3.textproto

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,17 @@
88
# source code in the PATENTS file, you can obtain it at
99
# www.aomedia.org/license/patent.
1010

11-
# proto-file: third_party/libiamf/cli/proto/user_metadata.proto
11+
# proto-file: third_party/iamf_tools/iamf/cli/proto/user_metadata.proto
1212
# proto-message: UserMetadata
1313

1414
test_vector_metadata {
1515
human_readable_description:
1616
"A simple profile stereo IAMF stream with 1 substream. The final audio "
1717
"frame incorrectly has fewer samples than all other frames because it does "
18-
"not have sufficient `num_samples_to_trim_at_end`"
18+
"not have sufficient `num_samples_to_trim_at_end`."
1919
file_name_prefix: "test_000000_3"
2020
is_valid: false
21+
validate_user_loudness: true
2122
mp4_fixed_timestamp: "2023-04-06 00:00:00"
2223
primary_tested_spec_sections: [
2324
"3.2/num_samples_to_trim_at_end",
@@ -29,15 +30,14 @@ test_vector_metadata {
2930
}
3031

3132
ia_sequence_header_metadata {
32-
ia_code: 0x69616d66 # "iamf"
3333
primary_profile: PROFILE_VERSION_SIMPLE
3434
additional_profile: PROFILE_VERSION_SIMPLE
3535
}
3636

3737
codec_config_metadata {
3838
codec_config_id: 200
3939
codec_config {
40-
codec_id: 0x6970636d # "ipcm"
40+
codec_id: CODEC_ID_LPCM
4141
num_samples_per_frame: 128
4242
audio_roll_distance: 0
4343
decoder_config_lpcm {
@@ -61,7 +61,7 @@ audio_element_metadata {
6161
reserved: 0
6262
channel_audio_layer_configs: [
6363
{
64-
loudspeaker_layout: 1 # Stereo
64+
loudspeaker_layout: LOUDSPEAKER_LAYOUT_STEREO
6565
output_gain_is_present_flag: 0
6666
recon_gain_is_present_flag: 0
6767
reserved_a: 0
@@ -127,7 +127,7 @@ mix_presentation_metadata {
127127
}
128128
}
129129
loudness {
130-
info_type: 0
130+
info_type_bit_masks: []
131131
integrated_loudness: -13733
132132
digital_peak: -12879
133133
}

0 commit comments

Comments
 (0)