Skip to content

Commit 60c8a23

Browse files
Improve the Javadoc for encode/decode methods in coder methods in coder for better clarity on context behaviour
1 parent c1755e3 commit 60c8a23

1 file changed

Lines changed: 54 additions & 18 deletions

File tree

  • sdks/java/core/src/main/java/org/apache/beam/sdk/coders

sdks/java/core/src/main/java/org/apache/beam/sdk/coders/Coder.java

Lines changed: 54 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,24 @@
5353
*
5454
* @param <T> the type of values being encoded and decoded
5555
*/
56+
/**
57+
* <p><b>Example usage:</b>
58+
* <pre>{@code
59+
* Coder<String> coder = StringUtf8Coder.of();
60+
*
61+
* // Encoding a single standalone value(typically uses OUTER context)
62+
* coder.encode("hello", outStream);
63+
*
64+
* // Encoding multiple values (NESTED context scenario)
65+
* for (String value : values) {
66+
* coder.encode(value, outStream);
67+
* }
68+
* }</pre>
69+
*
70+
* <p>When multiple values are encoded into the same stream, coders must ensure
71+
* that each value can be correctly decoded. This is typically done by encoding
72+
* length or delimiter information.
73+
*/
5674
public abstract class Coder<T> implements Serializable {
5775
/**
5876
* The context in which encoding or decoding is being done.
@@ -64,22 +82,32 @@ public abstract class Coder<T> implements Serializable {
6482
@Deprecated
6583
public static class Context {
6684
/**
67-
* The outer context: the value being encoded or decoded takes up the remainder of the
68-
* record/stream contents.
69-
*/
85+
* The outer context indicates that the value being encoded or decoded
86+
* occupies the entire remaining stream.
87+
*
88+
* <p>In this context, the coder does not need to include length or boundary
89+
* information, since the value extends to the end of the stream.
90+
*
91+
* <p><b>Example:</b> Encoding a single standalone value.
92+
*/
7093
public static final Context OUTER = new Context(true);
71-
7294
/**
73-
* The nested context: the value being encoded or decoded is (potentially) a part of a larger
74-
* record/stream contents, and may have other parts encoded or decoded after it.
75-
*/
95+
* The nested context indicates that the value being encoded or decoded
96+
* is part of a larger structure or stream containing multiple values.
97+
*
98+
* <p>In this context, the coder must include enough information (such as
99+
* length or delimiters) to allow correct decoding of individual elements.
100+
*
101+
* <p><b>Example:</b> Encoding elements inside a collection or record.
102+
*/
76103
public static final Context NESTED = new Context(false);
77104

78105
/**
79-
* Whether the encoded or decoded value fills the remainder of the output or input (resp.)
80-
* record/stream contents. If so, then the size of the decoded value can be determined from the
81-
* remaining size of the record/stream contents, and so explicit lengths aren't required.
82-
*/
106+
* Indicates whether the encoded/decoded value consumes the entire remaining stream.
107+
*
108+
* <p>If true, no additional length information is required.
109+
* If false, the coder must encode boundaries to allow correct decoding.
110+
*/
83111
public final boolean isWholeStream;
84112

85113
public Context(boolean isWholeStream) {
@@ -116,9 +144,11 @@ public String toString() {
116144
* be encoded next to each other on the output stream, each coder should encode information to
117145
* know how many bytes to read when decoding. A common approach is to prefix the encoding with the
118146
* element's encoded length.
119-
*
120-
* @throws IOException if writing to the {@code OutputStream} fails for some reason
121-
* @throws CoderException if the value could not be encoded for some reason
147+
* <p>The behavior of encoding depends on the {@link Context} in which it is used.
148+
* When using {@link Context#OUTER}, the encoded value may consume the entire remaining stream,
149+
* so no additional length information is required. In contrast, when using {@link Context#NESTED},
150+
* the encoded value is part of a larger structure, and the coder must include sufficient
151+
* boundary information (such as length prefixes) to allow correct decoding of individual elements.
122152
*/
123153
public abstract void encode(T value, OutputStream outStream) throws CoderException, IOException;
124154

@@ -136,10 +166,16 @@ public void encode(T value, OutputStream outStream, Context context)
136166
}
137167

138168
/**
139-
* Decodes a value of type {@code T} from the given input stream in the given context. Returns the
140-
* decoded value. Multiple elements can be encoded next to each other on the input stream, each
141-
* coder should encode information to know how many bytes to read when decoding. A common approach
142-
* is to prefix the encoding with the element's encoded length.
169+
* Decodes a value of type {@code T} from the given input stream and returns the decoded value.
170+
*
171+
* <p>When multiple elements are encoded in the same stream, the coder must be able to determine
172+
* how many bytes to read for each element. This is typically achieved by encoding length or
173+
* delimiter information during encoding.
174+
*
175+
* <p>The behavior of decoding depends on the {@link Context} in which it is used.
176+
* When decoding in {@link Context#OUTER}, the value is expected to consume the entire remaining
177+
* stream. In {@link Context#NESTED}, the value is part of a larger structure, so the coder must
178+
* rely on encoded boundaries (such as length prefixes) to correctly extract individual elements.
143179
*
144180
* @throws IOException if reading from the {@code InputStream} fails for some reason
145181
* @throws CoderException if the value could not be decoded for some reason

0 commit comments

Comments
 (0)