Skip to content

Commit 379a7f0

Browse files
add PicoWordCount Example (Minimal Beam wordcount pipeline)
1 parent 88ebf83 commit 379a7f0

1 file changed

Lines changed: 55 additions & 0 deletions

File tree

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.beam.examples;
19+
20+
import java.util.Arrays;
21+
import org.apache.beam.sdk.Pipeline;
22+
import org.apache.beam.sdk.io.TextIO;
23+
import org.apache.beam.sdk.transforms.Count;
24+
import org.apache.beam.sdk.transforms.FlatMapElements;
25+
import org.apache.beam.sdk.transforms.MapElements;
26+
import org.apache.beam.sdk.values.KV;
27+
import org.apache.beam.sdk.values.TypeDescriptors;
28+
import org.apache.beam.sdk.transforms.Filter;
29+
30+
/**
31+
* A minimal "pico" example of WordCount.
32+
*
33+
* <p>This is a simplified version of MinimalWordCount with fewer concepts.
34+
*/
35+
public class PicoWordCount {
36+
37+
public static void main(String[] args) {
38+
39+
Pipeline p = Pipeline.create();
40+
41+
p.apply("ReadLines", TextIO.read().from("input.txt"))
42+
.apply("ExtractWords",
43+
FlatMapElements.into(TypeDescriptors.strings())
44+
.via((String line) -> Arrays.asList(line.split("\\W+"))))
45+
.apply("FilterEmptyWords",
46+
Filter.by((String word) -> !word.isEmpty()))
47+
.apply("CountWords", Count.perElement())
48+
.apply("FormatResults",
49+
MapElements.into(TypeDescriptors.strings())
50+
.via((KV<String, Long> kv) -> kv.getKey() + ": " + kv.getValue()))
51+
.apply("WriteResults", TextIO.write().to("output"));
52+
53+
p.run().waitUntilFinish();
54+
}
55+
}

0 commit comments

Comments
 (0)