Commit 101be172 authored by Ioannis Tsafaras's avatar Ioannis Tsafaras
Browse files

LAM-20 Changed some comments

parent 6d722f45
...@@ -31,8 +31,8 @@ import java.util.regex.Matcher; ...@@ -31,8 +31,8 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
/** /**
* Implements the "Hashtag_WordCount" program that computes a simple word occurrence histogram * Implements the "Hashtag_WordCount" program that computes a simple hashtag occurrence histogram
* over some sample data * over some randomly generated tweets
* *
* <p> * <p>
* This example shows how to: * This example shows how to:
...@@ -55,13 +55,6 @@ public class Hashtag_WordCount { ...@@ -55,13 +55,6 @@ public class Hashtag_WordCount {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// get input data // get input data
/*DataSet<String> text = env.fromElements(
"To be, or not to be,--that is the question:--",
"Whether 'tis nobler in the mind to suffer",
"The slings and arrows of outrageous fortune",
"Or to take arms against a sea of troubles,"
);*/
DataSet<String> text = env.readTextFile("hdfs:///user/root/input"); DataSet<String> text = env.readTextFile("hdfs:///user/root/input");
DataSet<Tuple2<String, Integer>> counts = DataSet<Tuple2<String, Integer>> counts =
...@@ -71,10 +64,7 @@ public class Hashtag_WordCount { ...@@ -71,10 +64,7 @@ public class Hashtag_WordCount {
.groupBy(0) .groupBy(0)
.sum(1); .sum(1);
// emit result // emit result to hdfs
//counts.print();
//counts.writeAsText("file:///root/output", FileSystem.WriteMode.OVERWRITE);
//counts.writeAsText("hdfs:///user/root/output", FileSystem.WriteMode.OVERWRITE);
counts.writeAsText("hdfs:///user/root/output", FileSystem.WriteMode.OVERWRITE); counts.writeAsText("hdfs:///user/root/output", FileSystem.WriteMode.OVERWRITE);
// execute program // execute program
...@@ -88,15 +78,14 @@ public class Hashtag_WordCount { ...@@ -88,15 +78,14 @@ public class Hashtag_WordCount {
/** /**
* Implements the string tokenizer that splits sentences into words as a user-defined * Implements the string tokenizer that splits sentences into words as a user-defined
* FlatMapFunction. The function takes a line (String) and splits it into * FlatMapFunction. The function takes a line (String) and splits it into
* multiple pairs in the form of "(word,1)" (Tuple2<String, Integer>). * multiple pairs in the form of "(hashtag,1)" (Tuple2<String, Integer>).
*/ */
public static final class LineSplitter implements FlatMapFunction<String, Tuple2<String, Integer>> { public static final class LineSplitter implements FlatMapFunction<String, Tuple2<String, Integer>> {
@Override @Override
public void flatMap(String value, Collector<Tuple2<String, Integer>> out) { public void flatMap(String value, Collector<Tuple2<String, Integer>> out) {
// normalize and split the line
//String[] tokens = value..split("\\W+");
// Acquire hashtags
List<String> hashtags = new ArrayList<String>(); List<String> hashtags = new ArrayList<String>();
Matcher m = Pattern.compile("#(\\w+)") Matcher m = Pattern.compile("#(\\w+)")
.matcher(value.toLowerCase()); .matcher(value.toLowerCase());
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment