统计文件中英文单词出现的次数
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
class WordMapper extends Mapper<LongWritable, Text,Text,LongWritable>{
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, LongWritable>.Context context) throws IOException, InterruptedException {
String string = value.toString();
String[] split = string.split(" ");
for (String s : split) {
context.write(new Text(s),new LongWritable(1));
}
}
}
class WordReducer extends Reducer<Text,LongWritable,Text,LongWritable>{
@Override
protected void reduce(Text key, Iterable<LongWritable> values, Reducer<Text, LongWritable, Text, LongWritable>.Context context) throws IOException, InterruptedException {
long sum =0L;
for (LongWritable value : values) {
long l = value.get();
sum+=l;
}
context.write(key,new LongWritable(sum));
}
}
public class WordCount {
public static void main(String[] args) throws Exception{
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://master:9000");
Job job = Job.getInstance(conf);
job.setJarByClass(WordCount.class);
job.setJobName("单词统计案例");
job.setMapperClass(WordMapper.class);
job.setReducerClass(WordReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
FileInputFormat.setInputPaths(job,new Path(args[0]));
FileOutputFormat.setOutputPath(job,new Path(args[1]));
boolean b = job.waitForCompletion(true);
if (b){
System.out.println("单词统计案例mapreduce实现执行成功!>_-");
}else {
System.out.println("单词统计案例mapreduce实现执行失败");
}
}
}
统计三国演义第一章【玄德,张飞,张角】出现的次数
import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.dictionary.CustomDictionary;
import com.hankcs.hanlp.seg.Segment;
import com.hankcs.hanlp.seg.common.Term;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
import java.util.List;
class sgyyMapper extends Mapper<LongWritable, Text, Text, LongWritable> {
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, LongWritable>.Context context) throws IOException, InterruptedException {
String line = value.toString();
Segment segment = HanLP.newSegment();
CustomDictionary.add("张飞");
CustomDictionary.add("张角");
CustomDictionary.add("玄德");
List<Term> seg = segment.seg(line);
for (Term term : seg) {
String word = term.word;
if ("张飞".equals(word) || "玄德".equals(word) || "张角".equals(word)) {
context.write(new Text(word), new LongWritable(1L));
}
}
}
}
class sgyyReducer extends Reducer<Text,LongWritable,Text,LongWritable> {
@Override
protected void reduce(Text key, Iterable<LongWritable> values, Reducer<Text, LongWritable, Text, LongWritable>.Context context) throws IOException, InterruptedException {
long sum =0L;
for (LongWritable value : values) {
long l = value.get();
sum+=l;
}
context.write(key,new LongWritable(sum));
}
}
public class sgyyDemo {
public static void main(String[] args) throws Exception{
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://master:9000");
Job job = Job.getInstance(conf);
job.setJarByClass(sgyyDemo.class);
job.setJobName("三国演义人物统计案例");
job.setMapperClass(sgyyMapper.class);
job.setReducerClass(sgyyReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
FileInputFormat.setInputPaths(job,new Path(args[0]));
FileOutputFormat.setOutputPath(job,new Path(args[1]));
boolean b = job.waitForCompletion(true);
if (b){
System.out.println("单词统计案例mapreduce实现执行成功!>_-");
}else {
System.out.println("单词统计案例mapreduce实现执行失败");
}
}
}
标签:mapreduce,hadoop,案例,job,词频,org,apache,import,class
From: https://www.cnblogs.com/w-ll/p/18528934