public class WordCount {标签:LongWritable,map,Text,MapReduce,单词,job,举例,new,class From: https://www.cnblogs.com/xiaoxiao-/p/16633385.html
//map读取数据的key类型定死是LongWritable,代表的是行号,从0开始,value是一行数据,Text
static class MyMapper extends Mapper<LongWritable, Text, Text, LongWritable> {
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, LongWritable>.Context context) throws IOException, InterruptedException {
//写map处理逻辑
//context代表的是hadoop的上下文,将来可以使用它将数据写出map
//对每一行数据进行分割
//将hadoop的类型转成java的类型
String row = value.toString();
String[] words = row.split(" ");
//遍历数据,得到每一个单词
for (String word : words) {
//将String-->Text
Text key2 = new Text(word);
//对每一个单词进行封装,利用context写出map
context.write(key2, new LongWritable(1L));
}
}
}
static class MyReducer extends Reducer<Text, LongWritable, Text, LongWritable> {
@Override
protected void reduce(Text key, Iterable<LongWritable> values, Reducer<Text, LongWritable, Text, LongWritable>.Context context) throws IOException, InterruptedException {
//迭代values,进行求和
long sum = 0L;
for (LongWritable value : values) {
long l = value.get();
sum = sum + l;
}
context.write(key, new LongWritable(sum));
}
}
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
//获取hadoop相关的配置
Configuration conf = new Configuration();
//创建作业Job
Job job = Job.getInstance(conf);
//给任务起一个名字,这个名字将来会在yarn中能看到
job.setJobName("word count");
//设置reduce的个数,默认1个
// job.setNumReduceTasks(1);
//设置该任务的运行的主类
job.setJarByClass(WordCount.class);
//设置该任务将来的的map类
job.setMapperClass(MyMapper.class);
//设置该任务将来的的reduce类
job.setReducerClass(MyReducer.class);
//设置map阶段k-v输出的数据类型
//hadoop中字符串的类型对应的是叫做Text
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
//设置reduce阶段k-v输出的数据类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
//设置HDFS的输入路径和输出路径
FileInputFormat.addInputPath(job, new Path(args[0]));
//注意,这里设置的是输出的目录
FileOutputFormat.setOutputPath(job, new Path(args[1]));
//启动mr任务
job.waitForCompletion(true);
}
}