12-3实验

标签：12 Text hadoop job 实验 key new class

public static class WorldCount_Mapper extends Mapper<LongWritable, Text, Text, IntWritable>{
 
        @Override
        protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context)
                throws IOException, InterruptedException {
            System.out.println("split:<" + key + ","+ value + ">" );
            String[] strs = value.toString().split(" ");
            for (String string : strs) {
                System.out.println("map:<" + key + ","+ value + ">" );
                context.write(new Text(string),new IntWritable(1));
            }
        }
    }

public static class WorldCount_Reducer extends Reducer<Text, IntWritable, Text, IntWritable>{
 
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values,
                Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
            int index  = 0;
            for (IntWritable intWritable : values) {
                System.out.println("reduce:<" + key + ","+ intWritable + ">" );
                index  += intWritable.get();
            }
            context.write(key,new IntWritable(index));
        }
    }

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Job job = Job.getInstance();
        job.setJarByClass(WorldCount.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        job.setMapperClass(WorldCount_Mapper.class);
        job.setReducerClass(WorldCount_Reducer.class);
        FileInputFormat.addInputPath(job,new Path("hdfs://192.168.100.123:8020/input"));
        FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.100.123:8020/output"));
        job.waitForCompletion(true);
    }

package hadoop.mapreduce;
 
import java.io.IOException;
 
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 
public class MyWordCount {
    /*
     *     KEYIN：是map阶段输入的key（偏移量）
     *     VALUEIN：是map阶段输入的value（文本文件的内容--行）
     *  KEYOUT：是map阶段输出的key(单词)
     *  VALUEOUT：是map阶段输出的value（单词的计数--1）
     *  
     *  Java基本数据类型：
     *      int、short、long、double、float、char、boolean、byte
     *  hadoop数据类型
     *      IntWritable、ShortWritable、LongWritable、DoubleWritable、FloatWritable
     *      ByteWritable、BooleanWritable、NullWritable、Text
     *      Text：使用utf8编码的文本类型
     */
    public static class WordCount_Mapper extends Mapper<LongWritable, Text, Text, IntWritable>{
        @Override    //方法的重写
        protected void map(LongWritable key, Text value, Mapper<LongWritable, Text,
                Text, IntWritable>.Context context)
                throws IOException, InterruptedException {
            String[] line = value.toString().split(" ");    //将获取到的数据以空格进行切分成一个个单词
            for (String word : line) {     //遍历单词的数组
                context.write(new Text(word), new IntWritable(1));  //单词进行计数，将中间结果写入context
            }
        }                                                
    }
    
    /*
     * KEYIN：reduce阶段输入的key(单词)
     * VALUEIN：reduce阶段输入的value(单词的计数)
     * KEYOUT：reduce阶段输出的key(单词)
     * VALUEOUT：reduce阶段输出的value(单词计数的总和)
     * 
     * reduce方法中做以下修改：
     *     将Text arg0改为Text key
     *  将Iterable<IntWritable> arg1改为Iterable<IntWritable> value
     *  将Context arg2修改为Context context
     */
    public static class WordCount_Reducer extends Reducer<Text, IntWritable, Text, IntWritable>{
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values,
                Reducer<Text, IntWritable, Text, IntWritable>.Context context)
                        throws IOException, InterruptedException {
            int sum = 0;    //创建一个变量,和
            for (IntWritable intWritable : values) {        //遍历相同key单词的计数
                sum += intWritable.get();    //将相同key单词的计数进行累加
            }
            context.write(key, new IntWritable(sum));    //将计算的结果写入context
        }
    }
 
    //提交工作
    public static void main(String[] args) throws Exception {
        
        String inPath= "hdfs://192.168.182.10:8020/input.txt";
        String outPath = "hdfs://192.168.182.10:8020/output/";
        Configuration conf = new Configuration();
        Job job = Job.getInstance();    //创建Job对象job
        FileSystem fs = FileSystem.get(conf);
        if (fs.exists(new Path(outPath))) {
            fs.delete(new Path(outPath), true);
        }
        job.setJarByClass(MyWordCount.class);     //设置运行的主类MyWordCount
        job.setMapperClass(WordCount_Mapper.class);     //设置Mapper的主类
        job.setReducerClass(WordCount_Reducer.class);     //设置Reduce的主类
        job.setOutputKeyClass(Text.class);     //设置输出key的类型
        job.setOutputValueClass(IntWritable.class);     //设置输出value的类型
        //设置文件的输入路径(根据自己的IP和HDFS地址设置)
        FileInputFormat.addInputPath(job, new Path(inPath));    
        //设置计算结果的输出路径(根据自己的IP和HDFS地址设置)
        FileOutputFormat.setOutputPath(job, new Path(outPath));
        System.exit((job.waitForCompletion(true)?0:1));     //提交任务并等待任务完成
    }
}

标签：12,Text,hadoop,job,实验,key,new,class
From： https://www.cnblogs.com/kuandong24/p/18583286

相关文章

赞助商

阅读排行