首页 > 其他分享 >mapreduce案例_词频统计

mapreduce案例_词频统计

时间:2024-11-05 21:43:18浏览次数:2  
标签:mapreduce hadoop 案例 job 词频 org apache import class

统计文件中英文单词出现的次数

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

class WordMapper extends Mapper<LongWritable, Text,Text,LongWritable>{
    @Override
    protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, LongWritable>.Context context) throws IOException, InterruptedException {
        String string = value.toString();
        String[] split = string.split(" ");
        for (String s : split) {
            context.write(new Text(s),new LongWritable(1));
        }
    }
}

class WordReducer extends Reducer<Text,LongWritable,Text,LongWritable>{
    @Override
    protected void reduce(Text key, Iterable<LongWritable> values, Reducer<Text, LongWritable, Text, LongWritable>.Context context) throws IOException, InterruptedException {
        long sum =0L;
        for (LongWritable value : values) {
            long l = value.get();
            sum+=l;
        }
        context.write(key,new LongWritable(sum));
    }
}



public class WordCount {
    public static void main(String[] args) throws Exception{
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://master:9000");

        Job job = Job.getInstance(conf);

        job.setJarByClass(WordCount.class);

        job.setJobName("单词统计案例");

        job.setMapperClass(WordMapper.class);

        job.setReducerClass(WordReducer.class);

        job.setMapOutputKeyClass(Text.class);

        job.setMapOutputValueClass(LongWritable.class);

        job.setOutputKeyClass(Text.class);

        job.setOutputValueClass(LongWritable.class);

        FileInputFormat.setInputPaths(job,new Path(args[0]));

        FileOutputFormat.setOutputPath(job,new Path(args[1]));

        boolean b = job.waitForCompletion(true);
        if (b){
            System.out.println("单词统计案例mapreduce实现执行成功!>_-");
        }else {
            System.out.println("单词统计案例mapreduce实现执行失败");
        }


    }
}

统计三国演义第一章【玄德,张飞,张角】出现的次数

import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.dictionary.CustomDictionary;
import com.hankcs.hanlp.seg.Segment;
import com.hankcs.hanlp.seg.common.Term;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;
import java.util.List;

class sgyyMapper extends Mapper<LongWritable, Text, Text, LongWritable> {
    @Override
    protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, LongWritable>.Context context) throws IOException, InterruptedException {
        String line = value.toString();
        Segment segment = HanLP.newSegment();
        CustomDictionary.add("张飞");
        CustomDictionary.add("张角");
        CustomDictionary.add("玄德");

        List<Term> seg = segment.seg(line);
        for (Term term : seg) {
            String word = term.word;
            if ("张飞".equals(word) || "玄德".equals(word) || "张角".equals(word)) {
                context.write(new Text(word), new LongWritable(1L));
            }
        }

    }
}

class sgyyReducer extends Reducer<Text,LongWritable,Text,LongWritable> {
    @Override
    protected void reduce(Text key, Iterable<LongWritable> values, Reducer<Text, LongWritable, Text, LongWritable>.Context context) throws IOException, InterruptedException {
        long sum =0L;
        for (LongWritable value : values) {
            long l = value.get();
            sum+=l;
        }
        context.write(key,new LongWritable(sum));
    }
}

public class sgyyDemo {
    public static void main(String[] args) throws Exception{
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://master:9000");

        Job job = Job.getInstance(conf);

        job.setJarByClass(sgyyDemo.class);

        job.setJobName("三国演义人物统计案例");

        job.setMapperClass(sgyyMapper.class);

        job.setReducerClass(sgyyReducer.class);

        job.setMapOutputKeyClass(Text.class);

        job.setMapOutputValueClass(LongWritable.class);

        job.setOutputKeyClass(Text.class);

        job.setOutputValueClass(LongWritable.class);

        FileInputFormat.setInputPaths(job,new Path(args[0]));

        FileOutputFormat.setOutputPath(job,new Path(args[1]));

        boolean b = job.waitForCompletion(true);
        if (b){
            System.out.println("单词统计案例mapreduce实现执行成功!>_-");
        }else {
            System.out.println("单词统计案例mapreduce实现执行失败");
        }


    }
}

标签:mapreduce,hadoop,案例,job,词频,org,apache,import,class
From: https://www.cnblogs.com/w-ll/p/18528934

相关文章

  • mapreducr案例_好友关系
    importorg.apache.hadoop.conf.Configuration;importorg.apache.hadoop.fs.Path;importorg.apache.hadoop.io.LongWritable;importorg.apache.hadoop.io.Text;importorg.apache.hadoop.mapreduce.Job;importorg.apache.hadoop.mapreduce.Mapper;importorg.apache......
  • 用户注册案例--mvc架构的实现
    用户注册案例--浅谈servlet本案例为用户注册案例,同时介绍一部分之前经常用得到servlet的知识.servlet是javaEE的技术规范之一.基于MCV架构的分析1.dao层在mapper代理文件中写入insert的操作,对于成功注册的用户直接写入数据库,以及查询操作判断用户名是否已经存在UsergetUs......
  • 关于LIME(局部可解释模型无关解释)和SHAP(SHapley Additive exPlanations)的具体应用案例
    关注我,持续分享逻辑思维&管理思维&面试题;可提供大厂面试辅导、及定制化求职/在职/管理/架构辅导;推荐专栏《10天学会使用asp.net编程AI大模型》,目前已完成所有内容。一顿烧烤不到的费用,让人能紧跟时代的浪潮。从普通网站,到公众号、小程序,再到AI大模型网站。干货满满。学成后可......
  • 干货分享2:日引流500+创业粉实操案例分享
    分享个抖音引流创业粉的技巧,每天至少能引流500多人,贼简单,基本0成本,希望对大家有所帮助!引流教程:第一步,将抖音昵称和简介修改成如下图所示。抖音名字一定不能说“看主页”,这样过不了审,要用一些谐音字。修改后,用另一个抖音号搜索自己的抖音号,查看是否修改成功,成功后进行第......
  • todos案例
    把数据抽出来单独存放,要用的时候请求数据。一般在app组件中的created生命周期函数执行请求......
  • 数据安全秘籍:500强企业的经典传输案例大揭秘
    很多企业都会有数据安全建设的烦恼,不知道从何开始,哪里又是建设重点?那不妨借鉴一下500强企业的做法,它们在数据安全建设方面通常采取多层次的策略,具体包括以下几个方面:风险评估与管理:定期进行全面的风险评估,识别数据安全风险,制定相应的管理策略。安全政策与标准:制定并实施严格的数......
  • 【鸿蒙南向】移植案例与原理 - build lite源码分析 之 hb命令__main__.py
    ......
  • 【鸿蒙南向】移植案例与原理 - HPM包描述文件bundle.json
    ......
  • Java经典案例代码(持续更新中...)
        2024/11/4目录一、找素数①方法一②方法二二、打印九九乘法表三、打印三角形四、双色球系统一、找素数①方法一publicclassTest07{publicstaticvoidmain(String[]args){System.out.println(Find(101,200));}publicstat......
  • 响应式网页设计案例
    文章目录概念核心理念响应式设计的优点实现方法代码案例解释概念响应式设计核心理念是一个网站能够根据访问者的设备特性自动调整布局、内容和功能,以提供最佳的用户体验。它依赖于CSS媒体查询、灵活的网格布局和可伸缩的图像,确保网页内容在不同设备上都能自动调整......