首页 > 其他分享 >全球各大洲GDP_数据_分析与可视化

全球各大洲GDP_数据_分析与可视化

时间:2022-08-31 10:59:15浏览次数:50  
标签:GDP 大洲 hadoop job 可视化 io org apache import

import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class GDP4Mapper extends Mapper<LongWritable, Text,Text, DoubleWritable> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String line = value.toString();
        String[] arr = line.split("\t");
        String contunent = null;
        if (arr.length > 1) {
            contunent = arr[1];
            String continent = arr[1];
            if (continent.length() != 0) {
                String[] gdp = arr[2].split(" ");
                String gdp1 = gdp[1].replace(",", "").replace("(", "").replace(")", "");
                double allgdp = Double.parseDouble(gdp1) * 0.000000000001;
                context.write(new Text(contunent), new DoubleWritable(allgdp));
            }
        }
    }
}
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class GDP4Reducer extends Reducer<Text,DoubleWritable,Text, DoubleWritable> {

    @Override
    protected void reduce(Text key, Iterable<DoubleWritable> values, Context context) throws IOException, InterruptedException {
        //super.reduce(key, values, context);
        double sum=0;   //是每个地区的岗位数量和
        for(DoubleWritable i :values){
            sum+=i.get();   //i.get()是把IntWritable转成int
        }
        context.write(key,new DoubleWritable(sum));  //reducer的输出结果
    }
}
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.FileOutputStream;
import java.io.IOException;

public class GDP4Runner {
    public static  void  main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration conf=new Configuration();
        //创建job
        Job job= Job.getInstance(conf,"gdp4");
        //设置输入输出路径
        FileInputFormat.addInputPath(job,new Path(args[0]));
        FileOutputFormat.setOutputPath(job,new Path(args[1]));
        //设置运行类
        job.setJarByClass(GDP4Runner.class);
        job.setMapperClass(GDP4Mapper.class);
        job.setReducerClass(GDP4Reducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);
        System.exit(job.waitForCompletion(true)?0:1);

    }
}
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt

matplotlib.rcParams['font.family']='SimHei'   
matplotlib.rcParams['font.sans-serif'] = ['SimHei']

data=pd.read_csv(r"D:\oc\shiyun1\技能抽查模块3数据源\inputgdp\output4\part-r-00000",sep='\t',header=None)
data.columns=['大洲名称','总GDP数']
data

plt.figure(figsize=(10,6))
x=data['大洲名称']
y=data['总GDP数']
plt.bar(x,y,color='g',width=0.5,label='总GDP数')
plt.xlabel('大洲名称')
plt.ylabel('总GDP数')
plt.title('各大洲总GDP图')
plt.legend(fontsize=12)
plt.show()

 

标签:GDP,大洲,hadoop,job,可视化,io,org,apache,import
From: https://www.cnblogs.com/modikasi/p/16642211.html

相关文章