import org.apache.hadoop.io.DoubleWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import java.io.IOException; public class GDP4Mapper extends Mapper<LongWritable, Text,Text, DoubleWritable> { @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] arr = line.split("\t"); String contunent = null; if (arr.length > 1) { contunent = arr[1]; String continent = arr[1]; if (continent.length() != 0) { String[] gdp = arr[2].split(" "); String gdp1 = gdp[1].replace(",", "").replace("(", "").replace(")", ""); double allgdp = Double.parseDouble(gdp1) * 0.000000000001; context.write(new Text(contunent), new DoubleWritable(allgdp)); } } } }
import org.apache.hadoop.io.DoubleWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; public class GDP4Reducer extends Reducer<Text,DoubleWritable,Text, DoubleWritable> { @Override protected void reduce(Text key, Iterable<DoubleWritable> values, Context context) throws IOException, InterruptedException { //super.reduce(key, values, context); double sum=0; //是每个地区的岗位数量和 for(DoubleWritable i :values){ sum+=i.get(); //i.get()是把IntWritable转成int } context.write(key,new DoubleWritable(sum)); //reducer的输出结果 } }
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.DoubleWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.FileOutputStream; import java.io.IOException; public class GDP4Runner { public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf=new Configuration(); //创建job Job job= Job.getInstance(conf,"gdp4"); //设置输入输出路径 FileInputFormat.addInputPath(job,new Path(args[0])); FileOutputFormat.setOutputPath(job,new Path(args[1])); //设置运行类 job.setJarByClass(GDP4Runner.class); job.setMapperClass(GDP4Mapper.class); job.setReducerClass(GDP4Reducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); System.exit(job.waitForCompletion(true)?0:1); } }
import pandas as pd import matplotlib import matplotlib.pyplot as plt matplotlib.rcParams['font.family']='SimHei' matplotlib.rcParams['font.sans-serif'] = ['SimHei'] data=pd.read_csv(r"D:\oc\shiyun1\技能抽查模块3数据源\inputgdp\output4\part-r-00000",sep='\t',header=None) data.columns=['大洲名称','总GDP数'] data plt.figure(figsize=(10,6)) x=data['大洲名称'] y=data['总GDP数'] plt.bar(x,y,color='g',width=0.5,label='总GDP数') plt.xlabel('大洲名称') plt.ylabel('总GDP数') plt.title('各大洲总GDP图') plt.legend(fontsize=12) plt.show()
标签:GDP,大洲,hadoop,job,可视化,io,org,apache,import From: https://www.cnblogs.com/modikasi/p/16642211.html