import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapreduce.Mapper; import java.io.IOException; //keyin:行号 , valuein:Text, public class maoyanMapper extends Mapper<LongWritable,Text,Text, IntWritable> { public static String spiltRtoL(String s) { StringBuffer sb = new StringBuffer(); int length = s.length(); char[] c = new char[length]; for (int i = 0; i < length; i++) { c[i] = s.charAt(i); } for (int i = length - 1; i >= 0; i--) { sb.append(c[i]); } return sb.toString(); } @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { //supper.map(key,value,context); //一行一行读,然后将每一行文本转成字符串 String line=value.toString(); //分割每一行 String l = spiltRtoL(line); String y=l.substring(0,4); String year = spiltRtoL(y); context.write(new Text(year),new IntWritable(1)); } }
import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; public class maoyanReducer extends Reducer<Text,IntWritable,Text,IntWritable> { @Override protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { //super.reduce(key, values, context); int sum=0; //是每个地区的岗位数量和 for(IntWritable i :values){ sum+=i.get(); //i.get()是把IntWritable转成int } context.write(key,new IntWritable(sum)); //reducer的输出结果 } }
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.FileOutputStream; import java.io.IOException; public class maoyanRunner { public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf=new Configuration(); //创建job Job job= Job.getInstance(conf,"movie6"); //设置输入输出路径 FileInputFormat.addInputPath(job,new Path(args[0])); FileOutputFormat.setOutputPath(job,new Path(args[1])); //设置运行类 job.setJarByClass(maoyanRunner.class); job.setMapperClass(maoyanMapper.class); job.setReducerClass(maoyanReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); System.exit(job.waitForCompletion(true)?0:1); } }
import pandas as pd import matplotlib import matplotlib.pyplot as plt matplotlib.rcParams['font.family']='SimHei' matplotlib.rcParams['font.sans-serif'] = ['SimHei'] data=pd.read_csv(r"E:\output6\part-r-00000",sep='\t',header=None) data.columns=['年份','电影数目'] data.head() plt.figure(figsize=(9,6)) y=data['电影数目'] plt.plot(y,label='年份') plt.xlabel('时间(年)') plt.ylabel("上映数量") plt.legend() plt.title("电影年份对应的上映电影数目趋势表") plt.show()
标签:电影,hadoop,job,可视化,io,org,apache,import,猫眼 From: https://www.cnblogs.com/modikasi/p/16641932.html