首页 > 其他分享 >51JOB网站地区_平均薪资_数据分析与可视化

51JOB网站地区_平均薪资_数据分析与可视化

时间:2022-08-31 09:48:02浏览次数:58  
标签:String 51JOB hadoop apache 可视化 io org 薪资 import

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.DoubleWritable;

import java.io.IOException;

public class Job52Mapper extends Mapper<LongWritable,Text,Text, DoubleWritable> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        //supper.map(key,value,context);
        //一行一行读,然后将每一行文本转成字符串
        String line=value.toString();
        //分割每一行
        String[] arr=line.split( "\t"); //[大数据开发工程师 上海吉祥航空股份有限公司 上海]
        String city = null;
        String salary = null;
        String post = null;

        if (arr.length > 1){
            post = arr[0];
            if (post.indexOf("大数据") != -1) {
                if (arr.length > 2){
                    city = arr[2];
                    int index = city.indexOf("-");
                    if (index > 0){
                        city = city.substring(0,index);
                    }
                    if (arr.length > 3){
                        double avg = 0;
                        salary = arr[3];
                        if (salary.indexOf("万/年") != -1 || salary.indexOf("万以上/年") != -1){
                            String str1 = salary.split("万")[0];
                            String[] arr1 = str1.split("-");
                            avg = (Double.parseDouble(arr1[0]) + Double.parseDouble(arr1[arr1.length - 1])) * 10 / 12 / 2;
                        }
                        if (salary.indexOf("万/月") != -1){
                            String str1 = salary.replace("万/月","");
                            String[] arr1 = str1.split("-");
                            avg = (Double.parseDouble(arr1[0]) + Double.parseDouble(arr1[arr1.length - 1])) * 10 / 2;
                        }
                        if (salary.indexOf("千/月") != -1 || salary.indexOf("千以下/月") != -1){
                            String str1 = salary.split("千")[0];
                            String[] arr1 = str1.split("-");
                            avg = (Double.parseDouble(arr1[0]) + Double.parseDouble(arr1[arr1.length - 1])) / 2;
                        }
                        context.write(new Text(city),new DoubleWritable(avg));
                    }
                }

            }

        }
    }
}
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

//输入:Mapper的输出<上海,1>   <苏州,1>  <上海,1>
//<上海,<1,1,1>>
//Reducer的输出  <上海,2300>
public class Job52Reducer extends Reducer<Text,DoubleWritable,Text,DoubleWritable> {

    @Override
    protected void reduce(Text key, Iterable<DoubleWritable> values, Context context) throws IOException, InterruptedException {
        //super.reduce(key, values, context);
        int sum=0;   
        int index = 0;
        for(DoubleWritable i :values){
            sum+=i.get();   
            index++;
        }
        context.write(key,new DoubleWritable(sum/index));  //reducer的输出结果
    }
}
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.FileOutputStream;
import java.io.IOException;

public class Job52Runner {
    public static  void  main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration conf=new Configuration();
        //创建job
        Job job= Job.getInstance(conf,"job52");
        //设置输入输出路径
        FileInputFormat.addInputPath(job,new Path(args[0]));
        FileOutputFormat.setOutputPath(job,new Path(args[1]));
        //设置运行类
        job.setJarByClass(Job52Runner.class);
        job.setMapperClass(Job52Mapper.class);
        job.setReducerClass(Job52Reducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);
        System.exit(job.waitForCompletion(true)?0:1);

    }
}
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt

matplotlib.rcParams['font.family']='SimHei'   
matplotlib.rcParams['font.sans-serif'] = ['SimHei']

data = pd.read_csv("D:\oc\shiyun1\yuan\output2\part-r-00000",encoding="utf-8",delimiter='\t')
data.columns = ['地区名称','平均薪资']
data.head()

data = data.head()
plt.figure(figsize=(8,5))
x=data["地区名称"]
y=data["平均薪资"]
#制作柱状图
plt.bar(x,y,width=0.5,color="g")
plt.xticks(x,data["地区名称"])
plt.title("大数据相关职位地区前五的平均薪资")
plt.xlabel("城市名称")
plt.ylabel("平均薪资")
plt.legend(labels=["千/月"])
plt.show()

 

标签:String,51JOB,hadoop,apache,可视化,io,org,薪资,import
From: https://www.cnblogs.com/modikasi/p/16641867.html

相关文章