实验环境:
操作系统:Linux(Centos7);
Xsell 7
Hadoop版本:3.4.0(这里的版本根据自己的修改,可能小部分版本的Hadoop不适用于本文实验)
下面给出一个child-parent的表格,要求挖掘其中的父子辈关系,给出祖孙辈关系的表格。
输入文件内容如下:(保证之间空格为1,否则可能输出会出错)
child parent
Steven Lucy
Steven Jack
Jone Lucy
Jone Jack
Lucy Mary
Lucy Frank
Jack Alice
Jack Jesse
David Alice
David Jesse
Philip David
Philip Alma
Mark David
Mark Alma
输出文件内容如下:
grandchild grandparent
Steven Alice
Steven Jesse
Jone Alice
Jone Jesse
Steven Mary
Steven Frank
Jone Mary
Jone Frank
Philip Alice
Philip Jesse
Mark Alice
Mark Jesse
具体步骤:
1、首先启动hadoop
start-all.sh
2、新建文件夹以及相应的文件child-parent
(若之前创建过,则可以跳过此步)
mkdir MapReduce
进入该文件夹
cd MapReduce
创建文件并存储
vi child-parent
输入数据:
child parent
Steven Lucy
Steven Jack
Jone Lucy
Jone Jack
Lucy Mary
Lucy Frank
Jack Alice
Jack Jesse
David Alice
David Jesse
Philip David
Philip Alma
Mark David
Mark Alma
(切记要保存后退出!!!)
3、然后编码JAVA文件
vi MapReduce3.java
编写内容如下:(记得修改连接)
import java.io.IOException;
import java.util.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class MapReduce3 {
public static int time = 0;
public static class Map extends Mapper<Object, Text, Text, Text>{
public void map(Object key, Text value, Context context) throws IOException,InterruptedException{
String child_name = new String();
String parent_name = new String();
String relation_type = new String();
String line = value.toString();
int i = 0;
while(line.charAt(i) != ' '){
i++;
}
String[] values = {line.substring(0,i),line.substring(i+1)};
if(values[0].compareTo("child") != 0){
child_name = values[0];
parent_name = values[1];
relation_type = "1";
context.write(new Text(values[1]), new Text(relation_type+"+"+child_name+"+"+parent_name));
relation_type = "2";
context.write(new Text(values[0]), new Text(relation_type+"+"+child_name+"+"+parent_name));
}
}
}
public static class Reduce extends Reducer<Text, Text, Text, Text>{
public void reduce(Text key, Iterable<Text> values,Context context) throws IOException,InterruptedException{
if(time == 0){
context.write(new Text("grand_child"), new Text("grand_parent"));
time++;
}
int grand_child_num = 0;
String grand_child[] = new String[10];
int grand_parent_num = 0;
String grand_parent[]= new String[10];
Iterator ite = values.iterator();
while(ite.hasNext()){
String record = ite.next().toString();
int len = record.length();
int i = 2;
if(len == 0) continue;
char relation_type = record.charAt(0);
String child_name = new String();
String parent_name = new String();
while(record.charAt(i) != '+'){
child_name = child_name + record.charAt(i);
i++;
}
i=i+1;
while(i<len){
parent_name = parent_name+record.charAt(i);
i++;
}
if(relation_type == '1'){
grand_child[grand_child_num] = child_name;
grand_child_num++;
}
else{
grand_parent[grand_parent_num] = parent_name;
grand_parent_num++;
}
}
if(grand_parent_num != 0 && grand_child_num != 0 ){
for(int m = 0;m<grand_child_num;m++){
for(int n=0;n<grand_parent_num;n++){
context.write(new Text(grand_child[m]), new Text(grand_parent[n]));
}
}
}
}
}
public static void main(String[] args) throws Exception{
Configuration conf = new Configuration();
conf.set("fs.default.name","hdfs://hadoop01:9000");//根据自己的对hadoop01:9000进行修改
String[] otherArgs = new String[]{"input","output"};
if (otherArgs.length != 2) {
System.err.println("Usage: wordcount <in><out>");
System.exit(2);
}
Job job = Job.getInstance(conf,"Single table join");
job.setJarByClass(simple_data_mining.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
4、赋予权限
sudo chown -R root MapReduce
这里的root为用户名,可以更改为你使用的用户名,后面的MapReduce则是存放文件的文件夹。
(前面做过可跳过)(为后面的创建目录做准备)
5、 配置环境
(前面修改过的话可以跳过)
//进入所修改的目标
vim ~/.bashrc
//所需要添加的内容
export HADOOP_HOME=/export/servers/hadoop //这里的路径改为你的hadoop安装目录即可
export CLASSPATH=$($HADOOP_HOME/bin/hadoop classpath):$CLASSPATH
//使更改生效
source ~/.bashrc
6、编译
(此时仍然在创建的MapReduce文件夹中)
javac MapReduce3.java
7、打包为jar包
jar -cvf MapReduce3.jar *.class
8、输入与输出
创建目录(需要与前面赋予权限的用户名相同,我前面是root,所以这里也是root)
这里我还在MapReduce文件夹中,所以用的这个,你也可以直接转到hadoop的bin目录下使用hdfs指令来完成。
/export/servers/hadoop/bin/hdfs dfs -mkdir -p /user/root
先删除原来的input和output文件夹(如果原本没有可以跳过)
注意:一开始一定要没有,否则影响实验运行,导致最后生成jar包失败,即output文件夹内容错误或没有变化。
/export/servers/hadoop/bin/hdfs dfs -rm input/*
/export/servers/hadoop/bin/hdfs dfs -rm -r output
创建input文件夹
/export/servers/hadoop/bin/hdfs dfs -mkdir input
上传输入文件
/export/servers/hadoop/bin/hdfs dfs -put ./child-parent input
使用生成的jar包
/export/servers/hadoop/bin/hadoop jar MapReduce3.jar MapReduce3
查看输出
/export/servers/hadoop/bin/hdfs dfs -cat output/*
成功输出:
标签:String,parent,xshell7,编程,MapReduce,hadoop,child,new,name From: https://blog.csdn.net/2201_75467743/article/details/143722828