#!/bin/bash
source ~/.bashrc
#HADOOP所在的bin目录
HADOOP_BIN_PATH=/opt/cloudera/parcels/CDH/bin
#待检测的HDFS目录
d1=/tmp1
d2=/tmp/sac-sac1
d3=/tmp/cep-bu4
d4=/tmp/test_data_standard
d5=/tmp/test_data_standard_sac
#将待检测的目录(可以为多个)加载至数组中
array_check=($d1 $d2 $d3 $d4 $d5)
#当前时间戳
today_timestamp=
(
d
a
t
e
−
d
"
(date -d "
(date−d"(date +“%Y-%m-%d %H:%M”)" +%s)
#当月、上个月生产过滤条件
#ml=
(
d
a
t
e
−
d
′
l
a
s
t
m
o
n
t
h
′
+
"
m
=
(date -d 'last month' +"%Y-%m") m=
(date−d′lastmonth′+"m=(date +“%Y-%m”)
#g="^d| “
m
"
∣
"
m"| "
m"∣"ml
g=”^d| "$m
#Func: 删除指定时间之前的过期,这里设置的是30天前
removeOutDate(){
$HADOOP_BIN_PATH/hdfs dfs -ls -R
1
∣
g
r
e
p
−
E
−
v
"
1 | grep -E -v "
1∣grep−E−v"g" > lstemp
echo “ls completed!”
# 管道输入重定向: cat
F
I
L
E
N
A
M
E
∣
w
h
i
l
e
r
e
a
d
。。。
q
u
a
n
x
i
a
n
t
e
m
p
将读取出来的一行拆分成各个参数使用!
c
a
t
l
s
t
e
m
p
∣
w
h
i
l
e
r
e
a
d
q
u
a
n
x
i
a
n
t
e
m
p
u
s
e
r
g
r
o
u
p
s
i
z
e
d
a
y
h
o
u
r
f
i
l
e
p
a
t
h
d
o
c
u
r
r
e
n
t
f
i
l
e
t
i
m
e
=
"
FILENAME | while read 。。。 quanxian temp 将读取出来的一行拆分成各个参数使用! cat lstemp | while read quanxian temp user group size day hour filepath do current_file_time="
FILENAME∣whileread。。。quanxiantemp将读取出来的一行拆分成各个参数使用!catlstemp∣whilereadquanxiantempusergroupsizedayhourfilepathdocurrentfiletime="day
h
o
u
r
"
c
u
r
r
e
n
t
f
i
l
e
t
i
m
e
s
t
a
m
p
=
hour" current_file_timestamp=
hour"currentfiletimestamp=(date -d “$current_file_time” +%s)
if [ ${quanxian:0:1}=“-” ]&&[
(
(
((
((today_timestamp-$current_file_timestamp)) -ge $((302460*60)) ];then
echo “deltime: $(date +‘%Y-%m-%d %H:%M:%S’) fileinfo:
d
a
y
:
day:
day:hour $filepath” >> delfilelog
# $HADOOP_BIN_PATH/hdfs dfs -rm -r $filepath > /dev/null 2>&1
# cat delfilelog | awk ‘{print $6}’ |xargs hdfs dfs -rm -r
# nohup cat delfilelog | grep ‘/tmp1’ | awk ‘{print $6}’ |xargs hdfs dfs -rm -r &
fi
done
}
#Func: 执行删除
execute(){
echo -e “\n\n”
echo “
(
d
a
t
e
+
′
e
c
h
o
"
(date +'%Y-%m-%d %H:%M:%S') start to remove outdate files in hdfs" echo "
(date+′echo"(date +‘%Y-%m-%d %H:%M:%S’) today is: $(date +”%Y-%m-%d %H:%M:%S")"
for i in ${array_check[@]}
do
echo "$(date +'%Y-%m-%d %H:%M:%S') processing filepath: $i"
removeOutDate $i
echo -e "\n"
done
echo "$(date +'%Y-%m-%d %H:%M:%S') remove outdate files in hdfs finished"
echo -e "\n\n"
}
#开始执行
execute