Contents
1.29. 一些有用的shell脚本¶
1.29.1. 删除重复的文件¶
#!/usr/bin/env bash
#usage:xxx
#scripts_name:xxx.sh
ls -lS --time-style=long-iso |awk 'BEGIN {
getline; getline;
name1=$8;size=$5
}
{ name2=$8
if (size==$5)
{
"md5sum " name1 | getline; csum1=$1;
"md5sum " name2 | getline; csum2=$1;
if ( csum1=csum2 )
{
print name1; print name2
}
};
size=$5; name1=name2;
}' | sort -u > duplicate_files
cat duplicate_files | xargs -I {} md5sum {} | \
sort | uniq -w 32 | awk '{ print $2 }' | \
sort -u > unique_files
echo Removing...
comm duplicate_files unique_files -3 | tee /dev/stderr | \
xargs rm
echo Removed duplicates files successfully.
1.29.2. 生成文件统计信息的脚本¶
#!/usr/bin/env bash
#usage:xxx
#scripts_name:xxx.sh
# !/bin/bash
# 文件名: filestat.sh
if [ $# -ne 1 ];
then
echo "Usage is $0 basepath";
exit
fi
path=$1
find $path -type f -print >aaa.txt
declare -A statarray;
while read line;
do
ftype=`file -b "$line" | cut -d, -f1`
let statarray["$ftype"]++;
done < aaa.txt
echo ============ File types and counts =============
for ftype in "${!statarray[@]}";
do
echo $ftype : ${statarray["$ftype"]}
done
# declare -A statarray 定义关联数组
while 循环的形式如下:
while read line;
do something
done < filename
#while 循环,负责处理 find 命令的输出
#for循环用于迭代 statarray 并生成输出
(find $path -type f -print) 就相当于上面的filename(文件名),只不过是用的子进
程的输出。