Contents

一些有用的shell脚本
- 删除重复的文件
- 生成文件统计信息的脚本

1.29. 一些有用的shell脚本 ¶

1.29.1. 删除重复的文件 ¶

#!/usr/bin/env bash
#usage:xxx
#scripts_name:xxx.sh
ls -lS --time-style=long-iso |awk 'BEGIN {
    getline; getline;
    name1=$8;size=$5

}
{   name2=$8
    if (size==$5)
    {
    "md5sum " name1 | getline; csum1=$1;
    "md5sum " name2 | getline; csum2=$1;
    if ( csum1=csum2 )
    {
        print name1; print name2
    }
    };

size=$5; name1=name2;
}' | sort -u > duplicate_files

cat duplicate_files | xargs -I {} md5sum {} | \
sort | uniq -w 32 | awk '{ print $2 }' | \
sort -u > unique_files

echo Removing...
comm duplicate_files unique_files -3 | tee /dev/stderr | \
xargs rm
echo Removed duplicates files successfully.

1.29.2. 生成文件统计信息的脚本 ¶

#!/usr/bin/env bash
#usage:xxx
#scripts_name:xxx.sh
# !/bin/bash
# 文件名: filestat.sh
if [ $# -ne 1 ];
    then
    echo "Usage is $0 basepath";
    exit
fi
path=$1

find $path -type f -print >aaa.txt

declare -A statarray;

while read line;
do
    ftype=`file -b "$line" | cut -d, -f1`
    let statarray["$ftype"]++;
done < aaa.txt

echo ============ File types and counts =============

for ftype in "${!statarray[@]}";
do
    echo $ftype : ${statarray["$ftype"]}
done



# declare -A statarray 定义关联数组
while 循环的形式如下：
while read line;
    do something
done < filename

#while 循环，负责处理 find 命令的输出
#for循环用于迭代 statarray 并生成输出

(find $path -type f -print) 就相当于上面的filename（文件名），只不过是用的子进
程的输出。