diff --git a/README.md b/README.md index e66918f..739338c 100644 --- a/README.md +++ b/README.md @@ -3,3 +3,5 @@ * 从数据库提取数据文件,生成包含数据集uuid、path、volume、count信息的文本 * 逐条检查数据集的文件大小和数量,记录异常数据集 * 将结果通过邮件进行发送 + +只检查对外公开的数据集,并且有实体的数据集。 \ No newline at end of file diff --git a/file-in-db.php b/file-in-db.php new file mode 100755 index 0000000..45437c2 --- /dev/null +++ b/file-in-db.php @@ -0,0 +1,32 @@ +4 and d.host='".$host."' group by d.id,d.uuid,d.path"; + $result = pg_query($link, $sql); + $numrows = pg_num_rows($result); + $content=''; + for($ri = 0; $ri < $numrows; $ri++) + { + $row = pg_fetch_array($result, $ri); + $content.=$row['uuid']."\t"; + $content.=$row['volume']."\t"; + $content.=$row['fcount']."\t"; + $content.=$row['path']."\n"; + } + + $filename='./dataset-0.txt'; + if (!empty($content)) + if (!file_put_contents($filename, $content, LOCK_EX)) + { + sleep(1); + if (!file_put_contents($filename, $content, LOCK_EX)) + { + sleep(1); + if (!file_put_contents($filename, $content, LOCK_EX)) + die("error in write dataset-0.txt"); + } + } + pg_free_result($result); + pg_close($link); diff --git a/file-in-disk.sh b/file-in-disk.sh new file mode 100644 index 0000000..898387d --- /dev/null +++ b/file-in-disk.sh @@ -0,0 +1,15 @@ +#!/bin/bash +rm dataset-1.txt +cat dataset-0.txt | while read oneline; +do + uuid=`echo $oneline | awk '{print $1}'` + datapath=`echo "$oneline" | awk -F'\t' '{print $4}'` + fcount=`find -L "$datapath"/* | wc -l` + if [ $fcount -gt 0 ]; then + volume=`du -sbL "$datapath"/* | awk '{a+=$1}END{print a}'` + echo -e $uuid'\t'$volume'\t'$fcount'\t'$datapath >> dataset-1.txt + else + echo $datapath; + fi +done +exit 0 \ No newline at end of file