2

删除目录重复文件python脚本

 2 years ago
source link: https://qidizi.github.io/%E5%88%A0%E9%99%A4-%E7%9B%AE%E5%BD%95-%E9%87%8D%E5%A4%8D-%E6%96%87%E4%BB%B6-%E8%84%9A%E6%9C%AC/
Go to the source link to view the article. You can view the picture content, updated content and better typesetting reading experience. If the link is broken, please click the button below to view the snapshot at that time.
neoserver,ios ssh client

删除目录重复文件python脚本


#!/usr/bin/python
# -*-coding:utf-8 -*-

# 本目录下内容相同的文件仅保留首个批量处理脚本,注意不理会文件名,比如a.jpg与b.jpg,就只会保留a.jpg
# 注意,不处理子级目录

import os
import sys

if 2 != len(sys.argv):
    print('请输入要处理目录')
    exit(1)

dir_path = sys.argv.pop()
dir_path = os.path.abspath(dir_path)

if not os.path.isdir(dir_path):
    print('路径必须选择目录类型')
    exit()

dup_append_name = '重复文件.'
dir_name = os.path.basename(dir_path)
os.chdir(dir_path)
fp = os.listdir('.')
dup_num = 0

for p in fp:
    # 文件可能被删除了
    if not os.path.exists(p):
        continue

    if not os.path.isfile(p):
        print('%s 不是文件,跳过' % p)
        continue

    size = os.path.getsize(p)
    max_m = 100

    if size > 1024 * 1024 * max_m:
        print('%s 文件大小 %s 已经超过%sM,不处理' % (p, size, max_m))
        continue

    new_files = os.listdir('.')

    for np in new_files:
        # 当前待比较的文件跳过
        if np == p:
            continue
        fc = pc = None

        # noinspection PyBroadException
        try:
            fc = open(np, 'rb')
            fc_b = fc.read()
            fc.close()
            pc = open(p, 'rb')
            p_b = pc.read()
            pc.close()

            if p_b == fc_b:
                new_rn = '%s%s' % (dup_append_name, np)
                os.rename(np, new_rn)
                print('重复的文件 %s 被重命名成 %s' % (np, new_rn))
                dup_num += 1

        except Exception:
            print('比较内容时出错:')
            raise
        finally:
            if fc:
                # noinspection PyBroadException
                try:
                    fc.close()
                except Exception:
                    pass

            if pc:
                # noinspection PyBroadException
                try:
                    pc.close()
                except Exception:
                    pass

print('发现 %s 个重复文件,重复的文件名前将会被追加"%s"' % (dup_num, dup_append_name))
print('done')


About Joyk


Aggregate valuable and interesting links.
Joyk means Joy of geeK