【Python代码】清理文件夹名和建立本地数据库的代码

©来源: 我自己写的

最近在整理手头的资源，发现他们非常杂乱，很多名字都不太规范

这里写了一些清理文件夹名和建立本地数据库的相关代码，需要你自己去安一个python环境，然后安好对应的库

然后直接运行代码就可以

clear.py

import os
import re

# 定义支持的图片文件扩展名
IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".gif", ".bmp"}


def clean_folder_name(folder_name, parent_folder_name=None):
    """
    清洗文件夹名：
    1. 去掉数字及其后缀（如 "4940-"，"NO.8152"）。
    2. 去掉方括号内容（如 "[90P-1V 698.56 MB]"）。
    3. 移除特殊字符，仅保留字母、数字和空格。

    :param folder_name: 原始文件夹名
    :return: 清洗后的文件夹名或 None（如果清洗后为空）
    """
    folder_name = re.sub(r"([u4e00-u9fa5w]+)(_+1)+", r"1", folder_name)  # 处理中文重复
    forbidden_phrases = ["内购无水印"]  # 可以扩展更多禁用词
    for phrase in forbidden_phrases:
        folder_name = folder_name.replace(phrase, "")
    # 删除连续的下划线
    folder_name = re.sub(r"_+", "_", folder_name)

    folder_name = " ".join(dict.fromkeys(folder_name.split()))  # 去重
    folder_name = re.sub(r"(bw+b)(_1)+", r"1", folder_name)  # 处理下划线分隔的重复
    folder_name = re.sub(r"(bw+b)( 1)+", r"1", folder_name)  # 处理空格分隔的重复
    # 删除所有数字
    folder_name = re.sub(r"d+", "", folder_name)
    # 去掉方括号及其中内容
    folder_name = re.sub(r"[.*?]", "", folder_name)

    # 去掉以数字开头的内容（包括 "4940-", "NO.8152"）
    folder_name = re.sub(r"^d+[-.s]*", "", folder_name, flags=re.IGNORECASE)
    folder_name = re.sub(r"^NO.d+s*", "", folder_name, flags=re.IGNORECASE)

    # 移除特殊字符，仅保留字母、数字和空格
    folder_name = re.sub(r"[^ws]", "", folder_name)

    # 替换不可见空格
    folder_name = folder_name.replace("xa0", " ").strip()
    # 去除多余空格
    folder_name = re.sub(r"s+", " ", folder_name)

    # 去掉首尾空格
    folder_name = folder_name.strip()
    folder_name = folder_name.replace("NO", "")
    folder_name = folder_name.replace("  ", " ")  # 合并多个空格
    folder_name = folder_name.replace(" ", "_")  # 空格替换为下划线
    # 删除连续的下划线
    folder_name = re.sub(r"_+", "_", folder_name)

    # 如果清洗后的文件名无意义，则用父文件夹名加前缀
    if not folder_name or folder_name in ["_", "P", "V", "NO", "图片"]:
        folder_name = f"{parent_folder_name or 'default'}-{folder_name}".strip("-")

    return folder_name if folder_name else None  # 返回 None 表示无效文件夹名


def clean_video_name(video_name):
    """
    清洗视频文件名：
    1. 去掉数字及其后缀（如 "4940-"，"NO.8152"）。
    2. 去掉方括号内容（如 "[90P-1V 698.56 MB]"）。
    3. 移除特殊字符，仅保留字母、数字和空格。
    4. 空文件名分配编号（如 "001.mp4"）。
    """
    # 分离文件名和扩展名
    base_name, ext = os.path.splitext(video_name)

    # 去掉方括号及其中内容
    base_name = re.sub(r"[.*?]", "", base_name)

    # 删除所有数字
    base_name = re.sub(r"d+", "", base_name)

    # 去掉以数字开头的内容（包括 "4940-", "NO.8152"）
    base_name = re.sub(r"^d+[-.s]*", "", base_name, flags=re.IGNORECASE)
    base_name = re.sub(r"^NO.d+s*", "", base_name, flags=re.IGNORECASE)

    # 删除特定禁用词
    forbidden_phrases = ["内购无水印", "图片"]  # 可以扩展更多禁用词
    for phrase in forbidden_phrases:
        base_name = base_name.replace(phrase, "")

    # 删除重复字符串
    base_name = re.sub(r"([u4e00-u9fa5w]+)(_+1)+", r"1", base_name)  # 处理中文重复
    base_name = re.sub(r"(bw+b)(_1)+", r"1", base_name)  # 处理下划线分隔的重复
    base_name = re.sub(r"(bw+b)( 1)+", r"1", base_name)  # 处理空格分隔的重复

    # 移除特殊字符，仅保留字母和空格
    base_name = re.sub(r"[^u4e00-u9fa5ws]", "", base_name)

    # 替换不可见空格
    base_name = base_name.replace("xa0", " ").strip()

    # 去除多余空格
    base_name = re.sub(r"s+", " ", base_name)

    # 去掉首尾空格并将空格替换为下划线
    base_name = base_name.strip().replace(" ", "_")

    # 删除连续的下划线
    base_name = re.sub(r"_+", "_", base_name)

    # 检查是否只剩下数字和下划线
    if re.fullmatch(r"[_d]+", base_name):
        return None  # 返回 None 表示无效文件名

    # 如果清洗后的文件名为空，返回 None
    return base_name + ext if base_name else None


def unique_folder_path(folder_path):
    """
    如果目标路径已存在，则生成一个唯一的路径
    :param folder_path: 原始目标路径
    :return: 唯一的目标路径
    """
    base, ext = os.path.splitext(folder_path)
    counter = 1
    while os.path.exists(folder_path):
        folder_path = f"{base}_{counter}{ext}"
        counter += 1
    return folder_path


def process_images_and_clean_folders(base_path):
    """
    遍历目录，重命名图片文件并清洗文件夹名。
    :param base_path: 基础路径
    """
    for root, dirs, files in os.walk(base_path):
        # 清洗文件夹名
        folder_name = os.path.basename(root)
        # 当前文件夹的父路径和父文件夹名
        parent_path = os.path.dirname(root)
        parent_folder_name = os.path.basename(parent_path)
        cleaned_name = clean_folder_name(folder_name, parent_folder_name)

        if not cleaned_name:
            print(f"跳过无效文件夹：{root}")
            continue

        # 只有当清洗后的名称不同才进行重命名
        if cleaned_name != folder_name:
            # 重命名文件夹
            new_folder_path = os.path.join(parent_path, cleaned_name)
            if new_folder_path != root:
                new_folder_path = unique_folder_path(new_folder_path)
                os.rename(root, new_folder_path)
                print(f"文件夹重命名：{root} -> {new_folder_path}")
        else:
            print(f"保持原始文件夹名：{root}")
            new_folder_path = root

        # 处理图片文件
        image_count = 1
        for file in files:
            file_path = os.path.join(new_folder_path, file)
            file_ext = os.path.splitext(file)[1].lower()

            if file_ext in IMAGE_EXTENSIONS:
                # 按顺序编号重命名图片文件
                new_file_name = f"{image_count:03d}{file_ext}"
                if file != new_file_name:
                    new_file_path = os.path.join(new_folder_path, new_file_name)
                    os.rename(file_path, new_file_path)
                    print(f"图片文件重命名：{file_path} -> {new_file_path}")
                else:
                    print(f"保持原始文件名：{file_path}")
                image_count += 1
            else:
                print(f"跳过非图片文件：{file_path}")


def process_video_files(base_path):
    """
    遍历目录，清洗视频文件名，处理空文件名。
    如果清洗后文件名为空，分配编号（如 "001.mp4"）。
    """
    for root, _, files in os.walk(base_path):
        video_count = 1  # 初始化编号计数器

        for file in files:
            file_path = os.path.join(root, file)
            base_name, ext = os.path.splitext(file)

            # 检查是否为视频文件
            if ext.lower() not in [".mp4", ".avi", ".mkv", ".mov", ".flv"]:
                print(f"跳过非视频文件：{file}")
                continue

            # 清洗文件名
            cleaned_name = clean_video_name(file)
            if not cleaned_name:
                # 如果清洗后为空，分配编号
                cleaned_name = f"{video_count:03d}{ext}"
                video_count += 1

            # 检查是否需要重命名
            new_file_path = os.path.join(root, cleaned_name)
            if file_path != new_file_path:
                try:
                    # 如果目标文件已存在，添加数字后缀
                    counter = 1
                    base_name, ext = os.path.splitext(cleaned_name)
                    while os.path.exists(new_file_path):
                        new_file_path = os.path.join(root, f"{base_name}_{counter:02d}{ext}")
                        counter += 1

                    os.rename(file_path, new_file_path)
                    print(f"文件重命名：{file_path} -> {new_file_path}")
                except Exception as e:
                    print(f"文件重命名异常：{e}")


if __name__ == "__main__":
    # 处理图片文件夹和文件
    process_images_and_clean_folders(r"E:COLORFUL")
    process_video_files(r"E:COLORFUL")

database.py

import os
import re
import sqlite3
import py7zr
import zipfile
import json
from datetime import datetime
from PIL import Image
from moviepy.video.io.VideoFileClip import VideoFileClip


def analyze_image(file_path):
    """
    分析图片文件，获取分辨率。
    :param file_path: 图片文件路径
    :return: 图片分辨率字符串 (如 '1920x1080')
    """
    try:
        with Image.open(file_path) as img:
            width, height = img.size
            return f"{width}x{height}"
    except Exception as e:
        print(f"Error analyzing image file: {file_path}")
        return f"{-1}x{-1}"


def analyze_video(file_path):
    """
    分析视频文件，获取分辨率和时长。
    :param file_path: 视频文件路径
    :return: 分辨率字符串 (如 '1920x1080') 和时长 (秒)
    """
    try:
        video = VideoFileClip(file_path)
        width, height = video.size
        duration = int(video.duration)
        return f"{width}x{height}", duration
    except Exception as e:
        print(f"Error analyzing video file: {file_path}")
        return f"{-1}x{-1}", -1



# 初始化数据库
def initialize_database(db_path):
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()

    # 主表
    cursor.execute("""
        CREATE TABLE IF NOT EXISTS resources (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            name TEXT NOT NULL,
            type TEXT NOT NULL,
            theme TEXT,
            path TEXT NOT NULL,
            tags TEXT,
            size INTEGER,
            source TEXT,
            source_link TEXT,
            compressed INTEGER DEFAULT 0,
            archive_path TEXT,
            created_at TEXT,
            updated_at TEXT,
            attribute1 TEXT,
            attribute2 TEXT,
            attribute3 TEXT
        )
    """)

    # 主题值表
    cursor.execute("""
        CREATE TABLE IF NOT EXISTS themes (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            name TEXT NOT NULL UNIQUE,
            file_count INTEGER DEFAULT 0,
            file_types TEXT,
            total_size INTEGER DEFAULT 0,
            average_file_size REAL DEFAULT 0
        )
    """)

    # 图片子表
    cursor.execute("""
        CREATE TABLE IF NOT EXISTS images (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            resource_id INTEGER NOT NULL,
            resolution TEXT,
            format TEXT,
            color_mode TEXT,
            FOREIGN KEY (resource_id) REFERENCES resources(id)
        )
    """)

    # 视频子表
    cursor.execute("""
        CREATE TABLE IF NOT EXISTS videos (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            resource_id INTEGER NOT NULL,
            duration INTEGER,
            resolution TEXT,
            format TEXT,
            frame_rate REAL,
            FOREIGN KEY (resource_id) REFERENCES resources(id)
        )
    """)

    # 小说子表
    cursor.execute("""
        CREATE TABLE IF NOT EXISTS novels (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            resource_id INTEGER NOT NULL,
            title TEXT,
            author TEXT,
            word_count INTEGER,
            genre TEXT,
            FOREIGN KEY (resource_id) REFERENCES resources(id)
        )
    """)


    # 压缩包子表
    cursor.execute("""
        CREATE TABLE IF NOT EXISTS archives (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            resource_id INTEGER,
            contents TEXT,
            extracted_path TEXT,
            format TEXT,
            password TEXT,
            FOREIGN KEY (resource_id) REFERENCES resources(id)
        )
    """)

    conn.commit()
    conn.close()
    print("Database initialized successfully.")



# 添加资源
def add_resource(conn, name, resource_type, theme, path, tags, size, source, source_link):
    cursor = conn.cursor()
    created_at = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    cursor.execute("""
        INSERT INTO resources (name, type, theme, path, tags, size, source, source_link, compressed, created_at, updated_at)
        VALUES (?, ?, ?, ?, ?, ?, ?, ?, 0, ?, ?)
    """, (name, resource_type, theme, path, tags, size, source, source_link, created_at, created_at))
    conn.commit()
    print(f"Resource '{name}' added successfully.")


# 删除资源
def delete_resource(conn, resource_id):
    cursor = conn.cursor()
    cursor.execute("SELECT path, archive_path, compressed FROM resources WHERE id = ?", (resource_id,))
    resource = cursor.fetchone()
    if not resource:
        print(f"Resource ID {resource_id} not found.")
        return

    path, archive_path, compressed = resource

    # 删除文件或压缩包
    if compressed:
        if archive_path and os.path.exists(archive_path):
            os.remove(archive_path)
            print(f"Compressed file {archive_path} deleted.")
    else:
        if path and os.path.exists(path):
            os.remove(path)
            print(f"File {path} deleted.")

    # 删除数据库记录
    cursor.execute("DELETE FROM resources WHERE id = ?", (resource_id,))
    conn.commit()
    print(f"Resource ID {resource_id} deleted successfully.")


# 压缩资源并删除原文件
def compress_and_delete(conn, resource_id, archive_format='7z', password=None):
    cursor = conn.cursor()
    cursor.execute("SELECT path, name FROM resources WHERE id = ?", (resource_id,))
    resource = cursor.fetchone()
    if not resource:
        print(f"Resource ID {resource_id} not found.")
        return

    file_path, file_name = resource
    archive_path = f"{os.path.splitext(file_path)[0]}.{archive_format}"

    # 压缩操作
    if archive_format == '7z':
        with py7zr.SevenZipFile(archive_path, 'w', password=password) as archive:
            archive.write(file_path, arcname=file_name)
    elif archive_format == 'zip':
        with zipfile.ZipFile(archive_path, 'w', zipfile.ZIP_DEFLATED) as archive:
            archive.write(file_path, arcname=file_name)
    else:
        print(f"Unsupported archive format: {archive_format}")
        return

    # 删除原始文件
    os.remove(file_path)
    print(f"Compressed {file_path} to {archive_path} and deleted original file.")

    # 更新数据库
    cursor.execute("""
        UPDATE resources
        SET compressed = 1, archive_path = ?
        WHERE id = ?
    """, (archive_path, resource_id))
    conn.commit()


# 解压资源
def auto_decompress(conn, resource_id, password=None):
    cursor = conn.cursor()
    cursor.execute("SELECT archive_path, path, compressed FROM resources WHERE id = ?", (resource_id,))
    resource = cursor.fetchone()
    if not resource:
        print(f"Resource ID {resource_id} not found.")
        return

    archive_path, original_path, compressed = resource
    if not compressed:
        print(f"Resource ID {resource_id} is not compressed.")
        return

    # 解压操作
    extract_dir = os.path.dirname(original_path)
    if archive_path.endswith('.7z'):
        with py7zr.SevenZipFile(archive_path, 'r', password=password) as archive:
            archive.extractall(path=extract_dir)
    elif archive_path.endswith('.zip'):
        with zipfile.ZipFile(archive_path, 'r') as archive:
            archive.extractall(path=extract_dir)
    else:
        print(f"Unsupported archive format: {archive_path}")
        return

    print(f"Decompressed {archive_path} to {original_path}")

    # 删除压缩包
    if os.path.exists(archive_path):
        os.remove(archive_path)
        print(f"Deleted archive file: {archive_path}")

    # 更新数据库
    cursor.execute("""
        UPDATE resources
        SET compressed = 0, archive_path = NULL
        WHERE id = ?
    """, (resource_id,))
    conn.commit()


def clean_folder_name(folder_name, parent_folder_name=None):
    """
    清洗文件夹名或文件名：
    - 删除无意义字符（数字、特殊字符等）。
    - 如果清洗后无意义，则使用父文件夹名加前缀。

    :param folder_name: 原始文件夹名
    :param parent_folder_name: 父文件夹名，当文件名无意义时使用
    :return: 清洗后的文件夹名或 None
    """
    folder_name = re.sub(r"([u4e00-u9fa5w]+)(_+1)+", r"1", folder_name)  # 处理重复
    folder_name = re.sub(r"[.*?]", "", folder_name)  # 去掉方括号内容
    folder_name = re.sub(r"^d+[-.s]*", "", folder_name)  # 去掉数字开头的内容
    folder_name = re.sub(r"[^u4e00-u9fa5ws]", "", folder_name)  # 移除特殊字符
    folder_name = re.sub(r"s+", "_", folder_name.strip())  # 去掉空格并替换为下划线

    # 无意义名称处理
    if not folder_name or re.fullmatch(r"[_d]+", folder_name):
        folder_name = f"{parent_folder_name or 'default'}-{folder_name}".strip("-")
    folder_name = re.sub(r"_+", "_", folder_name)  # 删除连续的下划线
    folder_name = re.sub("_", " ", folder_name)  # 将下划线替换为空格

    return folder_name if folder_name else None


def scan_image_video_and_add_resources(conn, base_dir, source="local", source_link="N/A", max_file_size=15 * 1024 * 1024):
    cursor = conn.cursor()
    theme_stats = {}  # 用于统计主题的文件数量、总大小、文件类型
    large_files_log = "large_file.txt"
    for root, _, files in os.walk(base_dir):
        folder_name = os.path.basename(root)  # 当前文件夹的名字
        theme = clean_folder_name(folder_name)
        file_count = 0
        total_size = 0
        file_types = {}

        for file in files:
            file_path = os.path.join(root, file)
            file_size = os.path.getsize(file_path)
            file_type = os.path.splitext(file)[1][1:].lower() or "unknown"

            # 跳过非图片和视频文件
            if file_type not in ["jpg", "jpeg", "png", "gif", "bmp", "mp4", "avi", "mkv", "mov", "flv"]:
                continue

            created_at = datetime.now().strftime('%Y-%m-%d %H:%M:%S')

            # 插入资源表
            cursor.execute("""
                INSERT INTO resources (name, type, theme, path, tags, size, source, source_link, compressed, created_at, updated_at)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?, 0, ?, ?)
            """, (file, file_type, theme, file_path, "", file_size, source, source_link, created_at, created_at))
            resource_id = cursor.lastrowid

            # 图片处理
            if file_type in ["jpg", "jpeg", "png", "gif", "bmp"]:
                # 如果文件超过大小限制，记录并跳过
                if file_size > max_file_size:
                    with open(large_files_log, "a") as log_file:
                        log_file.write(f"File too large: {file_path} ({file_size} bytes)n")
                    print(f"Skipped large file: {file_path}")
                    continue
                resolution = analyze_image(file_path)
                cursor.execute("""
                    INSERT INTO images (resource_id, resolution, format, color_mode)
                    VALUES (?, ?, ?, ?)
                """, (resource_id, resolution, file_type.upper(), "RGB"))

            # 视频处理
            elif file_type in ["mp4", "avi", "mkv", "mov", "flv"]:
                resolution, duration = analyze_video(file_path)
                cursor.execute("""
                    INSERT INTO videos (resource_id, duration, resolution, format, frame_rate)
                    VALUES (?, ?, ?, ?, ?)
                """, (resource_id, duration, resolution, file_type.upper(), 30.0))  # 帧率默认30

            # 更新主题统计信息
            file_count += 1
            total_size += file_size
            file_types[file_type] = file_types.get(file_type, 0) + 1

        # 插入或更新主题子表
        if theme_stats.get(theme):
            theme_stats[theme]['file_count'] += file_count
            theme_stats[theme]['total_size'] += total_size
            for file_type, count in file_types.items():
                theme_stats[theme]['file_types'][file_type] = (
                        theme_stats[theme]['file_types'].get(file_type, 0) + count
                )
        else:
            theme_stats[theme] = {
                'file_count': file_count,
                'total_size': total_size,
                'file_types': file_types
            }

    # 写入主题子表
    for theme, stats in theme_stats.items():
        file_types_json = json.dumps(stats['file_types'])
        average_size = stats['total_size'] / stats['file_count'] if stats['file_count'] > 0 else 0
        cursor.execute("""
            INSERT INTO themes (name, file_count, file_types, total_size, average_file_size)
            VALUES (?, ?, ?, ?, ?)
            ON CONFLICT(name) DO UPDATE SET
                file_count=excluded.file_count,
                file_types=excluded.file_types,
                total_size=excluded.total_size,
                average_file_size=excluded.average_file_size
        """, (theme, stats['file_count'], file_types_json, stats['total_size'], average_size))

    conn.commit()
    print(f"Scanned directory '{base_dir}' and added resources to the database.")


if __name__ == "__main__":
    db_path = 'file_manager.db'
    initialize_database(db_path)
    conn = sqlite3.connect(db_path)

    try:
        # 要扫描的目录路径
        base_dir = r"E:COLORFUL"  # 替换为你的实际目录路径

        # 扫描目录并将数据添加到数据库
        scan_image_video_and_add_resources(conn, base_dir, source="local", source_link="N/A")
    finally:
        # 关闭数据库连接
        conn.close()

好评如潮 100%

文件解压教程

首先准备好解压工具, 电脑端安装 WINRAR, 手机端安装 Zarchiver 或者 ES文件管理器

然后有2种类型的压缩包:

1. 单一压缩文件的（可以单独下载和解压)

- 如果后缀名正常: 直接打开文件 > 输入密码 >解压文件 > 解压成功, 有的情况会有双层压缩, 再继续解压即可

- 如果需要修改后缀名: 不需要管文件原本后缀是什么，只要是压缩文件，后缀直接改成 .rar，然后用解压工具打开，工具会自动识别正确的类型，然后解压即可, (有的系统默认不能更改后缀名，这种情况, 要先百度下如何显示文件后缀名).

2. 多个压缩分卷文件的 (需要全部下载完毕后才能正确解压)

- 如果后缀名正常: 只需要解压第一个分卷即可, 工具在解压过程中会自动调用其他分卷, 不需要每个分卷都解压一遍 (所以需要提前全部下载好), 不同压缩格式的第一个分卷命名是有区别的 (RAR格式的第一个分卷是叫 xxx.part1.rar , 7z格式的第一个压缩分卷是叫 xxx.001 , ZIP格式的第一个压缩分卷就是默认的 XXX.zip ) .

- 如果是需要改后缀的情况 (比较少见): 需要把文件按顺序重新命名好才能正常解压, RAR的分卷命名格式是 xxx.part1.rar, xxx.part2.rar, xxx.part3.rar, 7z的命名格式是 xxx.001, xxx.002, xxx.003, ZIP的排序格式 xxx.zip, xxx.zip.001, xxx.zip.002