【Python代码】清理文件夹名和建立本地数据库的代码
最后修改 25-01-12 18:14:19
状态 已公开
下载检测 未检测
好评如潮 100%
2 好评
0 差评
52 点击
1 评论
5 收藏
0 分享

©来源: 我自己写的

最近在整理手头的资源,发现他们非常杂乱,很多名字都不太规范

这里写了一些清理文件夹名和建立本地数据库的相关代码,需要你自己去安一个python环境,然后安好对应的库

然后直接运行代码就可以

clear.py

import os
import re

# 定义支持的图片文件扩展名
IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".gif", ".bmp"}


def clean_folder_name(folder_name, parent_folder_name=None):
"""
清洗文件夹名:
1. 去掉数字及其后缀(如 "4940-","NO.8152")。
2. 去掉方括号内容(如 "[90P-1V 698.56 MB]")。
3. 移除特殊字符,仅保留字母、数字和空格。

:param folder_name: 原始文件夹名
:return: 清洗后的文件夹名或 None(如果清洗后为空)
"""
folder_name = re.sub(r"([u4e00-u9fa5w]+)(_+1)+", r"1", folder_name) # 处理中文重复
forbidden_phrases = ["内购无水印"] # 可以扩展更多禁用词
for phrase in forbidden_phrases:
folder_name = folder_name.replace(phrase, "")
# 删除连续的下划线
folder_name = re.sub(r"_+", "_", folder_name)

folder_name = " ".join(dict.fromkeys(folder_name.split())) # 去重
folder_name = re.sub(r"(bw+b)(_1)+", r"1", folder_name) # 处理下划线分隔的重复
folder_name = re.sub(r"(bw+b)( 1)+", r"1", folder_name) # 处理空格分隔的重复
# 删除所有数字
folder_name = re.sub(r"d+", "", folder_name)
# 去掉方括号及其中内容
folder_name = re.sub(r"[.*?]", "", folder_name)

# 去掉以数字开头的内容(包括 "4940-", "NO.8152")
folder_name = re.sub(r"^d+[-.s]*", "", folder_name, flags=re.IGNORECASE)
folder_name = re.sub(r"^NO.d+s*", "", folder_name, flags=re.IGNORECASE)

# 移除特殊字符,仅保留字母、数字和空格
folder_name = re.sub(r"[^ws]", "", folder_name)

# 替换不可见空格
folder_name = folder_name.replace("xa0", " ").strip()
# 去除多余空格
folder_name = re.sub(r"s+", " ", folder_name)

# 去掉首尾空格
folder_name = folder_name.strip()
folder_name = folder_name.replace("NO", "")
folder_name = folder_name.replace(" ", " ") # 合并多个空格
folder_name = folder_name.replace(" ", "_") # 空格替换为下划线
# 删除连续的下划线
folder_name = re.sub(r"_+", "_", folder_name)

# 如果清洗后的文件名无意义,则用父文件夹名加前缀
if not folder_name or folder_name in ["_", "P", "V", "NO", "图片"]:
folder_name = f"{parent_folder_name or 'default'}-{folder_name}".strip("-")

return folder_name if folder_name else None # 返回 None 表示无效文件夹名


def clean_video_name(video_name):
"""
清洗视频文件名:
1. 去掉数字及其后缀(如 "4940-","NO.8152")。
2. 去掉方括号内容(如 "[90P-1V 698.56 MB]")。
3. 移除特殊字符,仅保留字母、数字和空格。
4. 空文件名分配编号(如 "001.mp4")。
"""
# 分离文件名和扩展名
base_name, ext = os.path.splitext(video_name)

# 去掉方括号及其中内容
base_name = re.sub(r"[.*?]", "", base_name)

# 删除所有数字
base_name = re.sub(r"d+", "", base_name)

# 去掉以数字开头的内容(包括 "4940-", "NO.8152")
base_name = re.sub(r"^d+[-.s]*", "", base_name, flags=re.IGNORECASE)
base_name = re.sub(r"^NO.d+s*", "", base_name, flags=re.IGNORECASE)

# 删除特定禁用词
forbidden_phrases = ["内购无水印", "图片"] # 可以扩展更多禁用词
for phrase in forbidden_phrases:
base_name = base_name.replace(phrase, "")

# 删除重复字符串
base_name = re.sub(r"([u4e00-u9fa5w]+)(_+1)+", r"1", base_name) # 处理中文重复
base_name = re.sub(r"(bw+b)(_1)+", r"1", base_name) # 处理下划线分隔的重复
base_name = re.sub(r"(bw+b)( 1)+", r"1", base_name) # 处理空格分隔的重复

# 移除特殊字符,仅保留字母和空格
base_name = re.sub(r"[^u4e00-u9fa5ws]", "", base_name)

# 替换不可见空格
base_name = base_name.replace("xa0", " ").strip()

# 去除多余空格
base_name = re.sub(r"s+", " ", base_name)

# 去掉首尾空格并将空格替换为下划线
base_name = base_name.strip().replace(" ", "_")

# 删除连续的下划线
base_name = re.sub(r"_+", "_", base_name)

# 检查是否只剩下数字和下划线
if re.fullmatch(r"[_d]+", base_name):
return None # 返回 None 表示无效文件名

# 如果清洗后的文件名为空,返回 None
return base_name + ext if base_name else None


def unique_folder_path(folder_path):
"""
如果目标路径已存在,则生成一个唯一的路径
:param folder_path: 原始目标路径
:return: 唯一的目标路径
"""
base, ext = os.path.splitext(folder_path)
counter = 1
while os.path.exists(folder_path):
folder_path = f"{base}_{counter}{ext}"
counter += 1
return folder_path


def process_images_and_clean_folders(base_path):
"""
遍历目录,重命名图片文件并清洗文件夹名。
:param base_path: 基础路径
"""
for root, dirs, files in os.walk(base_path):
# 清洗文件夹名
folder_name = os.path.basename(root)
# 当前文件夹的父路径和父文件夹名
parent_path = os.path.dirname(root)
parent_folder_name = os.path.basename(parent_path)
cleaned_name = clean_folder_name(folder_name, parent_folder_name)

if not cleaned_name:
print(f"跳过无效文件夹:{root}")
continue

# 只有当清洗后的名称不同才进行重命名
if cleaned_name != folder_name:
# 重命名文件夹
new_folder_path = os.path.join(parent_path, cleaned_name)
if new_folder_path != root:
new_folder_path = unique_folder_path(new_folder_path)
os.rename(root, new_folder_path)
print(f"文件夹重命名:{root} -> {new_folder_path}")
else:
print(f"保持原始文件夹名:{root}")
new_folder_path = root

# 处理图片文件
image_count = 1
for file in files:
file_path = os.path.join(new_folder_path, file)
file_ext = os.path.splitext(file)[1].lower()

if file_ext in IMAGE_EXTENSIONS:
# 按顺序编号重命名图片文件
new_file_name = f"{image_count:03d}{file_ext}"
if file != new_file_name:
new_file_path = os.path.join(new_folder_path, new_file_name)
os.rename(file_path, new_file_path)
print(f"图片文件重命名:{file_path} -> {new_file_path}")
else:
print(f"保持原始文件名:{file_path}")
image_count += 1
else:
print(f"跳过非图片文件:{file_path}")


def process_video_files(base_path):
"""
遍历目录,清洗视频文件名,处理空文件名。
如果清洗后文件名为空,分配编号(如 "001.mp4")。
"""
for root, _, files in os.walk(base_path):
video_count = 1 # 初始化编号计数器

for file in files:
file_path = os.path.join(root, file)
base_name, ext = os.path.splitext(file)

# 检查是否为视频文件
if ext.lower() not in [".mp4", ".avi", ".mkv", ".mov", ".flv"]:
print(f"跳过非视频文件:{file}")
continue

# 清洗文件名
cleaned_name = clean_video_name(file)
if not cleaned_name:
# 如果清洗后为空,分配编号
cleaned_name = f"{video_count:03d}{ext}"
video_count += 1

# 检查是否需要重命名
new_file_path = os.path.join(root, cleaned_name)
if file_path != new_file_path:
try:
# 如果目标文件已存在,添加数字后缀
counter = 1
base_name, ext = os.path.splitext(cleaned_name)
while os.path.exists(new_file_path):
new_file_path = os.path.join(root, f"{base_name}_{counter:02d}{ext}")
counter += 1

os.rename(file_path, new_file_path)
print(f"文件重命名:{file_path} -> {new_file_path}")
except Exception as e:
print(f"文件重命名异常:{e}")


if __name__ == "__main__":
# 处理图片文件夹和文件
process_images_and_clean_folders(r"E:COLORFUL")
process_video_files(r"E:COLORFUL")

database.py
import os
import re
import sqlite3
import py7zr
import zipfile
import json
from datetime import datetime
from PIL import Image
from moviepy.video.io.VideoFileClip import VideoFileClip


def analyze_image(file_path):
"""
分析图片文件,获取分辨率。
:param file_path: 图片文件路径
:return: 图片分辨率字符串 (如 '1920x1080')
"""
try:
with Image.open(file_path) as img:
width, height = img.size
return f"{width}x{height}"
except Exception as e:
print(f"Error analyzing image file: {file_path}")
return f"{-1}x{-1}"


def analyze_video(file_path):
"""
分析视频文件,获取分辨率和时长。
:param file_path: 视频文件路径
:return: 分辨率字符串 (如 '1920x1080') 和时长 (秒)
"""
try:
video = VideoFileClip(file_path)
width, height = video.size
duration = int(video.duration)
return f"{width}x{height}", duration
except Exception as e:
print(f"Error analyzing video file: {file_path}")
return f"{-1}x{-1}", -1



# 初始化数据库
def initialize_database(db_path):
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

# 主表
cursor.execute("""
CREATE TABLE IF NOT EXISTS resources (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
type TEXT NOT NULL,
theme TEXT,
path TEXT NOT NULL,
tags TEXT,
size INTEGER,
source TEXT,
source_link TEXT,
compressed INTEGER DEFAULT 0,
archive_path TEXT,
created_at TEXT,
updated_at TEXT,
attribute1 TEXT,
attribute2 TEXT,
attribute3 TEXT
)
""")

# 主题值表
cursor.execute("""
CREATE TABLE IF NOT EXISTS themes (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL UNIQUE,
file_count INTEGER DEFAULT 0,
file_types TEXT,
total_size INTEGER DEFAULT 0,
average_file_size REAL DEFAULT 0
)
""")

# 图片子表
cursor.execute("""
CREATE TABLE IF NOT EXISTS images (
id INTEGER PRIMARY KEY AUTOINCREMENT,
resource_id INTEGER NOT NULL,
resolution TEXT,
format TEXT,
color_mode TEXT,
FOREIGN KEY (resource_id) REFERENCES resources(id)
)
""")

# 视频子表
cursor.execute("""
CREATE TABLE IF NOT EXISTS videos (
id INTEGER PRIMARY KEY AUTOINCREMENT,
resource_id INTEGER NOT NULL,
duration INTEGER,
resolution TEXT,
format TEXT,
frame_rate REAL,
FOREIGN KEY (resource_id) REFERENCES resources(id)
)
""")

# 小说子表
cursor.execute("""
CREATE TABLE IF NOT EXISTS novels (
id INTEGER PRIMARY KEY AUTOINCREMENT,
resource_id INTEGER NOT NULL,
title TEXT,
author TEXT,
word_count INTEGER,
genre TEXT,
FOREIGN KEY (resource_id) REFERENCES resources(id)
)
""")


# 压缩包子表
cursor.execute("""
CREATE TABLE IF NOT EXISTS archives (
id INTEGER PRIMARY KEY AUTOINCREMENT,
resource_id INTEGER,
contents TEXT,
extracted_path TEXT,
format TEXT,
password TEXT,
FOREIGN KEY (resource_id) REFERENCES resources(id)
)
""")

conn.commit()
conn.close()
print("Database initialized successfully.")



# 添加资源
def add_resource(conn, name, resource_type, theme, path, tags, size, source, source_link):
cursor = conn.cursor()
created_at = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
cursor.execute("""
INSERT INTO resources (name, type, theme, path, tags, size, source, source_link, compressed, created_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, 0, ?, ?)
""", (name, resource_type, theme, path, tags, size, source, source_link, created_at, created_at))
conn.commit()
print(f"Resource '{name}' added successfully.")


# 删除资源
def delete_resource(conn, resource_id):
cursor = conn.cursor()
cursor.execute("SELECT path, archive_path, compressed FROM resources WHERE id = ?", (resource_id,))
resource = cursor.fetchone()
if not resource:
print(f"Resource ID {resource_id} not found.")
return

path, archive_path, compressed = resource

# 删除文件或压缩包
if compressed:
if archive_path and os.path.exists(archive_path):
os.remove(archive_path)
print(f"Compressed file {archive_path} deleted.")
else:
if path and os.path.exists(path):
os.remove(path)
print(f"File {path} deleted.")

# 删除数据库记录
cursor.execute("DELETE FROM resources WHERE id = ?", (resource_id,))
conn.commit()
print(f"Resource ID {resource_id} deleted successfully.")


# 压缩资源并删除原文件
def compress_and_delete(conn, resource_id, archive_format='7z', password=None):
cursor = conn.cursor()
cursor.execute("SELECT path, name FROM resources WHERE id = ?", (resource_id,))
resource = cursor.fetchone()
if not resource:
print(f"Resource ID {resource_id} not found.")
return

file_path, file_name = resource
archive_path = f"{os.path.splitext(file_path)[0]}.{archive_format}"

# 压缩操作
if archive_format == '7z':
with py7zr.SevenZipFile(archive_path, 'w', password=password) as archive:
archive.write(file_path, arcname=file_name)
elif archive_format == 'zip':
with zipfile.ZipFile(archive_path, 'w', zipfile.ZIP_DEFLATED) as archive:
archive.write(file_path, arcname=file_name)
else:
print(f"Unsupported archive format: {archive_format}")
return

# 删除原始文件
os.remove(file_path)
print(f"Compressed {file_path} to {archive_path} and deleted original file.")

# 更新数据库
cursor.execute("""
UPDATE resources
SET compressed = 1, archive_path = ?
WHERE id = ?
""", (archive_path, resource_id))
conn.commit()


# 解压资源
def auto_decompress(conn, resource_id, password=None):
cursor = conn.cursor()
cursor.execute("SELECT archive_path, path, compressed FROM resources WHERE id = ?", (resource_id,))
resource = cursor.fetchone()
if not resource:
print(f"Resource ID {resource_id} not found.")
return

archive_path, original_path, compressed = resource
if not compressed:
print(f"Resource ID {resource_id} is not compressed.")
return

# 解压操作
extract_dir = os.path.dirname(original_path)
if archive_path.endswith('.7z'):
with py7zr.SevenZipFile(archive_path, 'r', password=password) as archive:
archive.extractall(path=extract_dir)
elif archive_path.endswith('.zip'):
with zipfile.ZipFile(archive_path, 'r') as archive:
archive.extractall(path=extract_dir)
else:
print(f"Unsupported archive format: {archive_path}")
return

print(f"Decompressed {archive_path} to {original_path}")

# 删除压缩包
if os.path.exists(archive_path):
os.remove(archive_path)
print(f"Deleted archive file: {archive_path}")

# 更新数据库
cursor.execute("""
UPDATE resources
SET compressed = 0, archive_path = NULL
WHERE id = ?
""", (resource_id,))
conn.commit()


def clean_folder_name(folder_name, parent_folder_name=None):
"""
清洗文件夹名或文件名:
- 删除无意义字符(数字、特殊字符等)。
- 如果清洗后无意义,则使用父文件夹名加前缀。

:param folder_name: 原始文件夹名
:param parent_folder_name: 父文件夹名,当文件名无意义时使用
:return: 清洗后的文件夹名或 None
"""
folder_name = re.sub(r"([u4e00-u9fa5w]+)(_+1)+", r"1", folder_name) # 处理重复
folder_name = re.sub(r"[.*?]", "", folder_name) # 去掉方括号内容
folder_name = re.sub(r"^d+[-.s]*", "", folder_name) # 去掉数字开头的内容
folder_name = re.sub(r"[^u4e00-u9fa5ws]", "", folder_name) # 移除特殊字符
folder_name = re.sub(r"s+", "_", folder_name.strip()) # 去掉空格并替换为下划线

# 无意义名称处理
if not folder_name or re.fullmatch(r"[_d]+", folder_name):
folder_name = f"{parent_folder_name or 'default'}-{folder_name}".strip("-")
folder_name = re.sub(r"_+", "_", folder_name) # 删除连续的下划线
folder_name = re.sub("_", " ", folder_name) # 将下划线替换为空格

return folder_name if folder_name else None


def scan_image_video_and_add_resources(conn, base_dir, source="local", source_link="N/A", max_file_size=15 * 1024 * 1024):
cursor = conn.cursor()
theme_stats = {} # 用于统计主题的文件数量、总大小、文件类型
large_files_log = "large_file.txt"
for root, _, files in os.walk(base_dir):
folder_name = os.path.basename(root) # 当前文件夹的名字
theme = clean_folder_name(folder_name)
file_count = 0
total_size = 0
file_types = {}

for file in files:
file_path = os.path.join(root, file)
file_size = os.path.getsize(file_path)
file_type = os.path.splitext(file)[1][1:].lower() or "unknown"

# 跳过非图片和视频文件
if file_type not in ["jpg", "jpeg", "png", "gif", "bmp", "mp4", "avi", "mkv", "mov", "flv"]:
continue

created_at = datetime.now().strftime('%Y-%m-%d %H:%M:%S')

# 插入资源表
cursor.execute("""
INSERT INTO resources (name, type, theme, path, tags, size, source, source_link, compressed, created_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, 0, ?, ?)
""", (file, file_type, theme, file_path, "", file_size, source, source_link, created_at, created_at))
resource_id = cursor.lastrowid

# 图片处理
if file_type in ["jpg", "jpeg", "png", "gif", "bmp"]:
# 如果文件超过大小限制,记录并跳过
if file_size > max_file_size:
with open(large_files_log, "a") as log_file:
log_file.write(f"File too large: {file_path} ({file_size} bytes)n")
print(f"Skipped large file: {file_path}")
continue
resolution = analyze_image(file_path)
cursor.execute("""
INSERT INTO images (resource_id, resolution, format, color_mode)
VALUES (?, ?, ?, ?)
""", (resource_id, resolution, file_type.upper(), "RGB"))

# 视频处理
elif file_type in ["mp4", "avi", "mkv", "mov", "flv"]:
resolution, duration = analyze_video(file_path)
cursor.execute("""
INSERT INTO videos (resource_id, duration, resolution, format, frame_rate)
VALUES (?, ?, ?, ?, ?)
""", (resource_id, duration, resolution, file_type.upper(), 30.0)) # 帧率默认30

# 更新主题统计信息
file_count += 1
total_size += file_size
file_types[file_type] = file_types.get(file_type, 0) + 1

# 插入或更新主题子表
if theme_stats.get(theme):
theme_stats[theme]['file_count'] += file_count
theme_stats[theme]['total_size'] += total_size
for file_type, count in file_types.items():
theme_stats[theme]['file_types'][file_type] = (
theme_stats[theme]['file_types'].get(file_type, 0) + count
)
else:
theme_stats[theme] = {
'file_count': file_count,
'total_size': total_size,
'file_types': file_types
}

# 写入主题子表
for theme, stats in theme_stats.items():
file_types_json = json.dumps(stats['file_types'])
average_size = stats['total_size'] / stats['file_count'] if stats['file_count'] > 0 else 0
cursor.execute("""
INSERT INTO themes (name, file_count, file_types, total_size, average_file_size)
VALUES (?, ?, ?, ?, ?)
ON CONFLICT(name) DO UPDATE SET
file_count=excluded.file_count,
file_types=excluded.file_types,
total_size=excluded.total_size,
average_file_size=excluded.average_file_size
""", (theme, stats['file_count'], file_types_json, stats['total_size'], average_size))

conn.commit()
print(f"Scanned directory '{base_dir}' and added resources to the database.")


if __name__ == "__main__":
db_path = 'file_manager.db'
initialize_database(db_path)
conn = sqlite3.connect(db_path)

try:
# 要扫描的目录路径
base_dir = r"E:COLORFUL" # 替换为你的实际目录路径

# 扫描目录并将数据添加到数据库
scan_image_video_and_add_resources(conn, base_dir, source="local", source_link="N/A")
finally:
# 关闭数据库连接
conn.close()

好评如潮 100%
文件解压教程

首先准备好解压工具, 电脑端安装 WINRAR, 手机端安装 Zarchiver 或者 ES文件管理器

然后有2种类型的压缩包:

1. 单一压缩文件的(可以单独下载和解压)

- 如果后缀名正常: 直接打开文件 > 输入密码 >解压文件 > 解压成功, 有的情况会有双层压缩, 再继续解压即可

- 如果需要修改后缀名: 不需要管文件原本后缀是什么,只要是压缩文件,后缀直接改成 .rar, 然后用解压工具打开,工具会自动识别正确的类型, 然后解压即可, (有的系统默认不能更改后缀名,这种情况, 要先百度下如何显示文件后缀名).

2. 多个压缩分卷文件的 (需要全部下载完毕后 才能正确解压)

- 如果后缀名正常: 只需要解压第一个分卷即可, 工具在解压过程中会自动调用其他分卷, 不需要每个分卷都解压一遍 (所以需要提前全部下载好), 不同压缩格式的第一个分卷命名是有区别的 (RAR格式的第一个分卷是叫 xxx.part1.rar , 7z格式的第一个压缩分卷是叫 xxx.001 , ZIP格式的第一个压缩分卷 就是默认的 XXX.zip ) .

- 如果是需要改后缀的情况 (比较少见): 需要把文件按顺序重新命名好才能正常解压, RAR的分卷命名格式是 xxx.part1.rar, xxx.part2.rar, xxx.part3.rar, 7z的命名格式是 xxx.001, xxx.002, xxx.003, ZIP的排序格式 xxx.zip, xxx.zip.001, xxx.zip.002

1 条评论
用户头像

天天在家手冲会不会阳痿? 如何锻炼自己的牛子持久不射? :?:

我也想像哥布林一样一天一个女骑士。:?:

那就快去“A酱的绅士玩具屋”吧, 初音社为大家申请到了限时粉丝专属价, 只有和客服A酱说是初音社来的就可以享受到优惠哦!~ :|

戳这里即可拥有>> 一个榨汁飞(lao)机(po)杯,快来我和签订契约成为绅(hen)士(tai)吧!

2024-03-09 12:12:12
淘宝店
初音社