235 lines
7.7 KiB
Python
235 lines
7.7 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""筛选个人主页模板:删除非静态项,并按网页 title 重命名顶层文件夹。"""
|
||
from __future__ import annotations
|
||
|
||
import re
|
||
import shutil
|
||
import sys
|
||
import uuid
|
||
from pathlib import Path
|
||
|
||
BASE = Path(
|
||
r"d:\SmyProjects\Frontend-Backend\InfoGenie\infogenie-frontend\public\toolbox\个人主页模板"
|
||
)
|
||
|
||
# 无 HTML 无法作为静态页;PHP 文件较多视为需服务端,非纯静态
|
||
PHP_THRESHOLD = 5
|
||
|
||
|
||
def count_html(root: Path) -> int:
|
||
n = 0
|
||
for p in root.rglob("*"):
|
||
if p.is_file() and p.suffix.lower() in (".html", ".htm"):
|
||
n += 1
|
||
return n
|
||
|
||
|
||
def count_php(root: Path) -> int:
|
||
return sum(1 for p in root.rglob("*.php") if p.is_file())
|
||
|
||
|
||
def extract_title_from_text(text: str) -> str | None:
|
||
for pattern in (
|
||
r"<title[^>]*>([^<]*)</title>",
|
||
r"<h1[^>]*>([^<]*)</h1>",
|
||
):
|
||
m = re.search(pattern, text, re.I | re.DOTALL)
|
||
if m:
|
||
raw = m.group(1)
|
||
raw = re.sub(r"<[^>]+>", "", raw)
|
||
title = re.sub(r"\s+", " ", raw).strip()
|
||
title = re.sub(r'[<>:"/\\|?*]', "", title)
|
||
title = title.strip(" -_|")
|
||
if title and len(title) < 100:
|
||
return title
|
||
return None
|
||
|
||
|
||
def read_html(path: Path) -> str:
|
||
for enc in ("utf-8", "utf-8-sig", "gbk", "gb2312"):
|
||
try:
|
||
return path.read_text(encoding=enc)
|
||
except (UnicodeDecodeError, OSError):
|
||
continue
|
||
return path.read_text(encoding="utf-8", errors="ignore")
|
||
|
||
|
||
def find_entry_html(folder: Path) -> Path | None:
|
||
for name in ("index.html", "index.htm"):
|
||
for p in folder.rglob(name):
|
||
if p.is_file():
|
||
return p
|
||
for p in folder.rglob("*"):
|
||
if p.is_file() and p.suffix.lower() in (".html", ".htm"):
|
||
return p
|
||
return None
|
||
|
||
|
||
def sanitize_folder_name(name: str) -> str:
|
||
for c in '<>:"/\\|?*':
|
||
name = name.replace(c, "")
|
||
name = name.strip(" .")
|
||
if len(name) > 120:
|
||
name = name[:120].rstrip()
|
||
return name or "未命名模板"
|
||
|
||
|
||
def ps_single_quote(s: str) -> str:
|
||
return "'" + s.replace("'", "''") + "'"
|
||
|
||
|
||
def write_rename_ps1(renames: list[tuple[str, str]], out_path: Path) -> None:
|
||
"""两阶段重命名,避免 A→B 与 B→A 等占用冲突。"""
|
||
pairs = [(o, n) for o, n in renames if o != n]
|
||
tag = uuid.uuid4().hex[:8]
|
||
lines = [
|
||
"# 由 tidy_profile_templates.py 生成:关闭占用该目录的 IDE/资源管理器窗口后执行。",
|
||
"$ErrorActionPreference = 'Stop'",
|
||
f"$base = {ps_single_quote(str(BASE))}",
|
||
"",
|
||
"# 阶段 1:改为临时名",
|
||
]
|
||
mids: list[tuple[str, str, str]] = []
|
||
for i, (old, new) in enumerate(pairs):
|
||
mid = f"__tmp_rename_{tag}_{i}__"
|
||
mids.append((old, mid, new))
|
||
lines.append(
|
||
f"Rename-Item -LiteralPath (Join-Path $base {ps_single_quote(old)}) "
|
||
f"-NewName {ps_single_quote(mid)}"
|
||
)
|
||
lines.extend(["", "# 阶段 2:改为最终名"])
|
||
for _old, mid, new in mids:
|
||
lines.append(
|
||
f"Rename-Item -LiteralPath (Join-Path $base {ps_single_quote(mid)}) "
|
||
f"-NewName {ps_single_quote(new)}"
|
||
)
|
||
out_path.write_text("\n".join(lines) + "\n", encoding="utf-8-sig")
|
||
|
||
|
||
def compute_renames(remaining: list[Path]) -> list[tuple[str, str]]:
|
||
used: set[str] = {c.name for c in remaining}
|
||
renames: list[tuple[str, str]] = []
|
||
for folder in remaining:
|
||
entry = find_entry_html(folder)
|
||
if not entry:
|
||
continue
|
||
try:
|
||
text = read_html(entry)
|
||
except OSError:
|
||
continue
|
||
title = extract_title_from_text(text)
|
||
if not title:
|
||
continue
|
||
new_name = sanitize_folder_name(title)
|
||
if new_name == folder.name:
|
||
continue
|
||
final = new_name
|
||
if final in used and final != folder.name:
|
||
i = 2
|
||
while f"{new_name}-{i}" in used:
|
||
i += 1
|
||
final = f"{new_name}-{i}"
|
||
used.discard(folder.name)
|
||
used.add(final)
|
||
renames.append((folder.name, final))
|
||
return renames
|
||
|
||
|
||
def main() -> None:
|
||
dry = "--apply" not in sys.argv
|
||
write_script = "--write-rename-script" in sys.argv
|
||
rename_only = "--rename-only" in sys.argv
|
||
script_path = Path(__file__).resolve().parent / "profile_template_renames.ps1"
|
||
|
||
if not BASE.is_dir():
|
||
print(f"Missing base: {BASE}")
|
||
sys.exit(1)
|
||
|
||
if rename_only:
|
||
remaining = sorted([c for c in BASE.iterdir() if c.is_dir()], key=lambda x: x.name)
|
||
renames = compute_renames(remaining)
|
||
write_rename_ps1(renames, script_path)
|
||
print(f"已写入重命名脚本: {script_path}")
|
||
print("请在关闭占用该文件夹的程序后,在 PowerShell 中执行:")
|
||
print(f" powershell -ExecutionPolicy Bypass -File \"{script_path}\"")
|
||
return
|
||
|
||
children = [c for c in BASE.iterdir() if c.is_dir()]
|
||
to_delete: list[tuple[Path, str, int, int]] = []
|
||
for child in children:
|
||
h, p = count_html(child), count_php(child)
|
||
if h == 0:
|
||
to_delete.append((child, "no_html", h, p))
|
||
elif p >= PHP_THRESHOLD:
|
||
to_delete.append((child, "php_heavy", h, p))
|
||
|
||
print("=== 将删除(非静态或无可展示 HTML)===")
|
||
for path, reason, h, p in sorted(to_delete, key=lambda x: x[0].name):
|
||
print(f" [{reason}] html={h} php={p} {path.name}")
|
||
|
||
if dry:
|
||
print("\n[DRY RUN] 加参数 --apply 执行删除与重命名\n")
|
||
|
||
delete_set = {p for p, _, _, _ in to_delete}
|
||
|
||
if not dry:
|
||
for path, _, _, _ in to_delete:
|
||
shutil.rmtree(path, ignore_errors=False)
|
||
print(f"已删除: {path.name}")
|
||
|
||
# 重命名:dry-run 时排除即将删除的目录
|
||
remaining = sorted(
|
||
[c for c in BASE.iterdir() if c.is_dir() and (dry and c not in delete_set or not dry)],
|
||
key=lambda x: x.name,
|
||
)
|
||
renames = compute_renames(remaining)
|
||
|
||
print("=== 计划重命名(按页面 title)===")
|
||
for old, new in renames:
|
||
if old != new:
|
||
print(f" {old}\n -> {new}")
|
||
|
||
if dry:
|
||
if write_script:
|
||
write_rename_ps1(renames, script_path)
|
||
print(f"\n已写入重命名脚本: {script_path}")
|
||
return
|
||
|
||
write_rename_ps1(renames, script_path)
|
||
print(f"\n已写入重命名脚本(若本机重命名失败可手动执行): {script_path}")
|
||
|
||
pairs = [(o, n) for o, n in renames if o != n]
|
||
tag = uuid.uuid4().hex[:8]
|
||
mids: list[tuple[str, str, str]] = []
|
||
for i, (old_name, new_name) in enumerate(pairs):
|
||
mid = f"__tmp_rename_{tag}_{i}__"
|
||
mids.append((old_name, mid, new_name))
|
||
|
||
for old_name, mid, new_name in mids:
|
||
src = BASE / old_name
|
||
dst = BASE / mid
|
||
if not src.is_dir():
|
||
continue
|
||
try:
|
||
src.rename(dst)
|
||
except OSError as e:
|
||
print(f"阶段1 重命名失败(可稍后运行脚本): {old_name} -> {mid} {e}")
|
||
|
||
for old_name, mid, new_name in mids:
|
||
src = BASE / mid
|
||
dst = BASE / new_name
|
||
if not src.is_dir():
|
||
continue
|
||
if dst.exists():
|
||
print(f"跳过(目标已存在): {mid} -> {new_name}")
|
||
continue
|
||
try:
|
||
src.rename(dst)
|
||
print(f"重命名: {old_name} -> {new_name}")
|
||
except OSError as e:
|
||
print(f"阶段2 重命名失败(可稍后运行脚本): {mid} -> {new_name} {e}")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|