# -*- coding: utf-8 -*-
"""筛选个人主页模板:删除非静态项,并按网页 title 重命名顶层文件夹。"""
from __future__ import annotations
import re
import shutil
import sys
import uuid
from pathlib import Path
BASE = Path(
r"d:\SmyProjects\Frontend-Backend\InfoGenie\infogenie-frontend\public\toolbox\个人主页模板"
)
# 无 HTML 无法作为静态页;PHP 文件较多视为需服务端,非纯静态
PHP_THRESHOLD = 5
def count_html(root: Path) -> int:
n = 0
for p in root.rglob("*"):
if p.is_file() and p.suffix.lower() in (".html", ".htm"):
n += 1
return n
def count_php(root: Path) -> int:
return sum(1 for p in root.rglob("*.php") if p.is_file())
def extract_title_from_text(text: str) -> str | None:
for pattern in (
r"
]*>([^<]*)",
r"]*>([^<]*)
",
):
m = re.search(pattern, text, re.I | re.DOTALL)
if m:
raw = m.group(1)
raw = re.sub(r"<[^>]+>", "", raw)
title = re.sub(r"\s+", " ", raw).strip()
title = re.sub(r'[<>:"/\\|?*]', "", title)
title = title.strip(" -_|")
if title and len(title) < 100:
return title
return None
def read_html(path: Path) -> str:
for enc in ("utf-8", "utf-8-sig", "gbk", "gb2312"):
try:
return path.read_text(encoding=enc)
except (UnicodeDecodeError, OSError):
continue
return path.read_text(encoding="utf-8", errors="ignore")
def find_entry_html(folder: Path) -> Path | None:
for name in ("index.html", "index.htm"):
for p in folder.rglob(name):
if p.is_file():
return p
for p in folder.rglob("*"):
if p.is_file() and p.suffix.lower() in (".html", ".htm"):
return p
return None
def sanitize_folder_name(name: str) -> str:
for c in '<>:"/\\|?*':
name = name.replace(c, "")
name = name.strip(" .")
if len(name) > 120:
name = name[:120].rstrip()
return name or "未命名模板"
def ps_single_quote(s: str) -> str:
return "'" + s.replace("'", "''") + "'"
def write_rename_ps1(renames: list[tuple[str, str]], out_path: Path) -> None:
"""两阶段重命名,避免 A→B 与 B→A 等占用冲突。"""
pairs = [(o, n) for o, n in renames if o != n]
tag = uuid.uuid4().hex[:8]
lines = [
"# 由 tidy_profile_templates.py 生成:关闭占用该目录的 IDE/资源管理器窗口后执行。",
"$ErrorActionPreference = 'Stop'",
f"$base = {ps_single_quote(str(BASE))}",
"",
"# 阶段 1:改为临时名",
]
mids: list[tuple[str, str, str]] = []
for i, (old, new) in enumerate(pairs):
mid = f"__tmp_rename_{tag}_{i}__"
mids.append((old, mid, new))
lines.append(
f"Rename-Item -LiteralPath (Join-Path $base {ps_single_quote(old)}) "
f"-NewName {ps_single_quote(mid)}"
)
lines.extend(["", "# 阶段 2:改为最终名"])
for _old, mid, new in mids:
lines.append(
f"Rename-Item -LiteralPath (Join-Path $base {ps_single_quote(mid)}) "
f"-NewName {ps_single_quote(new)}"
)
out_path.write_text("\n".join(lines) + "\n", encoding="utf-8-sig")
def compute_renames(remaining: list[Path]) -> list[tuple[str, str]]:
used: set[str] = {c.name for c in remaining}
renames: list[tuple[str, str]] = []
for folder in remaining:
entry = find_entry_html(folder)
if not entry:
continue
try:
text = read_html(entry)
except OSError:
continue
title = extract_title_from_text(text)
if not title:
continue
new_name = sanitize_folder_name(title)
if new_name == folder.name:
continue
final = new_name
if final in used and final != folder.name:
i = 2
while f"{new_name}-{i}" in used:
i += 1
final = f"{new_name}-{i}"
used.discard(folder.name)
used.add(final)
renames.append((folder.name, final))
return renames
def main() -> None:
dry = "--apply" not in sys.argv
write_script = "--write-rename-script" in sys.argv
rename_only = "--rename-only" in sys.argv
script_path = Path(__file__).resolve().parent / "profile_template_renames.ps1"
if not BASE.is_dir():
print(f"Missing base: {BASE}")
sys.exit(1)
if rename_only:
remaining = sorted([c for c in BASE.iterdir() if c.is_dir()], key=lambda x: x.name)
renames = compute_renames(remaining)
write_rename_ps1(renames, script_path)
print(f"已写入重命名脚本: {script_path}")
print("请在关闭占用该文件夹的程序后,在 PowerShell 中执行:")
print(f" powershell -ExecutionPolicy Bypass -File \"{script_path}\"")
return
children = [c for c in BASE.iterdir() if c.is_dir()]
to_delete: list[tuple[Path, str, int, int]] = []
for child in children:
h, p = count_html(child), count_php(child)
if h == 0:
to_delete.append((child, "no_html", h, p))
elif p >= PHP_THRESHOLD:
to_delete.append((child, "php_heavy", h, p))
print("=== 将删除(非静态或无可展示 HTML)===")
for path, reason, h, p in sorted(to_delete, key=lambda x: x[0].name):
print(f" [{reason}] html={h} php={p} {path.name}")
if dry:
print("\n[DRY RUN] 加参数 --apply 执行删除与重命名\n")
delete_set = {p for p, _, _, _ in to_delete}
if not dry:
for path, _, _, _ in to_delete:
shutil.rmtree(path, ignore_errors=False)
print(f"已删除: {path.name}")
# 重命名:dry-run 时排除即将删除的目录
remaining = sorted(
[c for c in BASE.iterdir() if c.is_dir() and (dry and c not in delete_set or not dry)],
key=lambda x: x.name,
)
renames = compute_renames(remaining)
print("=== 计划重命名(按页面 title)===")
for old, new in renames:
if old != new:
print(f" {old}\n -> {new}")
if dry:
if write_script:
write_rename_ps1(renames, script_path)
print(f"\n已写入重命名脚本: {script_path}")
return
write_rename_ps1(renames, script_path)
print(f"\n已写入重命名脚本(若本机重命名失败可手动执行): {script_path}")
pairs = [(o, n) for o, n in renames if o != n]
tag = uuid.uuid4().hex[:8]
mids: list[tuple[str, str, str]] = []
for i, (old_name, new_name) in enumerate(pairs):
mid = f"__tmp_rename_{tag}_{i}__"
mids.append((old_name, mid, new_name))
for old_name, mid, new_name in mids:
src = BASE / old_name
dst = BASE / mid
if not src.is_dir():
continue
try:
src.rename(dst)
except OSError as e:
print(f"阶段1 重命名失败(可稍后运行脚本): {old_name} -> {mid} {e}")
for old_name, mid, new_name in mids:
src = BASE / mid
dst = BASE / new_name
if not src.is_dir():
continue
if dst.exists():
print(f"跳过(目标已存在): {mid} -> {new_name}")
continue
try:
src.rename(dst)
print(f"重命名: {old_name} -> {new_name}")
except OSError as e:
print(f"阶段2 重命名失败(可稍后运行脚本): {mid} -> {new_name} {e}")
if __name__ == "__main__":
main()