Files
InfoGenie/InfoGenie-frontend/scripts/tidy_profile_templates.py
2026-03-28 20:59:52 +08:00

235 lines
7.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""筛选个人主页模板:删除非静态项,并按网页 title 重命名顶层文件夹。"""
from __future__ import annotations
import re
import shutil
import sys
import uuid
from pathlib import Path
BASE = Path(
r"d:\SmyProjects\Frontend-Backend\InfoGenie\infogenie-frontend\public\toolbox\个人主页模板"
)
# 无 HTML 无法作为静态页PHP 文件较多视为需服务端,非纯静态
PHP_THRESHOLD = 5
def count_html(root: Path) -> int:
n = 0
for p in root.rglob("*"):
if p.is_file() and p.suffix.lower() in (".html", ".htm"):
n += 1
return n
def count_php(root: Path) -> int:
return sum(1 for p in root.rglob("*.php") if p.is_file())
def extract_title_from_text(text: str) -> str | None:
for pattern in (
r"<title[^>]*>([^<]*)</title>",
r"<h1[^>]*>([^<]*)</h1>",
):
m = re.search(pattern, text, re.I | re.DOTALL)
if m:
raw = m.group(1)
raw = re.sub(r"<[^>]+>", "", raw)
title = re.sub(r"\s+", " ", raw).strip()
title = re.sub(r'[<>:"/\\|?*]', "", title)
title = title.strip(" -_|")
if title and len(title) < 100:
return title
return None
def read_html(path: Path) -> str:
for enc in ("utf-8", "utf-8-sig", "gbk", "gb2312"):
try:
return path.read_text(encoding=enc)
except (UnicodeDecodeError, OSError):
continue
return path.read_text(encoding="utf-8", errors="ignore")
def find_entry_html(folder: Path) -> Path | None:
for name in ("index.html", "index.htm"):
for p in folder.rglob(name):
if p.is_file():
return p
for p in folder.rglob("*"):
if p.is_file() and p.suffix.lower() in (".html", ".htm"):
return p
return None
def sanitize_folder_name(name: str) -> str:
for c in '<>:"/\\|?*':
name = name.replace(c, "")
name = name.strip(" .")
if len(name) > 120:
name = name[:120].rstrip()
return name or "未命名模板"
def ps_single_quote(s: str) -> str:
return "'" + s.replace("'", "''") + "'"
def write_rename_ps1(renames: list[tuple[str, str]], out_path: Path) -> None:
"""两阶段重命名,避免 A→B 与 B→A 等占用冲突。"""
pairs = [(o, n) for o, n in renames if o != n]
tag = uuid.uuid4().hex[:8]
lines = [
"# 由 tidy_profile_templates.py 生成:关闭占用该目录的 IDE/资源管理器窗口后执行。",
"$ErrorActionPreference = 'Stop'",
f"$base = {ps_single_quote(str(BASE))}",
"",
"# 阶段 1改为临时名",
]
mids: list[tuple[str, str, str]] = []
for i, (old, new) in enumerate(pairs):
mid = f"__tmp_rename_{tag}_{i}__"
mids.append((old, mid, new))
lines.append(
f"Rename-Item -LiteralPath (Join-Path $base {ps_single_quote(old)}) "
f"-NewName {ps_single_quote(mid)}"
)
lines.extend(["", "# 阶段 2改为最终名"])
for _old, mid, new in mids:
lines.append(
f"Rename-Item -LiteralPath (Join-Path $base {ps_single_quote(mid)}) "
f"-NewName {ps_single_quote(new)}"
)
out_path.write_text("\n".join(lines) + "\n", encoding="utf-8-sig")
def compute_renames(remaining: list[Path]) -> list[tuple[str, str]]:
used: set[str] = {c.name for c in remaining}
renames: list[tuple[str, str]] = []
for folder in remaining:
entry = find_entry_html(folder)
if not entry:
continue
try:
text = read_html(entry)
except OSError:
continue
title = extract_title_from_text(text)
if not title:
continue
new_name = sanitize_folder_name(title)
if new_name == folder.name:
continue
final = new_name
if final in used and final != folder.name:
i = 2
while f"{new_name}-{i}" in used:
i += 1
final = f"{new_name}-{i}"
used.discard(folder.name)
used.add(final)
renames.append((folder.name, final))
return renames
def main() -> None:
dry = "--apply" not in sys.argv
write_script = "--write-rename-script" in sys.argv
rename_only = "--rename-only" in sys.argv
script_path = Path(__file__).resolve().parent / "profile_template_renames.ps1"
if not BASE.is_dir():
print(f"Missing base: {BASE}")
sys.exit(1)
if rename_only:
remaining = sorted([c for c in BASE.iterdir() if c.is_dir()], key=lambda x: x.name)
renames = compute_renames(remaining)
write_rename_ps1(renames, script_path)
print(f"已写入重命名脚本: {script_path}")
print("请在关闭占用该文件夹的程序后,在 PowerShell 中执行:")
print(f" powershell -ExecutionPolicy Bypass -File \"{script_path}\"")
return
children = [c for c in BASE.iterdir() if c.is_dir()]
to_delete: list[tuple[Path, str, int, int]] = []
for child in children:
h, p = count_html(child), count_php(child)
if h == 0:
to_delete.append((child, "no_html", h, p))
elif p >= PHP_THRESHOLD:
to_delete.append((child, "php_heavy", h, p))
print("=== 将删除(非静态或无可展示 HTML===")
for path, reason, h, p in sorted(to_delete, key=lambda x: x[0].name):
print(f" [{reason}] html={h} php={p} {path.name}")
if dry:
print("\n[DRY RUN] 加参数 --apply 执行删除与重命名\n")
delete_set = {p for p, _, _, _ in to_delete}
if not dry:
for path, _, _, _ in to_delete:
shutil.rmtree(path, ignore_errors=False)
print(f"已删除: {path.name}")
# 重命名dry-run 时排除即将删除的目录
remaining = sorted(
[c for c in BASE.iterdir() if c.is_dir() and (dry and c not in delete_set or not dry)],
key=lambda x: x.name,
)
renames = compute_renames(remaining)
print("=== 计划重命名(按页面 title===")
for old, new in renames:
if old != new:
print(f" {old}\n -> {new}")
if dry:
if write_script:
write_rename_ps1(renames, script_path)
print(f"\n已写入重命名脚本: {script_path}")
return
write_rename_ps1(renames, script_path)
print(f"\n已写入重命名脚本(若本机重命名失败可手动执行): {script_path}")
pairs = [(o, n) for o, n in renames if o != n]
tag = uuid.uuid4().hex[:8]
mids: list[tuple[str, str, str]] = []
for i, (old_name, new_name) in enumerate(pairs):
mid = f"__tmp_rename_{tag}_{i}__"
mids.append((old_name, mid, new_name))
for old_name, mid, new_name in mids:
src = BASE / old_name
dst = BASE / mid
if not src.is_dir():
continue
try:
src.rename(dst)
except OSError as e:
print(f"阶段1 重命名失败(可稍后运行脚本): {old_name} -> {mid} {e}")
for old_name, mid, new_name in mids:
src = BASE / mid
dst = BASE / new_name
if not src.is_dir():
continue
if dst.exists():
print(f"跳过(目标已存在): {mid} -> {new_name}")
continue
try:
src.rename(dst)
print(f"重命名: {old_name} -> {new_name}")
except OSError as e:
print(f"阶段2 重命名失败(可稍后运行脚本): {mid} -> {new_name} {e}")
if __name__ == "__main__":
main()