[ai] forgot cleanup script
This commit is contained in:
parent
abfe564891
commit
cfa3feb7d2
|
|
@ -0,0 +1,257 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Simple build tool to minify web assets in the `web/` folder.
|
||||
|
||||
Usage:
|
||||
python3 scripts/minify_web.py
|
||||
|
||||
This script will read:
|
||||
- web/index.html
|
||||
- web/style.css
|
||||
- web/script.js
|
||||
|
||||
and write minified outputs to:
|
||||
- web/cleaned/index.html
|
||||
- web/cleaned/style.css
|
||||
- web/cleaned/script.js
|
||||
|
||||
The minifiers are intentionally conservative (no external deps) and aim to be
|
||||
safe for typical static files used in this project. They remove comments,
|
||||
collapse unnecessary whitespace and do small syntax-preserving transformations.
|
||||
They are NOT as powerful as terser/clean-css/html-minifier but avoid external
|
||||
package installs which may not be available on the build host.
|
||||
|
||||
If you want stronger/min-safe minification later, replace this script with an
|
||||
npm-based toolchain (npx terser, html-minifier-terser, clean-css) or call those
|
||||
tools from a Makefile.
|
||||
"""
|
||||
from pathlib import Path
|
||||
import re
|
||||
import sys
|
||||
import os
|
||||
|
||||
BASE = Path(__file__).resolve().parent.parent
|
||||
WEB = BASE / "web"
|
||||
CLEAN = WEB / "cleaned"
|
||||
|
||||
def ensure_clean_dir():
|
||||
CLEAN.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# ----------------------
|
||||
# HTML minifier
|
||||
# ----------------------
|
||||
def minify_html(src: str) -> str:
|
||||
"""
|
||||
- Preserve content inside <script>, <style>, <pre>, <code> tags by masking them.
|
||||
- Remove HTML comments.
|
||||
- Collapse whitespace between tags.
|
||||
- Trim leading/trailing whitespace.
|
||||
"""
|
||||
# Mask blocks we don't want to touch
|
||||
pattern = re.compile(r'(?is)<(script|style|pre|code)(\b[^>]*)?>(.*?)</\1>')
|
||||
placeholders = []
|
||||
def _mask(m):
|
||||
placeholders.append(m.group(0))
|
||||
return f"__HTML_PLACEHOLDER_{len(placeholders)-1}__"
|
||||
masked = pattern.sub(_mask, src)
|
||||
|
||||
# Remove comments <!-- ... -->
|
||||
masked = re.sub(r'(?is)<!--.*?-->', '', masked)
|
||||
|
||||
# Collapse whitespace between tags: > < => ><
|
||||
masked = re.sub(r'>\s+<', '><', masked)
|
||||
|
||||
# Collapse multiple spaces to one
|
||||
masked = re.sub(r'[ \t]{2,}', ' ', masked)
|
||||
|
||||
# Remove leading/trailing whitespace/newlines
|
||||
masked = masked.strip()
|
||||
|
||||
# Re-insert placeholders (unchanged)
|
||||
def _restore(m):
|
||||
idx = int(m.group(1))
|
||||
return placeholders[idx]
|
||||
result = re.sub(r'__HTML_PLACEHOLDER_(\d+)__', _restore, masked)
|
||||
return result
|
||||
|
||||
# ----------------------
|
||||
# CSS minifier
|
||||
# ----------------------
|
||||
def minify_css(src: str) -> str:
|
||||
"""
|
||||
- Remove comments (/* ... */)
|
||||
- Remove unnecessary whitespace
|
||||
- Collapse semicolons & spaces where safe
|
||||
"""
|
||||
# Remove comments
|
||||
s = re.sub(r'(?s)/\*.*?\*/', '', src)
|
||||
|
||||
# Remove whitespace around symbols
|
||||
s = re.sub(r'\s*([{}:;,])\s*', r'\1', s)
|
||||
|
||||
# Collapse multiple semicolons
|
||||
s = re.sub(r';;+', ';', s)
|
||||
|
||||
# Remove trailing semicolon before closing brace
|
||||
s = re.sub(r';}', '}', s)
|
||||
|
||||
# Collapse multiple whitespace/newlines
|
||||
s = re.sub(r'\s+', ' ', s)
|
||||
|
||||
return s.strip()
|
||||
|
||||
# ----------------------
|
||||
# JS minifier (simple, conservative)
|
||||
# ----------------------
|
||||
def minify_js(src: str) -> str:
|
||||
"""
|
||||
More conservative JS minifier:
|
||||
|
||||
- Removes /* ... */ block comments that are not inside strings or template literals.
|
||||
- Does NOT remove // line comments (they can be significant in JS and in regexes/URLs).
|
||||
- Trims trailing spaces on each line and collapses multiple empty lines to a single newline.
|
||||
- Preserves all other whitespace and token boundaries to avoid introducing syntax errors.
|
||||
This approach is intentionally conservative to avoid unexpected tokens.
|
||||
"""
|
||||
out_chars = []
|
||||
i = 0
|
||||
L = len(src)
|
||||
in_squote = False
|
||||
in_dquote = False
|
||||
in_bquote = False
|
||||
esc = False
|
||||
in_block_comment = False
|
||||
|
||||
# First pass: remove /* ... */ block comments but only when not inside strings/templates
|
||||
while i < L:
|
||||
c = src[i]
|
||||
nxt = src[i+1] if i+1 < L else ''
|
||||
|
||||
if in_block_comment:
|
||||
if c == '*' and nxt == '/':
|
||||
in_block_comment = False
|
||||
i += 2
|
||||
continue
|
||||
else:
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Handle string/template entry/exit
|
||||
if c == "'" and not (in_dquote or in_bquote):
|
||||
if not esc:
|
||||
in_squote = not in_squote
|
||||
out_chars.append(c)
|
||||
esc = False
|
||||
i += 1
|
||||
continue
|
||||
if c == '"' and not (in_squote or in_bquote):
|
||||
if not esc:
|
||||
in_dquote = not in_dquote
|
||||
out_chars.append(c)
|
||||
esc = False
|
||||
i += 1
|
||||
continue
|
||||
if c == '`' and not (in_squote or in_dquote):
|
||||
if not esc:
|
||||
in_bquote = not in_bquote
|
||||
out_chars.append(c)
|
||||
esc = False
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Escape handling inside strings/templates
|
||||
if (in_squote or in_dquote or in_bquote) and c == '\\' and not esc:
|
||||
esc = True
|
||||
out_chars.append(c)
|
||||
i += 1
|
||||
continue
|
||||
if esc:
|
||||
out_chars.append(c)
|
||||
esc = False
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Detect block comment only when not inside a string/template
|
||||
if not (in_squote or in_dquote or in_bquote) and c == '/' and nxt == '*':
|
||||
in_block_comment = True
|
||||
i += 2
|
||||
continue
|
||||
|
||||
# Otherwise keep character
|
||||
out_chars.append(c)
|
||||
i += 1
|
||||
|
||||
code_no_block_comments = ''.join(out_chars)
|
||||
|
||||
# Second pass: line-wise trimming and blank-line collapse (very conservative)
|
||||
lines = code_no_block_comments.splitlines()
|
||||
trimmed_lines = []
|
||||
prev_blank = False
|
||||
for line in lines:
|
||||
# remove trailing whitespace only
|
||||
t = line.rstrip()
|
||||
if t == '':
|
||||
if not prev_blank:
|
||||
trimmed_lines.append('')
|
||||
prev_blank = True
|
||||
else:
|
||||
trimmed_lines.append(t)
|
||||
prev_blank = False
|
||||
|
||||
result = '\n'.join(trimmed_lines).strip() + '\n' if trimmed_lines else ''
|
||||
return result
|
||||
|
||||
# ----------------------
|
||||
# File utilities
|
||||
# ----------------------
|
||||
def read_file(path: Path) -> str:
|
||||
try:
|
||||
return path.read_text(encoding='utf-8')
|
||||
except Exception as e:
|
||||
print(f"ERROR reading {path}: {e}", file=sys.stderr)
|
||||
return ''
|
||||
|
||||
def write_file(path: Path, data: str):
|
||||
try:
|
||||
path.write_text(data, encoding='utf-8')
|
||||
print(f"Wrote {path} ({len(data)} bytes)")
|
||||
except Exception as e:
|
||||
print(f"ERROR writing {path}: {e}", file=sys.stderr)
|
||||
|
||||
def minify_all():
|
||||
ensure_clean_dir()
|
||||
|
||||
# HTML
|
||||
index = WEB / "index.html"
|
||||
if index.exists():
|
||||
print("Minifying HTML:", index)
|
||||
s = read_file(index)
|
||||
out = minify_html(s)
|
||||
write_file(CLEAN / "index.html", out)
|
||||
else:
|
||||
print("No index.html found in web/")
|
||||
|
||||
# CSS
|
||||
css = WEB / "style.css"
|
||||
if css.exists():
|
||||
print("Minifying CSS:", css)
|
||||
s = read_file(css)
|
||||
out = minify_css(s)
|
||||
write_file(CLEAN / "style.css", out)
|
||||
else:
|
||||
print("No style.css found in web/")
|
||||
|
||||
# JS
|
||||
js = WEB / "script.js"
|
||||
if js.exists():
|
||||
print("Minifying JS:", js)
|
||||
s = read_file(js)
|
||||
out = minify_js(s)
|
||||
write_file(CLEAN / "script.js", out)
|
||||
else:
|
||||
print("No script.js found in web/")
|
||||
|
||||
print("Minification complete. Output placed in", CLEAN)
|
||||
|
||||
if __name__ == "__main__":
|
||||
minify_all()
|
||||
Loading…
Reference in New Issue