hannabox/scripts/minify_web.py

258 lines
7.3 KiB
Python

#!/usr/bin/env python3
"""
Simple build tool to minify web assets in the `web/` folder.
Usage:
python3 scripts/minify_web.py
This script will read:
- web/index.html
- web/style.css
- web/script.js
and write minified outputs to:
- web/cleaned/index.html
- web/cleaned/style.css
- web/cleaned/script.js
The minifiers are intentionally conservative (no external deps) and aim to be
safe for typical static files used in this project. They remove comments,
collapse unnecessary whitespace and do small syntax-preserving transformations.
They are NOT as powerful as terser/clean-css/html-minifier but avoid external
package installs which may not be available on the build host.
If you want stronger/min-safe minification later, replace this script with an
npm-based toolchain (npx terser, html-minifier-terser, clean-css) or call those
tools from a Makefile.
"""
from pathlib import Path
import re
import sys
import os
BASE = Path(__file__).resolve().parent.parent
WEB = BASE / "web"
CLEAN = WEB / "cleaned"
def ensure_clean_dir():
CLEAN.mkdir(parents=True, exist_ok=True)
# ----------------------
# HTML minifier
# ----------------------
def minify_html(src: str) -> str:
"""
- Preserve content inside <script>, <style>, <pre>, <code> tags by masking them.
- Remove HTML comments.
- Collapse whitespace between tags.
- Trim leading/trailing whitespace.
"""
# Mask blocks we don't want to touch
pattern = re.compile(r'(?is)<(script|style|pre|code)(\b[^>]*)?>(.*?)</\1>')
placeholders = []
def _mask(m):
placeholders.append(m.group(0))
return f"__HTML_PLACEHOLDER_{len(placeholders)-1}__"
masked = pattern.sub(_mask, src)
# Remove comments <!-- ... -->
masked = re.sub(r'(?is)<!--.*?-->', '', masked)
# Collapse whitespace between tags: > < => ><
masked = re.sub(r'>\s+<', '><', masked)
# Collapse multiple spaces to one
masked = re.sub(r'[ \t]{2,}', ' ', masked)
# Remove leading/trailing whitespace/newlines
masked = masked.strip()
# Re-insert placeholders (unchanged)
def _restore(m):
idx = int(m.group(1))
return placeholders[idx]
result = re.sub(r'__HTML_PLACEHOLDER_(\d+)__', _restore, masked)
return result
# ----------------------
# CSS minifier
# ----------------------
def minify_css(src: str) -> str:
"""
- Remove comments (/* ... */)
- Remove unnecessary whitespace
- Collapse semicolons & spaces where safe
"""
# Remove comments
s = re.sub(r'(?s)/\*.*?\*/', '', src)
# Remove whitespace around symbols
s = re.sub(r'\s*([{}:;,])\s*', r'\1', s)
# Collapse multiple semicolons
s = re.sub(r';;+', ';', s)
# Remove trailing semicolon before closing brace
s = re.sub(r';}', '}', s)
# Collapse multiple whitespace/newlines
s = re.sub(r'\s+', ' ', s)
return s.strip()
# ----------------------
# JS minifier (simple, conservative)
# ----------------------
def minify_js(src: str) -> str:
"""
More conservative JS minifier:
- Removes /* ... */ block comments that are not inside strings or template literals.
- Does NOT remove // line comments (they can be significant in JS and in regexes/URLs).
- Trims trailing spaces on each line and collapses multiple empty lines to a single newline.
- Preserves all other whitespace and token boundaries to avoid introducing syntax errors.
This approach is intentionally conservative to avoid unexpected tokens.
"""
out_chars = []
i = 0
L = len(src)
in_squote = False
in_dquote = False
in_bquote = False
esc = False
in_block_comment = False
# First pass: remove /* ... */ block comments but only when not inside strings/templates
while i < L:
c = src[i]
nxt = src[i+1] if i+1 < L else ''
if in_block_comment:
if c == '*' and nxt == '/':
in_block_comment = False
i += 2
continue
else:
i += 1
continue
# Handle string/template entry/exit
if c == "'" and not (in_dquote or in_bquote):
if not esc:
in_squote = not in_squote
out_chars.append(c)
esc = False
i += 1
continue
if c == '"' and not (in_squote or in_bquote):
if not esc:
in_dquote = not in_dquote
out_chars.append(c)
esc = False
i += 1
continue
if c == '`' and not (in_squote or in_dquote):
if not esc:
in_bquote = not in_bquote
out_chars.append(c)
esc = False
i += 1
continue
# Escape handling inside strings/templates
if (in_squote or in_dquote or in_bquote) and c == '\\' and not esc:
esc = True
out_chars.append(c)
i += 1
continue
if esc:
out_chars.append(c)
esc = False
i += 1
continue
# Detect block comment only when not inside a string/template
if not (in_squote or in_dquote or in_bquote) and c == '/' and nxt == '*':
in_block_comment = True
i += 2
continue
# Otherwise keep character
out_chars.append(c)
i += 1
code_no_block_comments = ''.join(out_chars)
# Second pass: line-wise trimming and blank-line collapse (very conservative)
lines = code_no_block_comments.splitlines()
trimmed_lines = []
prev_blank = False
for line in lines:
# remove trailing whitespace only
t = line.rstrip()
if t == '':
if not prev_blank:
trimmed_lines.append('')
prev_blank = True
else:
trimmed_lines.append(t)
prev_blank = False
result = '\n'.join(trimmed_lines).strip() + '\n' if trimmed_lines else ''
return result
# ----------------------
# File utilities
# ----------------------
def read_file(path: Path) -> str:
try:
return path.read_text(encoding='utf-8')
except Exception as e:
print(f"ERROR reading {path}: {e}", file=sys.stderr)
return ''
def write_file(path: Path, data: str):
try:
path.write_text(data, encoding='utf-8')
print(f"Wrote {path} ({len(data)} bytes)")
except Exception as e:
print(f"ERROR writing {path}: {e}", file=sys.stderr)
def minify_all():
ensure_clean_dir()
# HTML
index = WEB / "index.html"
if index.exists():
print("Minifying HTML:", index)
s = read_file(index)
out = minify_html(s)
write_file(CLEAN / "index.html", out)
else:
print("No index.html found in web/")
# CSS
css = WEB / "style.css"
if css.exists():
print("Minifying CSS:", css)
s = read_file(css)
out = minify_css(s)
write_file(CLEAN / "style.css", out)
else:
print("No style.css found in web/")
# JS
js = WEB / "script.js"
if js.exists():
print("Minifying JS:", js)
s = read_file(js)
out = minify_js(s)
write_file(CLEAN / "script.js", out)
else:
print("No script.js found in web/")
print("Minification complete. Output placed in", CLEAN)
if __name__ == "__main__":
minify_all()