258 lines
7.3 KiB
Python
258 lines
7.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Simple build tool to minify web assets in the `web/` folder.
|
|
|
|
Usage:
|
|
python3 scripts/minify_web.py
|
|
|
|
This script will read:
|
|
- web/index.html
|
|
- web/style.css
|
|
- web/script.js
|
|
|
|
and write minified outputs to:
|
|
- web/cleaned/index.html
|
|
- web/cleaned/style.css
|
|
- web/cleaned/script.js
|
|
|
|
The minifiers are intentionally conservative (no external deps) and aim to be
|
|
safe for typical static files used in this project. They remove comments,
|
|
collapse unnecessary whitespace and do small syntax-preserving transformations.
|
|
They are NOT as powerful as terser/clean-css/html-minifier but avoid external
|
|
package installs which may not be available on the build host.
|
|
|
|
If you want stronger/min-safe minification later, replace this script with an
|
|
npm-based toolchain (npx terser, html-minifier-terser, clean-css) or call those
|
|
tools from a Makefile.
|
|
"""
|
|
from pathlib import Path
|
|
import re
|
|
import sys
|
|
import os
|
|
|
|
BASE = Path(__file__).resolve().parent.parent
|
|
WEB = BASE / "web"
|
|
CLEAN = WEB / "cleaned"
|
|
|
|
def ensure_clean_dir():
|
|
CLEAN.mkdir(parents=True, exist_ok=True)
|
|
|
|
# ----------------------
|
|
# HTML minifier
|
|
# ----------------------
|
|
def minify_html(src: str) -> str:
|
|
"""
|
|
- Preserve content inside <script>, <style>, <pre>, <code> tags by masking them.
|
|
- Remove HTML comments.
|
|
- Collapse whitespace between tags.
|
|
- Trim leading/trailing whitespace.
|
|
"""
|
|
# Mask blocks we don't want to touch
|
|
pattern = re.compile(r'(?is)<(script|style|pre|code)(\b[^>]*)?>(.*?)</\1>')
|
|
placeholders = []
|
|
def _mask(m):
|
|
placeholders.append(m.group(0))
|
|
return f"__HTML_PLACEHOLDER_{len(placeholders)-1}__"
|
|
masked = pattern.sub(_mask, src)
|
|
|
|
# Remove comments <!-- ... -->
|
|
masked = re.sub(r'(?is)<!--.*?-->', '', masked)
|
|
|
|
# Collapse whitespace between tags: > < => ><
|
|
masked = re.sub(r'>\s+<', '><', masked)
|
|
|
|
# Collapse multiple spaces to one
|
|
masked = re.sub(r'[ \t]{2,}', ' ', masked)
|
|
|
|
# Remove leading/trailing whitespace/newlines
|
|
masked = masked.strip()
|
|
|
|
# Re-insert placeholders (unchanged)
|
|
def _restore(m):
|
|
idx = int(m.group(1))
|
|
return placeholders[idx]
|
|
result = re.sub(r'__HTML_PLACEHOLDER_(\d+)__', _restore, masked)
|
|
return result
|
|
|
|
# ----------------------
|
|
# CSS minifier
|
|
# ----------------------
|
|
def minify_css(src: str) -> str:
|
|
"""
|
|
- Remove comments (/* ... */)
|
|
- Remove unnecessary whitespace
|
|
- Collapse semicolons & spaces where safe
|
|
"""
|
|
# Remove comments
|
|
s = re.sub(r'(?s)/\*.*?\*/', '', src)
|
|
|
|
# Remove whitespace around symbols
|
|
s = re.sub(r'\s*([{}:;,])\s*', r'\1', s)
|
|
|
|
# Collapse multiple semicolons
|
|
s = re.sub(r';;+', ';', s)
|
|
|
|
# Remove trailing semicolon before closing brace
|
|
s = re.sub(r';}', '}', s)
|
|
|
|
# Collapse multiple whitespace/newlines
|
|
s = re.sub(r'\s+', ' ', s)
|
|
|
|
return s.strip()
|
|
|
|
# ----------------------
|
|
# JS minifier (simple, conservative)
|
|
# ----------------------
|
|
def minify_js(src: str) -> str:
|
|
"""
|
|
More conservative JS minifier:
|
|
|
|
- Removes /* ... */ block comments that are not inside strings or template literals.
|
|
- Does NOT remove // line comments (they can be significant in JS and in regexes/URLs).
|
|
- Trims trailing spaces on each line and collapses multiple empty lines to a single newline.
|
|
- Preserves all other whitespace and token boundaries to avoid introducing syntax errors.
|
|
This approach is intentionally conservative to avoid unexpected tokens.
|
|
"""
|
|
out_chars = []
|
|
i = 0
|
|
L = len(src)
|
|
in_squote = False
|
|
in_dquote = False
|
|
in_bquote = False
|
|
esc = False
|
|
in_block_comment = False
|
|
|
|
# First pass: remove /* ... */ block comments but only when not inside strings/templates
|
|
while i < L:
|
|
c = src[i]
|
|
nxt = src[i+1] if i+1 < L else ''
|
|
|
|
if in_block_comment:
|
|
if c == '*' and nxt == '/':
|
|
in_block_comment = False
|
|
i += 2
|
|
continue
|
|
else:
|
|
i += 1
|
|
continue
|
|
|
|
# Handle string/template entry/exit
|
|
if c == "'" and not (in_dquote or in_bquote):
|
|
if not esc:
|
|
in_squote = not in_squote
|
|
out_chars.append(c)
|
|
esc = False
|
|
i += 1
|
|
continue
|
|
if c == '"' and not (in_squote or in_bquote):
|
|
if not esc:
|
|
in_dquote = not in_dquote
|
|
out_chars.append(c)
|
|
esc = False
|
|
i += 1
|
|
continue
|
|
if c == '`' and not (in_squote or in_dquote):
|
|
if not esc:
|
|
in_bquote = not in_bquote
|
|
out_chars.append(c)
|
|
esc = False
|
|
i += 1
|
|
continue
|
|
|
|
# Escape handling inside strings/templates
|
|
if (in_squote or in_dquote or in_bquote) and c == '\\' and not esc:
|
|
esc = True
|
|
out_chars.append(c)
|
|
i += 1
|
|
continue
|
|
if esc:
|
|
out_chars.append(c)
|
|
esc = False
|
|
i += 1
|
|
continue
|
|
|
|
# Detect block comment only when not inside a string/template
|
|
if not (in_squote or in_dquote or in_bquote) and c == '/' and nxt == '*':
|
|
in_block_comment = True
|
|
i += 2
|
|
continue
|
|
|
|
# Otherwise keep character
|
|
out_chars.append(c)
|
|
i += 1
|
|
|
|
code_no_block_comments = ''.join(out_chars)
|
|
|
|
# Second pass: line-wise trimming and blank-line collapse (very conservative)
|
|
lines = code_no_block_comments.splitlines()
|
|
trimmed_lines = []
|
|
prev_blank = False
|
|
for line in lines:
|
|
# remove trailing whitespace only
|
|
t = line.rstrip()
|
|
if t == '':
|
|
if not prev_blank:
|
|
trimmed_lines.append('')
|
|
prev_blank = True
|
|
else:
|
|
trimmed_lines.append(t)
|
|
prev_blank = False
|
|
|
|
result = '\n'.join(trimmed_lines).strip() + '\n' if trimmed_lines else ''
|
|
return result
|
|
|
|
# ----------------------
|
|
# File utilities
|
|
# ----------------------
|
|
def read_file(path: Path) -> str:
|
|
try:
|
|
return path.read_text(encoding='utf-8')
|
|
except Exception as e:
|
|
print(f"ERROR reading {path}: {e}", file=sys.stderr)
|
|
return ''
|
|
|
|
def write_file(path: Path, data: str):
|
|
try:
|
|
path.write_text(data, encoding='utf-8')
|
|
print(f"Wrote {path} ({len(data)} bytes)")
|
|
except Exception as e:
|
|
print(f"ERROR writing {path}: {e}", file=sys.stderr)
|
|
|
|
def minify_all():
|
|
ensure_clean_dir()
|
|
|
|
# HTML
|
|
index = WEB / "index.html"
|
|
if index.exists():
|
|
print("Minifying HTML:", index)
|
|
s = read_file(index)
|
|
out = minify_html(s)
|
|
write_file(CLEAN / "index.html", out)
|
|
else:
|
|
print("No index.html found in web/")
|
|
|
|
# CSS
|
|
css = WEB / "style.css"
|
|
if css.exists():
|
|
print("Minifying CSS:", css)
|
|
s = read_file(css)
|
|
out = minify_css(s)
|
|
write_file(CLEAN / "style.css", out)
|
|
else:
|
|
print("No style.css found in web/")
|
|
|
|
# JS
|
|
js = WEB / "script.js"
|
|
if js.exists():
|
|
print("Minifying JS:", js)
|
|
s = read_file(js)
|
|
out = minify_js(s)
|
|
write_file(CLEAN / "script.js", out)
|
|
else:
|
|
print("No script.js found in web/")
|
|
|
|
print("Minification complete. Output placed in", CLEAN)
|
|
|
|
if __name__ == "__main__":
|
|
minify_all()
|