(don’t forget to add # at first line) !/usr/bin/env python3 ““” Convertit un site Gemini (.gmi) en site HTML en conservant l’arborescence. Pipeline : Gemini (.gmi) -> Markdown (.md) -> HTML (.html)
POLITIQUE DE SORTIE (version allégée) : - Fichiers AUTORISÉS en sortie : .html (et .md temporaires) - Fichiers EXCLUS : images, archives (zip, tar, gz…), binaires, etc.
Dépendances : - pandoc (installé sur le système) - Python 3.9+
Usage : python convert_gemini_to_html.py /chemin/site_gemini /chemin/sortie_html ““”
import subprocess import sys from pathlib import Path import shutil
ALLOWED_COPY_EXTENSIONS = set()
BLOCKED_EXTENSIONS = { “.png”, “.jpg”, “.jpeg”, “.gif”, “.webp”, “.svg”, “.zip”, “.tar”, “.gz”, “.bz2”, “.xz”, “.7z”, “.pdf”, “.mp3”, “.ogg”, “.wav”, “.mp4”, }
def gemini_to_markdown(text: str) -> str: ““” Convertit du Gemtext en Markdown. - Titres (#, ##, ###) - Liens Gemini => url [label] - Conversion des liens internes .gmi -> .html - Listes simples ““” md_lines = []
for line in text.splitlines():
line = line.rstrip()
# Titres
if line.startswith("###"):
md_lines.append("### " + line[3:].strip())
elif line.startswith("##"):
md_lines.append("## " + line[2:].strip())
elif line.startswith("#"):
md_lines.append("# " + line[1:].strip())
# Liens Gemini
elif line.startswith("=>"):
parts = line[2:].strip().split(maxsplit=1)
if not parts:
md_lines.append("")
continue
url = parts[0]
label = parts[1] if len(parts) == 2 else url
if not url.startswith(("http://", "https://", "gemini://")):
if url.endswith(".gmi"):
url = url[:-4] + ".html"
md_lines.append("")
md_lines.append(f"[{label}]({url})")
md_lines.append("")
elif line.startswith("* "):
md_lines.append("- " + line[2:])
else:
md_lines.append(line)
return "\n".join(md_lines)
def run_pandoc(md_file: Path, html_out: Path): html_out.parent.mkdir(parents=True, exist_ok=True) cmd = [ “pandoc”, str(md_file), “-f”, “markdown”, “-t”, “html”, “-s”, “-M”, “charset=utf-8”, “-o”, str(html_out) ] subprocess.run(cmd, check=True)
def build_index(folder: Path): html_files = sorted( p for p in folder.iterdir() if p.suffix == “.html” and p.name != “index.html” ) if not html_files: return
lines = [
"<html><head><meta charset='utf-8'><title>Index</title></head><body>",
f"<h1>{folder.name}</h1>",
"<ul>"
]
for f in html_files:
lines.append(f"<li><a href='{f.name}'>{f.stem}</a></li>")
lines.append("</ul></body></html>")
index_path = folder / "index.html"
index_path.write_text("\n".join(lines), encoding="utf-8")
def main(src_root: Path, dst_root: Path): tmp_md_root = dst_root / “._md” tmp_md_root.mkdir(parents=True, exist_ok=True)
for path in src_root.rglob("*"):
rel = path.relative_to(src_root)
out = dst_root / rel
if path.is_dir():
out.mkdir(parents=True, exist_ok=True)
continue
suffix = path.suffix.lower()
if suffix == ".gmi":
md_tmp = tmp_md_root / rel.with_suffix(".md")
md_tmp.parent.mkdir(parents=True, exist_ok=True)
text = path.read_text(encoding="utf-8")
md_text = gemini_to_markdown(text)
md_tmp.write_text(md_text, encoding="utf-8")
html_out = out.with_suffix(".html")
run_pandoc(md_tmp, html_out)
else:
continue
for folder in dst_root.rglob("*"):
if folder.is_dir() and folder.name != "._md":
build_index(folder)
if name == “main”: if len(sys.argv) != 3: print(“Usage: convert_gemini_to_html.py SRC_DIR DST_DIR”) sys.exit(1)
src = Path(sys.argv[1]).expanduser().resolve()
dst = Path(sys.argv[2]).expanduser().resolve()
if not src.exists():
print(f"Source inexistante : {src}")
sys.exit(1)
main(src, dst)
print("Conversion terminée ✔ (HTML only, archive allégée)")
(don’t forget to add # at first line) !/bin/bash
export NEOCITIES_API_KEY=YOUR_API_KEY
neocitizen upload –dir=$HOME/gemini_html