Build a Static Site Generator
Introduction
Static site generators transform content (usually Markdown) into static HTML files. They're used for blogs, documentation, portfolios, and landing pages. Static sites are fast, secure, and easy to host.
In this tutorial, we'll build a complete static site generator with Markdown support, templating, front matter parsing, and asset management.
- A Markdown to HTML converter
- Front matter parser
- Content organization system
- Template integration
- Asset pipeline
- Build CLI
- How static site generators work
- Markdown parsing
- Front matter handling
- Template rendering
- File system operations
Core Concepts
Front Matter
Front matter is metadata at the top of a file, separated by triple dashes. It contains fields like title, date, tags, and layout:
---
title: My First Post
date: 2024-01-15
tags: [python, tutorial]
layout: post.html
---
# Hello World
This is my first blog post!
Markdown
Markdown is a lightweight markup language that converts to HTML. We'll support common elements like headings, lists, code blocks, links, and images.
Build Process
The build process reads all content files, parses front matter and Markdown, renders templates, and outputs static HTML files.
Project Overview
Our static site generator will include:
| Feature | Description |
|---|---|
| Markdown | Parse and convert Markdown to HTML |
| Front Matter | Extract metadata from files |
| Templates | Reusable page layouts |
| Collections | Group content by type |
| Assets | Copy CSS, JS, images |
Prerequisites
- Python 3.8+ - Installed on your system
- Basic Python knowledge - File I/O, classes
Project Structure
Create this directory structure:
my-site/
├── content/
│ ├── pages/
│ │ ├── about.md
│ │ └── contact.md
│ └── posts/
│ ├── hello-world.md
│ └── python-tutorial.md
├── templates/
│ ├── base.html
│ ├── page.html
│ └── post.html
├── static/
│ ├── css/
│ │ └── style.css
│ └── js/
│ └── main.js
├── config.yaml
└── ssg.py
Markdown Parser
Create ssg/markdown.py:
import re
from html import escape
class MarkdownParser:
def __init__(self):
self.rules = [
(r'^### (.+)$', self._heading_3),
(r'^## (.+)$', self._heading_2),
(r'^# (.+)$', self._heading_1),
(r'^\*\*\*(.+?)\*\*\*$', self._bold_italic),
(r'\*\*(.+?)\*\*', self._bold),
(r'\*(.+?)\*', self._italic),
(r'~~(.+?)~~', self._strikethrough),
(r'`(.+?)`', self._inline_code),
(r'^\* (.+)$', self._list_item),
(r'^> (.+)$', self._blockquote),
(r'\[(.+?)\]\((.+?)\)', self._link),
(r'!\[(.+?)\]\((.+?)\)', self._image),
(r'^---$', self._horizontal_rule),
(r'^```(\w*)\n(.*?)```', self._code_block),
]
def parse(self, text: str) -> str:
lines = text.split('\n')
result = []
in_code_block = False
code_lang = ''
code_content = []
for line in lines:
if line.startswith('```'):
if not in_code_block:
in_code_block = True
code_lang = line[3:].strip()
code_content = []
else:
result.append(self._code_block((code_lang, '\n'.join(code_content))))
in_code_block = False
continue
if in_code_block:
code_content.append(line)
continue
parsed = False
for pattern, handler in self.rules:
match = re.match(pattern, line)
if match:
result.append(handler(match))
parsed = True
break
if not parsed and line.strip():
result.append(f'<p>{line}</p>')
elif not parsed and not line.strip():
result.append('')
return '\n'.join(result)
def _heading_1(self, match):
return f'<h1>{match.group(1)}</h1>'
def _heading_2(self, match):
return f'<h2>{match.group(1)}</h2>'
def _heading_3(self, match):
return f'<h3>{match.group(1)}</h3>'
def _bold(self, match):
return f'<strong>{match.group(1)}</strong>'
def _italic(self, match):
return f'<em>{match.group(1)}</em>'
def _bold_italic(self, match):
return f'<strong><em>{match.group(1)}</em></strong>'
def _strikethrough(self, match):
return f'<del>{match.group(1)}</del>'
def _inline_code(self, match):
return f'<code>{escape(match.group(1))}</code>'
def _list_item(self, match):
return f'<li>{match.group(1)}</li>'
def _blockquote(self, match):
return f'<blockquote>{match.group(1)}</blockquote>'
def _link(self, match):
return f'<a href="{match.group(2)}">{match.group(1)}</a>'
def _image(self, match):
return f'<img src="{match.group(2)}" alt="{match.group(1)}">'
def _horizontal_rule(self, match):
return '<hr>'
def _code_block(self, match):
lang, code = match if isinstance(match, tuple) else (match.group(1), match.group(2))
return f'<pre><code class="language-{lang}">{escape(code)}</code></pre>'
Content Loader
Create ssg/content.py:
import os
import yaml
import re
from datetime import datetime
from .markdown import MarkdownParser
class ContentLoader:
def __init__(self, content_dir: str):
self.content_dir = content_dir
self.markdown = MarkdownParser()
def load_all(self):
content = {'pages': [], 'posts': []}
pages_dir = os.path.join(self.content_dir, 'pages')
if os.path.exists(pages_dir):
content['pages'] = self._load_directory(pages_dir, 'pages')
posts_dir = os.path.join(self.content_dir, 'posts')
if os.path.exists(posts_dir):
content['posts'] = self._load_directory(posts_dir, 'posts')
return content
def _load_directory(self, directory: str, collection: str):
items = []
for filename in os.listdir(directory):
if filename.endswith('.md'):
filepath = os.path.join(directory, filename)
item = self.load_file(filepath, collection)
if item:
items.append(item)
items.sort(key=lambda x: x.get('date', datetime.min), reverse=True)
return items
def load_file(self, filepath: str, collection: str = None):
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
front_matter, markdown_content = self._parse_front_matter(content)
if 'date' in front_matter and isinstance(front_matter['date'], str):
try:
front_matter['date'] = datetime.fromisoformat(front_matter['date'])
except:
pass
html_content = self.markdown.parse(markdown_content)
filename = os.path.basename(filepath)
slug = filename[:-3]
return {
**front_matter,
'slug': slug,
'content': html_content,
'collection': collection,
}
def _parse_front_matter(self, content: str):
front_matter = {}
markdown_content = content
if content.startswith('---'):
parts = content.split('---', 2)
if len(parts) >= 3:
front_matter_text = parts[1].strip()
markdown_content = parts[2].strip()
try:
front_matter = yaml.safe_load(front_matter_text) or {}
except yaml.YAMLError:
pass
return front_matter, markdown_content
Site Generator
Create ssg/generator.py:
import os
import shutil
import yaml
from datetime import datetime
from .content import ContentLoader
class StaticSiteGenerator:
def __init__(self, config_path: str = 'config.yaml'):
self.config = self._load_config(config_path)
self.content_dir = self.config.get('content_dir', 'content')
self.template_dir = self.config.get('template_dir', 'templates')
self.static_dir = self.config.get('static_dir', 'static')
self.output_dir = self.config.get('output_dir', '_site')
self.loader = ContentLoader(self.content_dir)
self.template_cache = {}
def _load_config(self, path: str) -> dict:
if os.path.exists(path):
with open(path, 'r') as f:
return yaml.safe_load(f) or {}
return {}
def build(self):
print('Building site...')
if os.path.exists(self.output_dir):
shutil.rmtree(self.output_dir)
os.makedirs(self.output_dir, exist_ok=True)
self._copy_static()
content = self.loader.load_all()
for page in content['pages']:
self._render_page(page, content)
for post in content['posts']:
self._render_post(post, content)
self._render_index(content)
print(f'Site built successfully! Output: {self.output_dir}')
def _copy_static(self):
if os.path.exists(self.static_dir):
for root, dirs, files in os.walk(self.static_dir):
rel_dir = os.path.relpath(root, self.static_dir)
dest_dir = os.path.join(self.output_dir, rel_dir) if rel_dir != '.' else self.output_dir
os.makedirs(dest_dir, exist_ok=True)
for file in files:
src = os.path.join(root, file)
dst = os.path.join(dest_dir, file)
shutil.copy2(src, dst)
def _load_template(self, template_name: str) -> str:
if template_name in self.template_cache:
return self.template_cache[template_name]
template_path = os.path.join(self.template_dir, template_name)
if not os.path.exists(template_path):
return '{{ content }}'
with open(template_path, 'r') as f:
template = f.read()
self.template_cache[template_name] = template
return template
def _render_page(self, page: dict, content: dict):
template_name = page.get('layout', 'page.html')
template = self._load_template(template_name)
html = self._render_template(template, {**content, **page})
output_path = os.path.join(self.output_dir, 'pages', f"{page['slug']}.html")
os.makedirs(os.path.dirname(output_path), exist_ok=True)
with open(output_path, 'w') as f:
f.write(html)
def _render_post(self, post: dict, content: dict):
template_name = post.get('layout', 'post.html')
template = self._load_template(template_name)
html = self._render_template(template, {**content, **post})
output_path = os.path.join(self.output_dir, 'posts', f"{post['slug']}.html")
os.makedirs(os.path.dirname(output_path), exist_ok=True)
with open(output_path, 'w') as f:
f.write(html)
def _render_index(self, content: dict):
template = self._load_template('index.html')
html = self._render_template(template, content)
output_path = os.path.join(self.output_dir, 'index.html')
with open(output_path, 'w') as f:
f.write(html)
def _render_template(self, template: str, context: dict) -> str:
import re
result = template
for key, value in context.items():
if isinstance(value, list):
if key == 'posts':
value = self._render_post_list(value)
elif key == 'pages':
value = self._render_page_list(value)
else:
value = str(value) if value else ''
result = result.replace(f'{{{{ {key} }}}}', str(value))
result = result.replace(f'{{{{{key}}}}}', str(value))
return result
def _render_post_list(self, posts: list) -> str:
if not posts:
return '<p>No posts yet.</p>'
html = ['<ul class="post-list">']
for post in posts:
date_str = post.get('date', '').strftime('%Y-%m-%d') if hasattr(post.get('date'), 'strftime') else str(post.get('date', ''))
html.append(f'<li>')
html.append(f'<span class="date">{date_str}</span>')
html.append(f'<a href="/posts/{post["slug"]}.html">{post.get("title", "Untitled")}</a>')
html.append(f'</li>')
html.append('</ul>')
return '\n'.join(html)
def _render_page_list(self, pages: list) -> str:
if not pages:
return ''
html = ['<nav class="page-nav">']
for page in pages:
html.append(f'<a href="/pages/{page["slug"]}.html">{page.get("title", page["slug"])}</a>')
html.append('</nav>')
return '\n'.join(html)
Testing the Generator
Create the configuration and content files:
# config.yaml
content_dir: content
template_dir: templates
static_dir: static
output_dir: _site
# templates/base.html
<!DOCTYPE html>
<html>
<head>
<title>{{ title }}</title>
<link rel="stylesheet" href="/css/style.css">
</head>
<body>
<header>
<nav>
<a href="/">Home</a>
<a href="/pages/about.html">About</a>
<a href="/pages/contact.html">Contact</a>
</nav>
</header>
<main>
{{ content }}
</main>
<footer>
<p>© 2024 My Site</p>
</footer>
</body>
</html>
# templates/post.html
{% extends "base.html" %}
{% block content %}
<article class="post">
<h1>{{ title }}</h1>
<time>{{ date }}</time>
<div class="content">
{{ content }}
</div>
{% endblock %}
# content/posts/hello-world.md
---
title: Hello World
date: 2024-01-15
tags: [intro, welcome]
---
# Welcome to My Site
This is my first post! Here's what I learned:
* How to build a static site generator
* Markdown parsing
* Template rendering
## Code Example
```python
def hello():
print("Hello, World!")
```
Build the site:
from ssg.generator import StaticSiteGenerator
generator = StaticSiteGenerator('config.yaml')
generator.build()
Summary
Congratulations! You've built a complete static site generator. Here's what you learned:
- Markdown Parsing - How to convert Markdown to HTML
- Front Matter - How to extract metadata from files
- Content Loading - How to organize content by type
- Template Rendering - How to render pages with data
- Asset Pipeline - How to copy static files
Possible Extensions
- Add syntax highlighting
- Implement RSS feeds
- Add sitemap generation
- Implement image optimization
- Add live reload