Build a Template Engine
Introduction
Template engines are used to generate dynamic content by combining templates with data. They're used in web frameworks, email systems, code generators, and documentation tools.
In this tutorial, we'll build a complete template engine with variables, conditionals, loops, filters, and template inheritance.
- A template tokenizer
- An AST parser for templates
- A template compiler
- Built-in filters
- Template inheritance
- Custom functions
- How template engines work
- Lexical analysis for templates
- AST construction
- Template compilation
- Filter patterns
Core Concepts
Template Syntax
Templates combine static text with dynamic expressions. Our engine will use double curly braces for expressions and special tags for control structures:
{{ variable }}- Output variable{% if condition %}...{% endif %}- Conditionals{% for item in items %}...{% endfor %}- Loops{% include "template.html" %}- Includes{% extends "base.html" %}- Inheritance
Compilation Process
The template engine works in three phases: tokenization (breaking template into tokens), parsing (building AST), and rendering (executing AST with data).
Project Overview
Our template engine will support:
| Feature | Syntax |
|---|---|
| Variables | {{ name }} |
| Conditionals | {% if x > 1 %}...{% endif %} |
| For loops | {% for i in items %}...{% endfor %} |
| Filters | {{ name|upper }} |
| Inheritance | {% extends "base.html" %} |
Prerequisites
- Python 3.8+ - Installed on your system
- Basic Python knowledge - OOP, lists, dictionaries
Tokenizer
Create template_engine/lexer.py to tokenize templates:
import re
from enum import Enum, auto
from dataclasses import dataclass
class TokenType(Enum):
TEXT = auto()
VAR_OPEN = auto()
VAR_CLOSE = auto()
TAG_OPEN = auto()
TAG_CLOSE = auto()
IDENTIFIER = auto()
STRING = auto()
NUMBER = auto()
OPERATOR = auto()
COMMA = auto()
PIPE = auto()
DOT = auto()
IF = auto()
ELIF = auto()
ELSE = auto()
ENDIF = auto()
FOR = auto()
ENDFOR = auto()
IN = auto()
INCLUDE = auto()
EXTENDS = auto()
BLOCK = auto()
ENDBLOCK = auto()
EOF = auto()
@dataclass
class Token:
type: TokenType
value: any
position: int
class Lexer:
VAR_TAG = '{{'
VAR_END = '}}'
TAG_START = '{%'
TAG_END = '%}'
def __init__(self, template: str):
self.template = template
self.pos = 0
self.tokens = []
def tokenize(self) -> list:
while self.pos < len(self.template):
if self._match(self.VAR_TAG):
self.tokens.append(Token(TokenType.VAR_OPEN, '{{', self.pos))
self._tokenize_expression()
self._expect(TokenType.VAR_CLOSE, '}}')
elif self._match(self.TAG_START):
self.tokens.append(Token(TokenType.TAG_OPEN, '{%', self.pos))
self._tokenize_tag()
self._expect(TokenType.TAG_CLOSE, '%}')
else:
self._read_text()
self.tokens.append(Token(TokenType.EOF, '', self.pos))
return self.tokens
def _match(self, s: str) -> bool:
if self.template[self.pos:self.pos + len(s)] == s:
self.pos += len(s)
return True
return False
def _read_text(self):
start = self.pos
while self.pos < len(self.template):
if self.template[self.pos:self.pos + 2] in (self.VAR_TAG, self.TAG_START):
break
self.pos += 1
text = self.template[start:self.pos]
if text:
self.tokens.append(Token(TokenType.TEXT, text, start))
def _tokenize_expression(self):
self._skip_whitespace()
while self.pos < len(self.template) and not self._peek('}}'):
if self._peek().isalpha() or self._peek() == '_':
self._read_identifier_or_filter()
elif self._peek().isdigit() or self._peek() == '.':
self._read_number()
elif self._peek() == '"' or self._peek() == "'":
self._read_string()
elif self._peek() == '|':
self.tokens.append(Token(TokenType.PIPE, '|', self.pos))
self.pos += 1
elif self._peek() == '.':
self.tokens.append(Token(TokenType.DOT, '.', self.pos))
self.pos += 1
elif self._peek() in '()':
self.tokens.append(Token(TokenType.OPERATOR, self._peek(), self.pos))
self.pos += 1
else:
self.pos += 1
self._skip_whitespace()
def _tokenize_tag(self):
self._skip_whitespace()
if self._match_keyword('if'):
self.tokens.append(Token(TokenType.IF, 'if', self.pos))
elif self._match_keyword('elif'):
self.tokens.append(Token(TokenType.ELIF, 'elif', self.pos))
elif self._match_keyword('else'):
self.tokens.append(Token(TokenType.ELSE, 'else', self.pos))
elif self._match_keyword('endif'):
self.tokens.append(Token(TokenType.ENDIF, 'endif', self.pos))
elif self._match_keyword('for'):
self.tokens.append(Token(TokenType.FOR, 'for', self.pos))
elif self._match_keyword('endfor'):
self.tokens.append(Token(TokenType.ENDFOR, 'endfor', self.pos))
elif self._match_keyword('in'):
self.tokens.append(Token(TokenType.IN, 'in', self.pos))
elif self._match_keyword('include'):
self.tokens.append(Token(TokenType.INCLUDE, 'include', self.pos))
elif self._match_keyword('extends'):
self.tokens.append(Token(TokenType.EXTENDS, 'extends', self.pos))
elif self._match_keyword('block'):
self.tokens.append(Token(TokenType.BLOCK, 'block', self.pos))
elif self._match_keyword('endblock'):
self.tokens.append(Token(TokenType.ENDBLOCK, 'endblock', self.pos))
else:
self._tokenize_expression()
def _match_keyword(self, keyword: str) -> bool:
if self.template[self.pos:self.pos + len(keyword)] == keyword:
rest = self.template[self.pos + len(keyword):self.pos + len(keyword) + 1]
if not rest or not rest.isalnum():
self.pos += len(keyword)
return True
return False
def _read_identifier_or_filter(self):
start = self.pos
while self.pos < len(self.template) and (self.template[self.pos].isalnum() or self.template[self.pos] == '_'):
self.pos += 1
value = self.template[start:self.pos]
self.tokens.append(Token(TokenType.IDENTIFIER, value, start))
def _read_number(self):
start = self.pos
while self.pos < len(self.template) and (self.template[self.pos].isdigit() or self.template[self.pos] == '.'):
self.pos += 1
self.tokens.append(Token(TokenType.NUMBER, self.template[start:self.pos], start))
def _read_string(self):
quote = self.template[self.pos]
self.pos += 1
start = self.pos
while self.pos < len(self.template) and self.template[self.pos] != quote:
if self.template[self.pos] == '\\':
self.pos += 1
self.pos += 1
value = self.template[start:self.pos]
self.pos += 1
self.tokens.append(Token(TokenType.STRING, value, start))
def _expect(self, token_type: TokenType, value: str):
if self.template[self.pos:self.pos + len(value)] != value:
raise SyntaxError(f"Expected {value}")
self.pos += len(value)
self.tokens.append(Token(token_type, value, self.pos))
def _skip_whitespace(self):
while self.pos < len(self.template) and self.template[self.pos] in ' \t\n':
self.pos += 1
def _peek(self, s: str = None) -> str:
if s:
return self.template[self.pos:self.pos + len(s)] if self.pos + len(s) <= len(self.template) else ''
return self.template[self.pos] if self.pos < len(self.template) else ''
Parser
Create template_engine/parser.py to build the AST:
from dataclasses import dataclass
from typing import List, Union
from .lexer import Token, TokenType
@dataclass
class Node:
pass
@dataclass
class TextNode(Node):
value: str
@dataclass
class VariableNode(Node):
name: str
filters: List[tuple]
@dataclass
class IfNode(Node):
condition: 'Expression'
body: List[Node]
elif_bodies: List[tuple]
else_body: List[Node]
@dataclass
class ForNode(Node):
variable: str
iterable: 'Expression'
body: List[Node]
@dataclass
class IncludeNode(Node):
template_name: str
@dataclass
class ExtendsNode(Node):
template_name: str
@dataclass
class BlockNode(Node):
name: str
body: List[Node]
class Expression:
pass
@dataclass
class Variable(Expression):
name: str
@dataclass
class Literal(Expression):
value: any
@dataclass
class BinaryOp(Expression):
op: str
left: Expression
right: Expression
@dataclass
class Filter(Expression):
name: str
arg: Expression
class Parser:
def __init__(self, tokens: list):
self.tokens = tokens
self.pos = 0
def parse(self) -> List[Node]:
nodes = []
while not self._is_end():
node = self._parse_node()
if node:
nodes.append(node)
return nodes
def _parse_node(self) -> Node:
token = self._current()
if token.type == TokenType.TEXT:
return self._parse_text()
elif token.type == TokenType.VAR_OPEN:
return self._parse_variable()
elif token.type == TokenType.TAG_OPEN:
return self._parse_tag()
return None
def _parse_text(self) -> TextNode:
value = self._advance().value
return TextNode(value)
def _parse_variable(self) -> VariableNode:
self._expect(TokenType.VAR_OPEN)
name = self._parse_expression()
filters = []
while self._check(TokenType.PIPE):
self._advance()
filter_name = self._expect(TokenType.IDENTIFIER).value
arg = None
if self._check(TokenType.IDENTIFIER):
arg = self._parse_expression()
filters.append((filter_name, arg))
self._expect(TokenType.VAR_CLOSE)
return VariableNode(name, filters)
def _parse_tag(self) -> Node:
self._expect(TokenType.TAG_OPEN)
token = self._current()
if token.type == TokenType.IF:
return self._parse_if()
elif token.type == TokenType.FOR:
return self._parse_for()
elif token.type == TokenType.INCLUDE:
return self._parse_include()
elif token.type == TokenType.EXTENDS:
return self._parse_extends()
elif token.type == TokenType.BLOCK:
return self._parse_block()
self._expect(TokenType.TAG_CLOSE)
return None
def _parse_if(self) -> IfNode:
self._advance()
condition = self._parse_expression()
self._expect(TokenType.TAG_CLOSE)
body = self._parse_body(TokenType.ENDIF, TokenType.ELIF, TokenType.ELSE)
elif_bodies = []
else_body = []
while self._check(TokenType.ELIF):
self._advance()
elif_cond = self._parse_expression()
self._expect(TokenType.TAG_CLOSE)
elif_body = self._parse_body(TokenType.ENDIF, TokenType.ELIF, TokenType.ELSE)
elif_bodies.append((elif_cond, elif_body))
if self._check(TokenType.ELSE):
self._advance()
self._expect(TokenType.TAG_CLOSE)
else_body = self._parse_body(TokenType.ENDIF)
self._expect(TokenType.ENDIF)
self._expect(TokenType.TAG_CLOSE)
return IfNode(condition, body, elif_bodies, else_body)
def _parse_for(self) -> ForNode:
self._advance()
variable = self._expect(TokenType.IDENTIFIER).value
self._expect(TokenType.IN)
iterable = self._parse_expression()
self._expect(TokenType.TAG_CLOSE)
body = self._parse_body(TokenType.ENDFOR)
self._expect(TokenType.ENDFOR)
self._expect(TokenType.TAG_CLOSE)
return ForNode(variable, iterable, body)
def _parse_include(self) -> IncludeNode:
self._advance()
template = self._expect(TokenType.STRING).value
return IncludeNode(template)
def _parse_extends(self) -> ExtendsNode:
self._advance()
template = self._expect(TokenType.STRING).value
return ExtendsNode(template)
def _parse_block(self) -> BlockNode:
self._advance()
name = self._expect(TokenType.IDENTIFIER).value
self._expect(TokenType.TAG_CLOSE)
body = self._parse_body(TokenType.ENDBLOCK)
self._expect(TokenType.ENDBLOCK)
self._expect(TokenType.TAG_CLOSE)
return BlockNode(name, body)
def _parse_body(self, *end_types) -> List[Node]:
nodes = []
while not self._check(*end_types):
node = self._parse_node()
if node:
nodes.append(node)
return nodes
def _parse_expression(self) -> Expression:
return self._parse_or()
def _parse_or(self):
left = self._parse_and()
while self._check(TokenType.IDENTIFIER) and self._current().value == 'or':
self._advance()
right = self._parse_and()
left = BinaryOp('or', left, right)
return left
def _parse_and(self):
left = self._parse_comparison()
while self._check(TokenType.IDENTIFIER) and self._current().value == 'and':
self._advance()
right = self._parse_comparison()
left = BinaryOp('and', left, right)
return left
def _parse_comparison(self):
left = self._parse_addition()
while self._check(TokenType.OPERATOR) and self._current().value in ('==', '!=', '<', '>', '<=', '>='):
op = self._advance().value
right = self._parse_addition()
left = BinaryOp(op, left, right)
return left
def _parse_addition(self):
left = self._parse_multiplication()
while self._check(TokenType.OPERATOR) and self._current().value in ('+', '-'):
op = self._advance().value
right = self._parse_multiplication()
left = BinaryOp(op, left, right)
return left
def _parse_multiplication(self):
left = self._parse_unary()
while self._check(TokenType.OPERATOR) and self._current().value in ('*', '/', '%'):
op = self._advance().value
right = self._parse_unary()
left = BinaryOp(op, left, right)
return left
def _parse_unary(self):
if self._check(TokenType.OPERATOR) and self._current().value == '-':
self._advance()
return BinaryOp('-', Literal(0), self._parse_unary())
return self._parse_primary()
def _parse_primary(self):
token = self._current()
if token.type == TokenType.IDENTIFIER:
return Variable(self._advance().value)
elif token.type == TokenType.NUMBER:
return Literal(float(self._advance().value))
elif token.type == TokenType.STRING:
return Literal(self._advance().value)
elif token.type == TokenType.LPAREN:
self._advance()
expr = self._parse_expression()
self._expect(TokenType.OPERATOR, ')')
return expr
return Literal(None)
def _current(self):
return self.tokens[self.pos]
def _advance(self):
token = self.tokens[self.pos]
self.pos += 1
return token
def _check(self, *types):
if self.pos >= len(self.tokens):
return False
return self.tokens[self.pos].type in types
def _expect(self, token_type, value=None):
token = self._advance()
if token.type != token_type:
raise SyntaxError(f"Expected {token_type}, got {token.type}")
if value and token.value != value:
raise SyntaxError(f"Expected {value}, got {token.value}")
return token
def _is_end(self):
return self.pos >= len(self.tokens) or self._current().type == TokenType.EOF
Compiler
Create template_engine/engine.py to render templates:
import os
from .lexer import Lexer
from .parser import Parser, TextNode, VariableNode, IfNode, ForNode, IncludeNode, ExtendsNode, BlockNode
from .parser import Expression, Variable, Literal, BinaryOp
class TemplateEngine:
def __init__(self, template_dir='templates'):
self.template_dir = template_dir
self.filters = {}
self._register_builtin_filters()
def _register_builtin_filters(self):
self.filters['upper'] = lambda x: str(x).upper() if x else ''
self.filters['lower'] = lambda x: str(x).lower() if x else ''
self.filters['capitalize'] = lambda x: str(x).capitalize() if x else ''
self.filters['strip'] = lambda x: str(x).strip() if x else ''
self.filters['length'] = lambda x: len(x) if x else 0
self.filters['first'] = lambda x: x[0] if x else None
self.filters['last'] = lambda x: x[-1] if x else None
self.filters['join'] = lambda x, sep=', ': sep.join(str(i) for i in x) if x else ''
self.filters['default'] = lambda x, val='': x if x else val
def register_filter(self, name: str, func):
self.filters[name] = func
def render(self, template_name: str, context: dict = None) -> str:
context = context or {}
template = self._load_template(template_name)
ast = self._parse(template)
if isinstance(ast, ExtendsNode):
return self._render_extends(ast, context)
return self._render_nodes(ast, context)
def render_string(self, template_string: str, context: dict = None) -> str:
context = context or {}
ast = self._parse(template_string)
return self._render_nodes(ast, context)
def _load_template(self, name: str) -> str:
path = os.path.join(self.template_dir, name)
with open(path, 'r') as f:
return f.read()
def _parse(self, template: str):
lexer = Lexer(template)
tokens = lexer.tokenize()
parser = Parser(tokens)
return parser.parse()
def _render_nodes(self, nodes, context):
result = []
for node in nodes:
result.append(self._render_node(node, context))
return ''.join(result)
def _render_node(self, node, context):
if isinstance(node, TextNode):
return node.value
elif isinstance(node, VariableNode):
return self._render_variable(node, context)
elif isinstance(node, IfNode):
return self._render_if(node, context)
elif isinstance(node, ForNode):
return self._render_for(node, context)
elif isinstance(node, IncludeNode):
return self.render(node.template_name, context)
elif isinstance(node, BlockNode):
return self._render_nodes(node.body, context)
return ''
def _render_variable(self, node: VariableNode, context):
value = self._eval_expression(node.name, context)
for filter_name, filter_arg in node.filters:
if filter_name in self.filters:
filter_func = self.filters[filter_name]
if filter_arg:
value = filter_func(value, self._eval_expression(filter_arg, context))
else:
value = filter_func(value)
return str(value) if value is not None else ''
def _render_if(self, node: IfNode, context):
if self._eval_expression(node.condition, context):
return self._render_nodes(node.body, context)
for elif_cond, elif_body in node.elif_bodies:
if self._eval_expression(elif_cond, context):
return self._render_nodes(elif_body, context)
return self._render_nodes(node.else_body, context)
def _render_for(self, node: ForNode, context):
iterable = self._eval_expression(node.iterable, context)
result = []
for item in iterable:
new_context = dict(context)
new_context[node.variable] = item
result.append(self._render_nodes(node.body, new_context))
return ''.join(result)
def _render_extends(self, node: ExtendsNode, context):
parent_template = self._load_template(node.template_name)
parent_ast = self._parse(parent_template)
return self._render_nodes(parent_ast, context)
def _eval_expression(self, expr, context):
if isinstance(expr, Literal):
return expr.value
elif isinstance(expr, Variable):
return self._resolve_variable(expr.name, context)
elif isinstance(expr, BinaryOp):
return self._eval_binary_op(expr, context)
return None
def _resolve_variable(self, name: str, context):
parts = name.split('.')
value = context
for part in parts:
if isinstance(value, dict):
value = value.get(part)
elif hasattr(value, part):
value = getattr(value, part)
else:
return None
return value
def _eval_binary_op(self, op: BinaryOp, context):
left = self._eval_expression(op.left, context)
right = self._eval_expression(op.right, context)
if op.op == '==':
return left == right
elif op.op == '!=':
return left != right
elif op.op == '<':
return left < right
elif op.op == '>':
return left > right
elif op.op == '<=':
return left <= right
elif op.op == '>=':
return left >= right
elif op.op == '+':
return left + right
elif op.op == '-':
return left - right
elif op.op == '*':
return left * right
elif op.op == '/':
return left / right
elif op.op == '%':
return left % right
elif op.op == 'and':
return bool(left and right)
elif op.op == 'or':
return bool(left or right)
return None
Filters and Functions
Add custom filters and global functions:
from template_engine import TemplateEngine
engine = TemplateEngine('templates')
# Register custom filter
@engine.register_filter('reverse')
def reverse_filter(value):
if isinstance(value, str):
return value[::-1]
return list(reversed(value))
# Register custom filter with argument
@engine.register_filter('format')
def format_filter(value, format_string):
return format_string.format(value)
# Usage in template:
# {{ name|upper }}
# {{ items|join:", " }}
# {{ price|format:"${:.2f}" }}
# {{ text|reverse }}
Testing the Template Engine
Create test templates:
# templates/base.html
<html>
<head>
<title>{% block title %}My Site{% endblock %}</title>
</head>
<body>
<header>Welcome to My Site</header>
<main>
{% block content %}{% endblock %}
</main>
<footer>© 2024</footer>
</body>
</html>
# templates/index.html
{% extends "base.html" %}
{% block title %}Home - My Site{% endblock %}
{% block content %}
<h1>Welcome, {{ user_name|default:"Guest" }}!</h1>
{% if is_logged_in %}
<p>You have {{ messages|length }} messages.</p>
{% else %}
<p>Please log in.</p>
{% endif %}
<h2>Recent Posts</h2>
<ul>
{% for post in posts %}
<li>
<strong>{{ post.title|upper }}</strong>
<p>{{ post.content|truncate:50 }}</p>
</li>
{% endfor %}
</ul>
{% endblock %}
Render the template:
from template_engine import TemplateEngine
engine = TemplateEngine('templates')
context = {
'user_name': 'John',
'is_logged_in': True,
'posts': [
{'title': 'Hello World', 'content': 'This is my first post about programming.'},
{'title': 'Template Engines', 'content': 'Learn how to build your own template engine.'},
]
}
output = engine.render('index.html', context)
print(output)
Summary
Congratulations! You've built a complete template engine. Here's what you learned:
- Tokenizer - How to break templates into tokens
- Parser - How to build an AST from tokens
- Compiler - How to render templates with data
- Filters - How to add transformation functions
- Template Inheritance - How to create reusable layouts
Possible Extensions
- Add template caching
- Implement auto-escaping
- Add macros
- Implement whitespace control
- Add async rendering