Build a Configuration Parser
Introduction
Configuration parsers read configuration files in various formats and make them accessible to applications. Many applications use custom config formats that require specialized parsing.
What You'll Build
- INI file parser
- Environment variable support
- Variable interpolation
- Type coercion
Core Concepts
INI Format
INI files have sections, key-value pairs, and support comments with # or ;.
Lexer
Create configparser/lexer.py:
import re
from enum import Enum, auto
from dataclasses import dataclass
class TokenType(Enum):
SECTION = auto()
KEY = auto()
VALUE = auto()
COMMENT = auto()
NEWLINE = auto()
EOF = auto()
@dataclass
class Token:
type: TokenType
value: str
line: int
class Lexer:
def __init__(self, text: str):
self.text = text
self.pos = 0
self.line = 1
self.tokens = []
def tokenize(self):
while self.pos < len(self.text):
if self.text[self.pos] == '\n':
self.tokens.append(Token(TokenType.NEWLINE, '\n', self.line))
self.pos += 1
self.line += 1
elif self.text[self.pos] in ' \t':
self.pos += 1
elif self.text[self.pos] in '#;':
self._read_comment()
elif self.text[self.pos] == '[':
self._read_section()
elif self.text[self.pos].isalnum() or self.text[self.pos] in '_-':
self._read_key_value()
else:
self.pos += 1
self.tokens.append(Token(TokenType.EOF, '', self.line))
return self.tokens
def _read_comment(self):
start = self.pos
while self.pos < len(self.text) and self.text[self.pos] != '\n':
self.pos += 1
self.tokens.append(Token(TokenType.COMMENT, self.text[start:self.pos], self.line))
def _read_section(self):
start = self.pos
self.pos += 1
while self.pos < len(self.text) and self.text[self.pos] != ']':
self.pos += 1
self.pos += 1
value = self.text[start+1:self.pos-1].strip()
self.tokens.append(Token(TokenType.SECTION, value, self.line))
def _read_key_value(self):
start = self.pos
while self.pos < len(self.text) and self.text[self.pos] not in '=\n':
self.pos += 1
key = self.text[start:self.pos].strip()
if self.pos < len(self.text) and self.text[self.pos] == '=':
self.pos += 1
while self.pos < len(self.text) and self.text[self.pos] in ' \t':
self.pos += 1
start = self.pos
while self.pos < len(self.text) and self.text[self.pos] not in '\n#;':
self.pos += 1
value = self.text[start:self.pos].strip()
self.tokens.append(Token(TokenType.KEY, key, self.line))
self.tokens.append(Token(TokenType.VALUE, value, self.line))
else:
self.tokens.append(Token(TokenType.KEY, key, self.line))
Parser
Create configparser/parser.py:
from typing import Dict, Any, Optional
from .lexer import Lexer, Token, TokenType
class ConfigParser:
def __init__(self):
self.config: Dict[str, Dict[str, str]] = {}
self._current_section = 'DEFAULT'
def parse(self, text: str) -> Dict[str, Dict[str, str]]:
lexer = Lexer(text)
tokens = lexer.tokenize()
self.config = {'DEFAULT': {}}
self._current_section = 'DEFAULT'
i = 0
while i < len(tokens):
token = tokens[i]
if token.type == TokenType.SECTION:
self._current_section = token.value
if self._current_section not in self.config:
self.config[self._current_section] = {}
elif token.type == TokenType.KEY:
if i + 1 < len(tokens) and tokens[i + 1].type == TokenType.VALUE:
key = token.value
value = tokens[i + 1].value
self.config[self._current_section][key] = value
i += 1
i += 1
return self.config
def get(self, key: str, section: str = 'DEFAULT', default: Any = None) -> Any:
if section in self.config and key in self.config[section]:
return self._coerce(self.config[section][key])
return default
def get_section(self, section: str) -> Dict[str, Any]:
if section in self.config:
return {k: self._coerce(v) for k, v in self.config[section].items()}
return {}
def _coerce(self, value: str) -> Any:
if value.lower() in ('true', 'yes', 'on'):
return True
if value.lower() in ('false', 'no', 'off'):
return False
try:
if '.' in value:
return float(value)
return int(value)
except ValueError:
return value
Interpolation
import os
import re
class Interpolator:
def __init__(self, parser):
self.parser = parser
def interpolate(self, value: str) -> str:
value = self._interpolate_env(value)
value = self._interpolate_vars(value)
return value
def _interpolate_env(self, value: str) -> str:
pattern = r'\$\{env:([^}]+)\}'
def replacer(match):
var_name = match.group(1)
return os.environ.get(var_name, match.group(0))
return re.sub(pattern, replacer, value)
def _interpolate_vars(self, value: str) -> str:
pattern = r'\$\{([^:}]+)\}'
def replacer(match):
var_path = match.group(1)
parts = var_path.split(':')
section = 'DEFAULT'
key = parts[0]
if len(parts) > 1:
section = parts[0]
key = parts[1]
return str(self.parser.get(key, section, match.group(0)))
return re.sub(pattern, replacer, value)
Testing
config_text = """
# Database configuration
[database]
host = localhost
port = 5432
name = myapp
[server]
host = ${database:host}
port = 8080
debug = true
"""
parser = ConfigParser()
config = parser.parse(config_text)
print(parser.get('host', 'database'))
print(parser.get('port', 'server'))
print(parser.get('debug', 'server'))
Summary
You built a configuration parser with INI format support, type coercion, and variable interpolation.