← Back to Tutorials
Python

Build a Configuration Parser

Difficulty: Intermediate Est. Time: ~3 hours

Introduction

Configuration parsers read configuration files in various formats and make them accessible to applications. Many applications use custom config formats that require specialized parsing.

What You'll Build
  • INI file parser
  • Environment variable support
  • Variable interpolation
  • Type coercion

Core Concepts

INI Format

INI files have sections, key-value pairs, and support comments with # or ;.

Lexer

Create configparser/lexer.py:

import re
from enum import Enum, auto
from dataclasses import dataclass


class TokenType(Enum):
    SECTION = auto()
    KEY = auto()
    VALUE = auto()
    COMMENT = auto()
    NEWLINE = auto()
    EOF = auto()


@dataclass
class Token:
    type: TokenType
    value: str
    line: int


class Lexer:
    def __init__(self, text: str):
        self.text = text
        self.pos = 0
        self.line = 1
        self.tokens = []
    
    def tokenize(self):
        while self.pos < len(self.text):
            if self.text[self.pos] == '\n':
                self.tokens.append(Token(TokenType.NEWLINE, '\n', self.line))
                self.pos += 1
                self.line += 1
            elif self.text[self.pos] in ' \t':
                self.pos += 1
            elif self.text[self.pos] in '#;':
                self._read_comment()
            elif self.text[self.pos] == '[':
                self._read_section()
            elif self.text[self.pos].isalnum() or self.text[self.pos] in '_-':
                self._read_key_value()
            else:
                self.pos += 1
        
        self.tokens.append(Token(TokenType.EOF, '', self.line))
        return self.tokens
    
    def _read_comment(self):
        start = self.pos
        while self.pos < len(self.text) and self.text[self.pos] != '\n':
            self.pos += 1
        self.tokens.append(Token(TokenType.COMMENT, self.text[start:self.pos], self.line))
    
    def _read_section(self):
        start = self.pos
        self.pos += 1
        while self.pos < len(self.text) and self.text[self.pos] != ']':
            self.pos += 1
        self.pos += 1
        value = self.text[start+1:self.pos-1].strip()
        self.tokens.append(Token(TokenType.SECTION, value, self.line))
    
    def _read_key_value(self):
        start = self.pos
        while self.pos < len(self.text) and self.text[self.pos] not in '=\n':
            self.pos += 1
        
        key = self.text[start:self.pos].strip()
        
        if self.pos < len(self.text) and self.text[self.pos] == '=':
            self.pos += 1
            while self.pos < len(self.text) and self.text[self.pos] in ' \t':
                self.pos += 1
            
            start = self.pos
            while self.pos < len(self.text) and self.text[self.pos] not in '\n#;':
                self.pos += 1
            
            value = self.text[start:self.pos].strip()
            self.tokens.append(Token(TokenType.KEY, key, self.line))
            self.tokens.append(Token(TokenType.VALUE, value, self.line))
        else:
            self.tokens.append(Token(TokenType.KEY, key, self.line))

Parser

Create configparser/parser.py:

from typing import Dict, Any, Optional
from .lexer import Lexer, Token, TokenType


class ConfigParser:
    def __init__(self):
        self.config: Dict[str, Dict[str, str]] = {}
        self._current_section = 'DEFAULT'
    
    def parse(self, text: str) -> Dict[str, Dict[str, str]]:
        lexer = Lexer(text)
        tokens = lexer.tokenize()
        
        self.config = {'DEFAULT': {}}
        self._current_section = 'DEFAULT'
        
        i = 0
        while i < len(tokens):
            token = tokens[i]
            
            if token.type == TokenType.SECTION:
                self._current_section = token.value
                if self._current_section not in self.config:
                    self.config[self._current_section] = {}
            
            elif token.type == TokenType.KEY:
                if i + 1 < len(tokens) and tokens[i + 1].type == TokenType.VALUE:
                    key = token.value
                    value = tokens[i + 1].value
                    self.config[self._current_section][key] = value
                    i += 1
            
            i += 1
        
        return self.config
    
    def get(self, key: str, section: str = 'DEFAULT', default: Any = None) -> Any:
        if section in self.config and key in self.config[section]:
            return self._coerce(self.config[section][key])
        return default
    
    def get_section(self, section: str) -> Dict[str, Any]:
        if section in self.config:
            return {k: self._coerce(v) for k, v in self.config[section].items()}
        return {}
    
    def _coerce(self, value: str) -> Any:
        if value.lower() in ('true', 'yes', 'on'):
            return True
        if value.lower() in ('false', 'no', 'off'):
            return False
        
        try:
            if '.' in value:
                return float(value)
            return int(value)
        except ValueError:
            return value

Interpolation

import os
import re


class Interpolator:
    def __init__(self, parser):
        self.parser = parser
    
    def interpolate(self, value: str) -> str:
        value = self._interpolate_env(value)
        value = self._interpolate_vars(value)
        return value
    
    def _interpolate_env(self, value: str) -> str:
        pattern = r'\$\{env:([^}]+)\}'
        
        def replacer(match):
            var_name = match.group(1)
            return os.environ.get(var_name, match.group(0))
        
        return re.sub(pattern, replacer, value)
    
    def _interpolate_vars(self, value: str) -> str:
        pattern = r'\$\{([^:}]+)\}'
        
        def replacer(match):
            var_path = match.group(1)
            parts = var_path.split(':')
            
            section = 'DEFAULT'
            key = parts[0]
            
            if len(parts) > 1:
                section = parts[0]
                key = parts[1]
            
            return str(self.parser.get(key, section, match.group(0)))
        
        return re.sub(pattern, replacer, value)

Testing

config_text = """
# Database configuration
[database]
host = localhost
port = 5432
name = myapp

[server]
host = ${database:host}
port = 8080
debug = true
"""

parser = ConfigParser()
config = parser.parse(config_text)

print(parser.get('host', 'database'))
print(parser.get('port', 'server'))
print(parser.get('debug', 'server'))

Summary

You built a configuration parser with INI format support, type coercion, and variable interpolation.