While dbt doesn't implement traditional lambda functions like Python or JavaScript, it provides powerful functional programming capabilities through Jinja templating that enable lambda-like behavior, higher-order functions, and advanced SQL generation patterns.
Lambda functions in traditional programming are anonymous functions that can be defined inline. In dbt, we achieve similar functionality through:
-- Traditional approach
select
order_id,
(amount / 100)::numeric(16, 2) as amount_dollars
from raw_orders
-- Lambda-like macro approach
{% macro to_dollars(column_name, scale=2) %}
({{ column_name }} / 100)::numeric(16, {{ scale }})
{% endmacro %}
select
order_id,
{{ to_dollars('amount') }} as amount_dollars
from raw_orders
dbt macros function as first-class citizens that can be passed around, composed, and used to create higher-order abstractions.
-- Safe division macro (handles division by zero)
{% macro safe_divide(numerator, denominator, default_value=0) %}
case
when {{ denominator }} = 0 then {{ default_value }}
else {{ numerator }} / {{ denominator }}
end
{% endmacro %}
-- Usage
select
order_id,
{{ safe_divide('revenue', 'quantity', 'null') }} as unit_price
from orders
-- Generic transformation macro
{% macro transform_column(column_name, transformation_type, params={}) %}
{% if transformation_type == 'normalize' %}
({{ column_name }} - {{ params.get('min', 0) }}) /
({{ params.get('max', 1) }} - {{ params.get('min', 0) }})
{% elif transformation_type == 'log' %}
log({{ params.get('base', 'e') }}, {{ column_name }})
{% elif transformation_type == 'scale' %}
{{ column_name }} * {{ params.get('factor', 1) }}
{% else %}
{{ column_name }}
{% endif %}
{% endmacro %}
-- Usage examples
select
customer_id,
{{ transform_column('revenue', 'scale', {'factor': 1.1}) }} as scaled_revenue,
{{ transform_column('score', 'normalize', {'min': 0, 'max': 100}) }} as normalized_score
from customer_metrics
Create macros that accept other macros as parameters, enabling true higher-order function behavior.
-- Higher-order macro that applies a function to multiple columns
{% macro apply_to_columns(columns, transform_macro, params={}) %}
{% for column in columns %}
{{ transform_macro(column, **params) }} as {{ column }}_transformed
{%- if not loop.last -%},{%- endif %}
{% endfor %}
{% endmacro %}
-- Transformation function
{% macro standardize(column_name, mean=0, std=1) %}
({{ column_name }} - {{ mean }}) / {{ std }}
{% endmacro %}
-- Usage: Apply standardization to multiple columns
select
customer_id,
{{ apply_to_columns(['revenue', 'orders', 'lifetime_value'], 'standardize') }}
from customer_stats
-- Pipeline macro for chaining transformations
{% macro pipeline(column_name, transformations) %}
{% set result = column_name %}
{% for transform in transformations %}
{% set result = transform.macro ~ '(' ~ result ~ ', ' ~ transform.params | join(', ') ~ ')' %}
{% endfor %}
{{ result }}
{% endmacro %}
-- Define transformation chain
{% set price_pipeline = [
{'macro': 'coalesce', 'params': ['0']},
{'macro': 'round', 'params': ['2']},
{'macro': 'greatest', 'params': ['0.01']}
] %}
select
product_id,
{{ pipeline('raw_price', price_pipeline) }} as clean_price
from products
Use lambda-like patterns to generate SQL dynamically based on runtime conditions and data.
-- Lambda-like conditional column selector
{% macro conditional_select(table_name, conditions) %}
select
{% for condition in conditions %}
{% if condition.when %}
{{ condition.select }} AS {{ condition.alias }}
{% endif %}
{%- if not loop.last and condition.when -%},{%- endif %}
{% endfor %}
from {{ table_name }}
{% endmacro %}
-- Usage with runtime conditions
{% set column_conditions = [
{'when': var('include_revenue', true), 'select': 'revenue', 'alias': 'total_revenue'},
{'when': var('include_costs', false), 'select': 'costs', 'alias': 'total_costs'},
{'when': var('include_profit', true), 'select': 'revenue - costs', 'alias': 'profit'}
] %}
{{ conditional_select('financial_data', column_conditions) }}
-- Get unique values and generate case statements
{% macro generate_pivot_columns(table_name, pivot_column, value_column) %}
{% set pivot_query %}
select distinct {{ pivot_column }}
from {{ table_name }}
where {{ pivot_column }} is not null
order by {{ pivot_column }}
{% endset %}
{% if execute %}
{% set results = run_query(pivot_query) %}
{% set pivot_values = results.columns[0].values() %}
{% else %}
{% set pivot_values = [] %}
{% endif %}
{% for value in pivot_values %}
sum(case when {{ pivot_column }} = '{{ value }}'
then {{ value_column }} else 0 end) as {{ value | replace(' ', '_') | lower }}
{%- if not loop.last -%},{%- endif %}
{% endfor %}
{% endmacro %}
-- Usage: Dynamic pivot based on actual data
select
customer_id,
{{ generate_pivot_columns('order_details', 'product_category', 'amount') }}
from order_details
group by customer_id
Implement functional programming control flow patterns using Jinja.
-- Map pattern: Apply function to each element
{% macro map_columns(columns, func_name, params={}) %}
{% for col in columns %}
{{ func_name }}({{ col }}
{%- for key, value in params.items() -%}
, {{ key }}={{ value }}
{%- endfor -%}
) as {{ col }}_{{ func_name }}
{%- if not loop.last -%},{%- endif %}
{% endfor %}
{% endmacro %}
-- Filter pattern: Select columns based on condition
{% macro filter_columns(columns, condition_func) %}
{% set filtered = [] %}
{% for col in columns %}
{% if condition_func(col) %}
{% set _ = filtered.append(col) %}
{% endif %}
{% endfor %}
{{ filtered | join(', ') }}
{% endmacro %}
-- Reduce pattern: Aggregate columns
{% macro reduce_columns(columns, operation='sum', separator=' + ') %}
{% if operation == 'sum' %}
{{ columns | join(separator) }}
{% elif operation == 'concat' %}
concat({{ columns | join(', ') }})
{% elif operation == 'coalesce' %}
coalesce({{ columns | join(', ') }})
{% endif %}
{% endmacro %}
Create custom Jinja filters that act as composable lambda functions.
-- In macros/filters.sql
{% macro snake_case(text) %}
{{ text | lower | replace(' ', '_') | replace('-', '_') }}
{% endmacro %}
{% macro clean_column_name(column_name) %}
{{ column_name | snake_case | truncate(60, True, '') }}
{% endmacro %}
-- Function composition with filters
{% macro process_column_list(columns) %}
{% set processed = [] %}
{% for col in columns %}
{% set clean_name = clean_column_name(col) %}
{% set _ = processed.append(clean_name) %}
{% endfor %}
{{ processed }}
{% endmacro %}
Leverage dbt's built-in functions for functional programming patterns.
-- Using var() for function parameterization
{% set transformation_rules = var('transformations', {
'revenue': {'type': 'currency', 'scale': 2},
'quantity': {'type': 'integer', 'min': 0},
'rating': {'type': 'range', 'min': 1, 'max': 5}
}) %}
-- Using env_var() for environment-based functions
{% macro get_table_suffix() %}
{% if env_var('DBT_ENVIRONMENT', 'dev') == 'prod' %}
_prod
{% else %}
_{{ env_var('USER', 'dev') }}
{% endif %}
{% endmacro %}
-- Using run_query() for data-driven functions
{% macro get_max_date(table_name, date_column) %}
{% set query %}
select max({{ date_column }}) as max_date from {{ table_name }}
{% endset %}
{% if execute %}
{% set result = run_query(query) %}
{% set max_date = result.columns[0].values()[0] %}
'{{ max_date }}'
{% else %}
current_date
{% endif %}
{% endmacro %}
-- Curried function for creating specialized validators
{% macro create_validator(validation_type) %}
{% if validation_type == 'email' %}
{% macro validate(column) %}
{{ column }} ~ '@' and position('@' in {{ column }}) > 1
{% endmacro %}
{% elif validation_type == 'phone' %}
{% macro validate(column) %}
length(regexp_replace({{ column }}, '[^0-9]', '', 'g')) >= 10
{% endmacro %}
{% elif validation_type == 'url' %}
{% macro validate(column) %}
{{ column }} like 'http%' and length({{ column }}) > 7
{% endmacro %}
{% endif %}
{{ validate }}
{% endmacro %}
-- Partial application pattern
{% macro create_date_filter(comparison_op, reference_date) %}
{% macro filter_condition(date_column) %}
{{ date_column }} {{ comparison_op }} '{{ reference_date }}'
{% endmacro %}
{{ filter_condition }}
{% endmacro %}
-- Memoization using variables
{% set _memo_cache = {} %}
{% macro memoized_query(query_key, sql_query) %}
{% if query_key in _memo_cache %}
{{ log("Cache hit for: " ~ query_key, info=true) }}
{{ _memo_cache[query_key] }}
{% else %}
{% if execute %}
{% set result = run_query(sql_query) %}
{% set _ = _memo_cache.update({query_key: result}) %}
{{ log("Cache miss for: " ~ query_key, info=true) }}
{{ result }}
{% endif %}
{% endif %}
{% endmacro %}
execute
variable to prevent queries during parsing-- Test macro functionality
{% macro test_safe_divide() %}
select
-- Test normal division
{{ safe_divide('10', '2') }} as normal_div,
-- Test division by zero
{{ safe_divide('10', '0', 'null') }} as zero_div,
-- Test with variables
{{ safe_divide('revenue', 'quantity', '0') }} as dynamic_div
from (select 100 as revenue, 0 as quantity) as test_data
{% endmacro %}
-- Unit test for macro
select
case
when normal_div = 5 and zero_div is null and dynamic_div = 0
then 'PASS'
else 'FAIL'
end as test_result
from ({{ test_safe_divide() }}) as test
Example | File | Description |
---|---|---|
Basic Lambda Macros | 01_basic_lambda_macros.sql | Simple transformation macros that behave like lambda functions |
Parameterized Transformations | 02_parameterized_transformations.sql | Advanced parameterized functions with multiple transformation types |
Higher-Order Functions | 03_higher_order_functions.sql | Functions that accept other functions as parameters |
Dynamic SQL Generation | 04_dynamic_sql_generation.sql | Runtime SQL generation using lambda-like patterns |
Functional Control Structures | 05_functional_control_structures.sql | Map, filter, reduce patterns using Jinja |
Custom Filters & Composition | 06_custom_filters_composition.sql | Function composition and custom filter patterns |
Advanced Functional Patterns | 07_advanced_functional_patterns.sql | Currying, memoization, closures, and monadic patterns |
Real-World Use Cases | 08_real_world_use_cases.sql | Production-ready examples: data quality, ETL pipelines, customer segmentation |
-- Configuration-driven quality checks
{% set quality_rules = {
'completeness': {
'columns': ['customer_id', 'order_date', 'amount'],
'threshold': 0.95
},
'validity': {
'email': ['customer_email'],
'date_range': {'order_date': {'min': '2020-01-01', 'max': 'current_date'}},
'positive_numbers': ['amount', 'quantity']
},
'consistency': {
'referential': [
{'child': 'customer_id', 'parent': 'customers.id'},
{'child': 'product_id', 'parent': 'products.id'}
]
}
} %}
-- Lambda-like quality check generator
{% macro generate_quality_checks(table_name, rules) %}
{% for rule_type, rule_config in rules.items() %}
{% if rule_type == 'completeness' %}
{{ completeness_check(table_name, rule_config.columns, rule_config.threshold) }}
{% elif rule_type == 'validity' %}
{{ validity_checks(table_name, rule_config) }}
{% elif rule_type == 'consistency' %}
{{ consistency_checks(table_name, rule_config) }}
{% endif %}
{% if not loop.last %} union all {% endif %}
{% endfor %}
{% endmacro %}
-- Pipeline stage definition
{% set etl_pipeline = [
{'stage': 'extract', 'func': 'extract_source_data', 'deps': []},
{'stage': 'validate', 'func': 'validate_raw_data', 'deps': ['extract']},
{'stage': 'transform', 'func': 'apply_transformations', 'deps': ['validate']},
{'stage': 'aggregate', 'func': 'create_aggregations', 'deps': ['transform']},
{'stage': 'publish', 'func': 'publish_to_marts', 'deps': ['aggregate']}
] %}
-- Functional pipeline executor
{% macro execute_pipeline(pipeline_config, current_stage) %}
{% set current_stage_config = pipeline_config | selectattr('stage', 'equalto', current_stage) | first %}
-- Check dependencies
{% for dep in current_stage_config.deps %}
{{ log("Checking dependency: " ~ dep, info=true) }}
{% endfor %}
-- Execute stage function
{{ current_stage_config.func }}({{ current_stage }})
{% endmacro %}