import random import re from sqlite_static_helper import * _TABLE_HEADER_RE = re.compile( r'CREATE\s+(?:TEMP(?:ORARY)?\s+)?TABLE\s+(?:IF\s+NOT\s+EXISTS\s+)?' r'(?:[\w."`]+\.)?([\w"`]+)\s*\(', re.I, ) _CONSTRAINT_HEADS = {'PRIMARY', 'UNIQUE', 'CHECK', 'FOREIGN', 'CONSTRAINT'} def _split_top_level_commas(body: str) -> list[str]: depth = 0 cur: list[str] = [] out: list[str] = [] in_str = False quote = '' for ch in body: if in_str: cur.append(ch) if ch == quote: in_str = False continue if ch in ("'", '"', '`'): in_str = True quote = ch cur.append(ch) continue if ch == '(': depth += 1 cur.append(ch) elif ch == ')': depth -= 1 cur.append(ch) elif ch == ',' and depth == 0: out.append(''.join(cur)) cur = [] else: cur.append(ch) if cur: out.append(''.join(cur)) return out def get_schema(sql: str) -> dict[str, list[str]]: schema: dict[str, list[str]] = {} cursor = 0 while True: m = _TABLE_HEADER_RE.search(sql, cursor) if not m: break name = m.group(1).strip('"`') start = m.end() depth = 1 j = start while j < len(sql) and depth > 0: c = sql[j] if c == '(': depth += 1 elif c == ')': depth -= 1 j += 1 body = sql[start:j - 1] if depth == 0 else sql[start:] cols: list[str] = [] for piece in _split_top_level_commas(body): piece = piece.strip() if not piece: continue head = piece.split()[0].strip(' ,"`[]') if head.upper() in _CONSTRAINT_HEADS: continue if re.fullmatch(r'[A-Za-z_]\w*', head): cols.append(head) if cols: schema[name] = cols cursor = j return schema or {} def _pick_table(sql: str) -> str: return random.choice(list(get_schema(sql).keys())) def _pick_table_col(sql: str) -> tuple[str, str]: schema = get_schema(sql) table = random.choice(list(schema.keys())) cols = schema[table] or ['rowid'] return table, random.choice(cols) def mut_append_select_star(s: str) -> str: """Append a SELECT * statement for any existing table in the query""" return s + '\n' + f'SELECT * FROM {_pick_table(s)};' def mut_append_count(s: str) -> str: """Append a SELECT COUNT(*) statement for any existing table in the query""" return s + '\n' + f'\nSELECT COUNT(*) FROM {_pick_table(s)};' def mut_append_join(s: str) -> str: """Append a JOIN statement between two existing tables in the query""" schema = get_schema(s) if not schema: return s tables = list(schema.keys()) t1 = random.choice(tables) t2 = random.choice(tables) join_type = random.choice(JOINS) if 'NATURAL' in join_type or 'CROSS' in join_type: return s + '\n' + f'SELECT * FROM {t1} {join_type} {t2};' return s + '\n' + f'SELECT * FROM {t1} AS a {join_type} {t2} AS b ON a.rowid = b.rowid;' def mut_append_subquery(s: str) -> str: """Append a SELECT statement that queries a subquery""" table, col = _pick_table_col(s) style = random.choice([ f'SELECT * FROM {table} WHERE {col} IN (SELECT {col} FROM {table} t2 WHERE t2.{col} = {table}.{col});', f'SELECT * FROM {table} WHERE EXISTS (SELECT 1 FROM {table} t2 WHERE t2.{col} = {table}.{col});', f'SELECT * FROM {table} WHERE NOT EXISTS (SELECT 1 FROM {table} t2 WHERE t2.{col} = {table}.{col});', f'SELECT * FROM {table} WHERE {col} = (SELECT {col} FROM {table} t2 WHERE t2.{col} = {table}.{col} LIMIT 1);', f'SELECT {col}, (SELECT COUNT(*) FROM {table}) AS cnt FROM {table};', f'SELECT {col}, (SELECT MAX({col}) FROM {table}) AS max_val FROM {table};', f'SELECT {col}, (SELECT MIN({col}) FROM {table}) AS min_val FROM {table};', f'SELECT {col}, (SELECT AVG({col}) FROM {table}) AS avg_val FROM {table};', f'SELECT {col}, (SELECT SUM({col}) FROM {table}) AS sum_val FROM {table};', f'SELECT {col}, (SELECT {col} FROM {table} LIMIT 1) AS first_val FROM {table};', f'SELECT * FROM (SELECT * FROM {table}) AS sub;', f'SELECT * FROM (SELECT {col} FROM {table} WHERE {col} IS NOT NULL) AS sub;', f'SELECT * FROM (SELECT {col}, COUNT(*) FROM {table} GROUP BY {col}) AS sub;', f'SELECT * FROM (SELECT * FROM {table} ORDER BY {col}) AS sub LIMIT 10;', f'SELECT * FROM (SELECT * FROM {table}) AS t1 JOIN (SELECT * FROM {table}) AS t2 ON t1.{col} = t2.{col};', f'SELECT * FROM {table} WHERE EXISTS (SELECT 1 FROM {table} WHERE {col} > 0);', f'SELECT * FROM {table} WHERE EXISTS (SELECT 1 FROM {table} WHERE {col} < 0);', f'SELECT * FROM {table} WHERE EXISTS (SELECT 1 FROM {table} WHERE {col} = NULL);', f'SELECT * FROM {table} WHERE EXISTS (SELECT 1 FROM {table} WHERE 1 = 0);', f'SELECT * FROM {table} WHERE EXISTS (SELECT 1 FROM {table} WHERE 1 = 1);', f'SELECT * FROM {table} WHERE NOT EXISTS (SELECT 1 FROM {table} WHERE 1 = 0);', f'SELECT * FROM {table} WHERE NOT EXISTS (SELECT 1 FROM {table} WHERE 1 = 1);', f'SELECT * FROM {table} WHERE {col} IN (SELECT {col} FROM {table} WHERE {col} IS NOT NULL);', f'SELECT * FROM {table} WHERE {col} IN (SELECT {col} FROM {table} WHERE {col} IS NULL);', f'SELECT * FROM {table} WHERE {col} IN (SELECT {col} FROM {table} LIMIT 1);', f'SELECT * FROM {table} WHERE {col} IN (SELECT {col} FROM {table} LIMIT 0);', f'SELECT * FROM {table} WHERE {col} NOT IN (SELECT {col} FROM {table});', f'SELECT * FROM {table} WHERE {col} NOT IN (SELECT {col} FROM {table} WHERE {col} IS NOT NULL);', f'SELECT * FROM {table} WHERE {col} NOT IN (SELECT {col} FROM {table} WHERE {col} IS NULL);', f'SELECT * FROM {table} WHERE ({col}) IN (SELECT {col} FROM {table});', f'SELECT * FROM {table} WHERE ({col}, {col}) IN (SELECT {col}, {col} FROM {table});', f'SELECT * FROM {table} WHERE {col} > ANY (SELECT {col} FROM {table});', f'SELECT * FROM {table} WHERE {col} < ANY (SELECT {col} FROM {table});', f'SELECT * FROM {table} WHERE {col} = ANY (SELECT {col} FROM {table});', f'SELECT * FROM {table} WHERE {col} >= ANY (SELECT {col} FROM {table});', f'SELECT * FROM {table} WHERE {col} <= ANY (SELECT {col} FROM {table});', f'SELECT * FROM {table} WHERE {col} <> ANY (SELECT {col} FROM {table});', f'SELECT * FROM {table} WHERE {col} > ALL (SELECT {col} FROM {table});', f'SELECT * FROM {table} WHERE {col} < ALL (SELECT {col} FROM {table});', f'SELECT * FROM {table} WHERE {col} = ALL (SELECT {col} FROM {table});', f'SELECT * FROM {table} WHERE {col} >= ALL (SELECT {col} FROM {table});', f'SELECT * FROM {table} WHERE {col} <= ALL (SELECT {col} FROM {table});', f'SELECT * FROM {table} WHERE {col} <> ALL (SELECT {col} FROM {table});', f'SELECT * FROM {table} WHERE {col} > (SELECT AVG({col}) FROM {table});', f'SELECT * FROM {table} WHERE {col} < (SELECT AVG({col}) FROM {table});', f'SELECT * FROM {table} WHERE {col} > (SELECT MAX({col}) FROM {table});', f'SELECT * FROM {table} WHERE {col} < (SELECT MIN({col}) FROM {table});', f'SELECT * FROM {table} WHERE {col} = (SELECT COUNT(*) FROM {table});', f'SELECT * FROM {table} WHERE {col} > (SELECT COUNT(*) FROM {table});', f'SELECT * FROM {table} WHERE {col} = (SELECT SUM({col}) FROM {table});', f'SELECT * FROM {table} WHERE {col} = (SELECT GROUP_CONCAT({col}) FROM {table});', f'SELECT * FROM {table} WHERE {col} IN (SELECT {col} FROM {table} WHERE {col} IN (SELECT {col} FROM {table}));', f'SELECT * FROM {table} WHERE EXISTS (SELECT 1 FROM {table} WHERE EXISTS (SELECT 1 FROM {table}));', f'SELECT * FROM {table} WHERE {col} = (SELECT MAX({col}) FROM {table} WHERE {col} = (SELECT MAX({col}) FROM {table}));', f'SELECT * FROM (SELECT * FROM (SELECT * FROM {table}) AS inner1) AS inner2;', f'SELECT {col}, COUNT(*) FROM {table} GROUP BY {col} HAVING COUNT(*) > (SELECT COUNT(*) FROM {table} LIMIT 1);', f'SELECT {col}, COUNT(*) FROM {table} GROUP BY {col} HAVING COUNT(*) = (SELECT COUNT(*) FROM {table});', f'SELECT {col}, COUNT(*) FROM {table} GROUP BY {col} HAVING {col} IN (SELECT {col} FROM {table});', f'SELECT * FROM {table} t1 JOIN {table} t2 ON t1.{col} = (SELECT {col} FROM {table} LIMIT 1);', f'SELECT * FROM {table} t1 LEFT JOIN {table} t2 ON t1.{col} = (SELECT {col} FROM {table} WHERE {col} = t1.{col});', f'SELECT * FROM {table} t1 RIGHT JOIN {table} t2 ON t1.{col} = (SELECT {col} FROM {table} WHERE {col} = t1.{col});', f'SELECT * FROM {table} t1 JOIN (SELECT * FROM {table}) AS sub ON t1.{col} = sub.{col};', f'SELECT {col} FROM {table} UNION SELECT {col} FROM (SELECT * FROM {table}) AS sub;', f'SELECT {col} FROM {table} INTERSECT SELECT {col} FROM (SELECT * FROM {table}) AS sub;', f'SELECT {col} FROM {table} EXCEPT SELECT {col} FROM (SELECT * FROM {table}) AS sub;', f'SELECT {col} FROM {table} UNION ALL SELECT {col} FROM (SELECT * FROM {table}) AS sub;', f'WITH cte AS (SELECT * FROM {table}) SELECT * FROM cte;', f'WITH cte AS (SELECT {col} FROM {table}) SELECT * FROM cte WHERE {col} IS NOT NULL;', f'WITH cte AS (SELECT {col}, COUNT(*) FROM {table} GROUP BY {col}) SELECT * FROM cte;', f'WITH cte AS (SELECT * FROM {table}), cte2 AS (SELECT * FROM cte) SELECT * FROM cte2;', f'WITH RECURSIVE cte AS (SELECT {col} FROM {table} UNION ALL SELECT {col} FROM cte LIMIT 1) SELECT * FROM cte;', f'SELECT * FROM {table} WHERE {col} IN (SELECT {col} FROM {table} WHERE 1/0 = 0);' f'SELECT * FROM {table} WHERE {col} IN (SELECT {col} FROM {table} LIMIT -1);', f'SELECT * FROM {table} WHERE {col} IN (SELECT {col} FROM {table} LIMIT 0);', f'SELECT * FROM {table} WHERE {col} IN (SELECT {col} FROM {table} ORDER BY {col} LIMIT 1 OFFSET 1000);', f'SELECT * FROM {table} WHERE {col} IN (SELECT DISTINCT {col} FROM {table});', f'SELECT * FROM {table} WHERE {col} IN (SELECT ALL {col} FROM {table});', f'SELECT * FROM {table} WHERE {col} IN (SELECT {col} FROM {table} WHERE {col} LIKE \"%%\");', f'SELECT * FROM {table} WHERE {col} IN (SELECT {col} FROM {table} WHERE {col} GLOB \"*\");', f'SELECT * FROM {table} WHERE {col} IN (SELECT {col} FROM {table} WHERE ROW_NUMBER() OVER (ORDER BY {col}) = 1);', f'SELECT * FROM {table} WHERE {col} = (SELECT {col} FROM {table} ORDER BY {col} LIMIT 1 OFFSET (SELECT COUNT(*) FROM {table} / 2));', f'SELECT * FROM {table} WHERE {col} IN (SELECT {col} FROM {table});', f'SELECT * FROM {table} WHERE EXISTS (SELECT 1 FROM {table});', f'SELECT * FROM (SELECT * FROM {table}) AS sub;', f'SELECT {col} FROM {table} WHERE {col} = (SELECT MAX({col}) FROM {table});', f'SELECT * FROM {table} WHERE {col} NOT IN (SELECT {col} FROM {table});', f'SELECT * FROM {table} WHERE ({col}) IN (SELECT {col} FROM {table});', ]) return s + '\n' + style def mut_append_cte(s: str) -> str: """Append a SELECT statement that queries a CTE""" table, col = _pick_table_col(s) n = random.randint(2, 100) style = random.choice([ f'WITH cte AS (SELECT {col} FROM {table}) SELECT * FROM cte;', f'WITH cte AS (SELECT * FROM {table}) SELECT * FROM cte;', f'WITH cte AS (SELECT * FROM {table} WHERE {col} IS NOT NULL) SELECT * FROM cte;', f'WITH cte AS (SELECT DISTINCT {col} FROM {table}) SELECT * FROM cte;', f'WITH cte AS (SELECT {col}, COUNT(*) AS cnt FROM {table} GROUP BY {col}) SELECT * FROM cte;', f'WITH cte AS (SELECT * FROM {table} ORDER BY {col} LIMIT 5) SELECT * FROM cte;', f'WITH cte(x) AS (SELECT {col} FROM {table}) SELECT x FROM cte;', f'WITH cte(a, b) AS (SELECT {col}, {col} FROM {table}) SELECT a, b FROM cte;', f'WITH cte(a, b, c) AS (SELECT {col}, {col}, {col} FROM {table}) SELECT a, b, c FROM cte;', f'WITH cte(x) AS (VALUES(1),(2),(3)) SELECT * FROM cte;', f'WITH cte(x, y) AS (VALUES(1,2),(3,4),(5,6)) SELECT * FROM cte;', f'WITH a AS (SELECT * FROM {table}), b AS (SELECT * FROM a) SELECT * FROM b;', f'WITH a AS (SELECT {col} FROM {table}), b AS (SELECT {col} FROM a), c AS (SELECT {col} FROM b) SELECT * FROM c;', f'WITH a AS (SELECT {col} FROM {table}), b AS (SELECT {col} FROM {table}) SELECT * FROM a JOIN b ON a.{col} = b.{col};', f'WITH a AS (SELECT {col} FROM {table}), b AS (SELECT COUNT(*) AS cnt FROM a) SELECT * FROM b;', f'WITH a AS (SELECT * FROM {table} WHERE {col} IS NOT NULL), b AS (SELECT * FROM a WHERE {col} > 0), c AS (SELECT * FROM b LIMIT 10) SELECT * FROM c;', f'WITH a AS (SELECT * FROM {table}), b AS (SELECT * FROM {table}) SELECT * FROM a UNION ALL SELECT * FROM b;', f'WITH m AS MATERIALIZED (SELECT {col} FROM {table}) SELECT * FROM m;', f'WITH n AS NOT MATERIALIZED (SELECT {col} FROM {table}) SELECT * FROM n;', f'WITH m AS MATERIALIZED (SELECT * FROM {table}) SELECT m.{col} FROM m JOIN m AS m2 ON m.{col} = m2.{col};', f'WITH n AS NOT MATERIALIZED (SELECT * FROM {table}) SELECT * FROM n WHERE {col} > 0;', f'WITH a AS MATERIALIZED (SELECT * FROM {table}), b AS NOT MATERIALIZED (SELECT * FROM a) SELECT * FROM b;', f'WITH a AS NOT MATERIALIZED (SELECT * FROM {table}), b AS MATERIALIZED (SELECT * FROM a) SELECT * FROM b;', f'WITH RECURSIVE cnt(x) AS (VALUES(1) UNION ALL SELECT x+1 FROM cnt WHERE x<{n}) SELECT * FROM cnt;', f'WITH RECURSIVE cnt(x) AS (VALUES(1) UNION ALL SELECT x+1 FROM cnt WHERE x<{n}) SELECT SUM(x) FROM cnt;', f'WITH RECURSIVE cnt(x) AS (VALUES(1) UNION ALL SELECT x+1 FROM cnt WHERE x<{n}) SELECT MAX(x) FROM cnt;', f'WITH RECURSIVE cnt(x) AS (VALUES(1) UNION ALL SELECT x+1 FROM cnt WHERE x<{n}) SELECT COUNT(*) FROM cnt;', f'WITH RECURSIVE cnt(x) AS (VALUES(1) UNION ALL SELECT x+1 FROM cnt WHERE x<{n}) SELECT x FROM cnt WHERE x > {n//2};', f'WITH RECURSIVE pair(a,b) AS (VALUES(0,1) UNION ALL SELECT b, a+b FROM pair WHERE b<100) SELECT * FROM pair;', f'WITH RECURSIVE fib(a,b) AS (VALUES(1,1) UNION ALL SELECT b, a+b FROM fib WHERE b<1000) SELECT a FROM fib;', f'WITH RECURSIVE fib(a,b,n) AS (VALUES(1,1,1) UNION ALL SELECT b, a+b, n+1 FROM fib WHERE n<{n}) SELECT a, n FROM fib;', f'WITH RECURSIVE tree(id, parent, depth) AS (VALUES(1, NULL, 0) UNION ALL SELECT id+1, id, depth+1 FROM tree WHERE depth<{n}) SELECT * FROM tree;', f'WITH RECURSIVE tree(id, path) AS (VALUES(1, "/1") UNION ALL SELECT id+1, path || "/" || (id+1) FROM tree WHERE id<{n}) SELECT * FROM tree;', f'WITH RECURSIVE cnt(x) AS (VALUES(1) UNION SELECT x+1 FROM cnt WHERE x<{n}) SELECT * FROM cnt;', f'WITH RECURSIVE cnt(x) AS (VALUES(1) UNION ALL SELECT x+1 FROM cnt WHERE x<{n}) SELECT * FROM cnt;', f'WITH RECURSIVE vals(x) AS (SELECT {col} FROM {table} WHERE {col} IS NOT NULL UNION ALL SELECT {col} FROM {table} WHERE {col} IS NOT NULL LIMIT 10) SELECT * FROM vals;', f'WITH RECURSIVE r AS (SELECT {col} FROM {table} UNION ALL SELECT {col} FROM {table} LIMIT 5) SELECT * FROM r;', f'WITH RECURSIVE r AS (SELECT {col} FROM {table} WHERE {col} IS NOT NULL UNION ALL SELECT {col} FROM r WHERE {col} > 0 LIMIT 10) SELECT * FROM r;', f'WITH RECURSIVE r AS (SELECT * FROM {table} LIMIT 1 UNION ALL SELECT t.* FROM {table} t JOIN r ON t.{col} = r.{col} LIMIT 10) SELECT * FROM r;', f'WITH RECURSIVE cnt(x) AS (VALUES(1) UNION ALL SELECT x+1 FROM cnt WHERE x<1) SELECT * FROM cnt;', f'WITH RECURSIVE cnt(x) AS (VALUES(1) UNION ALL SELECT x+1 FROM cnt WHERE x<0) SELECT * FROM cnt;', f'WITH RECURSIVE empty(x) AS (SELECT 1 WHERE 1=0 UNION ALL SELECT x+1 FROM empty WHERE x<10) SELECT * FROM empty;', f'WITH RECURSIVE inf(x) AS (VALUES(1) UNION ALL SELECT x+1 FROM inf) SELECT * FROM inf LIMIT 10;', f'WITH RECURSIVE self(x) AS (SELECT 1 UNION ALL SELECT x FROM self) SELECT * FROM self LIMIT 5;', f'WITH RECURSIVE cnt(x) AS (VALUES(0) UNION ALL SELECT x-1 FROM cnt WHERE x>-{n}) SELECT * FROM cnt;', f'WITH RECURSIVE multi(a, b, c) AS (VALUES(1, 2, 3) UNION ALL SELECT a+1, b+1, c+1 FROM multi WHERE a<{n}) SELECT * FROM multi;', f'WITH RECURSIVE multi(a, b) AS (VALUES(1, "x") UNION ALL SELECT a+1, "x" FROM multi WHERE a<{n}) SELECT * FROM multi;', f'WITH RECURSIVE multi(a, b) AS (VALUES(1, 1.0) UNION ALL SELECT a+1, b*0.5 FROM multi WHERE a<{n}) SELECT * FROM multi;', f'WITH cte AS (SELECT * FROM {table}) SELECT * FROM cte JOIN {table} ON cte.{col} = {table}.{col};', f'WITH cte AS (SELECT * FROM {table}) SELECT * FROM cte LEFT JOIN {table} ON cte.{col} = {table}.{col};', f'WITH cte1 AS (SELECT * FROM {table}), cte2 AS (SELECT * FROM {table}) SELECT * FROM cte1 LEFT JOIN cte2 ON cte1.{col} = cte2.{col};', f'WITH cte AS (SELECT * FROM {table}) SELECT * FROM cte c1 JOIN cte c2 ON c1.{col} = c2.{col};', f'WITH cte AS (SELECT {col}, COUNT(*) AS cnt FROM {table} GROUP BY {col}) SELECT * FROM cte WHERE cnt > 1;', f'WITH cte AS (SELECT {col}, SUM({col}) AS total FROM {table} GROUP BY {col}) SELECT * FROM cte ORDER BY total DESC;', f'WITH cte AS (SELECT {col}, AVG({col}) AS avg_val FROM {table} GROUP BY {col} HAVING AVG({col}) > 0) SELECT * FROM cte;', f'WITH cte AS (SELECT COUNT(*) AS cnt FROM {table}) SELECT * FROM cte WHERE cnt > 0;', f'WITH cte AS (SELECT {col}, ROW_NUMBER() OVER (ORDER BY {col}) AS rn FROM {table}) SELECT * FROM cte WHERE rn <= 5;', f'WITH cte AS (SELECT {col}, RANK() OVER (ORDER BY {col}) AS rnk FROM {table}) SELECT * FROM cte;', f'WITH cte AS (SELECT {col}, SUM({col}) OVER (ORDER BY {col}) AS running FROM {table}) SELECT * FROM cte;', f'WITH cte AS (SELECT {col}, LAG({col}) OVER (ORDER BY {col}) AS prev FROM {table}) SELECT * FROM cte;', f'WITH cte AS (SELECT {col}, LEAD({col}) OVER (ORDER BY {col}) AS nxt FROM {table}) SELECT * FROM cte;', f'WITH cte AS (SELECT * FROM {table} WHERE {col} IN (SELECT {col} FROM {table} LIMIT 5)) SELECT * FROM cte;', f'WITH cte AS (SELECT * FROM {table} WHERE {col} = (SELECT MAX({col}) FROM {table})) SELECT * FROM cte;', f'WITH cte AS (SELECT * FROM {table} WHERE EXISTS (SELECT 1 FROM {table} WHERE {col} > 0)) SELECT * FROM cte;', f'WITH cte AS (SELECT {col} FROM {table} UNION SELECT {col} FROM {table}) SELECT * FROM cte;', f'WITH cte AS (SELECT {col} FROM {table} INTERSECT SELECT {col} FROM {table}) SELECT * FROM cte;', f'WITH cte AS (SELECT {col} FROM {table} EXCEPT SELECT {col} FROM {table}) SELECT * FROM cte;', f'WITH cte AS (SELECT {col} FROM {table} UNION ALL SELECT {col} FROM {table}) SELECT * FROM cte;', f'WITH cte AS (SELECT * FROM {table}) UPDATE {table} SET {col} = (SELECT {col} FROM cte LIMIT 1) WHERE 1=0;', f'WITH cte AS (SELECT * FROM {table} LIMIT 1) DELETE FROM {table} WHERE {col} IN (SELECT {col} FROM cte) AND 1=0;', f'WITH cte AS (SELECT 1 AS x) INSERT INTO {table}({col}) SELECT x FROM cte WHERE 1=0;', f'WITH a AS (SELECT * FROM {table}), RECURSIVE cnt(x) AS (VALUES(1) UNION ALL SELECT x+1 FROM cnt WHERE x<{n}) SELECT * FROM a CROSS JOIN cnt;', f'WITH RECURSIVE cnt(x) AS (VALUES(1) UNION ALL SELECT x+1 FROM cnt WHERE x<{n}), filtered AS (SELECT * FROM cnt WHERE x > {n//2}) SELECT * FROM filtered;', f'WITH a AS (SELECT * FROM {table}), b AS (SELECT * FROM a), c AS (SELECT * FROM b), d AS (SELECT * FROM c) SELECT * FROM d;', f'WITH a AS (SELECT {col} FROM {table}), b AS (SELECT {col} FROM a WHERE {col} IS NOT NULL), c AS (SELECT COUNT(*) AS cnt FROM b) SELECT cnt FROM c;', f'WITH cte(x) AS (VALUES(1),(2),(3)) SELECT * FROM cte WHERE x > 1;', f'WITH cte(a, b) AS (VALUES(1, "a"),(2, "b"),(3, "c")) SELECT * FROM cte;', f'WITH cte(x) AS (VALUES(1),(2),(3)) SELECT * FROM {table} WHERE {col} IN (SELECT x FROM cte);', f'WITH cte AS (SELECT NULL AS x) SELECT * FROM cte;', f'WITH cte AS (SELECT NULL AS x, NULL AS y) SELECT x, y, x IS NULL FROM cte;', f'WITH cte AS (SELECT {col} FROM {table} WHERE {col} IS NULL) SELECT * FROM cte;', f'WITH cte(x) AS (VALUES(NULL),(1),(NULL)) SELECT * FROM cte WHERE x IS NOT NULL;', f'WITH cte AS (SELECT 1 AS x) SELECT x + 0.5 FROM cte;', f'WITH cte AS (SELECT "123" AS x) SELECT x + 0 FROM cte;', f'WITH cte AS (SELECT 1.5 AS x) SELECT CAST(x AS TEXT) FROM cte;', f'WITH cte AS (SELECT NULL AS x) SELECT COALESCE(x, 0) FROM cte;', f'WITH cte AS (SELECT CASE WHEN {col} > 0 THEN "pos" ELSE "neg" END AS sign FROM {table}) SELECT * FROM cte;', f'WITH cte AS (SELECT CASE WHEN {col} IS NULL THEN 0 ELSE {col} END AS val FROM {table}) SELECT * FROM cte;', f'WITH cte AS (SELECT * FROM {table}) SELECT * FROM cte a, cte b WHERE a.{col} = b.{col};', f'WITH cte AS (SELECT * FROM {table}) SELECT * FROM cte WHERE {col} > 0 UNION ALL SELECT * FROM cte WHERE {col} <= 0;', f'WITH cte AS (SELECT * FROM {table}) SELECT (SELECT COUNT(*) FROM cte) AS total, * FROM cte LIMIT 1;', ]) return s + '\n' + style def mut_append_window(s: str) -> str: """Append a SELECT statement that applies a window function to an existing table in the query""" table, col = _pick_table_col(s) fn = random.choice(WINDOW_FUNCS).format(c=col) partition = f'PARTITION BY {col} ' if random.random() < 0.5 else '' order = f'ORDER BY {col}' frame = random.choice([ '', ' ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW', ' RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING', ' GROUPS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING', ' ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING EXCLUDE CURRENT ROW', ' ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING EXCLUDE GROUP', ' ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW EXCLUDE TIES', ' RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING EXCLUDE NO OTHERS', ]) return s + '\n' + f'SELECT {fn} OVER ({partition}{order}{frame}) FROM {table};' def mut_append_filter(s: str) -> str: """Append a SELECT statement that uses FILTER""" table, col = _pick_table_col(s) return s + '\n' + (f'SELECT ' f'COUNT(*) FILTER (WHERE {col} IS NOT NULL), ' f'SUM(rowid) FILTER (WHERE {col} > 0), ' f'COUNT(*) FILTER (WHERE 1=0), ' f'COUNT(*) FILTER (WHERE 1=1), ' f'COUNT(*) FILTER (WHERE NULL), ' f'AVG({col}) FILTER (WHERE {col} > 0 AND {col} < 100), ' f'COUNT(*) FILTER (WHERE typeof({col}) = "text") ' f'FROM {table};') def mut_append_aggregate(s: str) -> str: """Append a SELECT statement that uses an aggregate function""" table, col = _pick_table_col(s) fn = random.choice(AGG_FUNCS) if fn == 'COUNT' and random.random() < 0.5: agg = 'COUNT(*)' elif fn == 'GROUP_CONCAT' and random.random() < 0.5: agg = f"GROUP_CONCAT({col}, '|')" elif fn == 'STRING_AGG' and random.random() < 0.5: agg = f"STRING_AGG({col}, '|')" else: agg = f"{fn}({col})" return s + '\n' + f'SELECT {agg} FROM {table};' def mut_append_update(s: str) -> str: """Append an UPDATE statement""" table, col = _pick_table_col(s) val = random.choice([ str(random.randint(-100, 100)), 'NULL', "'x'", "''", f'{col} + 1', 'CURRENT_TIMESTAMP', f"json_object('k', {col})", ]) where = random.choice([ '', f' WHERE {col} IS NOT NULL', ' WHERE rowid = 1', ' WHERE 1', f' WHERE {col} BETWEEN 0 AND 100', ]) returning = ' RETURNING *' if random.random() < 0.4 else '' return s + '\n' + f'UPDATE {table} SET {col} = {val}{where}{returning};' def mut_append_delete(s: str) -> str: """Append a DELETE statement""" table, col = _pick_table_col(s) where = random.choice([ f'WHERE rowid = {random.randint(1, 100)}', f'WHERE {col} IS NULL', 'WHERE 0', 'WHERE 1', f'WHERE {col} > (SELECT AVG({col}) FROM {table})', ]) returning = ' RETURNING *' if random.random() < 0.4 else '' return s + '\n' + f'DELETE FROM {table} {where}{returning};' def mut_append_index(s: str) -> str: """Append a CREATE INDEX statement""" table, col = _pick_table_col(s) unique = random.choice(['', 'UNIQUE ']) where = random.choice(['', f' WHERE {col} IS NOT NULL', f' WHERE {col} > 0']) expr = random.choice( [col, f'lower({col})', f'({col} + 1)', f'{col} COLLATE NOCASE']) suffix = random.randint(0, 9999) return s + '\n' + f'CREATE {unique} INDEX IF NOT EXISTS idx_{table}_{suffix} ON {table}({expr}){where};' def mut_append_view(s: str) -> str: """Append a CREATE VIEW statement""" table, col = _pick_table_col(s) suffix = random.randint(0, 9999) temp = random.choice(['', 'TEMP ', 'TEMPORARY ']) return s + '\n' + f'CREATE {temp} VIEW IF NOT EXISTS v_{table}_{suffix} AS SELECT {col} FROM {table};' def mut_append_trigger(s: str) -> str: """Append a CREATE TRIGGER statement""" table, col = _pick_table_col(s) when = random.choice(['BEFORE', 'AFTER']) event = random.choice(['INSERT', 'UPDATE', 'DELETE', f'UPDATE OF {col}']) suffix = random.randint(0, 9999) body = random.choice([ "SELECT RAISE(IGNORE)", "SELECT RAISE(FAIL, 'no')", "SELECT RAISE(ABORT, 'abort')", "SELECT RAISE(ROLLBACK, 'rb')", f"INSERT INTO {table}({col}) VALUES (NULL)", ]) fer = ' FOR EACH ROW' if random.random() < 0.5 else '' return s + '\n' + f'CREATE TRIGGER IF NOT EXISTS trg_{table}_{suffix} {when} {event} ON {table}{fer} BEGIN {body}; END;' def mut_wrap_savepoint(s: str) -> str: """Wrap the query inside SAVEPOINT-RELEASE/ROLLBACK""" name = f'sp{random.randint(0, 9999)}' end = random.choice([ f'RELEASE {name};', f'ROLLBACK TO {name};\nRELEASE {name};', f'ROLLBACK TRANSACTION TO SAVEPOINT {name};\nRELEASE SAVEPOINT {name};', ]) return f'SAVEPOINT {name};\n{s}\n{end}' def mut_wrap_transaction(s: str) -> str: """Wrap the query inside a transaction""" begin = random.choice([ "BEGIN", "BEGIN DEFERRED", "BEGIN IMMEDIATE", "BEGIN EXCLUSIVE", "BEGIN TRANSACTION", "BEGIN DEFERRED TRANSACTION", "BEGIN IMMEDIATE TRANSACTION", "BEGIN EXCLUSIVE TRANSACTION" ]) end = random.choice( ['COMMIT;', 'ROLLBACK;', 'END;', 'END TRANSACTION;', 'COMMIT TRANSACTION;']) return f'{begin};\n{s}\n{end}' def mut_wrap_attach_detach(s: str) -> str: """Wrap the query inside ATTACH and DETACH statements""" alias = f'aux{random.randint(0, 99)}' return (f"ATTACH DATABASE ':memory:' AS {alias};\n" f"{s}\n" f"DETACH DATABASE {alias};") def mut_append_vacuum(s: str) -> str: """Append a VACUUM statement""" return s + '\n' + random.choice([ 'VACUUM;', 'VACUUM main;', ]) def mut_append_analyze(s: str) -> str: """Append an ANALYZE statement""" table = _pick_table(s) return s + '\n' + random.choice([ 'ANALYZE;', f'ANALYZE {table};', ]) def mut_append_reindex(s: str) -> str: """Append a REINDEX statement""" table = _pick_table(s) return s + '\n' + random.choice([ f'REINDEX {table};', 'REINDEX;', ]) def mut_append_null_insert(s: str) -> str: """Append an INSERT statement with NULL""" schema = get_schema(s) table = random.choice(list(schema.keys())) cols = schema[table] if not cols: return s + '\n' + f'INSERT INTO {table} DEFAULT VALUES;' vals = ', '.join('NULL' for _ in cols) return s + '\n' + f'INSERT INTO {table} VALUES ({vals});' def mut_append_insert_default(s: str) -> str: """Append an INSERT statement with DEFAULT VALUES""" return s + '\n' + f'INSERT INTO {_pick_table(s)} DEFAULT VALUES;' def mut_append_insert_select(s: str) -> str: """Append an INSERT statement that duplicates all rows in the table""" table = _pick_table(s) return s + '\n' + f'INSERT INTO {table} SELECT * FROM {table};' def mut_append_insert_or_x(s: str) -> str: """Append an INSERT OR ROLLBACK/ABORT/FAIL/IGNORE/REPLACE statement""" schema = get_schema(s) table = random.choice(list(schema.keys())) cols = schema[table] or ['rowid'] vals = ', '.join( random.choice(['NULL', '0', "''", "'x'", str(random.randint(-10, 10))]) for _ in cols) kind = random.choice( ['OR IGNORE', 'OR REPLACE', 'OR FAIL', 'OR ABORT', 'OR ROLLBACK']) return s + '\n' + f'INSERT {kind} INTO {table} VALUES ({vals});' def mut_append_upsert(s: str) -> str: """Append an INSERT statement that updates the existing set if there is a conflict""" schema = get_schema(s) table = random.choice(list(schema.keys())) cols = schema[table] if not cols: return s target = cols[0] upd = ', '.join(f'{c} = excluded.{c}' for c in cols[:3]) vals = ', '.join(random.choice(['NULL', '1', "'x'"]) for _ in cols) return s + '\n' + f'INSERT INTO {table} VALUES ({vals}) ON CONFLICT({target}) DO UPDATE SET {upd};' def mut_append_alter_rename_table(s: str) -> str: """Append an ALTER statement that renames an existing table""" table = _pick_table(s) suffix = random.randint(0, 9999) return s + '\n' + f'ALTER TABLE {table} RENAME TO {table}_r{suffix};' def mut_append_alter_rename_column(s: str) -> str: """Append an ALTER statement that renames a column of an existing table""" schema = get_schema(s) table = random.choice(list(schema.keys())) cols = schema[table] if not cols: return s col = random.choice(cols) suffix = random.randint(0, 9999) return s + '\n' + f'ALTER TABLE {table} RENAME COLUMN {col} TO {col}_r{suffix};' def mut_append_alter_add_column(s: str) -> str: """Append an ALTER statement that adds a column to an existing table""" table = _pick_table(s) suffix = random.randint(0, 9999) type = random.choice(TYPES) extras = random.choice([ '', ' DEFAULT NULL', " DEFAULT ''", ' DEFAULT 0', ' DEFAULT CURRENT_TIMESTAMP', ' COLLATE NOCASE', ' NOT NULL DEFAULT 0', ' UNIQUE', ' DEFAULT (random())', ' DEFAULT (abs(random()) % 1000)', ' COLLATE RTRIM', ]) return s + '\n' + f'ALTER TABLE {table} ADD COLUMN extra_{suffix} {type}{extras};' def mut_append_alter_drop_column(s: str) -> str: """Append an ALTER statement that drops a column from an existing table""" schema = get_schema(s) table = random.choice(list(schema.keys())) cols = schema[table] if not cols: return s col = random.choice(cols) return s + '\n' + f'ALTER TABLE {table} DROP COLUMN {col};' def mut_append_alter_drop_table_index_view_trigger(s: str) -> str: """Append an ALTER statement that drops a table, index, view, or trigger""" schema = get_schema(s) if not schema: return s table = random.choice(list(schema.keys())) obj = random.choice(['TABLE', 'INDEX', 'VIEW', 'TRIGGER']) return s + '\n' + f'DROP {obj} IF EXISTS {table};' def mut_repeat(s: str) -> str: """Repeat the query 1-10 times""" return f'{s}\n' * random.randint(1, 10) def mut_prepend_random_pragma(s: str) -> str: """Prepend a PRAGMA statement""" return random.choice(PRAGMAS) + '\n' + s def mut_append_random_pragma(s: str) -> str: """Append a PRAGMA statement""" return s + '\n' + random.choice(PRAGMAS) def mut_prepend_random_meta_command(s: str) -> str: """Prepend a meta-command""" return random.choice(META_COMMANDS) + '\n' + s