This commit is contained in:
2026-06-24 13:47:14 +02:00
commit fd930e15cb
2377 changed files with 1213931 additions and 0 deletions
+128
View File
@@ -0,0 +1,128 @@
import random
import re
import sqlite_static_helper
def _quote_str(text: str) -> str:
"""O'Reilly -> 'O''Reilly'"""
return "'" + text.replace("'", "''") + "'"
_TOKEN_RE = re.compile(
r"""
(?P<line_comment>--[^\n]*) |
(?P<block_comment>/\*.*?\*/) |
(?P<string>'(?:[^']|'')*') |
(?P<dquoted>"(?:[^"]|"")*") |
(?P<bracket>\[[^\]]*\]) |
(?P<backtick>`(?:[^`]|``)*`) |
(?P<blob>[xX]'[0-9a-fA-F]*') |
(?P<number>\b\d+(?:\.\d+)?(?:[eE][+-]?\d+)?\b) |
(?P<ident>[A-Za-z_][A-Za-z0-9_]*) |
(?P<op>[<>!=]=|<>|\|\||::|->>?|[+\-*/%<>=&|^~,.;()@]) |
(?P<ws>\s+) |
(?P<other>.)
""",
re.VERBOSE | re.DOTALL,
)
def _tokenize(s: str) -> list[tuple[str | None, str]]:
"""[("line_comment", "-- This is a comment!"), ("number", 0), ...]"""
return [(m.lastgroup, m.group()) for m in _TOKEN_RE.finditer(s)]
_UNSAFE_KINDS = {
"line_comment", "block_comment", "string", "dquoted", "bracket", "backtick",
"blob"
}
def _sub_in_safe(s: str, pattern: re.Pattern, repl, max_subs: int = 1) -> str:
"""Run `pattern.sub(repl, …)` only outside strings/comments/blob literals."""
if max_subs <= 0:
return s
parts: list[str] = []
done = 0
for kind, text in _tokenize(s):
if done >= max_subs or kind in _UNSAFE_KINDS:
parts.append(text)
continue
new_text, n = pattern.subn(repl, text, count=max_subs - done)
parts.append(new_text)
done += n
return ''.join(parts)
_NUM_RE = re.compile(r'\b\d+(?:\.\d+)?(?:[eE][+-]?\d+)?\b')
def mut_replace_num_with_edge_int(s: str) -> str:
"""Replace the left-most number with a random edge-case integer"""
return _sub_in_safe(
s, _NUM_RE, lambda m: str(random.choice(sqlite_static_helper.EDGE_INTS)), 1)
def mut_replace_num_with_random_int(s: str) -> str:
"""Replace the left-most number with a random integer"""
return _sub_in_safe(s, _NUM_RE,
lambda m: str(random.randint(-(2**63), 2**63 - 1)), 1)
def mut_replace_num_with_edge_float(s: str) -> str:
"""Replace the left-most number with a random edge-case floating-point number"""
return _sub_in_safe(
s, _NUM_RE, lambda m: repr(random.choice(sqlite_static_helper.EDGE_FLOATS)),
1)
def mut_replace_num_with_edge_string(s: str) -> str:
"""Replace the left-most number with a random edge-case string"""
return _sub_in_safe(
s, _NUM_RE,
lambda m: _quote_str(random.choice(sqlite_static_helper.EDGE_STRINGS)), 1)
def mut_replace_num_with_random_hex(s: str) -> str:
"""Replace the left-most number with a random hex blob"""
n = random.randint(0, 16)
val = "x'" + ''.join(random.choices('0123456789abcdef', k=n * 2)) + "'"
return _sub_in_safe(s, _NUM_RE, lambda m: val, 1)
def mut_replace_num_with_null(s: str) -> str:
"""Replace the left-most number with NULL"""
return _sub_in_safe(s, _NUM_RE, lambda m: 'NULL', 1)
def mut_wrap_num_in_cast(s: str) -> str:
"""Cast the left-most number to a different data type"""
target = random.choice(sqlite_static_helper.TYPES)
return _sub_in_safe(s, _NUM_RE, lambda m: f'CAST({m.group(0)} AS {target})',
1)
def _wrap_value(s: str, templates: list[str]) -> str:
tpl = random.choice(templates)
return _sub_in_safe(s, _NUM_RE, lambda m: tpl.replace('{v}', m.group(0)), 1)
def mut_wrap_num_in_datetime_func(s: str) -> str:
"""Wrap the left-most number in a date/time function"""
return _wrap_value(s, sqlite_static_helper.DATETIME_FUNCS)
def mut_wrap_num_in_core_func(s: str) -> str:
"""Wrap the left-most number in core function"""
return _wrap_value(s, sqlite_static_helper.CORE_FUNCS)
def mut_wrap_num_in_math_func(s: str) -> str:
"""Wrap the left-most number in a math function"""
return _wrap_value(s, sqlite_static_helper.MATH_FUNCS)
def mut_wrap_num_in_json_func(s: str) -> str:
"""Wrap the left-most number in a JSON function"""
return _wrap_value(s, sqlite_static_helper.JSON_VALUE_FUNCS)