180 lines
5.8 KiB
Python
180 lines
5.8 KiB
Python
import json
|
|
import os
|
|
from uuid import uuid4
|
|
|
|
from runner import collect_coverage
|
|
|
|
# from https://sqlite.org/lang_keywords.html
|
|
SQLITE_KEYWORDS = [
|
|
"ABORT", "ACTION", "ADD", "AFTER", "ALL", "ALTER", "ALWAYS", "ANALYZE", "AND",
|
|
"AS", "ASC", "ATTACH", "AUTOINCREMENT", "BEFORE", "BEGIN", "BETWEEN", "BY",
|
|
"CASCADE", "CASE", "CAST", "CHECK", "COLLATE", "COLUMN", "COMMIT", "CONFLICT",
|
|
"CONSTRAINT", "CREATE", "CROSS", "CURRENT", "CURRENT_DATE", "CURRENT_TIME",
|
|
"CURRENT_TIMESTAMP", "DATABASE", "DEFAULT", "DEFERRABLE", "DEFERRED",
|
|
"DELETE", "DESC", "DETACH", "DISTINCT", "DO", "DROP", "EACH", "ELSE", "END",
|
|
"ESCAPE", "EXCEPT", "EXCLUDE", "EXCLUSIVE", "EXISTS", "EXPLAIN", "FAIL",
|
|
"FILTER", "FIRST", "FOLLOWING", "FOR", "FOREIGN", "FROM", "FULL", "GENERATED",
|
|
"GLOB", "GROUP", "GROUPS", "HAVING", "IF", "IGNORE", "IMMEDIATE", "IN",
|
|
"INDEX", "INDEXED", "INITIALLY", "INNER", "INSERT", "INSTEAD", "INTERSECT",
|
|
"INTO", "IS", "ISNULL", "JOIN", "KEY", "LAST", "LEFT", "LIKE", "LIMIT",
|
|
"MATCH", "MATERIALIZED", "NATURAL", "NO", "NOT", "NOTHING", "NOTNULL", "NULL",
|
|
"NULLS", "OF", "OFFSET", "ON", "OR", "ORDER", "OTHERS", "OUTER", "OVER",
|
|
"PARTITION", "PLAN", "PRAGMA", "PRECEDING", "PRIMARY", "QUERY", "RAISE",
|
|
"RANGE", "RECURSIVE", "REFERENCES", "REGEXP", "REINDEX", "RELEASE", "RENAME",
|
|
"REPLACE", "RESTRICT", "RETURNING", "RIGHT", "ROLLBACK", "ROW", "ROWS",
|
|
"SAVEPOINT", "SELECT", "SET", "TABLE", "TEMP", "TEMPORARY", "THEN", "TIES",
|
|
"TO", "TRANSACTION", "TRIGGER", "UNBOUNDED", "UNION", "UNIQUE", "UPDATE",
|
|
"USING", "VACUUM", "VALUES", "VIEW", "VIRTUAL", "WHEN", "WHERE", "WINDOW",
|
|
"WITH", "WITHOUT"
|
|
]
|
|
|
|
queries: list[str] = []
|
|
|
|
|
|
def save_bug(sql: str,
|
|
kind: str,
|
|
b_out: str,
|
|
b_err: str,
|
|
r_out: str = '',
|
|
flag: str = ''):
|
|
"""
|
|
Creates and stores the bug report, with all the relevant metadata.
|
|
"""
|
|
|
|
os.makedirs('bugs', exist_ok=True)
|
|
path = f'bugs/bug_{uuid4()}_{kind}.md'
|
|
with open(path, 'w') as f:
|
|
f.write(create_bug_report(sql, b_out or b_err, r_out, flag))
|
|
|
|
|
|
def create_stats_report(valid_queries,
|
|
invalid_queries,
|
|
generated_queries: list[str],
|
|
qpm_gen: float = 0.0,
|
|
qpm_full: float = 0.0,
|
|
wall_seconds: float = 0.0):
|
|
collect_coverage("/fuzzer/coverage.json")
|
|
|
|
with open("/fuzzer/coverage.json", "r") as f:
|
|
data = json.load(f)
|
|
|
|
total_lines = 0
|
|
covered_lines = 0
|
|
total_branches = 0
|
|
covered_branches = 0
|
|
func_cov_dict = {}
|
|
|
|
for file in data.get("files") or []:
|
|
for line in file.get("lines") or []:
|
|
total_lines += 1
|
|
if line.get("count", 0) > 0:
|
|
covered_lines += 1
|
|
|
|
for branch in line.get("branches") or []:
|
|
total_branches += 1
|
|
if branch.get("count") or 0 > 0:
|
|
covered_branches += 1
|
|
|
|
# https://gcovr.com/en/stable/output/json.html#json-output
|
|
for function_obj in file.get("functions") or []:
|
|
executed = function_obj["execution_count"] > 0
|
|
func_cov_dict[f"{file['file']}-{function_obj['name']}"] = executed
|
|
|
|
print()
|
|
print(
|
|
'----------------------------------------------------------------------------------------'
|
|
)
|
|
print()
|
|
|
|
print(f"- Valid queries: {valid_queries}")
|
|
print(f"- Invalid queries: {invalid_queries}")
|
|
print(f"- Line coverage: {round(covered_lines / total_lines * 100, 2)}%")
|
|
print(
|
|
f"- Branch coverage: {round(covered_branches / total_branches * 100, 2)}%")
|
|
print(
|
|
f"- Function coverage: {round(sum(1 for value in func_cov_dict.values() if value) / len(func_cov_dict) * 100, 2)}%"
|
|
)
|
|
|
|
# Part 1a.
|
|
count_dict = {k: 0 for k in SQLITE_KEYWORDS}
|
|
for query in generated_queries:
|
|
for keyword in SQLITE_KEYWORDS:
|
|
if keyword in query:
|
|
count_dict[keyword] += 1
|
|
sorted_count_dict = dict(
|
|
sorted(count_dict.items(), key=lambda item: item[1], reverse=True)[:30])
|
|
print()
|
|
print(
|
|
'----------------------------------------------------------------------------------------'
|
|
)
|
|
print()
|
|
print('Part 1a. Number of unique queries that each keyword appears in:')
|
|
print()
|
|
for k in sorted_count_dict:
|
|
print(f'{k}: {sorted_count_dict[k]}')
|
|
|
|
# Part 1b.
|
|
count_dict_b = {k: 0 for k in SQLITE_KEYWORDS}
|
|
for query in generated_queries:
|
|
keyword_set = set(''.join(
|
|
char for char in query
|
|
if char.isalnum() or char == '_' or char == ' ').split())
|
|
for keyword in keyword_set:
|
|
if keyword in SQLITE_KEYWORDS:
|
|
count_dict_b[keyword] += query.count(keyword)
|
|
count_dict_b = {
|
|
k: round(v / len(generated_queries), 3) for k, v in count_dict_b.items()
|
|
}
|
|
sorted_count_dict_b = dict(
|
|
sorted(count_dict_b.items(), key=lambda item: item[1], reverse=True))
|
|
|
|
print()
|
|
print(
|
|
'----------------------------------------------------------------------------------------'
|
|
)
|
|
print()
|
|
print('Part 1b. Mean number of occurrences of each keyword per query:')
|
|
print()
|
|
for k, v in sorted_count_dict_b.items():
|
|
print(f'{k}: {v}')
|
|
print()
|
|
|
|
|
|
def create_bug_report(sql, actual, expected, flag):
|
|
"""
|
|
## Summary
|
|
<!--
|
|
Explain briefly what goes wrong and explain why you believe this is
|
|
a bug and not the intended behavior of SQLite (if it is not a crash).
|
|
-->
|
|
|
|
## Minimized query
|
|
```sql
|
|
Code of the bug-triggering SQL query
|
|
```
|
|
|
|
## Actual output
|
|
|
|
```sql
|
|
// add output here
|
|
```
|
|
|
|
## Expectation
|
|
|
|
```sql
|
|
// add output here
|
|
```
|
|
"""
|
|
|
|
report = "## Summary\n<!--Explain briefly what goes wrong and explain why you believe this is a bug and not the intended behavior of SQLite (if it is not a crash).-->**No review yet**\n\n"
|
|
|
|
report += f"## Minimized query\n\n```sql\n{sql}\n```\n\n"
|
|
|
|
report += f"## Actual output\n\n```sql\n{actual}\n```\n\n"
|
|
|
|
report += f"## Expectation\n\n```sql\n{expected}\n```\n\n"
|
|
|
|
report += f"## Flag\n\n```\n{flag}\n```\n\n"
|
|
|
|
return report
|