Skip to content

Commit b795254

Browse files
committed
MDBF-143: Add Infer builder
This preforms static analysis on the MariaDB codebase by maintaining a git source repository as a shared volume. Because static analysis takes time, a lot of time, there is a shared cache volume to store build results from main branches of the codebase so that as much incremental usage can occur. Infer runs in to phases, a capture and an analyze. Infer output are in a result-dir this contains: * report.json - what infer tools use * report.txt - the human readable version of this * capture.db - the sqlite3 version presentation of captured files and the relation to functions definitions. * results.db - the analyze phase outputs Of these, the report.json is desirable as the long term record of vulnerabilities.
1 parent 367432f commit b795254

File tree

5 files changed

+430
-0
lines changed

5 files changed

+430
-0
lines changed

configuration/builders/sequences/helpers.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -391,3 +391,26 @@ def mtr_junit_reporter(
391391
warn_on_fail=True,
392392
),
393393
)
394+
395+
396+
def save_infer_logs(
397+
logs_path: PurePath = PurePath("infer_results"),
398+
step_wrapping_fn=lambda step: step,
399+
):
400+
return step_wrapping_fn(
401+
ShellStep(
402+
command=SaveCompressedTar(
403+
name="Save Infer artifacts/logs",
404+
workdir=logs_path,
405+
archive_name="logs",
406+
destination="/packages/%(prop:tarbuildnum)s/logs/%(prop:buildername)s",
407+
),
408+
url=URL(
409+
url=f"{os.environ['ARTIFACTS_URL']}/%(prop:tarbuildnum)s/logs/%(prop:buildername)s",
410+
url_text="Infer artifacts/logs",
411+
),
412+
options=StepOptions(
413+
alwaysRun=True,
414+
),
415+
),
416+
)
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
from pathlib import PurePath
2+
3+
from configuration.builders.infra.runtime import (
4+
BuildSequence,
5+
DockerConfig,
6+
InContainer,
7+
)
8+
from configuration.builders.sequences.helpers import save_infer_logs
9+
from configuration.steps.base import BashCommand, StepOptions
10+
from configuration.steps.commands.compile import CompileCMakeCommand
11+
from configuration.steps.commands.configure import ConfigureMariaDBCMake
12+
from configuration.steps.commands.download import GitFetch, GitInitFromCommit
13+
from configuration.steps.commands.util import InferScript, PrintEnvironmentDetails
14+
from configuration.steps.generators.cmake.compilers import ClangCompiler
15+
from configuration.steps.generators.cmake.generator import CMakeGenerator
16+
from configuration.steps.generators.cmake.options import (
17+
CMAKE,
18+
WITH,
19+
BuildType,
20+
CMakeOption,
21+
)
22+
from configuration.steps.remote import ShellStep
23+
24+
25+
def infer(
26+
config: DockerConfig,
27+
jobs: int,
28+
):
29+
sequence = BuildSequence()
30+
31+
sequence.add_step(ShellStep(command=PrintEnvironmentDetails()))
32+
# infer --version
33+
34+
sequence.add_step(
35+
InContainer(
36+
docker_environment=config,
37+
step=ShellStep(
38+
command=BashCommand(
39+
command="git clean -df", workdir=PurePath("/mnt", "src")
40+
),
41+
options=StepOptions(
42+
haltOnFailure=False,
43+
descriptionDone="git cleaned",
44+
),
45+
),
46+
)
47+
)
48+
49+
sequence.add_step(
50+
InContainer(
51+
ShellStep(
52+
command=GitInitFromCommit(
53+
repo_url="%(prop:repository)s",
54+
commit="%(prop:revision)s",
55+
jobs=jobs,
56+
depth=0,
57+
workdir=PurePath("/mnt", "src"),
58+
)
59+
),
60+
docker_environment=config,
61+
),
62+
)
63+
64+
sequence.add_step(
65+
InContainer(
66+
docker_environment=config,
67+
step=ShellStep(
68+
command=BashCommand(
69+
command="git diff --name-only FETCH_HEAD..%(prop:master_branch)s | tee $OLD_PWD/index.txt",
70+
workdir=PurePath("/mnt", "src"),
71+
),
72+
options=StepOptions(
73+
haltOnFailure=False,
74+
descriptionDone="names of changed files",
75+
),
76+
),
77+
)
78+
)
79+
80+
flags = [
81+
# UBSAN is the only prevention of UNINIT_VAR(X) x= x
82+
# that generated lots of uninit read/write errors.
83+
CMakeOption(WITH.UBSAN, True),
84+
CMakeOption(CMAKE.EXPORT_COMPILE_COMMANDS, True),
85+
]
86+
87+
sequence.add_step(
88+
InContainer(
89+
docker_environment=config,
90+
step=ShellStep(
91+
command=ConfigureMariaDBCMake(
92+
name="configure",
93+
cmake_generator=CMakeGenerator(
94+
use_ccache=True,
95+
flags=flags,
96+
source_path="/mnt/src",
97+
builddir="bld",
98+
compiler=ClangCompiler(),
99+
),
100+
),
101+
options=StepOptions(descriptionDone="Configure"),
102+
),
103+
)
104+
)
105+
106+
# Some server code is generated, so these need to be generated to test
107+
sequence.add_step(
108+
InContainer(
109+
docker_environment=config,
110+
step=ShellStep(
111+
command=CompileCMakeCommand(
112+
builddir="bld",
113+
jobs=jobs,
114+
verbose=True,
115+
targets=[
116+
"GenError",
117+
"GenServerSource",
118+
"GenUnicodeDataSource",
119+
"GenFixPrivs",
120+
],
121+
),
122+
options=StepOptions(descriptionDone="compile"),
123+
),
124+
)
125+
)
126+
127+
env_vars = [("JOBS", str(jobs))]
128+
sequence.add_step(
129+
InContainer(
130+
docker_environment=config,
131+
step=ShellStep(
132+
command=InferScript("%(prop:branch)s", "%(prop:master_branch)s"),
133+
options=StepOptions(
134+
descriptionDone="infer analysis complete",
135+
),
136+
env_vars=env_vars,
137+
),
138+
)
139+
)
140+
141+
sequence.add_step(
142+
save_infer_logs(
143+
step_wrapping_fn=lambda step: InContainer(
144+
docker_environment=config, step=step
145+
),
146+
)
147+
)
148+
return sequence
Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
#!/bin/bash
2+
3+
# Infer script for performing
4+
# static analysis on the MariaDB codebase
5+
6+
set -x -e
7+
8+
if [ $# -lt 2 ]; then
9+
echo insufficient args >&2
10+
exit 1
11+
fi
12+
13+
# Testing this version
14+
branch=$1
15+
16+
shift
17+
# Which is against the master_branch
18+
master_branch=$1
19+
20+
if [ -z "$branch" ] || [ -z "$master_branch" ]; then
21+
echo "usage $0 branch master_branch" >&2
22+
exit 1
23+
fi
24+
25+
: "${JOBS:=4}"
26+
27+
base=$PWD
28+
result_dir=$PWD/infer_results
29+
30+
## Fetch
31+
32+
pushd /mnt/src
33+
git fetch origin "$branch"
34+
git checkout -f FETCH_HEAD
35+
git submodule update --init --recursive
36+
commit=$(git rev-parse FETCH_HEAD)
37+
38+
if [ -d "/mnt/infer/$commit" ]; then
39+
echo "Already scanned $commit"
40+
exit 0
41+
fi
42+
43+
# What can we use as a reference
44+
45+
#if [ ! -L /mnt/infer/"$master_branch" ] && [ -L /mnt/infer/main ]; then
46+
# # Attempting to use main to find/create a base $master_branch
47+
# merge_base=$(git merge-base "$master_branch" main)
48+
# if [ -n "$merge_base" ]; then
49+
# if [ -d /mnt/infer/"$merge_base" ]; then
50+
# echo "Creating $master_branch based of $merge_base"
51+
# ln -s "$merge_base" /mnt/infer/"$master_branch"
52+
# else
53+
# echo "Creating $master_branch based of main"
54+
# # could be a bit inaccurate as main as moved on from $master_branch
55+
# ln -s "$(readlink /mnt/infer/main)" /mnt/infer/"$master_branch"
56+
# fi
57+
# fi
58+
#fi
59+
60+
populate_differences()
61+
# input $merge_base
62+
{
63+
# Find something closer - e.g. we've appended to a branch
64+
# we've already tested
65+
mapfile -t commits < <(git rev-list "${merge_base}..FETCH_HEAD")
66+
for common_commit in "${commits[@]}"; do
67+
if [ -d /mnt/infer/"$common_commit" ]; then
68+
break;
69+
fi
70+
done
71+
if [ ! -d "/mnt/infer/$common_commit" ]; then
72+
echo "From $branch to master branch $master_branch last analysis $common_commit or later is missing" >&2
73+
exit 1
74+
fi
75+
merge_base=$common_commit
76+
# The file changes we from last results
77+
git diff --name-only FETCH_HEAD.."${merge_base}" | tee "$base"/index.txt
78+
79+
if [ ! -s "$base"/index.txt ]; then
80+
echo "Empty changes - nothing necessary"
81+
rm "$base"/index.txt
82+
exit 0
83+
fi
84+
85+
# use previous results as a base
86+
cp -a "/mnt/infer/$merge_base" "$result_dir"
87+
}
88+
89+
if [ "$branch" = "$master_branch" ]; then
90+
# compare against the last record we have for the master_branch
91+
# as this is a push on the master branch
92+
#last_master_branch_ref=$(readlink /mnt/infer/"$master_branch")
93+
#merge_base=$(git merge-base "$branch" "$last_master_branch_ref")
94+
95+
# Just assume we diverged from main at some point
96+
merge_base=$(git merge-base "$branch" origin/main)
97+
else
98+
merge_base=$(git merge-base "$branch" "$master_branch")
99+
fi
100+
101+
if [ -z "$merge_base" ]; then
102+
echo "No common commit ancestor between $branch and $master_branch" >&2
103+
# We don't have a master symlink yet
104+
# lack of index.txt is the key
105+
echo "This is going to take a while for a full scan"
106+
else
107+
populate_differences
108+
fi
109+
110+
# back from /mnt/src
111+
popd
112+
113+
# Build
114+
115+
build()
116+
{
117+
cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
118+
-DCMAKE_C_COMPILER=clang \
119+
-DCMAKE_CXX_COMPILER=clang++ \
120+
-S /mnt/src -B bld
121+
cmake --build bld \
122+
--target GenError GenServerSource GenUnicodeDataSource GenFixPrivs \
123+
--parallel "$JOBS"
124+
}
125+
126+
if [ ! -d bld ]; then
127+
mkdir bld
128+
build
129+
fi
130+
131+
#
132+
capture()
133+
{
134+
infer capture --compilation-database compile_commands.json --project-root /mnt/src --results-dir "${result_dir}" "$@"
135+
}
136+
137+
analyze()
138+
{
139+
infer analyze --project-root /mnt/src --results-dir "${result_dir}" --max-jobs "${JOBS}" "$@"
140+
}
141+
# Capture and analyze the feature of the files changes in index
142+
#
143+
cd bld
144+
145+
if [ ! -f ../index.txt ]; then
146+
echo "full run, this could take a while"
147+
capture
148+
analyze
149+
mv "$result_dir" /mnt/infer/"$commit"
150+
if [ "$branch" = "$master_branch" ];then
151+
ln -fs "$commit" /mnt/infer/"$master_branch"
152+
fi
153+
cd ..
154+
rm -rf bld
155+
exit
156+
fi
157+
158+
# We've copied over a result dir, so we're continuing
159+
# https://fbinfer.com/docs/infer-workflow/#differential-workflow
160+
# using 'infer capture" instead infer run
161+
capture --reactive
162+
163+
# some form of incremental
164+
analyze --changed-files-index ../index.txt
165+
166+
# Preserve result
167+
cp "${result_dir}"/report.json ../report.json
168+
169+
cp -a "${result_dir}" "${result_dir}_preserved"
170+
171+
pushd /mnt/src
172+
git checkout "$merge_base"
173+
popd
174+
175+
# TODO
176+
# How can we use the previous captured /mnt/infer/$merge_base
177+
178+
# just in case these have changed, including generated files
179+
cd ..
180+
build
181+
cd bld
182+
183+
capture --reactive --mark-unchanged-procs
184+
analyze --incremental-analysis --changed-files-index ../index.txt
185+
186+
# TODO useful enough to save as /mnt/infer/$commit
187+
# it may be merged next, or a commit pushed on top of it.
188+
infer reportdiff --report-current ../report.json --report-previous "${result_dir}"/report.json --project-root /mnt/src --results-dir "${result_dir}"
189+
cd ..
190+
rm -rf bld index.txt
191+
# report.json
192+
193+
check()
194+
{
195+
file=$1
196+
msg=$2
197+
if [ -f "${file}" ]; then
198+
filesize=$(stat -c%s "$file")
199+
# 2 is the size of an empty json array '[]'
200+
if [ "$filesize" -gt 2 ]; then
201+
echo "$msg"
202+
return 1
203+
fi
204+
fi
205+
return 0
206+
}
207+
208+
check "${result_dir}"/differential/introduced.json "bad human! Don't introduce bad things"
209+
check "${result_dir}"/differential/fixed.json "good human! Thanks for fixing the bad things"
210+
211+
212+
213+

0 commit comments

Comments
 (0)