Skip to content

Commit

Permalink
🔨 restructure
Browse files Browse the repository at this point in the history
  • Loading branch information
manuel-sommer committed Jan 14, 2025
1 parent 641565d commit 9df49a0
Showing 1 changed file with 108 additions and 57 deletions.
165 changes: 108 additions & 57 deletions dojo/tools/noseyparker/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,78 +24,129 @@ def get_findings(self, file, test):
Returns findings from jsonlines file and uses filter
to skip findings and determine severity
"""
dupes = {}

self.dupes = {}
# Turn JSONL file into DataFrame
if file is None:
return None
if file.name.lower().endswith(".jsonl"):
# Process JSON lines into Dict
data = [json.loads(line) for line in file]

# Check for empty file
if len(data[0]) == 0:
return []

# Parse through each secret in each JSON line
for line in data:
# Set rule to the current secret type (e.g. AWS S3 Bucket)
try:
rule_name = line["rule_name"]
secret = line["match_content"]
except Exception:
if line.get("rule_name") is not None and line.get("match_content") is not None:
self.version_0_16_0(line, test)
elif line.get("rule_name") is not None and line.get("finding_id") is not None:
self.version_0_22_0(line, test)
else:
msg = "Invalid Nosey Parker data, make sure to use Nosey Parker v0.16.0"
raise ValueError(msg)

# Set Finding details
for match in line["matches"]:
# The following path is to account for the variability in the JSON lines output
num_elements = len(match["provenance"]) - 1
json_path = match["provenance"][num_elements]

title = f"Secret(s) Found in Repository with Commit ID {json_path['commit_provenance']['commit_metadata']['commit_id']}"
filepath = json_path["commit_provenance"]["blob_path"]
line_num = match["location"]["source_span"]["start"]["line"]
description = f"Secret found of type: {rule_name} \n" \
f"SECRET starts with: '{secret[:3]}' \n" \
f"Committer Name: {json_path['commit_provenance']['commit_metadata']['committer_name']} \n" \
f"Committer Email: {json_path['commit_provenance']['commit_metadata']['committer_email']} \n" \
f"Commit ID: {json_path['commit_provenance']['commit_metadata']['commit_id']} \n" \
f"Location: {filepath} line #{line_num} \n" \
f"Line #{line_num} \n"

# Internal de-duplication
key = hashlib.md5((filepath + "|" + secret + "|" + str(line_num)).encode("utf-8")).hexdigest()

# If secret already exists with the same filepath/secret/linenum
if key in dupes:
finding = dupes[key]
finding.nb_occurences += 1
dupes[key] = finding
else:
dupes[key] = True
# Create Finding object
finding = Finding(
test=test,
cwe=798,
title=title,
description=description,
severity="High",
mitigation="Reset the account/token and remove from source code. Store secrets/tokens/passwords in secret managers or secure vaults.",
date=datetime.today().strftime("%Y-%m-%d"),
verified=False,
active=True,
is_mitigated=False,
file_path=filepath,
line=line_num,
static_finding=True,
nb_occurences=1,
dynamic_finding=False,

)
dupes[key] = finding
else:
msg = "JSON lines format not recognized (.jsonl file extension). Make sure to use Nosey Parker v0.16.0"
raise ValueError(msg)

return list(dupes.values())
return list(self.dupes.values())


def version_0_16_0(self, line, test):

Check failure on line 54 in dojo/tools/noseyparker/parser.py

View workflow job for this annotation

GitHub Actions / ruff-linting

Ruff (E303)

dojo/tools/noseyparker/parser.py:54:5: E303 Too many blank lines (2)
rule_name = line["rule_name"]
secret = line["match_content"]
for match in line["matches"]:
# The following path is to account for the variability in the JSON lines output
num_elements = len(match["provenance"]) - 1
json_path = match["provenance"][num_elements]

title = f"Secret(s) Found in Repository with Commit ID {json_path['commit_provenance']['commit_metadata']['commit_id']}"
filepath = json_path["commit_provenance"]["blob_path"]
line_num = match["location"]["source_span"]["start"]["line"]
description = f"Secret found of type: {rule_name} \n" \
f"SECRET starts with: '{secret[:3]}' \n" \
f"Committer Name: {json_path['commit_provenance']['commit_metadata']['committer_name']} \n" \
f"Committer Email: {json_path['commit_provenance']['commit_metadata']['committer_email']} \n" \
f"Commit ID: {json_path['commit_provenance']['commit_metadata']['commit_id']} \n" \
f"Location: {filepath} line #{line_num} \n" \
f"Line #{line_num} \n"

# Internal de-duplication
key = hashlib.md5((filepath + "|" + secret + "|" + str(line_num)).encode("utf-8")).hexdigest()

# If secret already exists with the same filepath/secret/linenum
if key in self.dupes:
finding = self.dupes[key]
finding.nb_occurences += 1
self.dupes[key] = finding
else:
self.dupes[key] = True
# Create Finding object
finding = Finding(
test=test,
cwe=798,
title=title,
description=description,
severity="High",
mitigation="Reset the account/token and remove from source code. Store secrets/tokens/passwords in secret managers or secure vaults.",
date=datetime.today().strftime("%Y-%m-%d"),
verified=False,
active=True,
is_mitigated=False,
file_path=filepath,
line=line_num,
static_finding=True,
nb_occurences=1,
dynamic_finding=False,

)
self.dupes[key] = finding

def version_0_22_0(self, line, test):
print("TODO")

Check failure on line 105 in dojo/tools/noseyparker/parser.py

View workflow job for this annotation

GitHub Actions / ruff-linting

Ruff (T201)

dojo/tools/noseyparker/parser.py:105:9: T201 `print` found
# rule_name = line["rule_name"]
# secret = line["match_content"]
# for match in line["matches"]:
# # The following path is to account for the variability in the JSON lines output
# num_elements = len(match["provenance"]) - 1
# json_path = match["provenance"][num_elements]

# title = f"Secret(s) Found in Repository with Commit ID {json_path['commit_provenance']['commit_metadata']['commit_id']}"
# filepath = json_path["commit_provenance"]["blob_path"]
# line_num = match["location"]["source_span"]["start"]["line"]
# description = f"Secret found of type: {rule_name} \n" \
# f"SECRET starts with: '{secret[:3]}' \n" \
# f"Committer Name: {json_path['commit_provenance']['commit_metadata']['committer_name']} \n" \
# f"Committer Email: {json_path['commit_provenance']['commit_metadata']['committer_email']} \n" \
# f"Commit ID: {json_path['commit_provenance']['commit_metadata']['commit_id']} \n" \
# f"Location: {filepath} line #{line_num} \n" \
# f"Line #{line_num} \n"

# # Internal de-duplication
# key = hashlib.md5((filepath + "|" + secret + "|" + str(line_num)).encode("utf-8")).hexdigest()

# # If secret already exists with the same filepath/secret/linenum
# if key in self.dupes:
# finding = self.dupes[key]
# finding.nb_occurences += 1
# self.dupes[key] = finding
# else:
# self.dupes[key] = True
# # Create Finding object
# finding = Finding(
# test=test,
# cwe=798,
# title=title,
# description=description,
# severity="High",
# mitigation="Reset the account/token and remove from source code. Store secrets/tokens/passwords in secret managers or secure vaults.",
# date=datetime.today().strftime("%Y-%m-%d"),
# verified=False,
# active=True,
# is_mitigated=False,
# file_path=filepath,
# line=line_num,
# static_finding=True,
# nb_occurences=1,
# dynamic_finding=False,
# )
# self.dupes[key] = finding

Check failure on line 152 in dojo/tools/noseyparker/parser.py

View workflow job for this annotation

GitHub Actions / ruff-linting

Ruff (W292)

dojo/tools/noseyparker/parser.py:152:44: W292 No newline at end of file

0 comments on commit 9df49a0

Please sign in to comment.