Skip to content

Commit

Permalink
retrieve: refactor Jeroengui source
Browse files Browse the repository at this point in the history
  • Loading branch information
jarelllama authored Feb 2, 2025
1 parent 3efe13c commit 90b263d
Showing 1 changed file with 10 additions and 11 deletions.
21 changes: 10 additions & 11 deletions scripts/retrieve_domains.sh
Original file line number Diff line number Diff line change
Expand Up @@ -784,24 +784,23 @@ source_fakewebshoplisthun() {
}

source_jeroengui() {
# Last checked: 03/01/25
# Last checked: 02/02/25
source_name='Jeroengui'
exclude_from_light=true # Too many domains

[[ "$USE_EXISTING_RESULTS" == true ]] && return

source_url='https://file.jeroengui.be/phishing/last_week.txt'
# Get URLs with no subdirectories (too many link shorteners)
curl -sS "$source_url" | grep -Po "^https?://\K${DOMAIN_REGEX}(?=/?$)" \
> source_results.tmp
{
source_url='https://file.jeroengui.be/phishing/last_week.txt'
# Get URLs with no subdirectories (too many link shorteners)
curl -sS "$source_url" | grep -Po "^https?://\K${DOMAIN_REGEX}(?=/?$)"

source_url='https://file.jeroengui.be/malware/last_week.txt'
curl -sS "$source_url" | grep -Po "^https?://\K${DOMAIN_REGEX}" \
>> source_results.tmp
source_url='https://file.jeroengui.be/malware/last_week.txt'
curl -sS "$source_url" | grep -Po "^https?://\K${DOMAIN_REGEX}"

source_url='https://file.jeroengui.be/scam/last_week.txt'
curl -sS "$source_url" | grep -Po "^https?://\K${DOMAIN_REGEX}" \
>> source_results.tmp
source_url='https://file.jeroengui.be/scam/last_week.txt'
curl -sS "$source_url" | grep -Po "^https?://\K${DOMAIN_REGEX}"
} > source_results.tmp

# Get matching NRDs for the light version. Unicode is only processed by the
# full version.
Expand Down

0 comments on commit 90b263d

Please sign in to comment.