From ea9ad19cfcbc1fc6b809de832146f90e29947f0d Mon Sep 17 00:00:00 2001 From: 0x6775737461 Date: Fri, 15 Jul 2022 10:19:53 -0400 Subject: [PATCH] using just sed to filter the data --- data/get_latest.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/data/get_latest.sh b/data/get_latest.sh index df9b4a6c8..f34866ca2 100755 --- a/data/get_latest.sh +++ b/data/get_latest.sh @@ -31,7 +31,14 @@ fi if [ ! -f public_suffix_list.dat ]; then echo "[+] Getting latest Public suffix list" wget -N -q https://publicsuffix.org/list/public_suffix_list.dat - cat public_suffix_list.dat | grep -i -e '^/' -v | awk 'NF' | sed 's/\*\.//g' > public_suffix_list.clean.txt + sed -r '{ + # removing lines with sentences (which need spaces) + / /d + # removing blank lines + /^\s*$/d + # getting just valid suffix, "*.foo.com" should be "foo.com" + s/\*\.//g + }' public_suffix_list.dat > public_suffix_list.clean.txt rm public_suffix_list.dat fi