1010
1111# Custom Python libraries.
1212
13+
14+ __version__ = "1.0.0"
15+
1316"""
1417Dork dictionary example:
1518
@@ -85,7 +88,8 @@ def retrieve_google_dorks(
8588 # Extract dork from <a href> using BeautifulSoup.
8689 # "<a href=\"/ghdb/5052\">inurl:_cpanel/forgotpwd</a>"
8790 soup = BeautifulSoup (dork ["url_title" ], "html.parser" )
88- extracted_dork = soup .find ("a" ).contents [0 ]
91+ # Some of the URL titles have trailing tabs, remove them.
92+ extracted_dork = soup .find ("a" ).contents [0 ].strip ()
8993 extracted_dorks .append (extracted_dork )
9094
9195 # For individual categories.
@@ -98,6 +102,10 @@ def retrieve_google_dorks(
98102
99103 category_dict [numeric_category_id ] = {"category_name" : category_name , "dorks" : []}
100104
105+ # Some of the URL titles have trailing tabs, use replace() to remove it in place. The strip() method cannot be
106+ # used because the tab is not at the end of the string, but between the <a> tags instead:
107+ # <a href="/ghdb/2696">"Powered by Rock Band CMS 0.10" </a>
108+ dork ["url_title" ] = dork ["url_title" ].replace ("\t " , "" )
101109 category_dict [numeric_category_id ]["dorks" ].append (dork )
102110
103111 # If requested, break up dorks into individual files based off category.
@@ -121,7 +129,8 @@ def retrieve_google_dorks(
121129 # Extract dork from <a href> using BeautifulSoup.
122130 # "<a href=\"/ghdb/5052\">inurl:_cpanel/forgotpwd</a>"
123131 soup = BeautifulSoup (dork ["url_title" ], "html.parser" )
124- extracted_dork = soup .find ("a" ).contents [0 ]
132+ # Some of the URL titles have trailing tabs, remove them.
133+ extracted_dork = soup .find ("a" ).contents [0 ].strip ()
125134 fh .write (f"{ extracted_dork } \n " )
126135
127136 # Save GHDB json object to all_google_dorks.json.
@@ -177,7 +186,7 @@ def retrieve_google_dorks(
177186 parser = argparse .ArgumentParser (
178187 formatter_class = argparse .RawDescriptionHelpFormatter ,
179188 description = (
180- "GHDB Scraper - Retrieve the Google Hacking Database dorks from "
189+ f "GHDB Scraper v { __version__ } - Retrieve Google Hacking Database dorks from "
181190 "https://www.exploit-db.com/google-hacking-database."
182191 ),
183192 epilog = epilog ,
0 commit comments