CRAWING SPIDER
Goal -> Recursively list all links starting from a base URL.
1. Read page HTML.
2. Extract all links.
3. Repeat for each new link that is not already on the list.
#!/usr/bin/env python import re import requests from urllib.parse import urljoin target_url = "http://10.0.0.45/mutillidae/" target_links = [] def extract_links_from(url): response = requests.get(url) return re.findall('(?:href=")(.*?")', response.content.decode()) def crawl(url): href_links = extract_links_from(url) for link in href_links: link = urljoin(url, link) if "#" in link: link = link.split("#")[0] if target_url in link and link not in target_links: target_links.append(link) print(link) crawl(link) crawl(target_url)
The Python program runs perfectly.
http://10.0.0.45/mutillidae/favicon.ico" http://10.0.0.45/mutillidae/styles/global-styles.css" http://10.0.0.45/mutillidae/styles/ddsmoothmenu/ddsmoothmenu.css" http://10.0.0.45/mutillidae/styles/ddsmoothmenu/ddsmoothmenu-v.css" http://10.0.0.45/mutillidae/index.php?page=home.php" http://10.0.0.45/mutillidae/index.php?page=login.php" http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=login.php" http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=login.php" http://10.0.0.45/mutillidae/set-up-database.php" http://10.0.0.45/mutillidae/index.php?page=show-log.php" http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=show-log.php" http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=show-log.php" http://10.0.0.45/mutillidae/index.php?page=captured-data.php" http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=captured-data.php" http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=captured-data.php" http://10.0.0.45/mutillidae/index.php?page=credits.php" http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=credits.php" http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=credits.php" http://10.0.0.45/mutillidae/" http://10.0.0.45/mutillidae/index.php?page=user-info.php" http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=user-info.php" http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=user-info.php" http://10.0.0.45/mutillidae/index.php?page=register.php" http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=register.php" http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=register.php" http://10.0.0.45/mutillidae/index.php?page=view-someones-blog.php" http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=view-someones-blog.php" http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=view-someones-blog.php" http://10.0.0.45/mutillidae/index.php?page=add-to-your-blog.php" http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=add-to-your-blog.php" http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=add-to-your-blog.php" http://10.0.0.45/mutillidae/index.php?page=site-footer-xss-discussion.php" http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=site-footer-xss-discussion.php" http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=site-footer-xss-discussion.php" http://10.0.0.45/mutillidae/index.php?page=html5-storage.php" http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=html5-storage.php" http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=html5-storage.php" http://10.0.0.45/mutillidae/index.php?page=capture-data.php" http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=capture-data.php" http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=capture-data.php" http://10.0.0.45/mutillidae/index.php?page=dns-lookup.php" http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=dns-lookup.php" http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=dns-lookup.php" http://10.0.0.45/mutillidae/index.php" http://10.0.0.45/mutillidae/index.php?page=password-generator.php&username=anonymous" http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=password-generator.php" http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=password-generator.php" http://10.0.0.45/mutillidae/index.php?page=user-poll.php" http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=user-poll.php" http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=user-poll.php" http://10.0.0.45/mutillidae/index.php?page=set-background-color.php" http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=set-background-color.php" http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=set-background-color.php" http://10.0.0.45/mutillidae/index.php?page=pen-test-tool-lookup.php" http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=pen-test-tool-lookup.php" http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=pen-test-tool-lookup.php" http://10.0.0.45/mutillidae/index.php?page=text-file-viewer.php" http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=text-file-viewer.php" http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=text-file-viewer.php" http://10.0.0.45/mutillidae/index.php?page=browser-info.php" http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=browser-info.php" http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=browser-info.php" http://10.0.0.45/mutillidae/index.php?page=source-viewer.php" http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=source-viewer.php" http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=source-viewer.php" http://10.0.0.45/mutillidae/index.php?page=arbitrary-file-inclusion.php" http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=arbitrary-file-inclusion.php" http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=arbitrary-file-inclusion.php" http://10.0.0.45/mutillidae/index.php?page=secret-administrative-pages.php" http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=secret-administrative-pages.php" http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=secret-administrative-pages.php" http://10.0.0.45/mutillidae/index.php?page=framing.php" http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=framing.php" http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=framing.php" http://10.0.0.45/mutillidae/framer.html" http://10.0.0.45/mutillidae/index.php?page=change-log.htm" http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=change-log.htm" http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=change-log.htm" http://10.0.0.45/mutillidae/index.php?page=installation.php" http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=installation.php" http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=installation.php" http://10.0.0.45/mutillidae/documentation/mutillidae-installation-on-xampp-win7.pdf" http://10.0.0.45/mutillidae/index.php?page=documentation/vulnerabilities.php" http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=documentation/vulnerabilities.php" http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=documentation/vulnerabilities.php" http://10.0.0.45/mutillidae/index.php?page=documentation/how-to-access-Mutillidae-over-Virtual-Box-network.php" http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=documentation/how-to-access-Mutillidae-over-Virtual-Box-network.php" http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=documentation/how-to-access-Mutillidae-over-Virtual-Box-network.php" http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=home.php" http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=home.php" http://10.0.0.45/mutillidae/ http://10.0.0.45/mutillidae/?page=add-to-your-blog.php" http://10.0.0.45/mutillidae/?page=view-someones-blog.php" http://10.0.0.45/mutillidae/?page=show-log.php" http://10.0.0.45/mutillidae/?page=text-file-viewer.php" http://10.0.0.45/mutillidae/?page=user-info.php" http://10.0.0.45/mutillidae/?page=login.php" http://10.0.0.45/mutillidae/?page=credits.php" http://10.0.0.45/mutillidae/?page=source-viewer.php" http://10.0.0.45/mutillidae/index.php?page=usage-instructions.php" http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=usage-instructions.php" http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=usage-instructions.php" http://10.0.0.45/mutillidae/index.php?page=php-errors.php" http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=php-errors.php" http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=php-errors.php" http://10.0.0.45/mutillidae/index.php?page=notes.php" http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=notes.php" http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=notes.php"