CRAWING SPIDER

Goal -> Recursively list all links starting from a base URL.

1. Read page HTML.

2. Extract all links.

3. Repeat for each new link that is not already on the list.

#!/usr/bin/env python
import re
import requests
from urllib.parse import urljoin


target_url = "http://10.0.0.45/mutillidae/"
target_links = []

def extract_links_from(url):
    response = requests.get(url)
    return re.findall('(?:href=")(.*?")', response.content.decode())


def crawl(url):
    href_links = extract_links_from(url)
    for link in href_links:
        link = urljoin(url, link)

        if "#" in link:
            link = link.split("#")[0]

        if target_url in link and link not in target_links:
            target_links.append(link)
            print(link)
            crawl(link)

crawl(target_url)

The Python program runs perfectly.

http://10.0.0.45/mutillidae/favicon.ico"
http://10.0.0.45/mutillidae/styles/global-styles.css"
http://10.0.0.45/mutillidae/styles/ddsmoothmenu/ddsmoothmenu.css"
http://10.0.0.45/mutillidae/styles/ddsmoothmenu/ddsmoothmenu-v.css"
http://10.0.0.45/mutillidae/index.php?page=home.php"
http://10.0.0.45/mutillidae/index.php?page=login.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=login.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=login.php"
http://10.0.0.45/mutillidae/set-up-database.php"
http://10.0.0.45/mutillidae/index.php?page=show-log.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=show-log.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=show-log.php"
http://10.0.0.45/mutillidae/index.php?page=captured-data.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=captured-data.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=captured-data.php"
http://10.0.0.45/mutillidae/index.php?page=credits.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=credits.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=credits.php"
http://10.0.0.45/mutillidae/"
http://10.0.0.45/mutillidae/index.php?page=user-info.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=user-info.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=user-info.php"
http://10.0.0.45/mutillidae/index.php?page=register.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=register.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=register.php"
http://10.0.0.45/mutillidae/index.php?page=view-someones-blog.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=view-someones-blog.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=view-someones-blog.php"
http://10.0.0.45/mutillidae/index.php?page=add-to-your-blog.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=add-to-your-blog.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=add-to-your-blog.php"
http://10.0.0.45/mutillidae/index.php?page=site-footer-xss-discussion.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=site-footer-xss-discussion.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=site-footer-xss-discussion.php"
http://10.0.0.45/mutillidae/index.php?page=html5-storage.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=html5-storage.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=html5-storage.php"
http://10.0.0.45/mutillidae/index.php?page=capture-data.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=capture-data.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=capture-data.php"
http://10.0.0.45/mutillidae/index.php?page=dns-lookup.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=dns-lookup.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=dns-lookup.php"
http://10.0.0.45/mutillidae/index.php"
http://10.0.0.45/mutillidae/index.php?page=password-generator.php&username=anonymous"
http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=password-generator.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=password-generator.php"
http://10.0.0.45/mutillidae/index.php?page=user-poll.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=user-poll.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=user-poll.php"
http://10.0.0.45/mutillidae/index.php?page=set-background-color.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=set-background-color.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=set-background-color.php"
http://10.0.0.45/mutillidae/index.php?page=pen-test-tool-lookup.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=pen-test-tool-lookup.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=pen-test-tool-lookup.php"
http://10.0.0.45/mutillidae/index.php?page=text-file-viewer.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=text-file-viewer.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=text-file-viewer.php"
http://10.0.0.45/mutillidae/index.php?page=browser-info.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=browser-info.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=browser-info.php"
http://10.0.0.45/mutillidae/index.php?page=source-viewer.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=source-viewer.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=source-viewer.php"
http://10.0.0.45/mutillidae/index.php?page=arbitrary-file-inclusion.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=arbitrary-file-inclusion.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=arbitrary-file-inclusion.php"
http://10.0.0.45/mutillidae/index.php?page=secret-administrative-pages.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=secret-administrative-pages.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=secret-administrative-pages.php"
http://10.0.0.45/mutillidae/index.php?page=framing.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=framing.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=framing.php"
http://10.0.0.45/mutillidae/framer.html"
http://10.0.0.45/mutillidae/index.php?page=change-log.htm"
http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=change-log.htm"
http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=change-log.htm"
http://10.0.0.45/mutillidae/index.php?page=installation.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=installation.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=installation.php"
http://10.0.0.45/mutillidae/documentation/mutillidae-installation-on-xampp-win7.pdf"
http://10.0.0.45/mutillidae/index.php?page=documentation/vulnerabilities.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=documentation/vulnerabilities.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=documentation/vulnerabilities.php"
http://10.0.0.45/mutillidae/index.php?page=documentation/how-to-access-Mutillidae-over-Virtual-Box-network.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=documentation/how-to-access-Mutillidae-over-Virtual-Box-network.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=documentation/how-to-access-Mutillidae-over-Virtual-Box-network.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=home.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=home.php"
http://10.0.0.45/mutillidae/
http://10.0.0.45/mutillidae/?page=add-to-your-blog.php"
http://10.0.0.45/mutillidae/?page=view-someones-blog.php"
http://10.0.0.45/mutillidae/?page=show-log.php"
http://10.0.0.45/mutillidae/?page=text-file-viewer.php"
http://10.0.0.45/mutillidae/?page=user-info.php"
http://10.0.0.45/mutillidae/?page=login.php"
http://10.0.0.45/mutillidae/?page=credits.php"
http://10.0.0.45/mutillidae/?page=source-viewer.php"
http://10.0.0.45/mutillidae/index.php?page=usage-instructions.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=usage-instructions.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=usage-instructions.php"
http://10.0.0.45/mutillidae/index.php?page=php-errors.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=php-errors.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=php-errors.php"
http://10.0.0.45/mutillidae/index.php?page=notes.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-hints&page=notes.php"
http://10.0.0.45/mutillidae/index.php?do=toggle-security&page=notes.php"
01-22 01:56