def url_validator(url: str) -> bool: """ use this func to filter out the urls to follow only valid urls :param: url :type: str :return: True if the passed url is valid otherwise return false :rtype: bool """
#the following regex is copied from Django source code
# to validate a url using regax
regex = re.compile(
r"^(?:http|ftp)s?://" # http:// or https://
r"(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|" # domain...
r"localhost|" # localhost...
r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})" # ...or ip
r"(?::\d+)?" # optional port
r"(?:/?|[/?]\S+)$",
re.IGNORECASE,
)
blocked_sites: list[str] = []
for site in blocked_sites:
if site in url or site == url:
return False
# if none of the above then ensure that the url is valid and then return True otherwise return False
if re.match(regex, url):
return True
return False