Commit 891490b2 authored by Sofia Papagiannaki's avatar Sofia Papagiannaki
Browse files

snf-common: Comment out unused code in urltools

parent 8b71128b
...@@ -26,30 +26,30 @@ from collections import namedtuple ...@@ -26,30 +26,30 @@ from collections import namedtuple
from posixpath import normpath from posixpath import normpath
__all__ = ["ParseResult", "SplitResult", "parse", "extract", "split", __all__ = ["ParseResult", "SplitResult", "split",
"split_netloc", "split_host", "assemble", "encode", "normalize", "split_netloc", "assemble", "normalize",
"normalize_host", "normalize_path", "normalize_query", "normalize_host", "normalize_path", "normalize_query",
"normalize_fragment", "unquote"] "normalize_fragment", "unquote"]
PSL_URL = 'http://mxr.mozilla.org/mozilla-central/source/netwerk/dns/effective_tld_names.dat?raw=1' #PSL_URL = 'http://mxr.mozilla.org/mozilla-central/source/netwerk/dns/effective_tld_names.dat?raw=1'
#
def _get_public_suffix_list(): #def _get_public_suffix_list():
"""Get the public suffix list. # """Get the public suffix list.
""" # """
local_psl = os.environ.get('PUBLIC_SUFFIX_LIST') # local_psl = os.environ.get('PUBLIC_SUFFIX_LIST')
if local_psl: # if local_psl:
psl_raw = open(local_psl).readlines() # psl_raw = open(local_psl).readlines()
else: # else:
psl_raw = urllib.urlopen(PSL_URL).readlines() # psl_raw = urllib.urlopen(PSL_URL).readlines()
psl = set() # psl = set()
for line in psl_raw: # for line in psl_raw:
item = line.strip() # item = line.strip()
if item != '' and not item.startswith('//'): # if item != '' and not item.startswith('//'):
psl.add(item) # psl.add(item)
return psl # return psl
#
PSL = _get_public_suffix_list() #PSL = _get_public_suffix_list()
SCHEMES = ['http', 'https', 'ftp', 'sftp', 'file', 'gopher', 'imap', 'mms', SCHEMES = ['http', 'https', 'ftp', 'sftp', 'file', 'gopher', 'imap', 'mms',
...@@ -114,12 +114,12 @@ def normalize(url): ...@@ -114,12 +114,12 @@ def normalize(url):
return assemble(result) return assemble(result)
def encode(url): #def encode(url):
"""Encode URL # """Encode URL
""" # """
parts = extract(url) # parts = extract(url)
encoded = ParseResult(*(_idna_encode(p) for p in parts)) # encoded = ParseResult(*(_idna_encode(p) for p in parts))
return assemble(encoded) # return assemble(encoded)
def assemble(parts): def assemble(parts):
...@@ -222,37 +222,37 @@ def unquote(text, exceptions=[]): ...@@ -222,37 +222,37 @@ def unquote(text, exceptions=[]):
return ''.join(res) return ''.join(res)
def parse(url): #def parse(url):
"""Parse a URL # """Parse a URL
""" # """
parts = split(url) # parts = split(url)
if parts.scheme: # if parts.scheme:
(username, password, host, port) = split_netloc(parts.netloc) # (username, password, host, port) = split_netloc(parts.netloc)
(subdomain, domain, tld) = split_host(host) # (subdomain, domain, tld) = split_host(host)
else: # else:
username = password = subdomain = domain = tld = port = '' # username = password = subdomain = domain = tld = port = ''
return ParseResult(parts.scheme, username, password, subdomain, domain, tld, # return ParseResult(parts.scheme, username, password, subdomain, domain, tld,
port, parts.path, parts.query, parts.fragment) # port, parts.path, parts.query, parts.fragment)
def extract(url): #def extract(url):
"""Extract as much information from a (relative) URL as possible # """Extract as much information from a (relative) URL as possible
""" # """
parts = split(url) # parts = split(url)
if parts.scheme: # if parts.scheme:
netloc = parts.netloc # netloc = parts.netloc
path = parts.path # path = parts.path
else: # else:
netloc = parts.path # netloc = parts.path
path = '' # path = ''
if '/' in netloc: # if '/' in netloc:
tmp = netloc.split('/', 1) # tmp = netloc.split('/', 1)
netloc = tmp[0] # netloc = tmp[0]
path = '/' + tmp[1] # path = '/' + tmp[1]
(username, password, host, port) = split_netloc(netloc) # (username, password, host, port) = split_netloc(netloc)
(subdomain, domain, tld) = split_host(host) # (subdomain, domain, tld) = split_host(host)
return ParseResult(parts.scheme, username, password, subdomain, domain, tld, # return ParseResult(parts.scheme, username, password, subdomain, domain, tld,
port, path, parts.query, parts.fragment) # port, path, parts.query, parts.fragment)
def split(url): def split(url):
...@@ -341,33 +341,33 @@ def split_netloc(netloc): ...@@ -341,33 +341,33 @@ def split_netloc(netloc):
return username, password, host, port return username, password, host, port
def split_host(host): #def split_host(host):
"""Use the Public Suffix List to split host into subdomain, domain and tld # """Use the Public Suffix List to split host into subdomain, domain and tld
""" # """
if '[' in host: # if '[' in host:
return '', host, '' # return '', host, ''
domain = subdomain = tld = '' # domain = subdomain = tld = ''
for c in host: # for c in host:
if c not in IP_CHARS: # if c not in IP_CHARS:
break # break
else: # else:
return '', host, '' # return '', host, ''
parts = host.split('.') # parts = host.split('.')
for i in range(len(parts)): # for i in range(len(parts)):
tld = '.'.join(parts[i:]) # tld = '.'.join(parts[i:])
wildcard_tld = '*.' + tld # wildcard_tld = '*.' + tld
exception_tld = '!' + tld # exception_tld = '!' + tld
if exception_tld in PSL: # if exception_tld in PSL:
domain = '.'.join(parts[:i+1]) # domain = '.'.join(parts[:i+1])
tld = '.'.join(parts[i+1:]) # tld = '.'.join(parts[i+1:])
break # break
if tld in PSL: # if tld in PSL:
domain = '.'.join(parts[:i]) # domain = '.'.join(parts[:i])
break # break
if wildcard_tld in PSL: # if wildcard_tld in PSL:
domain = '.'.join(parts[:i-1]) # domain = '.'.join(parts[:i-1])
tld = '.'.join(parts[i-1:]) # tld = '.'.join(parts[i-1:])
break # break
if '.' in domain: # if '.' in domain:
(subdomain, domain) = domain.rsplit('.', 1) # (subdomain, domain) = domain.rsplit('.', 1)
return subdomain, domain, tld # return subdomain, domain, tld
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment