Skip to content

Commit

Permalink
Added an optional param default_scheme to the url_normalize so that u…
Browse files Browse the repository at this point in the history
…ser may provide default scheme (e.g., 'http') instead of 'https'.
  • Loading branch information
niksite committed Dec 8, 2018
1 parent 27261b7 commit 994873e
Show file tree
Hide file tree
Showing 6 changed files with 39 additions and 23 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ Type "help", "copyright", "credits" or "license" for more information.

History:

* 1.4.1: Added an optional param default_scheme to the url_normalize
* 1.4.0: A bit of code refactoring and cleanup
* 1.3.3: Support empty string and double slash urls (//domain.tld)
* 1.3.2: Same code support both Python 3 and Python 2.
Expand All @@ -45,7 +46,7 @@ History:
* 1.1.2: support for shebang (#!) urls
* 1.1.1: using 'http' schema by default when appropriate
* 1.1.0: added handling of IDN domains
* 1.0.0: code pep8-zation
* 1.0.0: code pep8
* 0.1.0: forked from Sam Ruby's urlnorm.py

License: "Python" (PSF) License
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "url-normalize"
version = "1.4.0"
version = "1.4.1"
description = "URL normalization for Python"
authors = ["Nikolay Panov <[email protected]>"]
license = "PSF"
Expand Down
10 changes: 10 additions & 0 deletions tests/test_provide_url_scheme.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,13 @@ def test_provide_url_scheme_result_is_expected():
result = provide_url_scheme(url)

assert result == expected, url


def test_provide_url_scheme_accept_default_scheme_param():
"""Assert we could provide default_scheme param other than https."""
url = "//site/path"
expected = "http://site/path"

actual = provide_url_scheme(url, default_scheme="http")

assert actual == expected
10 changes: 10 additions & 0 deletions tests/test_url_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,13 @@ def test_url_normalize_results():
"""Assert url_normalize return expected results."""
for value, expected in EXPECTED_RESULTS.items():
assert expected == url_normalize(value), value


def test_url_normalize_with_http_scheme():
"""Assert we could use http scheme as default."""
url = "//www.foo.com/"
expected = "http://www.foo.com/"

actual = url_normalize(url, default_scheme='http')

assert actual == expected
13 changes: 1 addition & 12 deletions url_normalize/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,24 +20,13 @@
http://intertwingly.net/blog/2004/08/04/Urlnorm
This fork author: Nikolay Panov (<[email protected]>)
History:
* 1.4.0: A bit of code refactoring and cleanup
* 1.3.3: Support empty string and double slash urls (//domain.tld)
* 1.3.2: Same code support both Python 3 and Python 2.
* 1.3.1: Python 3 compatibility
* 1.2.1: PEP8, setup.py
* 1.1.2: support for shebang (#!) urls
* 1.1.1: using 'http' schema by default when appropriate
* 1.1.0: added handling of IDN domains
* 1.0.0: code pep8-zation
* 0.1.0: forked from Sam Ruby's urlnorm.py
"""

from __future__ import absolute_import

from .url_normalize import url_normalize

__license__ = "Python"
__version__ = "1.4.0"
__version__ = "1.4.1"

__all__ = ["url_normalize"]
24 changes: 15 additions & 9 deletions url_normalize/url_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,27 +17,29 @@
"ws": "80",
"wss": "443",
}
DEFAULT_CHARSET = "utf-8"
DEFAULT_SCHEME = "https"


def provide_url_scheme(url):
def provide_url_scheme(url, default_scheme=DEFAULT_SCHEME):
"""Make sure we have valid url scheme.
Params:
url : string : the URL
default_scheme : string : default scheme to use, e.g. 'https'
Returns:
string : updated url with validated/attached scheme
"""
has_scheme = ":" in url[:7]
is_default_scheme = url.startswith("//")
is_file_path = url == "-" or (url.startswith("/") and not is_default_scheme)
is_universal_scheme = url.startswith("//")
is_file_path = url == "-" or (url.startswith("/") and not is_universal_scheme)
if not url or has_scheme or is_file_path:
return url
if is_default_scheme:
return DEFAULT_SCHEME + ":" + url
return DEFAULT_SCHEME + "://" + url
if is_universal_scheme:
return default_scheme + ":" + url
return default_scheme + "://" + url


def generic_url_cleanup(url):
Expand Down Expand Up @@ -86,7 +88,7 @@ def normalize_userinfo(userinfo):
return userinfo


def normalize_host(host, charset="utf-8"):
def normalize_host(host, charset=DEFAULT_CHARSET):
"""Normalize host part of the url.
Lowercase and strip of final dot.
Expand Down Expand Up @@ -204,7 +206,7 @@ def normalize_query(query):
return query


def url_normalize(url, charset="utf-8"):
def url_normalize(url, charset=DEFAULT_CHARSET, default_scheme=DEFAULT_SCHEME):
"""URI normalization routine.
Sometimes you get an URL by a user that just isn't a real
Expand All @@ -218,10 +220,14 @@ def url_normalize(url, charset="utf-8"):
Params:
charset : string : optional
The target charset for the URL if the url was given as unicode string.
Returns:
string : a normalized url
"""
if not url:
return url
url = provide_url_scheme(url)
url = provide_url_scheme(url, default_scheme)
url = generic_url_cleanup(url)
url_elements = deconstruct_url(url)
url_elements = url_elements._replace(
Expand Down

0 comments on commit 994873e

Please sign in to comment.