1234567891011121314151617181920212223242526272829303132 |
- from urllib.parse import quote
- def encode_url_without_last_slash(url):
- """ 输入URL,返回 Path部分quote过的两个URL,一个大写,一个小写:
- http://linezing.ruoguschool.com/2012/12/一淘网数据部数据工程-了解前端内存泄露/
- "http://linezing.ruoguschool.com/2012/12/%e4%b8%80%e6%b7%98%e7%bd%91%e6%95%b0%e6%8d%ae%e9%83%a8%e6%95%b0%e6%8d%ae%e5%b7%a5%e7%a8%8b-%e4%ba%86%e8%a7%a3%e5%89%8d%e7%ab%af%e5%86%85%e5%ad%98%e6%b3%84%e9%9c%b2" : "一淘网数据部数据工程-了解前端内存泄露",
- """
- url = url[:-1] if url.endswith('/') else url
- ## safe指定不做转换的字符串,:和/不做转换,只转换Path中的部分,不转换Host的部分
- quoted = quote(url, safe=':/')
- # 处理这种URL情况(URL最后路径为中文) http://127.0.0.1:5000/itest/zh/环境配置
- # 在link_title映射中是这样的: "http://taobaotest.ruoguschool.com/itest/zh/%E7%8E%AF%E5%A2%83%E9%85%8D%E7%BD%AE" : "环境配置",
- # XXX: 对于 Path的中间路径有中文的情况暂不支持
- pos = quoted.rfind('/')
- quoted_lower = "{}{}".format(quoted[0:pos],quoted[pos:].lower())
- # 大小写,不包含/
- return quoted_lower, quoted
- if __name__ == '__main__':
- url = "http://linezing.ruoguschool.com/2012/12/一淘网数据部数据工程-了解前端内存泄露/"
- expected = "http://linezing.ruoguschool.com/2012/12/%e4%b8%80%e6%b7%98%e7%bd%91%e6%95%b0%e6%8d%ae%e9%83%a8%e6%95%b0%e6%8d%ae%e5%b7%a5%e7%a8%8b-%e4%ba%86%e8%a7%a3%e5%89%8d%e7%ab%af%e5%86%85%e5%ad%98%e6%b3%84%e9%9c%b2"
- print(encode_url_without_last_slash(url))
- print(encode_url_without_last_slash("http://127.0.0.1:5000/itest/zh/%E7%8E%AF%E5%A2%83%E9%85%8D%E7%BD%AE"))
- assert( expected in encode_url_without_last_slash(url))
- url = "http://127.0.0.1:5000/itest/zh/环境配置"
- expected = "http://127.0.0.1:5000/itest/zh/%E7%8E%AF%E5%A2%83%E9%85%8D%E7%BD%AE"
- assert (expected in encode_url_without_last_slash(url)) #"路径的部分为中文时转换不正确"
|