url_util.py 2.0 KB

1234567891011121314151617181920212223242526272829303132
  1. from urllib.parse import quote
  2. def encode_url_without_last_slash(url):
  3. """ 输入URL,返回 Path部分quote过的两个URL,一个大写,一个小写:
  4. http://linezing.ruoguschool.com/2012/12/一淘网数据部数据工程-了解前端内存泄露/
  5. "http://linezing.ruoguschool.com/2012/12/%e4%b8%80%e6%b7%98%e7%bd%91%e6%95%b0%e6%8d%ae%e9%83%a8%e6%95%b0%e6%8d%ae%e5%b7%a5%e7%a8%8b-%e4%ba%86%e8%a7%a3%e5%89%8d%e7%ab%af%e5%86%85%e5%ad%98%e6%b3%84%e9%9c%b2" : "一淘网数据部数据工程-了解前端内存泄露",
  6. """
  7. url = url[:-1] if url.endswith('/') else url
  8. ## safe指定不做转换的字符串,:和/不做转换,只转换Path中的部分,不转换Host的部分
  9. quoted = quote(url, safe=':/')
  10. # 处理这种URL情况(URL最后路径为中文) http://127.0.0.1:5000/itest/zh/环境配置
  11. # 在link_title映射中是这样的: "http://taobaotest.ruoguschool.com/itest/zh/%E7%8E%AF%E5%A2%83%E9%85%8D%E7%BD%AE" : "环境配置",
  12. # XXX: 对于 Path的中间路径有中文的情况暂不支持
  13. pos = quoted.rfind('/')
  14. quoted_lower = "{}{}".format(quoted[0:pos],quoted[pos:].lower())
  15. # 大小写,不包含/
  16. return quoted_lower, quoted
  17. if __name__ == '__main__':
  18. url = "http://linezing.ruoguschool.com/2012/12/一淘网数据部数据工程-了解前端内存泄露/"
  19. expected = "http://linezing.ruoguschool.com/2012/12/%e4%b8%80%e6%b7%98%e7%bd%91%e6%95%b0%e6%8d%ae%e9%83%a8%e6%95%b0%e6%8d%ae%e5%b7%a5%e7%a8%8b-%e4%ba%86%e8%a7%a3%e5%89%8d%e7%ab%af%e5%86%85%e5%ad%98%e6%b3%84%e9%9c%b2"
  20. print(encode_url_without_last_slash(url))
  21. print(encode_url_without_last_slash("http://127.0.0.1:5000/itest/zh/%E7%8E%AF%E5%A2%83%E9%85%8D%E7%BD%AE"))
  22. assert( expected in encode_url_without_last_slash(url))
  23. url = "http://127.0.0.1:5000/itest/zh/环境配置"
  24. expected = "http://127.0.0.1:5000/itest/zh/%E7%8E%AF%E5%A2%83%E9%85%8D%E7%BD%AE"
  25. assert (expected in encode_url_without_last_slash(url)) #"路径的部分为中文时转换不正确"