from urllib.parse import quote def encode_url_without_last_slash(url): """ 输入URL,返回 Path部分quote过的两个URL,一个大写,一个小写: http://linezing.ruoguschool.com/2012/12/一淘网数据部数据工程-了解前端内存泄露/ "http://linezing.ruoguschool.com/2012/12/%e4%b8%80%e6%b7%98%e7%bd%91%e6%95%b0%e6%8d%ae%e9%83%a8%e6%95%b0%e6%8d%ae%e5%b7%a5%e7%a8%8b-%e4%ba%86%e8%a7%a3%e5%89%8d%e7%ab%af%e5%86%85%e5%ad%98%e6%b3%84%e9%9c%b2" : "一淘网数据部数据工程-了解前端内存泄露", """ url = url[:-1] if url.endswith('/') else url ## safe指定不做转换的字符串,:和/不做转换,只转换Path中的部分,不转换Host的部分 quoted = quote(url, safe=':/') # 处理这种URL情况(URL最后路径为中文) http://127.0.0.1:5000/itest/zh/环境配置 # 在link_title映射中是这样的: "http://taobaotest.ruoguschool.com/itest/zh/%E7%8E%AF%E5%A2%83%E9%85%8D%E7%BD%AE" : "环境配置", # XXX: 对于 Path的中间路径有中文的情况暂不支持 pos = quoted.rfind('/') quoted_lower = "{}{}".format(quoted[0:pos],quoted[pos:].lower()) # 大小写,不包含/ return quoted_lower, quoted if __name__ == '__main__': url = "http://linezing.ruoguschool.com/2012/12/一淘网数据部数据工程-了解前端内存泄露/" expected = "http://linezing.ruoguschool.com/2012/12/%e4%b8%80%e6%b7%98%e7%bd%91%e6%95%b0%e6%8d%ae%e9%83%a8%e6%95%b0%e6%8d%ae%e5%b7%a5%e7%a8%8b-%e4%ba%86%e8%a7%a3%e5%89%8d%e7%ab%af%e5%86%85%e5%ad%98%e6%b3%84%e9%9c%b2" print(encode_url_without_last_slash(url)) print(encode_url_without_last_slash("http://127.0.0.1:5000/itest/zh/%E7%8E%AF%E5%A2%83%E9%85%8D%E7%BD%AE")) assert( expected in encode_url_without_last_slash(url)) url = "http://127.0.0.1:5000/itest/zh/环境配置" expected = "http://127.0.0.1:5000/itest/zh/%E7%8E%AF%E5%A2%83%E9%85%8D%E7%BD%AE" assert (expected in encode_url_without_last_slash(url)) #"路径的部分为中文时转换不正确"