david
/
404-intelligent-service


			
							1234567891011121314151617181920212223242526272829303132
							from urllib.parse import quote

def encode_url_without_last_slash(url):
    """ 输入URL，返回 Path部分quote过的两个URL，一个大写，一个小写:
    http://linezing.ruoguschool.com/2012/12/一淘网数据部数据工程-了解前端内存泄露/
    "http://linezing.ruoguschool.com/2012/12/%e4%b8%80%e6%b7%98%e7%bd%91%e6%95%b0%e6%8d%ae%e9%83%a8%e6%95%b0%e6%8d%ae%e5%b7%a5%e7%a8%8b-%e4%ba%86%e8%a7%a3%e5%89%8d%e7%ab%af%e5%86%85%e5%ad%98%e6%b3%84%e9%9c%b2" : "一淘网数据部数据工程-了解前端内存泄露",
    """
    url = url[:-1] if url.endswith('/') else url
    ## safe指定不做转换的字符串，:和/不做转换，只转换Path中的部分，不转换Host的部分
    quoted = quote(url, safe=':/')

    # 处理这种URL情况（URL最后路径为中文） http://127.0.0.1:5000/itest/zh/环境配置
    # 在link_title映射中是这样的： "http://taobaotest.ruoguschool.com/itest/zh/%E7%8E%AF%E5%A2%83%E9%85%8D%E7%BD%AE" : "环境配置",
    # XXX: 对于 Path的中间路径有中文的情况暂不支持
    pos = quoted.rfind('/')
    quoted_lower = "{}{}".format(quoted[0:pos],quoted[pos:].lower())

    # 大小写，不包含/
    return quoted_lower, quoted


if __name__ == '__main__':
    url = "http://linezing.ruoguschool.com/2012/12/一淘网数据部数据工程-了解前端内存泄露/"
    expected = "http://linezing.ruoguschool.com/2012/12/%e4%b8%80%e6%b7%98%e7%bd%91%e6%95%b0%e6%8d%ae%e9%83%a8%e6%95%b0%e6%8d%ae%e5%b7%a5%e7%a8%8b-%e4%ba%86%e8%a7%a3%e5%89%8d%e7%ab%af%e5%86%85%e5%ad%98%e6%b3%84%e9%9c%b2"

    print(encode_url_without_last_slash(url))
    print(encode_url_without_last_slash("http://127.0.0.1:5000/itest/zh/%E7%8E%AF%E5%A2%83%E9%85%8D%E7%BD%AE"))
    assert( expected in encode_url_without_last_slash(url))

    url = "http://127.0.0.1:5000/itest/zh/环境配置"
    expected = "http://127.0.0.1:5000/itest/zh/%E7%8E%AF%E5%A2%83%E9%85%8D%E7%BD%AE"
    assert (expected in encode_url_without_last_slash(url)) #"路径的部分为中文时转换不正确"