我注意到Java的URI类有一些 odd 行为。我试图写一个文学例子来说明我遇到的问题。
使用new
重新创建URI,如果对URI进行双重编码,则toString
的结果会有所不同。
final String raw = "€亞£€€¥€€¥£€ £££亞";
/**
* Gets encoded to
* %E2%82%AC%E4%BA%9E%C2%A3%E2%82%AC%E2%82%AC%C2%A5%E2%82%AC%E2%82%AC%C2%A5%C2%A3%E2%82%AC%20%C2%A3%C2%A3%C2%A3%E4%BA%9E
*/
final String encodedRaw = encodePathSegment(raw);
/*
* Gets encoded to:
* %25E2%2582%25AC%25E4%25BA%259E%25C2%25A3%25E2%2582%25AC%25E2%2582%25AC%25C2%25A5%25E2%2582%25AC%25E2%2582%25AC%25C2%25A5%25C2%25A3%25E2%2582%25AC%2520%25C2%25A3%25C2%25A3%25C2%25A3%25E4%25BA%259E
*/
final String doubleEncodedRaw = encodePathSegment(encodedRaw);
URI encodedUri = URI.create(encodedRaw);
URI doubleEncodedUri = URI.create(doubleEncodedRaw);
URI recreateSingleEncodedUri = (new URI(null, null, null, 0, encodedUri.getPath(), null, null));
URI recreateDoubleEncodingUri = (new URI(null, null, null, 0, doubleEncodedUri.getPath(), null, null));
/**
* The path for the recreated encoded URI is the raw URI
*/
Assertions.assertThat(recreateSingleEncodedUri.getPath()).isEqualTo(raw);
/**
* The toString for the recreated encoded URI is the raw URI
* The UTF-8 characters are decoded however the whitespace is kept encoded though. HUH ?
*/
Assertions.assertThat(recreateSingleEncodedUri.toString()).isEqualTo("€亞£€€¥€€¥£€%20£££亞");
/**
* The path for the recreated double encoding URI is the encoded URI
*/
Assertions.assertThat(recreateDoubleEncodingUri.getPath()).isEqualTo(encodedRaw);
/**
* The toString for the recreated double encoding URI is the double encoding URI... WTF ?
*/
Assertions.assertThat(recreateDoubleEncodingUri.toString()).isEqualTo(doubleEncodedRaw);