使用MicrosoftTranslator v3 REST服务将HTML字符串从英语翻译为德语。以下是示例html字符串,实际html字符串的字符数超过90k。 此处在翻译前后的html字符串有所不同,例如在换行符“ \ n”之后添加了换行符'\ n'以及其他更改。我如何避免这些多余的字符?
System.out.println(HTMLStringBeforeTranslation); //利用以下事实:同一句子可以具有两种不同的结构。格劳乔·马克思(Groucho Marx)的这一著名笑话假设大多数人期望第一部分的结构是
System.out.println(HTMLStringAfterTranslation); // \ n Nutzen Sie die Tatsache,dass der gleiche Satz zwei verschiedene Strukturen haben kann。 DieserberühmteWitz von Groucho马克思和Davon aus,Diess死于Menschen erwarten,Dass死在Struktur des ersten Teils \ n
代码:
public class Translator {
static String subscriptionKey = "Xxxxxxxxxxxxxxx";
static String host = "https://api.cognitive.microsofttranslator.com";
static String path = "/translate?api-version=3.0";
public static class RequestBody {
String Text;
public RequestBody(String textpass) {
this.Text = textpass; // text; // htmlText;
}
}
public static String TranslateHtml (String rawHtml) throws Exception {
URL url = new URL (host + path + params);
List<RequestBody> objList = new ArrayList<RequestBody>();
objList.add(new RequestBody(rawHtml));
String content = new Gson().toJson(objList);
return Posthtml(url, content).toString();
}
public static JsonElement Posthtml (URL url, String content) throws Exception {
HttpsURLConnection connection = (HttpsURLConnection) url.openConnection();
connection.setRequestMethod("POST");
//connection.setRequestProperty("textType", "html");
connection.setRequestProperty("Content-Type", "application/json"); // "application/json" "text/html"
connection.setRequestProperty("Content-Length", content.length() + "");
connection.setRequestProperty("Ocp-Apim-Subscription-Key", subscriptionKey);
connection.setRequestProperty("X-ClientTraceId", java.util.UUID.randomUUID().toString());
connection.setDoOutput(true);
DataOutputStream wr = new DataOutputStream(connection.getOutputStream());
byte[] encoded_content = content.getBytes("UTF-8");
wr.write(encoded_content, 0, encoded_content.length);
wr.flush();
wr.close();
StringBuilder response = new StringBuilder ();
BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream(), "UTF-8"));
String line;
while ((line = in.readLine()) != null) {
response.append(line);
}
in.close();
JsonElement element = new JsonParser().parse(response.toString());
JsonArray array = element.getAsJsonArray();
JsonParser parser = new JsonParser();
JsonObject obj = (JsonObject) parser.parse(array.get(0).toString());
JsonElement element2 = obj.get("translations");
JsonObject obj2 = (JsonObject) parser.parse(element2.getAsJsonArray().get(0).toString());
return obj2.get("text");
}
public static void main(String[] args) {
static String text2 = "Take advantage of the fact that the same sentence can have two different structures. This famous joke from Groucho Marx assumes that most people expect the structure of the first part to be";
static String htmlText = new String("<!DOCTYPE html><html><body><font color= 'red'>" + text2 + "</font></body></html>");
try {
System.out.println(TranslateHtml (htmlText));
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}