OCaml:如何解码unicode-escape字符串?

时间:2017-02-23 17:40:06

标签: unicode encoding functional-programming ocaml unicode-escapes

给出如下的str:

package com.example.navigationcalculator;

import android.app.Activity;
import android.content.Context;
import android.content.Intent;
import android.content.SharedPreferences;
import android.os.Bundle;
import android.util.Log;
import android.view.View;
import android.widget.EditText;

import static android.content.ContentValues.TAG;
import static com.example.navigationcalculator.R.id.direction_1000;

public class GetWind extends Activity {

public static final String PREFS_NAME = "com.example.myfirstapp.sharedPref";

public final static String DIR_1000 = "com.example.myfirstapp.DIR_1000";
public final static String SPEED_1000 = "com.example.myfirstapp.SPEED_1000";



SharedPreferences sharedpreferences;

@Override
protected void onCreate(Bundle savedInstanceState) {
    super.onCreate(savedInstanceState);
    setContentView(R.layout.activity_get_wind);

}

//Read all wind speeds and directions and put them in a shared preferences file
public void input_nav_data(View view) {
    Intent intent = new Intent(this, InputNavData.class);

    sharedpreferences = getSharedPreferences(PREFS_NAME, Context.MODE_PRIVATE);

    startActivity(intent);

    // 1000 feet
    EditText dir_1000 = (EditText)findViewById(R.id.direction_1000);
    int direction_1000 = Integer.parseInt(dir_1000.getText().toString());

    SharedPreferences.Editor editor = sharedpreferences.edit();

    editor.putInt(DIR_1000,direction_1000);
    editor.commit();

    int shared_dir_1000 = sharedpreferences.getInt(PREFS_NAME,direction_1000);
    String SD1000 = String.valueOf(shared_dir_1000);
    Log.d("direction 1000",SD1000);

}
}

如何将unicode-escape字符串解码为unicode字符串,或者在OCaml中解析Ascii字符串?

在python中我可以很容易地做到

package com.example.navigationcalculator;

import android.app.Activity;
import android.content.Context;
import android.content.Intent;
import android.content.SharedPreferences;
import android.os.Bundle;
import android.util.Log;
import android.view.View;
import android.widget.EditText;

import static com.example.navigationcalculator.GetWind.PREFS_NAME;
import static com.example.navigationcalculator.R.id.direction_1000;



public class InputNavData extends Activity {

public final static String altitude_static_string = "com.example.myfirstapp.altitude";

SharedPreferences sharedpreferences;

@Override
protected void onCreate(Bundle savedInstanceState) {
    super.onCreate(savedInstanceState);
    setContentView(R.layout.activity_input_nav_data);

}

//Read all navigation data and pass them through as a string to activity CalculateData
public void calculate_data(View view) {
    Intent intent = new Intent(this, CalculateData.class);

    sharedpreferences = getSharedPreferences(PREFS_NAME, Context.MODE_PRIVATE);

    startActivity(intent);

    // Read the actual altitude
    EditText alt = (EditText) findViewById(R.id.altitude);
    int altitude = Integer.parseInt(alt.getText().toString());

    SharedPreferences.Editor editor = sharedpreferences.edit();

    editor.putInt(altitude_static_string,altitude);
    editor.commit();

    int shared_altitude = sharedpreferences.getInt(PREFS_NAME,altitude);
    String SALT = String.valueOf(shared_altitude);
    Log.d("altitude",SALT);

    int PHI = sharedpreferences.getInt(PREFS_NAME,direction_1000);
    String phi = String.valueOf(PHI);
    Log.d("dir 1000 in inputnavdata",phi);
}

}

1 个答案:

答案 0 :(得分:3)

如果你的嵌入式转义序列总是要编码ASCII字符,正如你所说,你可以找到它们并用解码的等价物替换它们:

let decode s =
    let re = Str.regexp "\\\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]" in
    let s1 n = String.make 1 (Char.chr n) in
    let subst = function
    | Str.Delim u -> s1 (int_of_string ("0x" ^ String.sub u 2 4))
    | Str.Text t -> t
    in
    String.concat "" (List.map subst (Str.full_split re s))

这适用于您的示例:

val decode : string -> string = <fun>
# decode "#include \\u003Cunordered_map\\u003E\\u000D\\u000A";;
- : string = "#include <unordered_map>\r\n"

事实上,Python内置支持解码这些序列。

<强>更新

要通过转换为UTF-8来支持所有四位十六进制转义序列"\uXXXX",您可以使用以下代码:

let utf8encode s =
    let prefs = [| 0x0; 0xc0; 0xe0 |] in
    let s1 n = String.make 1 (Char.chr n) in
    let rec ienc k sofar resid =
        let bct = if k = 0 then 7 else 6 - k in
        if resid < 1 lsl bct then
            (s1 (prefs.(k) + resid)) ^ sofar
        else
            ienc (k + 1) (s1 (0x80 + resid mod 64) ^ sofar) (resid / 64)
    in
    ienc 0 "" (int_of_string ("0x" ^ s))

let decode2 s =
    let re = Str.regexp "\\\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]" in
    let subst = function
    | Str.Delim u -> utf8encode (String.sub u 2 4)
    | Str.Text t -> t
    in
    String.concat "" (List.map subst (Str.full_split re s))

它也适用于您的示例和其他一些示例:

val utf8encode : string -> string = <fun>
val decode2 : string -> string = <fun>
# decode2 "#include \\u003Cunordered_map\\u003E\\u000D\\u000A";;
- : string = "#include <unordered_map>\r\n"
# print_endline (decode2 "\\u00A2");;
¢
- : unit = ()
# print_endline (decode2 "\\u20AC");;
€
- : unit = ()