
时间:2015-05-20 22:48:19

标签: machine-learning artificial-intelligence neural-network backpropagation

我实现了一个反向传播的神经网络并对我的数据进行了训练。数据在英语和英语之间交替显示。 Africaans。神经网络应该识别输入语言。

网络结构为27 * 16 * 2 输入图层有26个输入,用于字母表的每个字母加上一个偏置单元。





enter image description here


public void train() throws NumberFormatException, IOException{

    // Training Accuracy
    double at = 0;

    int epoch = 0;

    int tNum = 0;

    for(; epoch < epochMax; epoch++){

        // Reads stock files from TestPackage package in existing project
        BufferedReader br = new BufferedReader(new InputStreamReader(this.getClass().
                getResourceAsStream("/TrainingData/" + trainingData.getName())));

        while ((line = br.readLine()) != null) {

            Boolean classified = false;


            // Set the correct classification Tk
            t[0] = Integer.parseInt(line.split("\t")[0]); //Africaans
            t[1] = (t[0] == 0) ? 1 : 0; // English

            // Convert training string to char array
            char trainingLine[] = line.split("\t")[1].toLowerCase().toCharArray();

            // Increment idx of input layer z, that matches
            // the position of the char in the alphabet
            // a == 0, b == 2, etc.....
            for(int l = 0; l < trainingLine.length; l++){
                if((int)trainingLine[l] >= 97 && (int)trainingLine[l] <= 122)
                    z[(int)trainingLine[l] % 97]++;

            /*System.out.println("Z   " + Arrays.toString(z));

            // Scale Z
            for(int i = 0; i < z.length-1; i++){
                z[i] = scale(z[i], 0, trainingLine.length, -Math.sqrt(3),Math.sqrt(3));

          *                  SET NET HIDDEN LAYER 
          * Each ith unit of the hidden Layer = 
          * each ith unit of the input layer
          * multiplied by every j in the ith level of the weights matrix ij*/

            for(int j = 0; j < ij.length; j++){  // 3
                double[] dotProduct = multiplyVectors(z, ij[j]);
                y[j] = sumVector(dotProduct);   


             *                 SET ACTIVATION HIDDEN LAYER 

            for(int j = 0; j < y.length-1; j++){
                y[j] = sigmoid(y[j], .3, .7);

             *                       SET NET OUTPUT LAYER 
             * Each jth unit of the hidden Layer = 
             * each jth unit of the input layer
             * multiplied by every k in the jth level of the weights matrix jk*/

            for(int k = 0; k < jk.length; k++){  // 3
                double[] dotProduct = multiplyVectors(y, jk[k]);
                o[k] = sumVector(dotProduct);

             *                   SET ACTIVATION OUTPUT LAYER

            for(int k = 0; k < o.length; k++){
                o[k] = sigmoid(o[k], .3, .7);

             *                     SET OUTPUT ERROR
             * For each traing example, evalute the error.
             * Error is defined as (Tk - Ok)
             * Correct classifications will result in zero error:
             *          (1 - 1) = 0
             *          (0 - 0) = 0

            for(int k = 0; k < o.length; k++){
                oError[k] = t[k] - o[k];

             *                     SET TRAINING ACCURACY
             * If error is 0, then a 1 indicates a succesful prediction.
             * If error is 1, then a 0 indicates an unsucessful prediction.

            if(quantize(o[0],.3, .7) == t[0] && quantize(o[1], .3, .7) == t[1]){
                classified = true;
                at += 1;

            // Only compute errors and change weiths for classification errors

             *                  CALCULATE OUTPUT SIGNAL ERROR
             *                 Error of ok = -(tk - ok)(1 - ok)ok

            for(int k = 0; k < o.length; k++){
                oError[k] = outputError(t[k], o[k]);


             *                  CALCULATE HIDDEN LAYER SIGNAL ERROR

            // The term (1-yk)yk is expanded to yk - yk squared

            // For each k-th output unit, multiply it by the
            // summed dot product of the two terms (1-yk)yk and jk[k]

            for(int j = 0; j < y.length; j++){
                for(int k = 0; k < o.length; k++){
                    yError[j] +=  oError[k] * jk[k][j] * (1 -  y[j]) * y[j];

             *                  CALCULATE NEW WIGHTS FOR HIDDEN-JK-OUTPUT

            for(int k = 0; k < o.length; k++){
                for(int j = 0; j < y.length; j++){
                    djk[k][j] = (-1*learningRate)*oError[k]*y[j] + momentum*djk[k][j];

                    // Old weights = themselves + new delta weight
                    jk[k][j] += djk[k][j]; 



            for(int j = 0; j < y.length-1; j++){
                for(int i = 0; i < z.length; i++){

                    dij[j][i] = (-1*learningRate)*yError[j]*z[i] + momentum*dij[j][i];

                    // Old weights = themselves + new delta weight
                    ij[j][i] += dij[j][i]; 

    // Accuracy Percentage
    double at_prec = (at/tNum) * 100;

    System.out.println("Training Accuracy: " + at_prec);    

1 个答案:

答案 0 :(得分:2)




从概念上讲,我会有一个完整的预处理阶段来获取一些统计数据。在我的头顶,我可能会计算(我不懂语言): - 句子中出现字母“a”与“c”的比例 - 句子中出现字母“d”与“p”的比例 - 句子中单词的平均长度
