Question

我正在尝试将纯文本文件制作成一个数组（它们已经按照经文组织）。我希望将每节经文作为数组上的项目。我当前的代码只返回文本文件中的最后一节。 print_r证明它认为最后一节是数组中唯一的项目。我错过了一些明显的东西，它是什么？

代码：

from __future__ import print_function

import sys
import os
import time
import pickle

import numpy as np
import theano
import theano.tensor as T

import lasagne

def load_dataset(sample_data, sample_label):
    if sys.version_info[0] == 2:
        from urllib import urlretrieve
    else:
        from urllib.request import urlretrieve

def download(filename, source='http://yann.lecun.com/exdb/mnist'):
    print("Downloading %s" % filename)
    urlretrieve(source + filename, filename)

import gzip

# ##################### Build the neural network model                     #######################
# This script supports three types of models. For each one, we define a
# function that takes a Theano variable representing the input and returns
# the output layer of a neural network model built in Lasagne.

def build_mlp(input_var=None):
    # This creates an MLP of two hidden layers of 800 units each, followed by
    # a softmax output layer of 10 units. It applies 20% dropout to the input
    # data and 50% dropout to the hidden layers.

    # Input layer, specifying the expected input shape of the network
    # (unspecified batchsize, 1 channel, 28 rows and 28 columns) and
    # linking it to the given Theano variable `input_var`, if any:
    l_in = lasagne.layers.InputLayer(shape=(None, 1, 28, 28),
                                 input_var=input_var)

    # Apply 20% dropout to the input data:
    l_in_drop = lasagne.layers.DropoutLayer(l_in, p=0.2)

    # Add a fully-connected layer of 800 units, using the linear rectifier, and
    # initializing weights with Glorot's scheme (which is the default anyway):
    l_hid1 = lasagne.layers.DenseLayer(
        l_in_drop, num_units=800,
        nonlinearity=lasagne.nonlinearities.rectify,
        W=lasagne.init.GlorotUniform())

    # We'll now add dropout of 50%:
    l_hid1_drop = lasagne.layers.DropoutLayer(l_hid1, p=0.5)

    # Another 800-unit layer:
    l_hid2 = lasagne.layers.DenseLayer(
        l_hid1_drop, num_units=800,
        nonlinearity=lasagne.nonlinearities.rectify)

    # 50% dropout again:
    l_hid2_drop = lasagne.layers.DropoutLayer(l_hid2, p=0.5)

    # Finally, we'll add the fully-connected output layer, of 10 softmax units:
    l_out = lasagne.layers.DenseLayer(
        l_hid2_drop, num_units=10,
        nonlinearity=lasagne.nonlinearities.softmax)

    # Each layer is linked to its incoming layer(s), so we only need to pass
    # the output layer to give access to a network in Lasagne:
    return l_out


def build_custom_mlp(input_var=None, depth=2, width=800, drop_input=.2,
                 drop_hidden=.5):
# By default, this creates the same network as `build_mlp`, but it can be
# customized with respect to the number and size of hidden layers. This
# mostly showcases how creating a network in Python code can be a lot more
# flexible than a configuration file. Note that to make the code easier,
# all the layers are just called `network` -- there is no need to give them
# different names if all we return is the last one we created anyway; we
# just used different names above for clarity.

# Input layer and dropout (with shortcut `dropout` for `DropoutLayer`):
    network = lasagne.layers.InputLayer(shape=(None, 1, 28, 28),
                                    input_var=input_var)
    if drop_input:
        network = lasagne.layers.dropout(network, p=drop_input)
    # Hidden layers and dropout:
    nonlin = lasagne.nonlinearities.rectify
    for _ in range(depth):
        network = lasagne.layers.DenseLayer(
            network, width, nonlinearity=nonlin)
    if drop_hidden:
        network = lasagne.layers.dropout(network, p=drop_hidden)
    # Output layer:
    softmax = lasagne.nonlinearities.softmax
    network = lasagne.layers.DenseLayer(network, 10, nonlinearity=softmax)
    return network


def build_cnn(input_var=None):
# As a third model, we'll create a CNN of two convolution + pooling stages
# and a fully-connected hidden layer in front of the output layer.

    # Input layer, as usual:
    network = lasagne.layers.InputLayer(shape=(None, 1, 28, 28),
                                    input_var=input_var)
# This time we do not apply input dropout, as it tends to work less well
# for convolutional layers.

# Convolutional layer with 32 kernels of size 5x5. Strided and padded
# convolutions are supported as well; see the docstring.
    network = lasagne.layers.Conv2DLayer(
        network, num_filters=32, filter_size=(5, 5),
        nonlinearity=lasagne.nonlinearities.rectify,
        W=lasagne.init.GlorotUniform())
# Expert note: Lasagne provides alternative convolutional layers that
# override Theano's choice of which implementation to use; for details
# please see http://lasagne.readthedocs.org/en/latest/user/tutorial.html.

# Max-pooling layer of factor 2 in both dimensions:
    network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))

# Another convolution with 32 5x5 kernels, and another 2x2 pooling:
    network = lasagne.layers.Conv2DLayer(
        network, num_filters=32, filter_size=(5, 5),
        nonlinearity=lasagne.nonlinearities.rectify)
    network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))

# A fully-connected layer of 256 units with 50% dropout on its inputs:
    network = lasagne.layers.DenseLayer(
        lasagne.layers.dropout(network, p=.5),
        num_units=256,
        nonlinearity=lasagne.nonlinearities.rectify)

# And, finally, the 10-unit output layer with 50% dropout on its inputs:
    network = lasagne.layers.DenseLayer(
        lasagne.layers.dropout(network, p=.5),
        num_units=10,
        nonlinearity=lasagne.nonlinearities.softmax)

    return network
def load_test_images(filename):
    if not os.path.exists(filename):
        download(filename)
    with gzip.open(filename, 'rb') as f:
        data = np.frombuffer(f.read(), np.uint8, offset=16)

    data = data.reshape(-1,1,28,28)

    return data / np.float32(256)

def load_test_labels(filename):
    if not os.path.exists(filename):
        download(filename)

    with gzip.open(filename, 'rb') as f:
                    data = np.frombuffer(f.read(), np.uint8, offset=8)
    return data

X_test = load_test_images(sample_data)
y_test = load_test_labels(sample_label)

return X_test, y_test

def predict_label(sample, model='model.npz'):
    input_var = T.tensor4('sample')

    network = build_mlp(input_var)

    with np.load(model) as f:
        param_values = [f['arr_%d'%i] for i in range(len(f.files))]

    lasagne.layers.set_all_param_values(network, param_values)

    prediction = lasagne.layers.get_output(network, deterministic=True)

    result = T.argmax(prediction, axis=1)
    predict_fn = theano.function([input_var],result)

    return predict_fn(sample)

def main(model='mlp'):
    # load the test dataset
    print("Loading data...")

    sample_data = 't10k-images-idx3-ubyte.gz'
    sample_label = 't10k-labels-idx1-ubyte.gz'
    X_test, y_test = load_dataset(sample_data, sample_label)

    # print("build model ...")
    # network = build_mlp() 

    print("Evaluating ...")
    label = predict_label(X_test)

# with open("predict_label.txt","w") as text_file:
#   print(label, file = text_file)

# with open("groundtruth_label.txt","w") as text_file:
    #        print(y_test, file = text_file)

    print("Saving result ...")
    out_predict = open('predict.pkl', 'w')
    pickle.dump(label, out_predict)

    thefile = open('predict.txt','w')
    for item in label:
        thefile.write('%s\n' % item)

    out_groundtruth = open('groundtruth.pkl', 'w')
    pickle.dump(y_test, out_groundtruth)

if __name__ == '__main__':
    print("This script will predict the lables based on the models leared via CNN, MLP")
    print("Reference: https://lasagne.readthedocs.io/en/latest/user/tutorial.html#run-the-mnist-example")

    kwargs = {}
    if len(sys.argv) > 1:
        kwargs['model'] = sys.argv[1]

    main(**kwargs)

示例文字：

<?php

$url = 'homily1burgundio.txt';

$homily = file($url);

foreach ($homily as $line) {
    $line_array = preg_split( '/\r\n|\r|\n/', $line );
}
echo "<p>" . $line_array[0] . "</p>";
print_r($line_array);
?>
        </body>

Answer 1

可能是因为你循环遍历行，但$line_array只是一个变量。如果你成功$line_array[]（如下所示），那就行了。

foreach ($homily as $line) {
    $line_array[] = preg_split( '/\r\n|\r|\n/', $line );
}

您将拥有$line_array的数组。

注意：我没有检查过，这应该有效。谢谢！

更新

根据你的评论，我为代码添加了一些验证，因为你需要有文字的行而不是空行，下面的代码为你做了诀窍。

<?php
$url = 'homily1burgundio.txt';
$homily = file($url);
foreach ($homily as $line) {
    $result = array_filter(preg_split( '/\r\n|\r|\n/', $line ));
    if ( isset($result[0]) )
        $line_array[] = $result[0];
    }
//echo "<p>" . $line_array[0] . "</p>";
echo "<pre>".print_r($line_array, true)."</pre>";
?>

基本上这会过滤掉文件中的空行，如果只有有效行，则会将该行存储到$line_array

希望这对你有用！谢谢！

Answer 2

$url = 'homily1burgundio.txt';

$homily = file($url);
$lines = preg_split( '/\n/', $homily );
//you got every line as an element of the array
var_dump($lines);

如果您想要更具体的单行，请执行此操作

$multi_dim_array=array();
foreach ($lines as $line) {
    $multi_dim_array[] = preg_split( '/\r\n|\r|\n/', $line );        
}
var_dump($multi_dim_array);

使用换行符从字符串创建PHP数组

2 个答案:

更新