我在下面编写了用于计算不同单词和句子的概率的代码,但是即使我更改了要评估的文本,它也给了我相同的数字,我不知道为什么!(我复制了完整的代码,因此您可以复制并运行它。
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace T_2060_ParserEstadoDeCuenta
{
class Program
{
static void Main(string[] args)
{
Console.WriteLine("Parsing the csv file");
List<clsEstadoCuenta> resp = new List<clsEstadoCuenta>();
var lines = File.ReadAllLines("d:\\ztemp\\parseEstcta.csv");
for (int i = 1; i < lines.Count(); i++)
{
try
{
/*
*/
var campos = lines[i].Split(',');
clsEstadoCuenta nR = new clsEstadoCuenta();
nR.NumeroCuenta = (String.IsNullOrEmpty(campos[1])) ? "" : campos[1];
nR.CodigoPais = 504;
nR.Banco = "Fichosa";
nR.Moneda = (String.IsNullOrEmpty(campos[2])) ? "" : campos[2];
nR.TasaCambio = 24.6;
var tmpFecha = campos[0].Split('/');
nR.FechaTransaccion = new DateTime(Convert.ToInt32(tmpFecha[2]), Convert.ToInt32(tmpFecha[1]), Convert.ToInt32(tmpFecha[0]));
nR.Descripcion = (String.IsNullOrEmpty(campos[1])) ? "" : campos[1];
nR.Referencia = (String.IsNullOrEmpty(campos[2])) ? "" : campos[2];
nR.Debito = (String.IsNullOrEmpty(campos[4])) ? 0 : Convert.ToDouble(campos[4]);
nR.Credito = (String.IsNullOrEmpty(campos[5])) ? 0 : Convert.ToDouble(campos[5]);
nR.Payee = "A";
}
catch (Exception ex)
{
Console.WriteLine("error on line {0} : {1}", i, ex.Message);
continue;
}
}
Console.WriteLine("Parsing has ended, we have {0} rows \n", resp.Count);
foreach (var item in resp)
{
Console.WriteLine(item.NumeroCuenta+"\t" +item.CodigoPais+"\t"+item.Banco+"t"+item.Moneda+"\t"+item.Debito);
}
Console.ReadLine();
}
class clsEstadoCuenta
{
private string _NumeroCuenta;
public string NumeroCuenta
{
get { return _NumeroCuenta; }
set { _NumeroCuenta = value; }
}
private int _CodigoPais;
public int CodigoPais
{
get { return _CodigoPais; }
set { _CodigoPais = value; }
}
private string _Banco;
public string Banco
{
get { return _Banco; }
set { _Banco = value; }
}
private string _Moneda;
public string Moneda
{
get { return _Moneda; }
set { _Moneda = value; }
}
private double _TasaCambio;
public double TasaCambio
{
get { return _TasaCambio; }
set { _TasaCambio = value; }
}
private double _Debito;
public double Debito
{
get { return _Debito; }
set { _Debito = value; }
}
private double _Credito;
public double Credito
{
get { return _Credito; }
set { _Credito = value; }
}
private DateTime _FechaTrasaccion;
public DateTime FechaTransaccion
{
get { return _FechaTrasaccion; }
set { _FechaTrasaccion = value; }
}
private string _Payee;
public string Payee
{
get { return _Payee; }
set { _Payee = value; }
}
private string _Descripcion;
public string Descripcion
{
get { return _Descripcion; }
set { _Descripcion = value; }
}
private string _Referencia;
public string Referencia
{
get { return _Referencia; }
set { _Referencia = value; }
}
private string _CodigoBancario;
public string CodigoBancario
{
get { return _CodigoBancario; }
set { _CodigoBancario = value; }
}
private string _Categoria;
public string Categoria
{
get { return _Categoria; }
set { _Categoria = value; }
}
private string _Sector;
public string Sector
{
get { return _Sector; }
set { _Sector = value; }
}
private double _ValorLocal;
public double ValorLocal
{
get
{
_ValorLocal = Credito - Debito;
return _ValorLocal;
}
//set { _ValorLocal = value; }
}
private double _ValorDolares;
public double ValorDolares
{
get
{
_ValorDolares = ValorLocal / TasaCambio;
return _ValorDolares;
}
// set { _ValorDolares = value; }
}
private string _NombreEmpresa;
public string NombreEmpresa
{
get { return _NombreEmpresa; }
set { _NombreEmpresa = value; }
}
}
}
}
无论插入文本from nltk.util import bigrams
from nltk.lm.preprocessing import pad_both_ends
from nltk.util import everygrams
from nltk.lm import Vocabulary
from nltk.lm import MLE
from nltk.lm import Laplace
from nltk.lm.preprocessing import flatten
import codecs
import re
from nltk.lm.preprocessing import padded_everygram_pipeline
from nltk.lm.preprocessing import padded_everygrams
from nltk.tokenize import word_tokenize
from nltk import ngrams
s = "The Internet may be overflowing with new technology but crime in cyberspace is still of the old-fashioned variety."\
"The National Consumers League said Wednesday that the most popular scam on the Internet was the pyramid scheme, in which early investors in a bogus fund are paid off with deposits of later investors."
max_len_ngram = 2;
paddedLine =list(pad_both_ends(word_tokenize(s),n=2));
vocab = Vocabulary(word_tokenize(s),1);
train = [everygrams(paddedLine,max_len = max_len_ngram)];
lm = MLE(max_len_ngram);
lm.fit(train,vocab);
print(lm.score("WHY?"));
是什么,它总是返回0.03636363636363636,这是绝对错误的!我该如何解决?
我怎样才能得分完整的句子而不是单词?