我试图用C#中的Aforge.Net通过感知器进行OCR。我用九个30 * 30二进制图片学习了我的网络。但在结果中,它将所有内容都识别为“C”。 这是代码:
private void button1_Click(object sender, EventArgs e)
{
AForge.Neuro.ActivationNetwork network = new AForge.Neuro.ActivationNetwork(new AForge.Neuro.BipolarSigmoidFunction(2), 900, 3);
network.Randomize();
AForge.Neuro.Learning.PerceptronLearning learning = new AForge.Neuro.Learning.PerceptronLearning(network);
learning.LearningRate =1 ;
double[][] input = new double[9][];
for (int i = 0; i < 9; i++)
{
input[i] = new double[900];
}
//Reading A images
for (int i = 1; i <= 3; i++)
{
Bitmap a = AForge.Imaging.Image.FromFile(path + "\\a" + i + ".bmp");
for (int j = 0; j < 30; j++)
for (int k = 0; k < 30; k++)
{
if (a.GetPixel(j, k).ToKnownColor() == KnownColor.White)
{
input[i-1][j * 10 + k] = -1;
}
else
input[i-1][j * 10 + k] = 1;
}
// showImage(a);
}
//Reading B images
for (int i = 1; i <= 3; i++)
{
Bitmap a = AForge.Imaging.Image.FromFile(path + "\\b" + i + ".bmp");
for (int j = 0; j < 30; j++)
for (int k = 0; k < 30; k++)
{
if (a.GetPixel(j , k).ToKnownColor() == KnownColor.White)
{
input[i + 2][j * 10 + k] = -1;
}
else
input[i + 2][j * 10 + k] = 1;
}
// showImage(a);
}
//Reading C images
for (int i = 1; i <= 3; i++)
{
Bitmap a = AForge.Imaging.Image.FromFile(path + "\\c" + i + ".bmp");
for (int j = 0; j < 30; j++)
for (int k = 0; k < 30; k++)
{
if (a.GetPixel(j , k ).ToKnownColor() == KnownColor.White)
{
input[i + 5][j * 10 + k] = -1;
}
else
input[i + 5][j * 10 + k] = 1;
}
// showImage(a);
}
bool needToStop = false;
int iteration = 0;
while (!needToStop)
{
double error = learning.RunEpoch(input, new double[9][] { new double[3] { 1, -1, -1 },new double[3] { 1, -1, -1 },new double[3] { 1, -1, -1 },//A
new double[3] { -1, 1, -1 },new double[3] { -1, 1, -1 },new double[3] { -1, 1, -1 },//B
new double[3] { -1, -1, 1 },new double[3] { -1, -1, 1 },new double[3] { -1, -1, 1 } }//C
/*new double[9][]{ input[0],input[0],input[0],input[1],input[1],input[1],input[2],input[2],input[2]}*/
);
//learning.LearningRate -= learning.LearningRate / 1000;
if (error == 0)
break;
else if (iteration < 1000)
iteration++;
else
needToStop = true;
System.Diagnostics.Debug.WriteLine("{0} {1}", error, iteration);
}
Bitmap b = AForge.Imaging.Image.FromFile(path + "\\b1.bmp");
//Reading A Sample to test Netwok
double[] sample = new double[900];
for (int j = 0; j < 30; j++)
for (int k = 0; k < 30; k++)
{
if (b.GetPixel(j , k ).ToKnownColor() == KnownColor.White)
{
sample[j * 30 + k] = -1;
}
else
sample[j * 30 + k] = 1;
}
foreach (double d in network.Compute(sample))
System.Diagnostics.Debug.WriteLine(d);//Output is Always C = {-1,-1,1}
}
我真的很想知道它为什么回答错误。
答案 0 :(得分:3)
将初始30x30图像加载到input
结构中的双[900]数组时,您正在使用以下计算:
for (int j = 0; j < 30; j++)
for (int k = 0; k < 30; k++)
{
if (a.GetPixel(j, k).ToKnownColor() == KnownColor.White)
input[i-1][j * 10 + k] = -1;
else
input[i-1][j * 10 + k] = 1;
}
此处的偏移计算错误。您需要将j * 10 + k
更改为j * 30 + k
,否则您将获得无效结果。稍后您在加载测试图像时使用正确的偏移计算,这就是为什么它与损坏的样本没有正确匹配的原因。
您应该编写一个方法来将位图加载到double[900]
数组中并为每个图像调用它,而不是多次编写相同的代码。这有助于减少这样的问题,其中两个代码应该返回相同的结果给出不同的结果。
答案 1 :(得分:2)
我尝试了你的代码。它也帮助了我,并为此感谢。我可以通过对图像中的位数组进行一些更改来使代码正常工作。这是我使用的方法。
`
private double[] GetImageData(Bitmap bmp)
{
double[] imageData = null;
//Make the image grayscale
Grayscale filter = new Grayscale(0.2125, 0.7154, 0.0721);
bmp = filter.Apply(bmp);
//Binarize the image
AForge.Imaging.Filters.Threshold thFilter = new AForge.Imaging.Filters.Threshold(128);
thFilter.ApplyInPlace(bmp);
int height = bmp.Height;
int width = bmp.Width;
imageData = new double[height * width];
int imagePointer = 0;
System.Diagnostics.Debug.WriteLine("Height : " + height);
System.Diagnostics.Debug.WriteLine("Width : " + width);
for (int i = 0; i < height; i++)
{
for (int j = 0; j < width; j++)
{
System.Diagnostics.Debug.Write(string.Format("({0} , {1}) Color : {2}\n", i, j, bmp.GetPixel(i, j)));
//Identify the black points of the image
if (bmp.GetPixel(i, j) == Color.FromArgb(255, 0, 0, 0))
{
imageData[imagePointer] = 1;
}
else
{
imageData[imagePointer] = 0;
}
imagePointer++;
}
System.Diagnostics.Debug.WriteLine("");
}
System.Diagnostics.Debug.WriteLine("Bits : " + imagePointer );
return imageData;
}`
希望这会有所帮助。谢谢。
答案 2 :(得分:0)
试试这个
double error = learning.RunEpoch(input, new double[9][] { new double[3] **{ 1, -1, -1 }**,new double[3] **{ -1, 1, -1 }**,new double[3] **{ -1, -1, 1 }**,//A
new double[3] **{ 1, -1, -1 }**,new double[3] **{ -1, 1, -1 }**,new double[3] **{ -1, -1, 1 }**,//B
new double[3] **{ 1, -1, -1 }**,new double[3] **{ -1, 1, -1 }**,new double[3] **{ -1, -1, 1 }** }//C
);
或者这样
double[][] output = new double[patterns][];
for (int j = 0; j < patterns; j++)
{
output[j] = new double[patterns];
for (int i = 0; i < patterns; i++)
{
if (i != j)
{
output[j][i] = -1;
}
else
{
output[j][i] = 1;
}
}
}
double error = learning.RunEpoch(input,output)
double[] netout = neuralNet.Compute(pattern);
int maxIndex = 0;
double max = netout[0];
for (int i = 1; i < netout.Length; i++)
{
if (netout[i] > max)
{
max = netout[i];
maxIndex = i;
}
}
如果maxIndex = 0答案是A
如果maxIndex = 1答案为B
如果maxIndex = 2答案是C
我认为你必须从图像中创建矩阵并将其用作模式,例如20/20或15/15或小,你的30/30很大。
我使用不同的方式获取Image Scheme。 I分割图像20/20并且如果矩形中的一个像素是黑色(或您想要的其他颜色),则在矩阵中保存1,否则为0.
我在此之后更换所有像素我只有两种颜色,白色和黑色,我可以用轮廓进行操作。
private void Cmd_ReplaceColors(ref WriteableBitmap Par_WriteableBitmap,int Par_Limit=180)
{
for (int y = 0; y < Par_WriteableBitmap.PixelHeight; y++)
{
for (int x = 0; x < Par_WriteableBitmap.PixelWidth; x++)
{
Color color = Par_WriteableBitmap.GetPixel(x, y);
if (color == Colors.White)
{
}
else
{
if (color.R < Par_Limit)
{
Par_WriteableBitmap.SetPixel(x, y, Colors.Black);
}
else
{
Par_WriteableBitmap.SetPixel(x, y, Colors.White);
}
}
}
}
Par_WriteableBitmap.Invalidate();
}
我认为1000次迭代很小,更好的10万次:))