Question

我正在尝试实现一种结构搜索机制，查找块并将它们包装在一个块中。

我是机器学习的新手，起初我从brain.js开始，这个库非常简单明了，我意识到第一次发生的事情，该库适用于简单的任务。

但是不幸的是，这个库没有功能，我早些时候问过如何找到块：How to take the data?

我决定尝试tensorflow，但是由于难以理解该库，我仍然不了解它的学习方法，因为有输入且应该是什么结果。

这是我尝试搜索brain.js

的一个示例

<html>
<head>
    <script src="https://cdn.rawgit.com/BrainJS/brain.js/5797b875/browser.js"></script>

</head>
<body>

<div>
    <button onclick="train()">train</button><button onclick="Generate.next(); Generate.draw();">generate</button><button onclick="calculate()">calculate</button>
</div>

<canvas id="generate" style="border: 1px solid #000"></canvas>

</body>

<script type="text/javascript">
    var trainData = [];

    function randomInteger(min, max) {
        var rand = min - 0.5 + Math.random() * (max - min + 1)
        //rand = Math.round(rand);
        return rand;
    }

    function getRandomColor() {
        var letters = '0123456789ABCDEF';

        var color = '#';

        for (var i = 0; i < 6; i++) {
            color += letters[Math.floor(Math.random() * 16)];
        }

        return color;
    }


    var Generate   = new function(){
        var canvas = document.getElementById('generate');
        var ctx    = canvas.getContext('2d');
        var elem   = {
            input: [],
            output: []
        }

        var size = {
            width: 240,
            height: 140
        }

        canvas.width  = 500;
        canvas.height = 250;

        this.next = function(){
            this.build();

            trainData.push({
                input: elem.input,
                output: elem.output
            });
        }

        this.clear = function(){
            ctx.clearRect(0, 0, canvas.width, canvas.height);
        }

        this.draw = function(){
            this.clear();

            this.item(elem.input, function(item){
                ctx.strokeStyle = "green";

                ctx.strokeRect(item[0], item[1], item[2], item[3]);
            })

            this.item(elem.output, function(item){
                ctx.strokeStyle = "blue";

                ctx.strokeRect(item[0], item[1], item[2], item[3]);
            })


        }

        this.item = function(where, call){
            for (var i = 0; i < where.length; i+=4) {
                var input = [
                    where[i],
                    where[i+1],
                    where[i+2],
                    where[i+3],
                ];

                this.denormalize(input);

                call(input)
            }
        }

        this.normalize = function(input){
            input[0] = input[0] / 500;
            input[1] = input[1] / 250;
            input[2] = input[2] / 500;
            input[3] = input[3] / 250;
        }

        this.denormalize = function(input){
            input[0] = input[0] * 500;
            input[1] = input[1] * 250;
            input[2] = input[2] * 500;
            input[3] = input[3] * 250;
        }

        this.empty = function(add){
            var data = [];

            for (var i = 0; i < add; i++) {
                data = data.concat([0,0,0,0]);
            }

            return data;
        }

        this.build = function(){
            var output  = [];
            var input   = [];

            size.width  = randomInteger(100,500);
            size.height = randomInteger(50,250);

            var lines       = 1;//Math.round(size.height / 100);
            var line_size   = 0;
            var line_offset = 0;

            for(var i = 0; i < lines; i++){
                line_size = randomInteger(30,Math.round(size.height / lines));

                var columns        = Math.round(randomInteger(1,3));
                var columns_width  = 0;
                var columns_offset = 0;

                for(var c = 0; c < columns; c++){
                    columns_width = randomInteger(30,Math.round(size.width / columns));

                    var item = [
                        columns_offset + 10,
                        line_offset + 10,
                        columns_width - 20,
                        line_size - 20
                    ];

                    this.normalize(item);

                    input = input.concat(item);

                    columns_offset += columns_width;
                }

                var box = [
                    0,
                    line_offset,
                    columns_offset,
                    line_size
                ]

                this.normalize(box);

                output = output.concat(box);

                line_offset += line_size + 10;
            }

            elem.input  = input.concat(this.empty(5 - Math.round(input.length / 4)));
            elem.output = output.concat(this.empty(2 - Math.round(output.length / 4)));
        }

        this.get = function(){
            return elem.input;
        }


        this.calculate = function(result, stat){
            console.log('brain:',result);

            this.item(result, function(item){
                ctx.strokeStyle = "red";

                ctx.strokeRect(item[0], item[1], item[2], item[3]);
            })
        }

        this.train = function(){
            for(var i = 0; i < 1400; i++){
                this.next();
            }
        }
    }

    Generate.train();

    Generate.log = true;

    var net,stat;


    function train(){
        net  = new brain.NeuralNetwork({ hiddenLayers: [4],activation: 'tanh'});
        stat = net.train(trainData,{log: true, iterations: 1250,learningRate: 0.0001,errorThresh:0.0005});

        console.log('stat:',stat)
    }

    function calculate(){
        Generate.calculate(net.run(Generate.get()))
    }


</script>
</html>

我的目标是训练网络以查找元素并显示其大小。

程序：单击以进行培训单击生成以单击以进行计算

蓝色块包裹了绿色块，这应该是结果，红色块表明已经找到了神经网络。

这就是我感兴趣的：

tensorflow可以找到块吗？
数据应该是图片还是数字数据？
您如何建议开始？

如果有人在如何接收数据，以什么格式和如何训练方面举一个小例子，我将不胜感激。

修改

我给出了绿色块的大小和位置，目的是找到绿色块的位置及其总大小，例如，蓝色块显示了这一点。

Answer 1

神经网络

神经网络的固定输入是绿色块的数量。假设我们将在图片中找到3个块。该模型的InputShape为[3，4]，每个块具有4个坐标(x, y, w, h)。预测的框可以是min(x), min(y), max(x+w), max(y+h)。此边界框将包裹这些框。样本数据可以是

features = [[[1, 2, 3, 4], [2, 4, 5, 6], [3, 4, 2, 2]]]
labels = [[1, 2, 7, 10]]

const random = _ =>  Math.floor(Math.random()*100)
  const generate = _ => {
  xarr = Array.from({length: 3},  _ => random())
  yarr = Array.from({length: 3},  _ => random())
  
  features = xarr.map((x, i) => ([x, yarr[i], x + random(), yarr[i] + random()]))
  labels = features.reduce((acc, f) => ([Math.min(acc[0], f[0]), Math.min(acc[1], f[1]), Math.max(acc[0] + acc[2], f[0] + f[2]),  Math.max(acc[0] + acc[3], f[1] + f[3])]) )
  
  return {features, labels}
  
}

(async () => {
    const model = tf.sequential();
     model.add(tf.layers.dense({units: 20, inputShape: [3, 4], activation: 'relu'}));
     model.add(tf.layers.reshape({targetShape: [60]}));
     model.add(tf.layers.dense({units: 4, activation: 'relu'}));
 
    model.summary();

 // Prepare the model for training: Specify the loss and the optimizer.
    model.compile({loss: 'meanSquaredError', optimizer: 'adam'});

 // Generate some synthetic data for training.
    let x = [];
    let y = [];
    for (let i = 0; i < 5; i++) {
      const data = generate();
      x.push(data.features);
      y.push(data.labels);
    }
    const xs = tf.tensor3d(x);
    const ys = tf.tensor2d(y);
    console.log(xs.shape);
    console.log(ys.shape);

 // Train the model using the data then do inference on a data point the
 // model hasn't seen:
    xs.print()
    ys.print()
    await model.fit(xs, ys, {epochs: 100});
    model.predict(tf.tensor([[[1, 2, 3, 4], [2, 4, 5, 6], [3, 4, 2, 2]]])).print();
})();

<html>
  <head>
    <!-- Load TensorFlow.js -->
    <script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs@latest"> </script>
  </head>

  <body>
  </body>
</html>

卷积过滤器

先前的模型将生成包装其坐标已赋予模型的框的框。但是，如果要找出匹配框的位置，则可以使用卷积过滤器。

让我们假设要在张量中匹配以下数据[[1, 2], [5, 6]]。此数据可以是裁剪后的图片，我们希望查看它是否存在于大图片中，如果是，则查看它出现了多少次。使用[[1, 1], [1, 1]]的卷积过滤器，在匹配项的左上角坐标(x, y)处将得到14的结果。对此值（14）进行过滤将返回感兴趣坐标的索引。

(async() => {
// tf.setBackend('cpu')
  const arr = Array.from({length: 16}, (_, k) => k+1)

  const x = tf.tensor([...arr, ...arr.reverse()], [8, 4]); // input image 2d
  x.print()

  const filter = tf.ones([2, 2]) // input filter 2d

  const conv = x.reshape([1, ...x.shape, 1]).conv2d(filter.reshape([...filter.shape, 1, 1]), 1, 'same').squeeze()
  
  conv.print() // conv

  const part = tf.tensor([[1, 2], [5, 6]]) // searched tensor

  const mask = conv.equal(part.sum()).asType('bool');
  const coords = await tf.whereAsync(mask);

  coords.print(); // (0, 0) and (4, 0) are the top left coordinates of part of x that matches the part tensor
  
  // how many elements matches
  console.log(coords.shape[0])
  
  // filter coords
  
  const [a, b] = coords.lessEqual(x.shape.map((a, i) => a - part.shape[i] )).split(2, 1);  // because of padding 'same'
  
  const filterMask = a.mul(b)
  const filterCoords = await tf.whereAsync(filterMask);
  filterCoords.print()
  const newCoords = coords.gather(filterCoords.split(2, 1)[0].reshape([2]))
  newCoords.print()
  
   const matchIndexes = await newCoords.unstack().reduce(async (a, c) => {
    const cropped = x.slice(await c.data(), part.shape)
    const sameElements = (await tf.whereAsync(cropped.equal(part).asType('bool')))
    if(tf.util.sizeFromShape(part.shape) * 2 === (await sameElements.data()).length) {
      a.push(await c.data())
    }
    return a
  }, [])
  console.log('matching index', matchIndexes) // only [0, 0]
})()

<html>
  <head>
    <!-- Load TensorFlow.js -->
    <script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs@latest"> </script>
  </head>

  <body>
  </body>
</html>

更全面地说，卷积过滤器不足以判断是否存在匹配项。实际上，具有以下值[[5, 6], [2, 1]]的张量的一部分也将输出14。为确保仅输出正确的索引，可以在给定坐标处对输入张量进行切片，并在处理张量时尽可能按位检查值不大或只是一些元素而已。

Tensorflow结构搜索

1 个答案: