如何加速我的表转换算法?

时间:2017-10-23 12:09:58

标签: java algorithm

我有一项任务是将字符串表从一种格式转换为另一种格式。 enter image description here

我用这个类转换表:

import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;

class TableConverter
{
    public String[] Entities; //here I store descriptive fields like DescField1, DescField2...
    public ArrayList<String> ConvertedList; //here I store converted table rows as separate string lines 

    public TableConverter(ArrayList<String> lines) //we receive table rows as separate string lines
    {
        String[] splitted_first_line = lines.get(0).split("\t"); //split first row to get descriptive fields
        this.Entities = new String[splitted_first_line.length - 2]; //allocate size to hold all descriptive fields. -2 because last two columns is Date and Total
        System.arraycopy(splitted_first_line, 0, this.Entities, 0, this.Entities.length); //copy descriptive fields into my arr     

        //--

        int lines_sz = lines.size(); //save lines size to not recalculate it every iteration 
        Map<String, Integer> k_d_map = new HashMap<String, Integer>(); //map to store indecies of every Date column

        for (int i = 1; i < lines_sz; i++)
        {
            if (lines.get(i).isEmpty())
                continue;

            String[] splitted_line = lines.get(i).split("\t"); //splitted line on values    

            if (!k_d_map.containsKey(splitted_line[splitted_line.length - 2])) //if my map does not contain such date
                k_d_map.put(splitted_line[splitted_line.length - 2], 0); //then add it
        }

        String[] known_dates = k_d_map.keySet().toArray(new String[k_d_map.size()]);
        SortStrDates(known_dates); //I sort dates by ASC 
        k_d_map.clear(); //clear map to fill it again with correct indecies

        for (int i = 0; i < known_dates.length; i++) //refilling map and now we know every date index
            k_d_map.put(known_dates[i], i);

        //--

        Map<String, EntitySales> ESs_map = new HashMap<String, EntitySales>(); //map for rows

        for (int i = 1; i < lines_sz; i++)
        {
            if (lines.get(i).isEmpty())
                continue;

            String[] splitted_line = lines.get(i).split("\t"); //split row  
            String curr_entity = GetEntityFromLine(splitted_line); //I get set of descriptive fields separated by \t. It looks like this: asd\tqwe\t...\tzxc
            int dti = k_d_map.get(splitted_line[splitted_line.length - 2]); //I get date column index for Date stored in this row (if it was 02.2017 then index will be 0) 

            if (ESs_map.containsKey(curr_entity)) //I check if map contains row with such descriptive fields set
                ESs_map.get(curr_entity).SalesAmounts[dti] = splitted_line[splitted_line.length - 1]; //if contains, we set sale amount at date index (set 5 to 02.2017 column for example)
            else
            {
                EntitySales es = new EntitySales(curr_entity, known_dates.length); //else we create new object to hold row          
                es.SalesAmounts[dti] = splitted_line[splitted_line.length - 1]; //set sales amount at date
                ESs_map.put(curr_entity, es); //and add to map
            }
        }

        //--

        String first_row = ""; //here and below I build first row text representation, I add stored DescFields and unique dates
        this.ConvertedList = new ArrayList<String>();               

        for (int i = 0; i < this.Entities.length; i++)
            first_row += this.Entities[i] + "\t";

        for (int i = 0; i < known_dates.length; i++)
            first_row += i < known_dates.length - 1 ? known_dates[i] + "\t" : known_dates[i];

        this.ConvertedList.add(first_row);

        //--

        for (EntitySales es : ESs_map.values()) //Here I get rows as separate lines 
            this.ConvertedList.add(es.GetAsLine());
    }

    public String GetEntityFromLine(String[] line)
    {
        String[] entities = new String[line.length - 2];
        System.arraycopy(line, 0, entities, 0, entities.length);

        String entity = "";

        for (int i = 0; i < entities.length; i++)
            entity += i < entities.length - 1 ? entities[i] + "\t" : entities[i];

        return entity;
    }

    public void SortStrDates(String[] dates)
    {
        for (int i = 0; i < dates.length; i++)
            for (int j = i + 1; j < dates.length; j++)
            {
                Date dt_i = MyJunk.ConvertStrToDate(dates[i]);
                Date dt_j = MyJunk.ConvertStrToDate(dates[j]);

                if (dt_j.before(dt_i))
                {
                    String temp_i = dates[i];
                    dates[i] = dates[j];
                    dates[j] = temp_i;
                }
            }
    }
}

class EntitySales
{
    public String Entity;
    public String[] SalesAmounts;

    public EntitySales(String entity, int sales_amounts_size)
    {
        this.Entity = entity;
        this.SalesAmounts = new String[sales_amounts_size];
    }

    public String GetAsLine()
    {
        String line = this.Entity + "\t";

        for (int i = 0; i < this.SalesAmounts.length; i++)
        {
            String val = this.SalesAmounts[i] == null || this.SalesAmounts[i].isEmpty() ? "0" : this.SalesAmounts[i];
            line += i < this.SalesAmounts.length - 1 ? val + "\t" : val;
        }

        return line;
    }
}

它有效,但它最终会因为巨大的桌子而变慢。 我等了1小时20分钟转换800k行表并取消了任务。 200k行仅在3分钟内转换。我不知道为什么会这么慢,但问题是如何加速我的算法呢? 我尝试将Integer值分配给每组描述性字段(asd \ tqwe \ t ... \ tzxc - &gt; 0,某些\ telse - &gt; 1)并比较不带Maps的整数,但它只是慢了。

1 个答案:

答案 0 :(得分:1)

虽然您可以改进整体算法,但主要减速可能在GetAsLine函数中:

public String GetAsLine()
{
    String line = this.Entity + "\t";

    for (int i = 0; i < this.SalesAmounts.length; i++)
    {
        String val = this.SalesAmounts[i] == null || this.SalesAmounts[i].isEmpty() ? "0" : this.SalesAmounts[i];
        line += i < this.SalesAmounts.length - 1 ? val + "\t" : val;
    }

    return line;
}

在这里,您在循环中使用字符串连接来构建密钥。这非常低效,因为它每次循环都会分配一个新字符串。这涉及为新字符串分配内存并将现有字符串复制到新字符串。你的垃圾收集器得到了很多锻炼。

为了改善这一点,你想要做的是创建一个StringBuilder,然后在那里构建字符串:

StringBuilder line = new StringBuilder();
for (int i = 0; i < this.SalesAmounts.length; i++)
{
    String val = this.SalesAmounts[i] == null || this.SalesAmounts[i].isEmpty() ? "0" : this.SalesAmounts[i];
    line.append(val+"\t");
}
// remove final tab character
line.remove(line.length()-1, line.length()-1);

return line.toString();

这个更快的原因是因为StringBuilder每次附加某些内容时都不会创建新字符串。因此,您可以减少对字符串的复制。