使用C#从文本文件中提取字段名称和最大长度

时间:2012-08-03 21:08:57

标签: c# c#-3.0

我有一个文件,它是一个保存为文本文件的SQL Server结果集。

以下是文件的示例:

RWS_DMP_ID      RV1_DMP_NUM      CUS_NAME
3192            3957             THE ACME COMPANY                          
3192            3957             THE ACME COMPANY                          
3192            3957             THE ACME COMPANY   

我想创建一个读取此文件的C#程序并创建以下数据表:

     Field       MaxSize
     -----       -------
 RWS_DMP_ID  17 
RV1_DMP_NUM 17 
CUS_NAME    42

这是字段名称及其最大长度的列表。最大长度是字段的开始,直到下一个字段开始之前的空格。

顺便说一句,我不关心代码性能。这很少使用文件处理实用程序。

我用以下代码解决了这个问题:

objFile = new StreamReader(strPath + strFileName);
            strLine = objFile.ReadLine();
            intLineCnt = 0;
            while (strLine != null)
            {
                intLineCnt++;

                if (intLineCnt <= 3)
                {                       
                    if (intLineCnt == 1)
                    {
                        strWords = SplitWords(strLine);
                        intNumberOfFields = strWords.Length;
                        foreach (char c in strLine)
                        {
                            if (bolNewField == true)
                            {
                                bolFieldEnd = false;
                                bolNewField = false;
                            }
                            if (bolFieldEnd == false)
                            {
                                if (c == ' ')
                                {
                                    bolFieldEnd = true;
                                }
                            }
                            else
                            {
                                if (c != ' ')
                                {
                                    if (intFieldCnt < strWords.Length)
                                    {
                                        strProcessedData[intFieldCnt, 0] = strWords[intFieldCnt];
                                        strProcessedData[intFieldCnt, 1] = (intCharCnt - 1).ToString();
                                    }
                                    intFieldCnt++;
                                    intCharCnt = 1;
                                    bolNewField = true;
                                }
                            }
                            if (bolNewField == false)
                            {
                                intCharCnt++;
                            }
                        }
                        strProcessedData[intFieldCnt, 0] = strWords[intFieldCnt];
                        strProcessedData[intFieldCnt, 1] = intCharCnt.ToString();                               
                    }
                    else if (intLineCnt == 3)
                    {
                        intLine2Cnt= 0;
                        intTotalLength = 0;
                        while(intLine2Cnt < intNumberOfFields)
                        {
                            intSize = Convert.ToInt32(strProcessedData[intLine2Cnt, 1]);
                            if (intSize + intTotalLength > strLine.Length)
                            {
                                intSize = strLine.Length - intTotalLength;
                            }
                            strField = strLine.Substring(intTotalLength, intSize);
                            strField = strField.Trim();
                            strProcessedData[intLine2Cnt, intLineCnt - 1] = strField;
                            intTotalLength = intTotalLength + intSize + 1;                                

                            intLine2Cnt++;
                        }
                    }                       
                }
                strLine = objFile.ReadLine();
            }`enter code here`

我知道这段代码是一个完整的黑客工作。我正在寻找一种更好的方法来解决这个问题。

有没有更好的方法来解决这个问题?

感谢

3 个答案:

答案 0 :(得分:0)

我不确定这是多么节省内存,但我认为它更清晰(假设您的字段以制表符分隔):

var COL_DELIMITER = new[] { '\t' };
string[] lines = File.ReadAllLines(strPath + strFileName);

// read the field names from the first line
var fields = lines[0].Split(COL_DELIMITER, StringSplitOptions.RemoveEmptyEntries).ToList();

// get a 2-D array of the columns (excluding the header row)
string[][] columnsArray = lines.Skip(1).Select(l => l.Split(COL_DELIMITER)).ToArray();

// dictionary of columns with max length
var max = new Dictionary<string, int>(); 

// for each field, select all columns, and take the max string length
foreach (var field in fields)
{
    max.Add(field, columnsArray.Select(row => row[fields.IndexOf(field)]).Max(col => col.Trim().Length));
}

// output per requirment
Console.WriteLine(string.Join(Environment.NewLine,
        max.Keys.Select(field => field + " " + max[field])
    ));

答案 1 :(得分:0)

    void MaximumWidth(StreamReader reader)
    {
        string[] columns = null;
        int[]   maxWidth = null;

        string line;

        while ((line = reader.ReadLine()) != null)
        {
            string[] cols = line.Split('\t');

            if (columns == null)
            {
                columns = cols;
                maxWidth = new int[cols.Length];
            }
            else
            {
                for (int i = 0; i < columns.Length; i++)
                {
                    int width = cols[i].Length;

                    if (maxWidth[i] < width)
                    {
                        maxWidth[i] = width;
                    }
                }
            }
        }

        // ...
    }

答案 2 :(得分:0)

这是我想出的。最重要的是使用IndexOf字符串函数。

 class Program
    {
        static void Main(string[] args)
        {
            String strFilePath;
            String strLine;
            Int32 intMaxLineSize;

            strFilePath = [File path and name];                            
            StreamReader objFile= null;

            objFile = new StreamReader(strFilePath);

            intMaxLineSize = File.ReadAllLines(strFilePath).Max(line => line.Length);

            //Get the first line
            strLine = objFile.ReadLine();

            GetFieldNameAndFieldLengh(strLine, intMaxLineSize);


            Console.WriteLine("Press <enter> to continue.");
            Console.ReadLine();
        }
        public static void GetFieldNameAndFieldLengh(String strLine, Int32 intMaxSize)
        {            
            Int32 x;            
            string[] fields = null;
            string[,] strFieldSizes = null;
            Int32 intFieldSize;

            fields = SplitWords(strLine);


            strFieldSizes = new String[fields.Length, 2];
            x = 0;

            foreach (string strField in fields)
            {
                if (x < fields.Length - 1)
                {
                    intFieldSize = strLine.IndexOf(fields[x + 1]) - strLine.IndexOf(fields[x]);                    
                }
                else
                {
                    intFieldSize = intMaxSize - strLine.IndexOf(fields[x]);
                }
                strFieldSizes[x, 0] = fields[x];
                strFieldSizes[x, 1] = intFieldSize.ToString();
                x++;
            }
            Console.ReadLine();


        }

        static string[] SplitWords(string s)
        {
            return Regex.Split(s, @"\W+");
        }
    }