C#:导入的CSV文件,输入字符串格式不正确

时间:2018-01-25 04:29:24

标签: c# regex list csv parsing

此代码尝试从CSV文件中获取两列数值。

using System;
using System.Collections.Generic;
using System.Linq;
using System.IO;

using System.Text.RegularExpressions;
using System.Globalization;

namespace PredominantValues
{
    class Program
    {
        static void Main(string[] args)
        {

            List<DataValues> values = File.ReadAllLines("C:\\Users\\yetih\\Desktop\\Firm_Inventory.csv")
                                           .Skip(1)
                                           .Select(v => DataValues.FromCSV(v))
                                           .ToList();

            int YEARHIGH    = 2050;
            int YEARLOW     = 1700;
            int PRICEHIGH   = 900000;
            int PRICELOW    = 5000;

            int maxYearBuilt = DataValues.GetMaxYearBuilt(values, YEARHIGH);
            int minYearBuilt = DataValues.GetMinYearBuilt(values, YEARLOW);
            int avgYearBuilt = Convert.ToInt32(values.Average(r => r.yearBuilt));

            int maxPrice = DataValues.GetMaxPrice(values, PRICEHIGH);
            int minPrice = DataValues.GetMinPrice(values, PRICELOW);
            int avgPrice = Convert.ToInt32(values.Average(r => r.price));

            Console.WriteLine("Low      - Year Built: " + maxYearBuilt);
            Console.WriteLine("High     - Year Built: " + minYearBuilt);
            Console.WriteLine("Average  - Year Built: " + avgYearBuilt);
            Console.WriteLine("High     - Price:      " + maxPrice);
            Console.WriteLine("Low      - Price:      " + minPrice);
            Console.WriteLine("Average  - Price:      " + avgPrice);

            Console.ReadLine();
        }

    }

    public class DataValues
    {
        public int yearBuilt;
        public int price;

        public static DataValues FromCSV(string csvLine)
        {
            Regex CSVParser = new Regex(",(?=(?:[^\"]*\"[^\"]*\")*(?![^\"]*\"))");
            string[] values = CSVParser.Split(csvLine);
            DataValues dataValues = new DataValues();

            values[0] = values[0].Replace("," , "");

            Console.WriteLine(values[0]);
            Console.ReadLine();

            decimal test = decimal.Parse(values[0], NumberStyles.AllowCurrencySymbol);

            Console.WriteLine(test);
            Console.WriteLine(values[1]);
            Console.ReadLine();

            dataValues.price = int.Parse(values[0], NumberStyles.Currency);
            dataValues.yearBuilt = Convert.ToInt32(values[1]);
            return dataValues;
        }

        public static int GetMaxYearBuilt(List<DataValues> list, int YEARHIGH)
        {
            int maxYearBuilt;
            int index;

            do
            {
                maxYearBuilt = list.Max(r => r.yearBuilt);
                if (maxYearBuilt >= YEARHIGH)
                {
                    index = list.FindIndex(a => a.yearBuilt == maxYearBuilt);
                    list.RemoveAt(index);
                }
            } while (maxYearBuilt >= YEARHIGH);

            return maxYearBuilt;
        }

        public static int GetMinYearBuilt(List<DataValues> list, int YEARLOW)
        {
            int minYearBuilt;
            int index;

            do
            {
                minYearBuilt = list.Min(r => r.yearBuilt);
                if (minYearBuilt <= YEARLOW)
                {
                    index = list.FindIndex(a => a.yearBuilt == minYearBuilt);
                    list.RemoveAt(index);
                }
            } while (minYearBuilt <= YEARLOW);

            return minYearBuilt;
        }

        public static int GetMaxPrice(List<DataValues> list, int PRICEHIGH)
        {
            int maxPrice;
            int index;

            do
            {
                maxPrice = list.Max(r => r.price);
                if (maxPrice >= PRICEHIGH)
                {
                    index = list.FindIndex(a => a.price == maxPrice);
                    list.RemoveAt(index);
                }
            } while (maxPrice >= PRICEHIGH);

            return maxPrice;
        }

        public static int GetMinPrice(List<DataValues> list, int PRICELOW)
        {
            int minPrice;
            int index;

            do
            {
                minPrice = list.Min(r => r.price);
                if (minPrice <= PRICELOW)
                {
                    index = list.FindIndex(a => a.price == minPrice);
                    list.RemoveAt(index);
                }
            } while (minPrice <= PRICELOW);

            return minPrice;
        }
    }
}

到目前为止,只有当我进入CSV文件并将第一列的格式从“Currency”更改为“General”时,我才能成功导入文件并将其拆分为列表。这两列是'Price'和'Year Built'。如果我保留'货币'格式,我会得到一个例外:“输入字符串的格式不正确”。

我尝试解析字符串以删除货币符号,但是我遇到了相同的结果。

dataValues.price = int.Parse(values[0], NumberStyles.Currency);

我想将数值存储在'dataValues.price'的'values [0]'中

1 个答案:

答案 0 :(得分:0)

首先,让我们清理FromCSV()方法:

public class DataValues
{
    public int yearBuilt;
    public decimal price;

    private static Regex CSVParser = new Regex(",(?=(?:[^\"]*\"[^\"]*\")*(?![^\"]*\"))");

    public static DataValues FromCSV(string csvLine)
    {
        var result = new DataValues();

        string[] values = CSVParser.Split(csvLine);

        values[0] = values[0].Replace("," , "").Replace("$","");
        result.price = decimal.Parse(values[0]);
        result.yearBuilt = int.Parse(values[1]);

        return result;
    }
}

这样做,目前尚不清楚你要做什么。阅读代码,您可以找到两个字段的MAX和MIN值的方法,但是您使用的过程会排除某个范围之外的值,这种方式有时会影响其他字段,有时则不会。我怀疑您要么在记录的任何部分超出范围时要排除完整记录,要么只想排除超出范围的字段,并且不打算在GetMax / Min()方法中引起副作用。但是,在你想要的问题中,这一点并不清楚。我这里有代码。

首先,如果要排除整个记录:

static void Main(string[] args)
{
    int YEARHIGH        = 2050;
    int YEARLOW         = 1700;
    decimal PRICEHIGH   = 900000m;
    decimal PRICELOW    = 5000m;

    var values = File.ReadLines(@"C:\Users\yetih\Desktop\Firm_Inventory.csv")
             .Skip(1)
             .Select(v => DataValues.FromCSV(v))
             .Where(r => r.yearBuilt > YEARLOW && r.yearBuilt < YEARHIGH && r.price > PRICELOW && r.price < PRICEHIGH)
             .ToList();

    int maxYearBuilt = values.Max(r => r.yearBuilt);
    int minYearBuilt = values.Min(r => r.yearBuilt);
    int avgYearBuilt = Convert.ToInt32(values.Average(r => r.yearBuilt));

    decimal maxPrice = values.Max(r => r.price)
    decimal minPrice = values.Min(r => r.price);
    decimal avgPrice = values.Average(r => r.price);

    Console.WriteLine("Low      - Year Built: {0}", maxYearBuilt);
    Console.WriteLine("High     - Year Built: {0}", minYearBuilt);
    Console.WriteLine("Average  - Year Built: {0}", avgYearBuilt);
    Console.WriteLine("High     - Price: {0,9:C}", maxPrice);
    Console.WriteLine("Low      - Price: {0,9:C}", minPrice);
    Console.WriteLine("Average  - Price: {0,9:C}", avgPrice);

    Console.ReadKey(true);
}

其次,如果您只想在每个字段中排除:

static void Main(string[] args)
{
    int YEARHIGH        = 2050;
    int YEARLOW         = 1700;
    decimal PRICEHIGH   = 900000m;
    decimal PRICELOW    = 5000m;

    var values = File.ReadLines(@"C:\Users\yetih\Desktop\Firm_Inventory.csv")
             .Skip(1)
             .Select(v => DataValues.FromCSV(v))
             .ToList();

    int maxYearBuilt = values.Where(r => r.yearBuilt < YEARHIGH).Max(r => r.yearBuilt);
    int minYearBuilt = values.Where(r => r.yearBuilt > YEARLOW).Min(r => r.yearBuilt);
    int avgYearBuilt = Convert.ToInt32(values.Where(r => r.yearBuilt > YEARLOW && r.yearBuilt < YEARHIGH).Average(r => r.yearBuilt));

    decimal maxPrice = values.Where(r => r.price < PRICEHIGH).Max(r => r.price)
    decimal minPrice = values.Where(r => r.price > PRICELOW).Min(r => r.price);
    decimal avgPrice = values.Where(r => r.price > PRICELOW && r.price < PRICEHIGH).Average(r => r.price);

    Console.WriteLine("Low      - Year Built: {0}", maxYearBuilt);
    Console.WriteLine("High     - Year Built: {0}", minYearBuilt);
    Console.WriteLine("Average  - Year Built: {0}", avgYearBuilt);
    Console.WriteLine("High     - Price: {0,9:C}", maxPrice);
    Console.WriteLine("Low      - Price: {0,9:C}", minPrice);
    Console.WriteLine("Average  - Price: {0,9:C}", avgPrice);

    Console.ReadKey(true);
}