如何将C#代码转换为python?

时间:2018-01-28 04:28:51

标签: c# python dataframe

我正在尝试读取大型数据文件。我在C#中有以下代码,它可以获取满足要求的行并将它们保存到单独的文件中。整个代码大约需要20分钟才能运行。

*miniArray

我在python中写了一些东西,可以使用chunksize实现相同的功能,但运行需要25个小时。我想知道是否有一种方法可以根据上面的C#代码在python中编写类似的代码。

我的python代码如下。将行转换为DataFrame的命令占用的时间最多。

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;

namespace ConsoleApplication1 {
    public class CSVFileConverter {
        public string FilePath { get; set; }
        public string OutputDirectory { get; set; }
        public int numStartLine { get; set; }
        public string strFeature { get; set; }

        public  string[] colNames {get;set;}
        private string strcolNames;
        private int maxNumOfFiles;
        private int m_numofLines = -1;
        public int numofLines {
            get {
                return this.m_numofLines;
            }
        }
        private int colIndex;

        private Dictionary<int, StreamWriter> dictSW = new Dictionary<int, StreamWriter>();

        public CSVFileConverter(string filepath, string output, string feature, int numstartline) {
            this.FilePath = filepath;
            this.OutputDirectory = output;
            this.strFeature = feature;
            this.numStartLine = numstartline;
            this.GetColNames();
            this.Converter();
            this.CloseStreams();

        }

        private void GetColNames() {
            try {
                var strline = ReadCSVLines(this.FilePath).First();
                strcolNames = strline;
                colNames = strline.Split(',');
                if (!colNames.Contains(this.strFeature)) {
                    throw new Exception(string.Format("The File doesn't have this specified feature: {0}", this.strFeature));
                } else {
                    this.colIndex = Array.IndexOf(colNames, this.strFeature);
                }
             }catch (Exception ex){
                 Console.WriteLine(ex);
                 Environment.Exit(0);

            }

        }

        private void OpenFiles() {
            for (int i = 0; i <= this.maxNumOfFiles; i++) {

            }
        }

        private void Converter() {
            foreach (var strline in ReadCSVLines(this.FilePath)) {
                m_numofLines++;
                if (m_numofLines < this.numStartLine) continue; //skip lines;
                string[] strsplit = strline.Split(',');
                int id = 0;
                try {
                    id = Convert.ToInt32(strsplit[this.colIndex]);

                } catch {
                    Console.WriteLine("Line {0} is invalid input: {1} = {2}.",m_numofLines, this.strFeature, strsplit[this.colIndex]);
                    continue;
                }

                if (dictSW.ContainsKey(id)) {
                    var sw = dictSW[id];
                    sw.WriteLine(strline);
                    sw.Flush();
                } else {
                    string filename = OutputDirectory + "file"+ id.ToString() + ".csv";
                    StreamWriter sw = new StreamWriter(filename);
                    dictSW.Add(id, sw);
                    sw.WriteLine(this.strcolNames);
                    sw.WriteLine(strline);
                    sw.Flush();
                }

                if (id < 0 || id > 100) {
                    Console.WriteLine("Line {0} is invalid input: {1} = {2}.", m_numofLines, this.strFeature, id);
                    continue;
                }

                if ((m_numofLines % 10000) == 0) {
                    Console.WriteLine("numLines = {0}", m_numofLines);
                }


            }


        }

        private void CloseStreams() {
            foreach (var sw in dictSW.Values) {
                sw.Dispose();
            }
        }

        private static IEnumerable<string> ReadCSVLines(string filepath) {
            using (StreamReader sr = new StreamReader(filepath)) {
                while (!sr.EndOfStream) {
                    string strline = sr.ReadLine();
                    yield return strline;
                }
            }
        }

    }
}
namespace ConsoleApplication1 {
    class Program {
        static void Main(string[] args) {
            //Dictionary<int, List<double>> dict = new Dictionary<int, List<double>>  ();
            string csvpath = @"D:\data\train.csv";
            string folderpath = @"D:\data\processedfiles\";
            CSVFileConverter csv_conv = new CSVFileConverter(csvpath, folderpath, "a", 1);
            Console.ReadKey();

        }

    }

}

谢谢。

0 个答案:

没有答案