我正在尝试对包含密码的大约一千万行的文本文件执行一些分析。我这样做是通过读取文件的每一行,创建一个以值作为参数的类,然后将该类添加到列表中。在4,000,000行后,我得到一个内存不足异常。如果没有将所有内容存储在SQL数据库中,还有什么可以做的吗?
编辑:我要做的是获取密码,将其添加到Credential对象,然后将其添加到列表中。
public class Credential
{
public string Password { get; set; }
public static readonly List<string> specialCharacters = new List<string> { "@", "!", "~", "*", "^", "&", "\\", "/", "#", "$", "%", "<", ">", ".", ",", "?", ")", "(", "'", "\"", "+", "=", "_", "-", ";", ":", "{", "}", "]", "[", };
public Credential(string password)
{
this.Password = password;
this.Mapping = new Dictionary<int, CredentialValueType>();
for (var i = 0; i < this.Length; i++)
{
this.Mapping.Add(i, new CredentialValueType(this.Password[i]));
}
}
public Dictionary<int, CredentialValueType> Mapping { get; private set; }
public int Length
{
get
{
return this.Password.Length;
}
}
public bool HasUppercase
{
get
{
return this.Password.Any(c => char.IsUpper(c));
}
}
public bool HasLowercase
{
get
{
return this.Password.Any(c => char.IsLower(c));
}
}
public bool HasNumber
{
get
{
return this.Password.Any(c => char.IsNumber(c));
}
}
public bool HasSpecialCharacter
{
get //Verify that this works right...
{
return this.Password.Where(a => specialCharacters.Contains(a.ToString())).Count() > 0;
}
}
}
public struct CredentialValueType
{
public char Value { get; set; }
public ValueType ValueType { get; set; }
public CredentialValueType(char val)
{
this = new CredentialValueType();
this.Value = val;
if (char.IsUpper(val)) this.ValueType = ValueType.UpperCase;
else if (char.IsLower(val)) this.ValueType = PasswordStats.ValueType.LowerCase;
else if (char.IsNumber(val)) this.ValueType = PasswordStats.ValueType.Number;
else this.ValueType = PasswordStats.ValueType.SpecialCharacter;
}
}
我的功能如下:
public class PasswordAnalyzer
{
public IList<Credential> Credentials { get; private set; }
public PasswordAnalyzer(string file, int passwordField = 0, Delimiter delim = Delimiter.Comma)
{
this.Credentials = new List<Credential>();
using (var fileReader = File.OpenText(file)) //Verify UTF-8
{
using (var csvReader = new CsvHelper.CsvReader(fileReader))
{
csvReader.Configuration.Delimiter = "\t";
while (csvReader.Read())
{
var record = csvReader.GetField<string>(passwordField);
this.Credentials.Add(new Credential(record));
System.Diagnostics.Debug.WriteLine(this.Credentials.Count);
}
}
}
}
}
答案 0 :(得分:2)
您可以将Mapping存储在数组中,而不是创建400万个字典。我确信这会节省很多空间,但如果没有关于消耗多少内存的更多信息等等,很难判断这是否能解决您的问题。
我假设您显示的代码不是您的实际代码,但如果您只是需要遍历这些行,请使用IEnumerable,并生成每个结果。你对内存的要求会更好,因为你只有一个&#34; line&#34;在记忆中一次。