我有二进制文件,包含许多24字节数据包,其中每个数据包的前8个字节代表DateTime
类型的序列化时间戳。数据包全部按时间戳以升序排序。我想开发一个二元搜索算法,选择前8个字节,反序列化时间戳,并将其与所需的时间戳进行比较。
目标是在二进制文件中找到位置,该位置表示与所需时间戳匹配的序列化时间戳的起始位置。
修改
数据是二进制文件而不是数据结构,因此List<T>.BinarySearch()
对我不起作用。但是可以在BinarySearch
Stream
上使用CustomComparer
吗?
该文件包含许多数十亿个这样的数据包,因此通过该文件的简单迭代将是非常低效的。我认为是二元搜索方法。
答案 0 :(得分:3)
尚未对其进行测试,但重点是在文件中间读取8个字节,而不是向右或向左移动中间并重复,具体取决于读取的时间戳。 (不是最干净的代码)。复杂性将是Log(N)
public class BinaryFinder
{
private readonly long _packagesCount;
private readonly FileStream _reader;
public BinaryFinder(FileStream reader, int packageSize)
{
_reader = reader;
_packagesCount = reader.Length / packageSize;
}
public long Find(DateTime dateToSearch)
{
return Find(0, _packagesCount, dateToSearch);
}
private long Find(long minPosition, long maxPosition, DateTime dateToSearch)
{
while (minPosition<=maxPosition) {
var newPosition = (minPosition + maxPosition) / 2;
var readDate = ReadDateAt(newPosition);
if (readDate == dateToSearch) {
return newPosition;
}
if (dateToSearch < readDate){
maxPosition = newPosition-1;
}
else {
minPosition = newPosition+1;
}
}
return -1;
}
private DateTime ReadDateAt(long middlePosition)
{
var buffer = new byte[8];
_reader.Seek(middlePosition, SeekOrigin.Begin);
_reader.Read(buffer, 0, buffer.Length);
var currentDate = ConvertBytesToDate(buffer);
return currentDate;
}
private static DateTime ConvertBytesToDate(byte[] dateBytes)
{
throw new NotImplementedException();
}
}
答案 1 :(得分:1)
好的,这是代码中的疯狂想法,检查它,它将返回您正在寻找的时间戳的结构索引。
只需实例化FileStructList(fileName)
,然后执行list.BinarySearchIndexOf(theTimeStamp);
您甚至可以将自己的比较器传递给它:)
这包括对代码的二进制搜索,但由于它是IList,您可以使用任何可用于集合的搜索方法。
public class FileStructList : IList<long>
{
Stream baseStream;
BinaryReader reader;
int length;
int headerSize;
public FileStructList(string FileName, int HeaderSize)
{
baseStream = File.OpenRead(FileName);
reader = new BinaryReader(baseStream);
length = (int)((baseStream.Length - HeaderSize) / 24);
headerSize = HeaderSize;
}
public long this[int index]
{
get
{
baseStream.Seek(24 * index + headerSize, SeekOrigin.Begin);
return reader.ReadInt64();
}
set
{
throw new NotImplementedException();
}
}
public int Count
{
get
{
return length;
}
}
public bool IsReadOnly
{
get
{
return true;
}
}
public void Add(long item)
{
throw new NotImplementedException();
}
public void Clear()
{
throw new NotImplementedException();
}
public bool Contains(long item)
{
return BinarySearchIndexOf(item) != -1;
}
public void CopyTo(long[] array, int arrayIndex)
{
throw new NotImplementedException();
}
public IEnumerator<long> GetEnumerator()
{
throw new NotImplementedException();
}
public int IndexOf(long item)
{
return BinarySearchIndexOf(item);
}
public void Insert(int index, long item)
{
throw new NotImplementedException();
}
public bool Remove(long item)
{
throw new NotImplementedException();
}
public void RemoveAt(int index)
{
throw new NotImplementedException();
}
IEnumerator IEnumerable.GetEnumerator()
{
throw new NotImplementedException();
}
public Int32 BinarySearchIndexOf(long value, IComparer<long> comparer = null)
{
comparer = comparer ?? Comparer<long>.Default;
Int32 lower = 0;
Int32 upper = length - 1;
while (lower <= upper)
{
Int32 middle = lower + (upper - lower) / 2;
Int32 comparisonResult = comparer.Compare(value, this[middle]);
if (comparisonResult == 0)
return middle;
else if (comparisonResult < 0)
upper = middle - 1;
else
lower = middle + 1;
}
return -1;
}
}