我正在尝试生成6个字符或更少的随机base32数字。这应该提供大约10亿种不同的组合。
我创建了一个程序来生成这些“随机”数字。但是,它似乎每40,000代平均产生一次重复。
当存在超过十亿种不同的组合时,为什么这些“随机”数字经常重复?
这是我的代码:
static void Main(string[] args)
{
int seed = Environment.TickCount;
Random r = new Random(seed);
Dictionary<int, int> resultDictionary = new Dictionary<int, int>();
for (int x = 1; x <= 1000; x++)
{
Dictionary<string, int> dictionary = new Dictionary<string, int>();
try
{
while (true)
{
int rand = r.Next(0, 1073741823);
CrockfordBase32Encoding encoding = new CrockfordBase32Encoding();
string encodedRand = encoding.Encode((ulong)rand, false);
dictionary.Add(encodedRand, rand);
}
}
catch (Exception)
{
}
Console.WriteLine(string.Format("{0} - {1}", x, dictionary.Count));
resultDictionary.Add(x, dictionary.Count);
x++;
}
Console.WriteLine();
Console.WriteLine("Average Number Before Duplicate: " + resultDictionary.Average(x => x.Value));
Console.ReadLine();
}
答案 0 :(得分:117)
这类似于Birthday Problem。鉴于一群n
人,两人共享同一个生日 1 的概率是多少?它比你想象的要高。
在你的情况下,随机选择一个介于0和1,073,741,823之间的数字的几率是多少?
上述链接的一个近似值为1-exp(-(n*n)/(2*d))
。如果n=40,000
相当于选择副本的概率大约为52.5%,那么在平均40,000个选择之后看到重复似乎是合理的。
1 假设生日普遍均匀分布,实际情况并非如此,但“足够接近”并使数学更容易
答案 1 :(得分:40)
这被称为Birthday Problem,只是基本的概率论。
1到K 范围内的N个随机数不会给出重复的概率是:
计算获得至少一个副本的几率从1中减去该值。
在你的情况下,它评估为
P(40000, 1073741823) = 1 - p(40000, 1073741823)
使用Wolfram Alpha进行计算,结果为
0.5252888122305790
这意味着您获得重复的可能性略高于50%。随着您产生更多数字,您将越来越多地获得重复数据。
以下是对N的更多评价:
N Result
40000 0.5253
100000 0.9905
200000 0.9999
答案 2 :(得分:5)
框架中包含的随机数生成器是伪随机的,不保证随机数分布。如果您担心分发模式,请考虑以下文章:http://www.codeproject.com/Articles/15102/NET-random-number-generators-and-distributions
尽管如此,我的统计学教授(不是一位)曾经说过,&#34;有一个小谎言,一个大谎言,还有统计数据&#34;。
首先是完整的代码,所以人们不必在互联网上搜索要测试的类实现:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
namespace ConsoleApplication2
{
class Program
{
static void Main(string[] args)
{
var r = RandomProvider.GetThreadRandom();
Dictionary<int, int> resultDictionary = new Dictionary<int, int>();
for (int x = 1; x <= 1000; x++)
{
Dictionary<string, int> dictionary = new Dictionary<string, int>();
try
{
while (true)
{
int rand = r.Next(0, 1073741823);
CrockfordBase32Encoding encoding = new CrockfordBase32Encoding();
string encodedRand = encoding.Encode((ulong)rand, false);
dictionary.Add(encodedRand, rand);
}
}
catch (Exception)
{
}
Console.WriteLine("{0} - {1}", x, dictionary.Count);
resultDictionary.Add(x, dictionary.Count);
x++;
}
Console.WriteLine();
Console.WriteLine("Average Number Before Duplicate: " + resultDictionary.Average(x => x.Value));
Console.WriteLine("Minimum Number Before Duplicate: " + resultDictionary.Min(x => x.Value));
Console.WriteLine("Maximum Number Before Duplicate: " + resultDictionary.Max(x => x.Value));
Console.WriteLine(" Median Number Before Duplicate: " + resultDictionary.Select(x=>x.Value).Median());
Console.ReadLine();
}
}
public static class Extensions
{
public static double Median<T>(this IEnumerable<T> list)
{
List<double> orderedList = list.Select(s=>Convert.ToDouble(s))
.OrderBy(numbers => numbers)
.ToList();
int listSize = orderedList.Count;
double result;
if (listSize % 2 == 0) // even
{
int midIndex = listSize / 2;
result = ((orderedList.ElementAt(midIndex - 1) +
orderedList.ElementAt(midIndex)) / 2);
}
else // odd
{
double element = (double)listSize / 2;
element = Math.Round(element, MidpointRounding.AwayFromZero);
result = orderedList.ElementAt((int)(element - 1));
}
return result;
}
}
public static class RandomProvider
{
private static int seed = Environment.TickCount;
private static ThreadLocal<Random> randomWrapper = new ThreadLocal<Random>(() =>
new Random(Interlocked.Increment(ref seed))
);
public static Random GetThreadRandom()
{
return randomWrapper.Value;
}
}
public class CrockfordBase32Encoding
{
const int Base = 32;
const int CheckDigitBase = 37;
static readonly IDictionary<int, char> valueEncodings;
static readonly IDictionary<int, char> checkDigitEncodings;
static readonly IDictionary<char, int> valueDecodings;
static readonly IDictionary<char, int> checkDigitDecodings;
static CrockfordBase32Encoding()
{
var symbols = new SymbolDefinitions();
valueEncodings = symbols.ValueEncodings;
checkDigitEncodings = symbols.CheckDigitEncodings;
valueDecodings = symbols.ValueDecodings;
checkDigitDecodings = symbols.CheckDigitDecodings;
}
public string Encode(ulong input, bool includeCheckDigit)
{
var chunks = SplitInto5BitChunks(input);
var characters = chunks.Select(chunk => valueEncodings[chunk]);
if (includeCheckDigit)
{
var checkValue = (int)(input % CheckDigitBase);
characters = characters.Concat(new[] { checkDigitEncodings[checkValue] });
}
return new string(characters.ToArray());
}
internal static IEnumerable<byte> SplitInto5BitChunks(ulong input)
{
const int bitsPerChunk = 5;
const int shift = (sizeof(ulong) * 8) - bitsPerChunk;
var chunks = new List<byte>();
do
{
var lastChunk = input << shift >> shift;
chunks.Insert(0, (byte)lastChunk);
input = input >> bitsPerChunk;
} while (input > 0);
return chunks;
}
public ulong? Decode(string encodedString, bool treatLastCharacterAsCheckDigit)
{
if (encodedString == null)
throw new ArgumentNullException("encodedString");
if (encodedString.Length == 0)
return null;
IEnumerable<char> charactersInReverse = encodedString.Reverse().ToArray();
int? expectedCheckValue = null;
if (treatLastCharacterAsCheckDigit)
{
var checkDigit = charactersInReverse.First();
if (!checkDigitDecodings.ContainsKey(checkDigit)) return null;
expectedCheckValue = checkDigitDecodings[checkDigit];
charactersInReverse = charactersInReverse.Skip(1);
}
ulong number = 0;
ulong currentBase = 1;
foreach (var character in charactersInReverse)
{
if (!valueDecodings.ContainsKey(character)) return null;
var value = valueDecodings[character];
number += (ulong)value * currentBase;
currentBase *= Base;
}
if (expectedCheckValue.HasValue &&
(int)(number % CheckDigitBase) != expectedCheckValue)
return null;
return number;
}
}
internal class SymbolDefinitions : List<SymbolDefinition>
{
readonly List<SymbolDefinition> extraCheckDigits = new List<SymbolDefinition>();
public SymbolDefinitions()
{
AddRange(new[]
{
new SymbolDefinition { Value = 0, EncodeSymbol = '0', DecodeSymbols = new[] { '0', 'O', 'o' } },
new SymbolDefinition { Value = 1, EncodeSymbol = '1', DecodeSymbols = new[] { '1', 'I', 'i', 'L', 'l' } },
new SymbolDefinition { Value = 2, EncodeSymbol = '2', DecodeSymbols = new[] { '2' } },
new SymbolDefinition { Value = 3, EncodeSymbol = '3', DecodeSymbols = new[] { '3' } },
new SymbolDefinition { Value = 4, EncodeSymbol = '4', DecodeSymbols = new[] { '4' } },
new SymbolDefinition { Value = 5, EncodeSymbol = '5', DecodeSymbols = new[] { '5' } },
new SymbolDefinition { Value = 6, EncodeSymbol = '6', DecodeSymbols = new[] { '6' } },
new SymbolDefinition { Value = 7, EncodeSymbol = '7', DecodeSymbols = new[] { '7' } },
new SymbolDefinition { Value = 8, EncodeSymbol = '8', DecodeSymbols = new[] { '8' } },
new SymbolDefinition { Value = 9, EncodeSymbol = '9', DecodeSymbols = new[] { '9' } },
new SymbolDefinition { Value = 10, EncodeSymbol = 'A', DecodeSymbols = new[] { 'A', 'a' } },
new SymbolDefinition { Value = 11, EncodeSymbol = 'B', DecodeSymbols = new[] { 'B', 'b' } },
new SymbolDefinition { Value = 12, EncodeSymbol = 'C', DecodeSymbols = new[] { 'C', 'c' } },
new SymbolDefinition { Value = 13, EncodeSymbol = 'D', DecodeSymbols = new[] { 'D', 'd' } },
new SymbolDefinition { Value = 14, EncodeSymbol = 'E', DecodeSymbols = new[] { 'E', 'e' } },
new SymbolDefinition { Value = 15, EncodeSymbol = 'F', DecodeSymbols = new[] { 'F', 'f' } },
new SymbolDefinition { Value = 16, EncodeSymbol = 'G', DecodeSymbols = new[] { 'G', 'g' } },
new SymbolDefinition { Value = 17, EncodeSymbol = 'H', DecodeSymbols = new[] { 'H', 'h' } },
new SymbolDefinition { Value = 18, EncodeSymbol = 'J', DecodeSymbols = new[] { 'J', 'j' } },
new SymbolDefinition { Value = 19, EncodeSymbol = 'K', DecodeSymbols = new[] { 'K', 'k' } },
new SymbolDefinition { Value = 20, EncodeSymbol = 'M', DecodeSymbols = new[] { 'M', 'm' } },
new SymbolDefinition { Value = 21, EncodeSymbol = 'N', DecodeSymbols = new[] { 'N', 'n' } },
new SymbolDefinition { Value = 22, EncodeSymbol = 'P', DecodeSymbols = new[] { 'P', 'p' } },
new SymbolDefinition { Value = 23, EncodeSymbol = 'Q', DecodeSymbols = new[] { 'Q', 'q' } },
new SymbolDefinition { Value = 24, EncodeSymbol = 'R', DecodeSymbols = new[] { 'R', 'r' } },
new SymbolDefinition { Value = 25, EncodeSymbol = 'S', DecodeSymbols = new[] { 'S', 's' } },
new SymbolDefinition { Value = 26, EncodeSymbol = 'T', DecodeSymbols = new[] { 'T', 't' } },
new SymbolDefinition { Value = 27, EncodeSymbol = 'V', DecodeSymbols = new[] { 'V', 'v' } },
new SymbolDefinition { Value = 28, EncodeSymbol = 'W', DecodeSymbols = new[] { 'W', 'w' } },
new SymbolDefinition { Value = 29, EncodeSymbol = 'X', DecodeSymbols = new[] { 'X', 'x' } },
new SymbolDefinition { Value = 30, EncodeSymbol = 'Y', DecodeSymbols = new[] { 'Y', 'y' } },
new SymbolDefinition { Value = 31, EncodeSymbol = 'Z', DecodeSymbols = new[] { 'Z', 'z' } },
});
extraCheckDigits.AddRange(new[]
{
new SymbolDefinition { Value = 32, EncodeSymbol = '*', DecodeSymbols = new[] { '*' } },
new SymbolDefinition { Value = 33, EncodeSymbol = '~', DecodeSymbols = new[] { '~' } },
new SymbolDefinition { Value = 34, EncodeSymbol = '$', DecodeSymbols = new[] { '$' } },
new SymbolDefinition { Value = 35, EncodeSymbol = '=', DecodeSymbols = new[] { '=' } },
new SymbolDefinition { Value = 36, EncodeSymbol = 'U', DecodeSymbols = new[] { 'U', 'u' } },
});
}
public IDictionary<int, char> ValueEncodings
{
get
{
return this.ToDictionary(s => s.Value, s => s.EncodeSymbol);
}
}
public IDictionary<int, char> CheckDigitEncodings
{
get
{
return this
.Union(extraCheckDigits)
.ToDictionary(s => s.Value, s => s.EncodeSymbol);
}
}
public IDictionary<char, int> ValueDecodings
{
get
{
return this
.SelectMany(s => s.DecodeSymbols.Select(d => new { s.Value, DecodeSymbol = d }))
.ToDictionary(s => s.DecodeSymbol, s => s.Value);
}
}
public IDictionary<char, int> CheckDigitDecodings
{
get
{
return this
.Union(extraCheckDigits)
.SelectMany(s => s.DecodeSymbols.Select(d => new { s.Value, DecodeSymbol = d }))
.ToDictionary(s => s.DecodeSymbol, s => s.Value);
}
}
}
internal class SymbolDefinition
{
public int Value { get; set; }
public IEnumerable<char> DecodeSymbols { get; set; }
public char EncodeSymbol { get; set; }
}
}
我添加了几个额外的输出行:
Average Number Before Duplicate: 41043.954
Minimum Number Before Duplicate: 2498
Maximum Number Before Duplicate: 127683
Median Number Before Duplicate: 37860
不是那么有趣,而平均值大约是40k,看看最小值和最大值,相差两个数量级。
随机性不保证均匀分布。在连续两次骰子投掷中,两次投掷中的数字4仍然是随机的。之前已经完成了在一生中两次或更多次获得彩票大奖。
如果你需要为每个帖子提供更独特的发行版,我已经在Jon Skeet的most excellent book中包含了RandomProvider的样本(是的,我是一个粉丝)。
<强>更新强>
用于并行执行的小重写,因为折磨基于硅的生命形式很有趣:
static void Main(string[] args)
{
ConcurrentDictionary<int, int> resultDictionary = new ConcurrentDictionary<int, int>();
Parallel.For(0, 1000, x =>
{
var r = RandomProvider.GetThreadRandom();
ConcurrentDictionary<string, int> dictionary = new ConcurrentDictionary<string, int>();
while (true)
{
int rand = r.Next(0, 1073741823);
CrockfordBase32Encoding encoding = new CrockfordBase32Encoding();
string encodedRand = encoding.Encode((ulong) rand, false);
if (!dictionary.TryAdd(encodedRand, rand)) break;
}
Console.WriteLine("{0} - {1}", x, dictionary.Count);
resultDictionary.TryAdd(x, dictionary.Count);
});
Console.WriteLine();
Console.WriteLine("Average Number Before Duplicate: " + resultDictionary.Average(x => x.Value));
Console.WriteLine("Minimum Number Before Duplicate: " + resultDictionary.Min(x => x.Value));
Console.WriteLine("Maximum Number Before Duplicate: " + resultDictionary.Max(x => x.Value));
Console.WriteLine(" Median Number Before Duplicate: " + resultDictionary.Select(x=>x.Value).Median());
Console.ReadLine();
}
和结果:
Average Number Before Duplicate: 41826.375
Minimum Number Before Duplicate: 1655
Maximum Number Before Duplicate: 134671
Median Number Before Duplicate: 39119
更新2
所以CodeProject文章的作者将他的作品发布为NuGet包:
Install-Package Troschuetz.Random
我使用相同的示例代码来测试不同的生成器:
StandardGenerator
Average Number Before Duplicate: 40434.148
Minimum Number Before Duplicate: 978
Maximum Number Before Duplicate: 136248
Median Number Before Duplicate: 38845
ALFGenerator
Average Number Before Duplicate: 40395.845
Minimum Number Before Duplicate: 828
Maximum Number Before Duplicate: 125705
Median Number Before Duplicate: 38042
MT19937Generator
Average Number Before Duplicate: 40478.174
Minimum Number Before Duplicate: 2723
Maximum Number Before Duplicate: 121367
Median Number Before Duplicate: 38279
XorShift128Generator
Average Number Before Duplicate: 41463.732
Minimum Number Before Duplicate: 878
Maximum Number Before Duplicate: 111206
Median Number Before Duplicate: 39013.5
所以,你有它。享受它值得的东西..