正如这些问题所指出的,由于U-SQL具有强制性的确定性,因此Guid.NewGuid将为所有行返回相同的值,即,如果扩展(如果元素(顶点)需要重试),则它应返回相同的值....
Guid.NewGuid() always return same Guid for all rows
但是。...用户定义的提取器的官方文档中的代码示例有目的地使用Guid.NewGuid()。
我不是在询问上述问题的答案的有效性,因为它们来自权威来源(u-sql的程序管理器,非常权威!)。但是,我想知道使用提取器的操作是否意味着可以正常使用NewGuid?是否只是在u-sql的c#表达式和用户定义的函数中,其中NewGuid不安全?
[SqlUserDefinedExtractor(AtomicFileProcessing = true)]
public class FullDescriptionExtractor : IExtractor
{
private Encoding _encoding;
private byte[] _row_delim;
private char _col_delim;
public FullDescriptionExtractor(Encoding encoding, string row_delim = "\r\n", char col_delim = '\t')
{
this._encoding = ((encoding == null) ? Encoding.UTF8 : encoding);
this._row_delim = this._encoding.GetBytes(row_delim);
this._col_delim = col_delim;
}
public override IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
{
string line;
//Read the input line by line
foreach (Stream current in input.Split(_encoding.GetBytes("\r\n")))
{
using (System.IO.StreamReader streamReader = new StreamReader(current, this._encoding))
{
line = streamReader.ReadToEnd().Trim();
//Split the input by the column delimiter
string[] parts = line.Split(this._col_delim);
int count = 0; // start with first column
foreach (string part in parts)
{
if (count == 0)
{ // for column “guid”, re-generated guid
Guid new_guid = Guid.NewGuid();
output.Set<Guid>(count, new_guid);
}
else if (count == 2)
{
// for column “user”, convert to UPPER case
output.Set<string>(count, part.ToUpper());
}
else
{
// keep the rest of the columns as-is
output.Set<string>(count, part);
}
count += 1;
}
}
yield return output.AsReadOnly();
}
yield break;
}
}