我正在开发一个Visual Studio扩展,它为自定义语言提供了一些功能。我已经完成了简单的语法高亮,我希望继续学习语法错误突出显示,大括号匹配,大纲等等。我现在正在关注的主要问题是这些都需要不同的标签类型,(我可以看到)需要不同的标签。但是,我看不到任何直观的方式来在标记器之间共享信息,因为所有这三件事都可以在内容的一个解析中完成。我的意思是,我可以解析它三次,但这听起来不是一个好的解决方案。
如何从标记器返回多个标记类型(可能使用ITag?)或在多个标记之间共享信息?
我目前的结构如下:
internal class HighlightWordTagger : ITagger<ClassificationTag>
{
ITextBuffer TextBuffer;
IClassificationType Keyword;
IClassificationType Comment;
IClassificationType Literal;
// Probably a giant memory leak
Dictionary<ITextSnapshot, List<TagSpan<ClassificationTag>>> SnapshotResults = new Dictionary<ITextSnapshot, List<TagSpan<ClassificationTag>>>();
public HighlightWordTagger(ITextBuffer sourceBuffer, IClassificationTypeRegistryService typeService)
{
TextBuffer = sourceBuffer;
TextBuffer.Changed += (sender, args) =>
{
LexSnapshot(args.After);
TagsChanged(this, new SnapshotSpanEventArgs(new SnapshotSpan(args.After, new Span(0, args.After.Length))));
};
Keyword = typeService.GetClassificationType("WideKeyword");
Comment = typeService.GetClassificationType("WideComment");
Literal = typeService.GetClassificationType("WideLiteral");
}
public IEnumerable<ITagSpan<ClassificationTag>> GetTags(NormalizedSnapshotSpanCollection spans)
{
LexSnapshot(spans[0].Snapshot);
foreach (var snapshotspan in SnapshotResults[spans[0].Snapshot])
{
foreach (var span in spans)
{
if (snapshotspan.Span.IntersectsWith(span))
{
yield return snapshotspan;
}
}
}
}
Span SpanFromLexer(Lexer.Range range)
{
return new Span((int)range.begin.offset, (int)(range.end.offset - range.begin.offset));
}
void LexSnapshot(ITextSnapshot shot)
{
if (SnapshotResults.ContainsKey(shot))
return;
var lexer = new Lexer();
var list = new List<TagSpan<ClassificationTag>>();
SnapshotResults[shot] = list;
lexer.Read(
shot.GetText(),
(where, what) =>
{
if (what == Lexer.Failure.UnlexableCharacter)
return false;
var loc = new Span(
(int)where.offset,
(int)shot.Length - (int)where.offset
);
if (what == Lexer.Failure.UnterminatedComment)
list.Add(new TagSpan<ClassificationTag>(new SnapshotSpan(shot, loc), new ClassificationTag(Comment)));
if (what == Lexer.Failure.UnterminatedStringLiteral)
list.Add(new TagSpan<ClassificationTag>(new SnapshotSpan(shot, loc), new ClassificationTag(Literal)));
return false;
},
where =>
{
// Clamp this so it doesn't go over the end when we add \n in the lexer.
where.end.offset = where.end.offset > shot.Length ? (uint)(shot.Length) : where.end.offset;
var loc = SpanFromLexer(where);
list.Add(new TagSpan<ClassificationTag>(new SnapshotSpan(shot, loc), new ClassificationTag(Comment)));
},
token => {
var location = SpanFromLexer(token.location);
if (token.type == Lexer.TokenType.String || token.type == Lexer.TokenType.Integer)
{
list.Add(new TagSpan<ClassificationTag>(new SnapshotSpan(shot, location), new ClassificationTag(Literal)));
}
if (lexer.IsKeyword(token.type))
{
list.Add(new TagSpan<ClassificationTag>(new SnapshotSpan(shot, location), new ClassificationTag(Keyword)));
}
return false;
}
);
}
public event EventHandler<SnapshotSpanEventArgs> TagsChanged = delegate { };
}
我可能会做得更好,不要那么多,但这是另一个问题。
答案 0 :(得分:5)
我最终不得不将这些问题分开。您可以使用ITextBuffer.Properties.GetOrCreateSingletonProperty
将您选择的任意对象与文本缓冲区相关联。我最终创建了一个单独的lexer类,将它与文本缓冲区相关联,然后简单地执行除了标记之外的几乎所有逻辑。然后在每个标记器的实现中,我只是在lexer中查询结果,然后标记它们。这允许多个标记符依赖于相同的词法分析器实例。
考虑到大多数词法分析器和解析器会产生多种标记,我很惊讶VS会让你非常糟糕地产生这种结果。
答案 1 :(得分:3)
正如小狗所说,你可以使用多个单独的标记,它们可以通过GetOrCreateSingletonProperty
相互联系,但我认为不能保证标记的创建顺序,所以初始化过程可能是不方便。
与此同时,我成功地将多个标记组合在一个类中,我甚至制作了一个完整的“示例语言”来演示这种技术。这是:
/// <summary>Boilerplate factory class that associates <see cref="SampleLanguageForVS"/>,
/// and file extension .samplelang, with content type "Sample Language".</summary>
[Export(typeof(IClassifierProvider))]
[Export(typeof(ITaggerProvider))]
[TagType(typeof(ClassificationTag))]
[TagType(typeof(ErrorTag))]
[ContentType("Sample Language")]
internal class SampleLanguageForVSProvider : IClassifierProvider, ITaggerProvider
{
[Export]
[Name("Sample Language")] // Must match the [ContentType] attributes
[BaseDefinition("code")]
internal static ContentTypeDefinition _ = null;
[Export]
[FileExtension(".samplelang")]
[ContentType("Sample Language")]
internal static FileExtensionToContentTypeDefinition _1 = null;
[Import] IClassificationTypeRegistryService _registry = null; // Set via MEF
public static SampleLanguageForVS Get(IClassificationTypeRegistryService registry, ITextBuffer buffer)
{
return buffer.Properties.GetOrCreateSingletonProperty<SampleLanguageForVS>(
delegate { return new SampleLanguageForVS(registry, buffer); });
}
public IClassifier GetClassifier(ITextBuffer buffer)
{
return Get(_registry, buffer);
}
public ITagger<T> CreateTagger<T>(ITextBuffer buffer) where T : ITag
{
return Get(_registry, buffer) as ITagger<T>;
}
}
internal class SampleLanguageForVS : IClassifier,
ITagger<ClassificationTag>,
ITagger<ErrorTag>,
IBackgroundAnalyzerImpl<object, IList<ITagSpan<ITag>>>
{
protected IClassificationTypeRegistryService _registry;
protected ITextBuffer _buffer;
protected IClassificationType _commentType;
protected ClassificationTag _outerParenTag;
protected IList<ITagSpan<ITag>> _resultTags;
protected BackgroundAnalyzerForVS<object, IList<ITagSpan<ITag>>> _parseHelper;
public SampleLanguageForVS(IClassificationTypeRegistryService registry,ITextBuffer buffer)
{
_registry = registry;
_buffer = buffer;
_commentType = registry.GetClassificationType(PredefinedClassificationTypeNames.Comment);
_outerParenTag = MakeTag(PredefinedClassificationTypeNames.Keyword);
_parseHelper = new BackgroundAnalyzerForVS<object, IList<ITagSpan<ITag>>>(buffer, this, true);
}
ClassificationTag MakeTag(string name)
{
return new ClassificationTag(_registry.GetClassificationType(name));
}
#region Classifier (lexical analysis)
public event EventHandler<ClassificationChangedEventArgs> ClassificationChanged;
public IList<ClassificationSpan> GetClassificationSpans(SnapshotSpan span)
{
List<ClassificationSpan> spans = new List<ClassificationSpan>();
var line = span.Snapshot.GetLineFromPosition(span.Start);
do {
var cspan = GetLineClassification(line);
if (cspan != null)
spans.Add(cspan);
if (line.EndIncludingLineBreak.Position >= span.Snapshot.Length) break;
line = span.Snapshot.GetLineFromPosition(line.EndIncludingLineBreak.Position);
} while (line.EndIncludingLineBreak < span.End.Position);
return spans;
}
public ClassificationSpan GetLineClassification(ITextSnapshotLine line)
{
var span = new Span(line.Start.Position, line.Length);
var sspan = new SnapshotSpan(line.Snapshot, span);
int i;
for (i = span.Start; i < line.Snapshot.Length && char.IsWhiteSpace(line.Snapshot[i]); i++) { }
if (i < line.Snapshot.Length &&
(line.Snapshot[i] == '#' ||
line.Snapshot[i] == '/' && i + 1 < line.Snapshot.Length && line.Snapshot[i+1] == '/'))
return new ClassificationSpan(sspan, _commentType);
return null;
}
#endregion
#region Background analysis (the two taggers)
public object GetInputSnapshot()
{
return null; // this example has no state to pass to the analysis thread.
}
public IList<ITagSpan<ITag>> RunAnalysis(ITextSnapshot snapshot, object input, System.Threading.CancellationToken cancelToken)
{
List<ITagSpan<ITag>> results = new List<ITagSpan<ITag>>();
// On analysis thread: produce classification tags for nested [(parens)]
// and warning tags for backslashes.
int parenLevel = 0;
for (int i = 0; i < snapshot.Length; i++)
{
char c = snapshot[i];
if (c == '\\')
results.Add(new TagSpan<ErrorTag>(
new SnapshotSpan(snapshot, new Span(i, 1)),
new ErrorTag("compiler warning", "Caution: that's not really a slash, it's a backslash!!")));
bool open = (c == '[' || c == '(');
bool close = (c == ']' || c == ')');
if (close) {
if (parenLevel > 0)
parenLevel--;
else {
results.Add(new TagSpan<ErrorTag>(
new SnapshotSpan(snapshot, new Span(i, Math.Min(2, snapshot.Length-i))),
new ErrorTag("syntax error", "Caution: closing parenthesis without matching opener")));
}
}
if ((open || close) && parenLevel == 0)
results.Add(new TagSpan<ClassificationTag>(
new SnapshotSpan(snapshot, new Span(i, 1)),
_outerParenTag));
if (open)
parenLevel++;
}
return results;
}
public void OnRunSucceeded(IList<ITagSpan<ITag>> results)
{
_resultTags = results;
// We don't know which tags changed unless we do some fancy diff, so
// act as if everything changed.
if (TagsChanged != null) // should always be true
TagsChanged(this, new SnapshotSpanEventArgs(new SnapshotSpan(_buffer.CurrentSnapshot, new Span(0, _buffer.CurrentSnapshot.Length))));
}
#endregion
#region ITagger<ClassificationTag> and ITagger<ErrorTag> Members
IEnumerable<ITagSpan<ErrorTag>> ITagger<ErrorTag>.GetTags(NormalizedSnapshotSpanCollection spans)
{
return GetTags<ErrorTag>(spans);
}
IEnumerable<ITagSpan<ClassificationTag>> ITagger<ClassificationTag>.GetTags(NormalizedSnapshotSpanCollection spans)
{
return GetTags<ClassificationTag>(spans);
}
public IEnumerable<ITagSpan<TTag>> GetTags<TTag>(NormalizedSnapshotSpanCollection spans) where TTag : ITag
{
if (_resultTags == null)
return null;
// TODO: make more efficient for large files with e.g. binary search
int start = spans[0].Start.Position, end = spans[spans.Count-1].End.Position;
return _resultTags.Where(ts => ts.Span.End >= start && ts.Span.Start <= end).OfType<ITagSpan<TTag>>();
}
public event EventHandler<SnapshotSpanEventArgs> TagsChanged;
#endregion
}
上面缺少的是using
语句(请参阅complete source file)和BackgroundAnalyzerForVS
类。如果将此代码插入到vsix项目中,您将获得“lexing”,延迟“解析”,警告和错误标记。演示文件:
Open this in Visual Studio to see "sample" syntax highlighting.
// Backslashes are underlined.
\\ <-- Such as those ones.
When you start a parenthetical (like this) the parens are highlighted,
but ([nested parens (like this)]) are not highlighted.
# Do not write a closing ")" without an opening "(".