让我们有一系列数字。说{1,2,3,4,5}。和一组带有权重的子序列替换。例如:
{1} - > {3}:0.2 - 巫婆意味着1可以被3代替,重量为0.1
{2,3,4} - > {4,3}:0.3
{5} - > {2}:0.4
我需要找到我们可以使用具有限制权重的替换获得的所有序列。
限制的作用是这样的:任何n(3)项(连续)的替换权重之和应该小于给定数e <= 0.5。
对于我们的例子,结果是:
{1,2,3,4,5}:替换权重:{0,0,0,0,0}。连续3个项目的总和小于0.5
{3,2,3,4,5}:替换权重:{0.2,0,0,0,0}
{1,4,3,5}:替换权重:{0,0.3 / 3,0.3 / 3,0.3 / 3,0} 3个符号的序列所以/ 3
{3,4,3,5}替代权重:{0.2,0.3 / 3,0.3 / 3,0.3 / 3,0}
{1,2,3,4,2}替换权重:{0,0,0,0,0.4}
{3,2,3,4,2}替换权重:{0.2,0,0,0,0.4}
我们不允许{1,4,3,2},因为替换权重{0,0.3 / 3,0.3 / 3,0.3 / 3,0.4}有3个最后项目,总和权重= 0.6。
在实例中,子序列替换集很大。几乎任何短的子序列都有替代品。 很明显,任务可以使用蛮力来完成。但我正在寻找一种快速做到这一点的方法。任何帮助,将不胜感激。
已更新
实际上,我处理字符串而不是数字序列。到目前为止,我想出了以下算法(用C#实现):
public class Substitution
{
public string SubstituteWhat { get; private set; }
public string SubstituteTo { get; private set; }
public double TotalWeight { get; private set; }
public double SymbolWeight { get; private set; }
public Substitution(string substituteWhat, string substituteTo, double totalWeight)
{
SubstituteWhat = substituteWhat;
SubstituteTo = substituteTo;
TotalWeight = totalWeight;
SymbolWeight = TotalWeight/SubstituteWhat.Length;
}
public override string ToString()
{
return string.Format("SubstituteWhat: {0}, SubstituteTo: {1}, TotalWeight: {2}", SubstituteWhat, SubstituteTo, TotalWeight);
}
}
class SubstitutedStringWindowImpl
{
public string OriginalPhrase { get; set; }
public string SubstitutedPhrase { get; set; }
private double[] weightVector;
private double windowSum;
private int addItemPointer;
private int substructItemPoiner;
public SubstitutedStringWindowImpl(string phrase, int windowLength)
{
this.OriginalPhrase = phrase;
SubstitutedPhrase = String.Empty;
weightVector = new double[phrase.Length + windowLength ];
windowSum = 0;
substructItemPoiner = 0;
addItemPointer = windowLength;
}
public bool PerformSubstitution(
Substitution substitution,
double maxWeight,
out SubstitutedStringWindowImpl substitutedString)
{
substitutedString = MemberwiseClone() as SubstitutedStringWindowImpl;
substitutedString.weightVector = weightVector.ToArray();
for (int i = 0; i < substitution.SubstituteWhat.Length; i++)
{
substitutedString.weightVector[substitutedString.addItemPointer] = substitution.SymbolWeight;
substitutedString.windowSum = substitutedString.windowSum -
substitutedString.weightVector[substitutedString.substructItemPoiner] +
substitutedString.weightVector[substitutedString.addItemPointer];
substitutedString.substructItemPoiner++;
substitutedString.addItemPointer++;
if (substitutedString.windowSum > maxWeight)
return false;
if (substitutedString.addItemPointer == substitutedString.weightVector.Length)
break;
}
substitutedString.SubstitutedPhrase = SubstitutedPhrase + substitution.SubstituteTo;
return true;
}
}
internal class SubstitutionManagerWindowImpl
{
private readonly Dictionary<string, List<Substitution>> substitutionsDict;
private readonly double maxWeight;
private readonly int maxSubstitutionLength;
private readonly int windowLength;
public SubstitutionManagerWindowImpl(
List<Substitution> substitutions,
double maxWeight,
int windowLength)
{
this.substitutionsDict = substitutions.GroupBy(x => x.SubstituteWhat)
.ToDictionary(x => x.Key, x => x.ToList());
this.maxWeight = maxWeight;
this.windowLength = windowLength;
maxSubstitutionLength = substitutions.Max(x => x.SubstituteWhat.Length);
}
private List<SubstitutedStringWindowImpl> GetAllSubstitutionsPrivate(
SubstitutedStringWindowImpl stringToHandle, int symbolCount)
{
if (stringToHandle.OriginalPhrase.Length == symbolCount)
return new List<SubstitutedStringWindowImpl> {stringToHandle};
var result = new List<SubstitutedStringWindowImpl>();
for (int i = 1;
i <= Math.Min(maxSubstitutionLength, stringToHandle.OriginalPhrase.Length - symbolCount);
i++)
{
var subphraseToSubstitute = stringToHandle.OriginalPhrase.Substring(symbolCount, i);
List<Substitution> appropriateSubstitutions;
if (!substitutionsDict.TryGetValue(subphraseToSubstitute, out appropriateSubstitutions))
continue;
foreach (var substitution in appropriateSubstitutions)
{
SubstitutedStringWindowImpl handledString;
if (!stringToHandle.PerformSubstitution(substitution, maxWeight, out handledString))
continue;
result.AddRange(GetAllSubstitutionsPrivate(handledString,
symbolCount + substitution.SubstituteWhat.Length));
}
}
return result;
}
// this is the entry function
public List<string> GetAllSubstitutions(string phrase)
{
var res = GetAllSubstitutionsPrivate(new SubstitutedStringWindowImpl(phrase,windowLength), 0);
return res.Select(x => x.SubstitutedPhrase).ToList();
}
}
但似乎它没有快速完成工作。有任何建议如何改进吗?
答案 0 :(得分:2)
似乎并不需要能够用多项式延迟进行枚举。定义递归过程
all_substitutions(substitutions, first_half, last_n_weights, second_half)
其中first_half
是输入的替换前缀,second_half
是输入的未取代后缀,使得两半组成整个输入。 all_substitutions
的正文应该测试second_half
是否为空。如果是,请收益first_half
并返回。否则,对于second_half
开头的每个可行替换,使其适当递归(我们计算将单个替换为自身作为权重0的可行替换)。
如果这些方面的程序不够快,那么查看实际代码会很有帮助,因为进一步的改进将是数据结构的。
答案 1 :(得分:0)
我试图看看这个,并且被我似乎无法缓存结果的事实所困扰,这通常是一种非常有效的优化。经过一些调整后我发现了一种方法。
代码尽快替换序列,然后以递归方式替换右侧的剩余序列。它不打扰传递到替换先前替换成本的子序列,因为子序列将在稍后的时间重新分析而没有第一次替换,因此改变了先前的成本值。
然而,当父序列在子序列中接收到替换结果时,会考虑成本:它只保留那些满足其替换成本不会超出成本限制的事实。当然,就像在每个缓存机制中一样,你可以用内存来换取速度,但缓存在这里非常有用。 (理想情况下,缓存应该独立于CappedCostSubstitutionEnumerator;使用AOP可以在需要时注入)
我们首先定义Sequence
和Substitution
对象
public class Sequence<T>: List<T>
{
public double Cost { get; set; }
public override string ToString() // for the cache key
{
return string.Join("|", this.Select(t => t.ToString()));
}
public Sequence(IEnumerable<T> sequence)
{
this.AddRange(sequence);
Cost = 0;
}
}
public class Substitution<T>
{
public IEnumerable<T> OriginalSequence { get; set; }
public IEnumerable<T> SwappedSequence { get; set; }
public double Cost { get; set; }
}
现在我们可以定义替换枚举器
public class CappedCostSubstitutionEnumerator<T>
{
private static Dictionary<string, List<Sequence<T>>> Cache { get; set; }
static CappedCostSubstitutionEnumerator()
{
Cache = new Dictionary<string, List<Sequence<T>>>();
}
public double AuthorizedCost { get; set; }
public List<Substitution<T>> Substitutions { get; set; }
public CappedCostSubstitutionEnumerator(double maxAuthorizedCost)
{
Substitutions = new List<Substitution<T>>();
AuthorizedCost = maxAuthorizedCost;
}
public List<List<T>> EnumerateSequenceSubstitutions(List<T> sequence)
{
var values = EnumerateSequenceSubstitutions(new Sequence<T>(sequence));
return values.Select(s => s as List<T>).ToList();
}
private List<Sequence<T>> EnumerateSequenceSubstitutions(Sequence<T> sourceSequence)
{
var sourceSequenceKey = sourceSequence.ToString();
if (Cache.ContainsKey(sourceSequenceKey))
{
return Cache[sourceSequenceKey];
}
else
{
var sequenceSubstitutionsResults = new List<Sequence<T>> { sourceSequence };
foreach (var substitution in Substitutions.Where(substitution => substitution.Cost <= AuthorizedCost))
{
SubstituteWhereOriginalSubstitutionSequenceIsFound(sourceSequence, substitution, sequenceSubstitutionsResults);
}
Cache.Add(sourceSequenceKey, sequenceSubstitutionsResults);
return sequenceSubstitutionsResults;
}
}
private void SubstituteWhereOriginalSubstitutionSequenceIsFound(Sequence<T> sourceSequence, Substitution<T> substitution,
List<Sequence<T>> sequenceSubstitutionsResults)
{
var indexInSequence = 0;
var originalSequenceLength = substitution.OriginalSequence.Count();
var upperIndexInSequence = sourceSequence.Count() - originalSequenceLength;
// we are going to look for the substitution pattern at each possible place in the source sequence
while (indexInSequence <= upperIndexInSequence)
{
var evaluatedMatch = sourceSequence.Skip(indexInSequence).Take(originalSequenceLength);
if (evaluatedMatch.SequenceEqual<T>(substitution.OriginalSequence))
{
var start = sourceSequence.Take(indexInSequence);
var substituted = substitution.SwappedSequence;
var remain = sourceSequence.Skip(indexInSequence + originalSequenceLength);
// now we want the subsequences without taking cost into account
var subSequences = EnumerateSequenceSubstitutions(new Sequence<T>(remain));
// we filter the subsequences on the total cost here
foreach (
var subSequence in
subSequences.Where(
subSeq => (subSeq.Cost + substitution.Cost) <= (AuthorizedCost - sourceSequence.Cost)))
{
sequenceSubstitutionsResults.Add(
new Sequence<T>(start.Concat(substituted).Concat(subSequence))
{
Cost = substitution.Cost + subSequence.Cost
});
}
}
indexInSequence++;
}
}
}
有些观点:
要使用它,请执行以下操作:
private static void Main(string[] args)
{
var p = new CappedCostSubstitutionEnumerator<int>(0.5);
p.Substitutions.Add(new Substitution<int>() {OriginalSequence = new int[] {1}, SwappedSequence = new int[] {3}, Cost = 0.2});
p.Substitutions.Add(new Substitution<int>() { OriginalSequence = new int[] { 2,3,4 }, SwappedSequence = new int[] { 4,3 }, Cost = 0.3 });
p.Substitutions.Add(new Substitution<int>() { OriginalSequence = new int[] { 5 }, SwappedSequence = new int[] { 2 }, Cost = 0.4 });
p.Substitutions.Add(new Substitution<int>() { OriginalSequence = new int[] { 5,1 }, SwappedSequence = new int[] { 4, 3 }, Cost = 0.3 });
p.Substitutions.Add(new Substitution<int>() { OriginalSequence = new int[] { 2,3 }, SwappedSequence = new int[] { 4, 3 }, Cost = 0.3 });
p.Substitutions.Add(new Substitution<int>() { OriginalSequence = new int[] { 2 }, SwappedSequence = new int[] { 4, 3 }, Cost = 0.1 });
p.Substitutions.Add(new Substitution<int>() { OriginalSequence = new int[] { 4 }, SwappedSequence = new int[] { 4, 3 }, Cost = 0.3 });
var results = p.EnumerateSequenceSubstitutions(new List<int>() { 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 5 });
// results contains 5390 values
}
答案 2 :(得分:0)
这是我的解决方案,以及一些测试。 在我的计算机上以不到一秒的速度计算30个令牌序列的替换。
它作为二叉树工作,并在上下文对象中缓存中间结果,因此所有分支共享缓存。 问题是将源分成两个子树并不容易,因为有各种令牌长度。它是基于每个可能的令牌长度在不同点重复拆分而解决的,因此每个令牌长度都可以在两个子树中获得。
涉及很多优化,包括通过“预算”广告位预过滤替换令牌(无法按每个预算计算)。
请注意,原始项目不包含在结果中作为递归函数的优化。 此外,我已经用小数替换了双倍以避免舍入错误,因为它从预算中减去了成本。 实际上,我建议用不同范围的整数替换成本。
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.VisualStudio.TestTools.UnitTesting;
namespace SubstitutionsWithBudget
{
[TestClass]
public class SubstitutionTest
{
private static readonly List<Substitution> Substitutions = new List<Substitution>
{
new Substitution("1", "3", 0.2m),
new Substitution("234", "43", 0.3m),
new Substitution("5", "2", 0.4m)
};
private readonly SubstitutionsDefinition substitutionsDefinition = new SubstitutionsDefinition(Substitutions);
[TestMethod]
public void OriginalQuestion()
{
string source = "12345";
var variants = EnumerateSubstitutions(new Context(), substitutionsDefinition, source, 0.6m);
foreach (var variant in variants)
{
Console.WriteLine(variant);
}
Assert.AreEqual(5, variants.Count());
}
[TestMethod]
public void MultiplicityTest()
{
const int multiplicity = 6;
string source = string.Join("", Enumerable.Repeat("12345", multiplicity));
var variants = EnumerateSubstitutions(new Context(), substitutionsDefinition, source, multiplicity * 0.6m).ToList();
foreach (var variant in variants.Take(10))
{
Console.WriteLine(variant);
}
}
[TestMethod]
public void SomeUsefulApplication()
{
var substitutions = new List<Substitution>
{
new Substitution("monkey", "elephant", 2m),
new Substitution("monkey", "shark", 3m),
new Substitution("banana", "apple", 1m),
new Substitution("feed", "kill", 4m),
};
var resultingSubstitutions = EnumerateSubstitutions(
new Context(),
new SubstitutionsDefinition(substitutions),
"feed monkey with banana",
4)
.OrderBy(s => s.Item2).ToList();
foreach (var substitution in resultingSubstitutions)
{
Console.WriteLine(substitution);
}
Assert.IsTrue(resultingSubstitutions.Any(
s => s.Item1 == "feed elephant with banana"));
Assert.IsFalse(resultingSubstitutions.Any(
s => s.Item1 == "kill shark with banana"));
}
IEnumerable<Tuple<string, decimal>> EnumerateSubstitutions(Context context, SubstitutionsDefinition substitutionsDefinition, string source, decimal maxCost)
{
if (source.Length == 0)
{
yield break;
}
var possibleSubstitutions = substitutionsDefinition.GetSubstitutions(source, maxCost).ToList();
// find substitutions of whole string
foreach (var possibleSubstitution in possibleSubstitutions)
{
yield return Tuple.Create(possibleSubstitution.Destination, possibleSubstitution.Weight);
}
// Variable split boundary to accomodate tokens of different length
var middle = source.Length / 2;
var window = substitutionsDefinition.MaxLength - 1;
if (window > source.Length - 2)
{
window = source.Length - 2;
}
var min = middle - window / 2;
var returned = new HashSet<Tuple<string, decimal>> { Tuple.Create(source, 0m) };
for (var i = 0; i <= window; i++)
{
var divideAt = min + i;
var left = source.Substring(0, divideAt);
var right = source.Substring(divideAt, source.Length - divideAt);
var q =
from leftSubstitution in context.GetCachedResult(Tuple.Create(left, maxCost),
() => EnumerateSubstitutions(context, substitutionsDefinition, left, maxCost)).Concat(new[] { Tuple.Create(left, 0m) })
let leftCost = leftSubstitution.Item2
from rightSubstitution in context.GetCachedResult(Tuple.Create(right, maxCost - leftCost),
() => EnumerateSubstitutions(context, substitutionsDefinition, right, maxCost - leftCost)).Concat(new[] { Tuple.Create(right, 0m) })
where leftCost + rightSubstitution.Item2 <= maxCost
select new { leftSubstitution, rightSubstitution };
q = q.ToList();
foreach (var item in q.Select(pair => Tuple.Create(pair.leftSubstitution.Item1 + pair.rightSubstitution.Item1,
pair.leftSubstitution.Item2 + pair.rightSubstitution.Item2)).Where(item => !returned.Contains(item)))
{
yield return item;
returned.Add(item);
}
}
}
}
public struct Substitution
{
public readonly string Souce;
public readonly string Destination;
public readonly decimal Weight;
public Substitution(string souce, string destination, decimal weight)
{
Souce = souce;
Destination = destination;
Weight = weight;
}
}
public class Context
{
private readonly Dictionary<Tuple<string, decimal>, List<Tuple<string, decimal>>> cache = new Dictionary<Tuple<string, decimal>, List<Tuple<string, decimal>>>();
public IEnumerable<Tuple<string, decimal>> GetCachedResult(Tuple<string, decimal> key, Func<IEnumerable<Tuple<string, decimal>>> create)
{
List<Tuple<string, decimal>> result;
cache.TryGetValue(key, out result);
if (result != null) return result;
result = create().ToList();
cache.Add(key, result);
return result;
}
public void AddToCache(Tuple<string, decimal> key, List<Tuple<string, decimal>> result)
{
cache.Add(key, result);
}
}
public class SubstitutionsDefinition
{
private readonly decimal maxCost;
private const int Granularity = 10;
/// <summary>
/// Holds substitutions lookups based on budget slots.
/// A slot with higher budget also holds values of all lower-budget slots.
/// </summary>
private readonly ILookup<int, ILookup<string, Substitution>> byCost;
private readonly int maxLength;
private readonly ILookup<string, Substitution> simple;
private bool simpleMode;
public int MaxLength { get { return maxLength; } }
// Really helpful if there are a lot of substitutions
public IEnumerable<Substitution> GetSubstitutions(string token, decimal budget)
{
return simpleMode
? GetSubstitutionsSmallSet(token, budget)
: GetSubstitutionsLargeSet(token, budget);
}
private IEnumerable<Substitution> GetSubstitutionsLargeSet(string token, decimal budget)
{
return byCost[GetSlot(budget)].SelectMany(i => i[token]).Where(s => s.Weight <= budget);
}
public IEnumerable<Substitution> GetSubstitutionsSmallSet(string token, decimal budget)
{
return simple[token].Where(s => s.Weight <= budget);
}
public SubstitutionsDefinition(IEnumerable<Substitution> substitutions)
{
var list = substitutions.ToList();
simpleMode = list.Count < 20; // Need to be found experimentally.
simple = list.ToLookup(i => i.Souce);
maxCost = list.Max(s => s.Weight);
maxLength = list.Max(s => s.Souce.Length);
var byLength = list.ToLookup(s => s.Souce.Length);
var q =
from length in list.Select(i => i.Souce.Length).Distinct()
from budgetSlot in Enumerable.Range(0, Granularity + 1)
from item in byLength[length]
where item.Weight <= GetCost(budgetSlot)
group item by budgetSlot;
byCost = q.ToLookup(i => i.Key, i => i.ToLookup(j => j.Souce));
}
private decimal GetCost(int slot)
{
var d = maxCost * slot;
return d / Granularity;
}
private int GetSlot(decimal weight)
{
var maxWeight = Math.Min(weight, maxCost);
return (int)(Granularity * maxWeight / maxCost);
}
}
}