从字典中找到句子字谜的有效方法?

时间:2015-12-11 09:31:16

标签: python optimization combinations anagram

我需要制作一个程序,该程序将带有字典和任意字符串的文件作为输入,然后输出该字典中构成给定字符串字符的所有单词组合。 例如,使用100个最流行的英语单词和字符串[' on it work', ' into work', ' not i work', ' know or it', ' work it no', ' to work in'],我应该得到类似def sortstring(string): return ''.join(sorted(string)) def simplify(all_strings): possible_strings = defaultdict(list) for string in all_strings: possible_strings[sortstring(string).strip()].append(string) return possible_strings def generate(database, length,curstring="", curdata=set()): if len(curstring.replace(" ", "")) > length: return set() if len((curstring).replace(" ", "")) == length: return curdata.union(set([curstring])) for i in database: if len((curstring+i).replace(" ", "")) <= length: curdata = curdata.union(generate(database.difference(set([i])), length, curstring+" "+i, curdata)) database = database.difference(set([i])) return curdata def analyse(database, input_string): cletters = countstring(input_string) strings = simplify(generate(database, cletters)) data = list() sorted_string = sortstring(input_string).strip() if sorted_string in strings.keys(): data = strings[sorted_string] return len(strings.values()), data def countstring(string): a = countletters(string) return sum(a.values()) def countletters(string): result = {} for i in ascii_lowercase: result[i] = string.count(i) return result 的内容,我会这样做。

问题是我的程序效率太低:在字典中有100个单词,字符串长度的实际限制是7个字符,之后的所有内容都需要太长时间。我尝试寻找与此事有关的各种算法无济于事。

以下是我搜索字谜的方法:

<beans:beans xmlns="http://www.springframework.org/schema/security"
xmlns:beans="http://www.springframework.org/schema/beans" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:jee="http://www.springframework.org/schema/jee" 
xsi:schemaLocation="
    http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans-3.0.xsd
    http://www.springframework.org/schema/jee http://www.springframework.org/schema/jee/spring-jee-3.0.xsd 
    http://www.springframework.org/schema/security http://www.springframework.org/schema/security/spring-security-4.0.xsd
">

<http auto-config="true" use-expressions="true">
    <custom-filter after="EXCEPTION_TRANSLATION_FILTER" ref="ajaxTimeoutRedirectFilter"/>

    <custom-filter position="SWITCH_USER_FILTER" ref="switchUserProcessingFilter" />
    <intercept-url pattern="/j_spring_security_switch_user" access="hasRole('ROLE_SUPERVISOR')"/>

    <session-management  invalid-session-url="/login.html?invalidSession=1" session-fixation-protection="newSession">
        <concurrency-control max-sessions="10" error-if-maximum-exceeded="true" />
    </session-management>

    <intercept-url pattern="/login.html" access="hasRole('ROLE_ANONYMOUS')" requires-channel="https"/>
    <intercept-url pattern="/resources/**" access="permitAll" requires-channel="any"/>
    <intercept-url pattern="/admin**" access="hasRole('ROLE_ADMIN')" requires-channel="https"/>
    <intercept-url pattern="/rest/**" access="hasRole('ROLE_USER')" requires-channel="https"/>
    <intercept-url pattern="/index" access="hasRole('ROLE_USER')" requires-channel="https"/>
    <intercept-url pattern="/upload/**" access="hasRole('ROLE_USER')" requires-channel="https"/>

    <headers>
        <xss-protection block="false"/>
        <frame-options disabled="true"/>
        <cache-control/>
    </headers>

    <!-- access denied page -->
    <access-denied-handler error-page="/403" />
    <form-login 
        login-page="/login.html" 
        default-target-url="/index" 
        always-use-default-target="true"
        authentication-failure-url="/login.html?error=1" 
        username-parameter="username" 
        password-parameter="password"/>
    <logout logout-success-url="/login.html?logout=1" invalidate-session="false" delete-cookies="JSESSIONID"/>
    <!-- enable csrf protection -->
    <!-- <csrf disabled="true" /> -->

    <port-mappings>
        <port-mapping http="8080" https="8443"/>
    </port-mappings>
</http>

<beans:bean id="ajaxTimeoutRedirectFilter" class="com.finconsgroup.mens.springsecurity.AjaxTimeoutRedirectFilter">
    <beans:property name="customSessionExpiredErrorCode" value="419"/>
</beans:bean>

<beans:bean id="switchUserProcessingFilter" class="com.finconsgroup.mens.springsecurity.MensSwitchUserFilter">
    <beans:property name="userDetailsService" ref="mensAuthenticationService"/>
    <beans:property name="switchUserUrl" value="/j_spring_security_switch_user"/>
    <beans:property name="exitUserUrl" value="/j_spring_security_exit_user"/>
    <beans:property name="targetUrl" value="/index"/>
</beans:bean>

<beans:bean name="bcryptEncoder" class="org.springframework.security.crypto.bcrypt.BCryptPasswordEncoder"/>
<!-- Select users and user_roles from database -->
<authentication-manager alias="authenticationManager">
    <authentication-provider user-service-ref="mensAuthenticationService">
        <password-encoder ref="bcryptEncoder"/>
    </authentication-provider>
</authentication-manager>
<beans:bean id="mensAuthenticationService" class="com.finconsgroup.mens.springsecurity.MensAuthenticationProvider">
    <beans:property name="dataSource" ref="mensDataSource"/>
    <beans:property name="usersByUsernameQuery" value="my_query"/>
    <beans:property name="authoritiesByUsernameQuery" value="my_query"/>
    <beans:property name="groupAuthoritiesByUsernameQuery" value="my_query"/>
    <beans:property name="enableGroups" value="true"/>
</beans:bean>

<!-- Spring Security -->

<beans:bean id="mensPermissionEvaluator" class="com.finconsgroup.mens.springsecurity.MensPermissionEvaluator">
    <beans:constructor-arg ref="aclService"/>
</beans:bean>

<beans:bean id="securityExpressionHandler" 
        class="org.springframework.security.access.expression.method.DefaultMethodSecurityExpressionHandler">
    <beans:property name="permissionEvaluator" ref="mensPermissionEvaluator"/>
</beans:bean>

<global-method-security
    authentication-manager-ref="authenticationManager"
    pre-post-annotations="enabled"
    secured-annotations="enabled">
    <expression-handler ref="securityExpressionHandler"/>
</global-method-security>

<!-- ================================================================== -->
<!-- ACL service                                                        -->
<!-- ================================================================== -->

<beans:bean id="aclService" class="org.springframework.security.acls.jdbc.JdbcMutableAclService">
    <beans:constructor-arg ref="mensDataSource" />
    <beans:constructor-arg ref="lookupStrategy" />
    <beans:constructor-arg ref="aclCache" />
</beans:bean>

<beans:bean id="aclCache" class="org.springframework.security.acls.domain.EhCacheBasedAclCache">
    <beans:constructor-arg>
        <beans:bean class="org.springframework.cache.ehcache.EhCacheFactoryBean">
            <beans:property name="cacheManager">
                <beans:ref bean="mensEhCacheManager"/>
            </beans:property>
            <beans:property name="cacheName" value="aclCache"/>
        </beans:bean>
    </beans:constructor-arg>
    <beans:constructor-arg>
        <beans:bean class="org.springframework.security.acls.domain.DefaultPermissionGrantingStrategy">
            <beans:constructor-arg>
                <beans:bean class="org.springframework.security.acls.domain.ConsoleAuditLogger"/>
            </beans:constructor-arg>
        </beans:bean>
    </beans:constructor-arg>
    <beans:constructor-arg>
        <beans:bean class="org.springframework.security.acls.domain.AclAuthorizationStrategyImpl">
            <beans:constructor-arg>
                <beans:list>
                    <beans:bean class="org.springframework.security.core.authority.SimpleGrantedAuthority">
                        <beans:constructor-arg value="ROLE_ACL_ADMIN"/>
                    </beans:bean>
                </beans:list>
            </beans:constructor-arg>
        </beans:bean>
    </beans:constructor-arg>
</beans:bean>

<beans:bean id="lookupStrategy" class="org.springframework.security.acls.jdbc.BasicLookupStrategy">
    <beans:constructor-arg ref="mensDataSource" />
    <beans:constructor-arg ref="aclCache" />
    <beans:constructor-arg>
        <!-- Decides whether current principal can make ACL changes. See
             AclAuthorizationStrategyImpl Javadoc for the rules involved. -->
        <beans:bean class="org.springframework.security.acls.domain.AclAuthorizationStrategyImpl">
            <beans:constructor-arg>
                <beans:list>
                    <!-- Role required to change ACL ownership -->
                    <beans:ref bean="adminRole" />
                    <!-- Role required to change auditing details -->
                    <beans:ref bean="adminRole" />
                    <!-- Role required to change other ACL/ACE details -->
                    <beans:ref bean="adminRole" />
                </beans:list>
            </beans:constructor-arg>
        </beans:bean>
    </beans:constructor-arg>
    <beans:constructor-arg>
        <beans:bean class="org.springframework.security.acls.domain.ConsoleAuditLogger" />
    </beans:constructor-arg>
</beans:bean>

<beans:bean id="adminRole" class="org.springframework.security.core.authority.SimpleGrantedAuthority">
    <beans:constructor-arg value="ADMIN" />
</beans:bean>

有人可以提出改进方法吗?虽然我认为我使用的算法应该完全丢弃,因为它的复杂度似乎太高了,因为它的速度有多慢。 以防万一:该程序应该足够高效,以支持成千上万个单词和最多几十个字符的字典。这比我做的要好得多。

2 个答案:

答案 0 :(得分:2)

我自己解决了部分问题。 解决了生成器代码中的for-if反模式:

def generate(database, length,letters,curstring="",curdata=set()):
if len(curstring.replace(" ",""))>length:
    return set()
if len((curstring).replace(" ",""))==length:
    return curdata.union(set([curstring]))
t=countletters(curstring)
for i in ascii_lowercase:
    if t[i]>letters[i]:
        return set()
for i in database:
    t=countletters(curstring+i)
    test=0
    for j in ascii_lowercase:
        if t[j]>letters[j]:
            test=1
    if test: continue
    if sum(t.values())<=length:
        curdata=curdata.union(generate(database.difference(set([i])),length,letters,curstring+" "+i,curdata))
        database=database.difference(set([i]))
return curdata

现在它快得多,但如果字典包含数万个单词和/或输入字符串很长,它仍然很慢。

答案 1 :(得分:0)

这是一个实现我在评论中建议的树方法的递归方法:

def frequencyDict(s):
    s = s.lower()
    d = {}
    for c in s:
        if c.isalpha():
            if c in d:
                d[c] += 1
            else:
                d[c] = 1
    return d

def canMake(w,fdict):
    d = frequencyDict(w)
    return all(d[c] <= fdict.get(c,0) for c in d)

def candidates(wlist,fdict):
    return [w for w in wlist if canMake(w,fdict)]

def anagrams(wlist,fdict):
    if len(wlist) == 0 or len(fdict) == 0:
        return "no anagrams"
    hits = []
    firstWords = candidates(wlist,fdict)
    if len(firstWords) == 0:
        return "no anagrams"
    for w in firstWords:
        #create reduced frequency dict
        d = fdict.copy() 
        for c in w:
            d[c] -= 1
            if d[c] == 0: del d[c]
        #if d is empty, the first word is also a the last word
        if len(d) == 0:
            hits.append(w)
        else:
            #create reduced word list
            rlist = [v for v in wlist if canMake(v,d)]
            tails = anagrams(rlist, d)
            if tails != "no anagrams":
                hits.extend(w + " " + t for t in tails)
    if len(hits) == 0:
        return "no anagrams"
    else:
        return hits

def findAnagrams(wlist,s):
    return anagrams(wlist,frequencyDict(s.lower()))

f = open("linuxwords.txt")
words = f.read().split('\n')
f.close()
words = [w.strip().lower() for w in words if not '-' in w]
test = findAnagrams(words, "Donald Trump")

大约需要20秒才能找到所有730张唐纳德特朗普&#34;从旧的Linux单词列表中提取。我最喜欢的是&#34;潮湿的坚果领主&#34;