我试图制作一个python脚本,它让我获得给定矩阵中最长的重复字符(水平和垂直)。
示例:
我有这个矩阵:
afaaf
rbaca
rlaff
将此矩阵用于输入,结果应为: a 3
你可以看到矩阵的第3列充满了一个并且它也是矩阵中重复次数最多的字符。
我有什么:
#!/bin/python2.7
#Longest string in matrix
#Given a matrix filled with letters. Find the longest string, containing only the same letter, which can be obtained by starting
#with any position and then moving horizontally and vertically (each cell can be visited no more than 1 time).
# Settings here
# -------------
string_matrix = """
afaaf
rbaca
rlaff
"""
pos = (0,0)
# -------------
import pdb
import time
import collections
from collections import defaultdict
import re
rows = 0
columns = 0
matrix = []
matrix2 = []
counter = 0
res_l = []
i = 0
c = ''
# if matrix2 is full of 1's, stop
def stop():
for i in range(0, rows):
for j in range(0, columns):
if matrix2[i][j] == 0:
return False
return True
# checks the points, and returns the most repeated char and length
def check_points(points1, points2):
r = []
r.append(-1)
r.append('')
# create strings from matrix
s1 = ''
s2 = ''
for point in points1:
s1 += matrix[point[0]][point[1]]
for point in points2:
s2 += matrix[point[0]][point[1]]
rr = {}
for c in s1:
rr[c] = 0
for c in s2:
rr[c] = 0
for i in range(0, len(s1)):
k = 1
for j in range(i+1, len(s1)):
if s1[i] == s1[j]:
k += 1
else:
break
if k > rr[s1[i]]:
rr[s1[i]] = k
for i in range(0, len(s2)):
k = 1
for j in range(i+1, len(s2)):
if s2[i] == s2[j]:
k += 1
else:
break
if k > rr[s2[i]]:
rr[s2[i]] = k
m = -1
c = ''
for key,value in rr.iteritems():
if value > m:
m = value
c = key
return m, c
# Depth-first search, recursive
def search(pos):
global res_l
global matrix2
global c
counter = 0
x = pos[0]
y = pos[1]
c = matrix[x][y]
# base clause
# when matrix2 is all checked
if stop():
return counter, c
points1 = []
points2 = []
allpoints = []
for i in range(0, columns):
if matrix2[x][i] != 1:
points1.append([x, i])
allpoints.append([x, i])
for i in range(0, rows):
if matrix2[i][x] != 1:
points2.append([i, x])
allpoints.append([i, x])
r = check_points(points1, points2)
if r[0] > counter:
counter = r[0]
c = r[1]
matrix2[x][y] = 1
for point in allpoints:
rr = search(point)
if rr[0] > counter:
counter = int(rr[0])
c = rr[1]
#print 'c: ' + str(c) + ' - k: ' + str(counter)
return counter, c
def main():
# create the matrix from string
string_matrix_l = string_matrix.strip()
splited = string_matrix_l.split('\n')
global rows
global columns
global matrix
global matrix2
rows = len(splited)
columns = len(splited[1])
# initialize matrixes with 0
matrix = [[0 for x in range(columns)] for x in range(rows)]
matrix2 = [[0 for x in range(columns)] for x in range(rows)]
# string to matrix
i = 0
for s in splited:
s = s.strip()
if s == '':
continue
j = 0
for c in s:
try:## Heading ##
matrix[i][j] = c
#print 'ok: ' + str(i) + ' ' + str(j) + ' ' + c
except:
print 'fail: index out of range matrix[' + str(i) + '][' + str(j)+'] ' + c
j = j + 1
i = i + 1
# print some info
print 'Given matrix: ' + str(matrix) + '\n'
print 'Start position: ' + str(pos)
print 'Start character: ' + str(matrix[pos[0]][pos[1]])
# get the result
res = search(pos)
print '-------------------------------------'
print '\nChar: ' + str(res[1]) + '\nLength: ' + str(res[0])
if __name__ == "__main__":
main()
这是我的源代码。 上面给出的示例也用在源代码中。给出的结果是: r 2 这是错误的......再次, a 3
它有4个功能:main,search,stop和check_points。
什么行不通:
大部分时间都给了我错误的性格,甚至认为计数有时可能是正确的。有时候它会在水平方向上工作,有时却不会。我确信我做错了什么,但是......现在已经超过1周了,因为我试图弄清楚如何做到这一点。在stackoverflow上问了另一个问题,有点进一步但是......仍然卡住了。
任何建议都表示赞赏。
答案 0 :(得分:2)
您可以使用itertools.groupby
快速查找某些字符的重复次数,并使用izip_longest(*matrix)
转置矩阵(交换其行和列)。
from itertools import groupby, izip_longest
matrix_string = """
afaaf
rbaca
rlaff
"""
def longest_repetition(row):
return max((sum(1 for item in group), letter)
for letter, group in groupby(row)
if letter is not None)
def main():
matrix = [[letter for letter in row.strip()]
for row in matrix_string.strip().split('\n')]
count, letter = max(
max(longest_repetition(row) for row in matrix),
max(longest_repetition(col) for col in izip_longest(*matrix))
)
print letter, count
if __name__ == '__main__':
main()
由于您已更新了此要求,因此这是一个代码的递归版本,并附有一些解释。如果它不是一个任务,并且这个任务出现在一些现实生活中的问题,你应该真的使用过第一个版本。
matrix_string = """
afaaf
rbaca
rlaff
"""
def find_longest_repetition(matrix):
rows = len(matrix)
cols = len(matrix[0])
# row, col - row and column of the current character.
# direction - 'h' if we are searching for repetitions in horizontal direction (i.e., in a row).
# 'v' if we are searching in vertical direction.
# result - (count, letter) of the longest repetition we have seen by now.
# This order allows to compare results directly and use `max` to get the better one
# current - (count, letter) of the repetition we have seen just before the current character.
def recurse(row, col, direction, result, current=(0, None)):
# Check if we need to start a new row, new column,
# new direction, or finish the recursion.
if direction == 'h': # If we are moving horizontally
if row >= rows: # ... and finished all rows
return recurse( # restart from the (0, 0) position in vertical direction.
0, 0,
'v',
result
)
if col >= cols: # ... and finished all columns in the current row
return recurse( # start the next row.
row + 1, 0,
direction,
result
)
else: # If we are moving vertically
if col >= cols: # ... and finished all columns
return result # then we have analysed all possible repetitions.
if row >= rows: # ... and finished all rows in the current column
return recurse( # start the next column.
0, col + 1,
direction,
result
)
# Figure out where to go next in the current direction
d_row, d_col = (0, 1) if direction == 'h' else (1, 0)
# Try to add current character to the current repetition
count, letter = current
if matrix[row][col] == letter:
updated_current = count + 1, letter
else:
updated_current = 1, matrix[row][col]
# Go on with the next character in the current direction
return recurse(
row + d_row,
col + d_col,
direction,
max(updated_current, result), # Update the result, if necessary
updated_current
)
return recurse(0, 0, 'h', (0, None))
def main():
matrix = [[letter for letter in row.strip()]
for row in matrix_string.strip().split('\n')]
count, letter = find_longest_repetition(matrix)
print letter, count
if __name__ == '__main__':
main()
答案 1 :(得分:1)
您还可以尝试使用collections.Counter(string).most_common()来获得最重复的角色。
from collections import Counter
string_matrix = """
afaaf
rbaca
rlaff
"""
def GetMostRepetitions(pos):
mc = []
for ii in range(pos[0],len(working_mat)):
mc.extend(Counter(working_mat[ii]).most_common(1))
for jj in range(pos[1],len(working_mat[0])):
column = []
for kk in range(ii,len(working_mat)):
column.append(working_mat[kk][jj])
mc.extend(Counter(column).most_common(1))
m = 0
for item in mc:
if item[1] > m:
m = item[1]
k = item[0]
print(k, m)
working_mat = string_matrix.strip().split('\n')
for ii in range(len(working_mat)):
for jj in range(len(working_mat[0])):
pos = (ii,jj)
GetMostRepetitions(pos)
正如Kolmar所述,您还可以使用更好的方式transpose the matrix。