哦,不!你不小心把一个长篇文章中的空格、标点都删掉了,并且大写也弄成了小写。像句子"I reset the computer. It still didn’t boot!"已经变成了"iresetthecomputeritstilldidntboot"。在处理标点符号和大小写之前,你得先把它断成词语。当然了,你有一本厚厚的词典dictionary,不过,有些词没在词典里。假设文章用sentence表示,设计一个算法,把文章断开,要求未识别的字符最少,返回未识别的字符数。
注意:本题相对原题稍作改动,只需返回未识别的字符数
示例:
输入:
dictionary = ["looked","just","like","her","brother"]
sentence = "jesslookedjustliketimherbrother"
输出: 7
解释: 断句后为"jess looked just like tim her brother",共7个未识别字符。
提示:
- 0 <= len(sentence) <= 1000
- dictionary中总字符数不超过 150000。
- 你可以认为dictionary和sentence中只包含小写字母。
Python 解答:
1.暴力递归超时
class Solution:
def respace(self, dictionary: List[str], sentence: str) -> int:
cache = {}
def find(dictionary, sentence, cache):
if not sentence:
return 0
elif not dictionary:
return len(sentence)
else:
temp = []
for item in dictionary:
if item == sentence[:len(item)]:
if sentence[len(item):] in cache.keys():
temp.append(cache[sentence[len(item):]])
else:
newvalue = self.respace(dictionary, sentence[len(item):])
cache[sentence[len(item):]] = newvalue
temp.append(newvalue)
if sentence[1:] in cache.keys():
temp.append(1+cache[sentence[1:]])
else:
newvalue = self.respace(dictionary, sentence[1:])
cache[sentence[1:]] = newvalue
temp.append(1+newvalue)
return min(temp)
return find(dictionary, sentence, cache)
2.动态规划
`class Solution:
def respace(self, dictionary: List[str], sentence: str) -> int:
aset = set(dictionary)
dp = [0 for i in range(len(sentence)+1)]
dp[0] = 0
for i in range(1, len(dp)):
dp[i] = dp[i-1]+1
for j in range(i):
if sentence[j:i] in aset:
dp[i] = min(dp[i], dp[j])
return dp[len(sentence)]
3.动态规划+前缀树
class Solution:
class Trie:
class Node:
def __init__(self):
self.node = {}
self.isword = False
def __init__(self):
self.root = self.Node()
def add(self, word):
cur = self.root
for c in word[::-1]:
if c not in cur.node.keys():
cur.node[c] = self.Node()
cur = cur.node[c]
cur.isword = True
def respace(self, dictionary: List[str], sentence: str) -> int:
atrie = self.Trie()
for word in dictionary:
atrie.add(word)
dp = [0 for i in range(len(sentence)+1)]
dp[0] == 0
for i in range(1, len(sentence)+1):
dp[i] = dp[i-1] + 1
res = []
j = i-1
p = atrie.root
while j >= 0 and p and sentence[j] in p.node.keys():
p = p.node[sentence[j]]
if p.isword:
res.append(j)
j -= 1
for j in res:
dp[i] = min(dp[i], dp[j])
return dp[-1]
留言