import nltk
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
nltk.download("punkt")
nltk.download("averaged_perceptron_tagger")
def remove_prepositional_phrases(sentence, prepositions):
words = word_tokenize(sentence)
tagged_words = pos_tag(words)
filtered_words = []
skip_next = False
for i, (word, pos) in enumerate(tagged_words):
if skip_next:
skip_next = False
continue
if pos == "IN" and word.lower() in prepositions: # 如果词性为介词且在指定的介词列表中
# 跳过当前介词及其后面的一个单词
skip_next = True
else:
filtered_words.append(word)
filtered_sentence = " ".join(filtered_words)
return filtered_sentence
input_sentence = "a apple in a desk on a table behind a chair"
prepositions_to_remove = ["in", "on", "behind"]
filtered_sentence = remove_prepositional_phrases(input_sentence, prepositions_to_remove)
print(filtered_sentence)
在这个示例中,我添加了一个名为 prepositions_to_remove
的列表,其中包含你希望去除的介词(例如 "in"、"on"、"behind")。remove_prepositional_phrases
函数现在会检查词性是否为介词且是否在指定的介词列表中,并进行相应处理。
你可以根据需要添加或修改 prepositions_to_remove
列表,以包含你希望去除的所有介词。然后使用这个函数来处理句子,去除指定介词后面的内容。