You are on page 1of 1

import nltk

from nltk import ngrams, pos_tag

from nltk.tokenize import word_tokenize

text = "la Caputxeta vermella va veure el llop i li va donar una carbassa."

n = 2 # bigrames

m = 3 # trigrames

# Tokenize the text into words

words = word_tokenize(text)

# Tag the words with their part of speech

tagged_words = pos_tag(words)

# Extract bigrams and trigrams of part-of-speech tags

tagged_bigrams = ngrams(tagged_words, n)

tagged_trigrams = ngrams(tagged_words, m)

# Print the bigrams and trigrams

print("Bigrams:")

for bg in tagged_bigrams:

print(bg)

print("\nTrigrams:")

for tg in tagged_trigrams:

print(tg)

You might also like