i getting "valueerror: many values unpack (expected 4)" below code. please me!! trying lemmatize , cut off common words , add library can identify common words , find relationship between words.
def build_dataset(words, vocabulary_size): lexicon = [] l in words: all_words = word_tokenize(l.lower()) lexicon += list(all_words ) lexicon = [lemmatizer.lemmatize(i) in lexicon] w_counts = counter(lexicon) word = [] w in w_counts: if 5000 > w_counts[w] > 50 : word.append(w) print(len(word)) return word count = [['unk', -1]] count.extend(collections.counter(word).most_common(vocabulary_size - 1)) dictionary = dict() l2, _ in count: dictionary[l2] = len(dictionary) data = list() unk_count = 0 l2 in word: if l2 in dictionary: index = dictionary[l2] else: index = 0 unk_count += 1 data.append(index) count[0][1] = unk_count reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys())) return data, count, dictionary, reverse_dictionary data, count, dictionary, reverse_dictionary = build_dataset(words, vocabulary_size)
Comments
Post a Comment