taggeddocument gensim example
def test_mixed_tag_types(self):
"""Ensure alternating int/string tags don't share indexes in doctag_syn0"""
mixed_tag_corpus = [doc2vec.TaggedDocument(words, [i, words[0]]) for i, words in enumerate(raw_sentences)]
model = doc2vec.Doc2Vec()
model.build_vocab(mixed_tag_corpus)
expected_length = len(sentences) + len(model.docvecs.doctags) # 9 sentences, 7 unique first tokens
self.assertEquals(len(model.docvecs.doctag_syn0), expected_length)