induce PCFG grammar from the tree bank data. Assuming yourself to be Mr. P implement the above problem.
# extract productions from three trees and induce the PCFG
print("Induce PCFG grammar from treebank data:")
productions = []
for item in treebank.fileids()[:2]:
for tree in treebank.parsed_sents(item):
# perform optional tree transformations, e.g.:
tree.collapse_unary(collapsePOS = False)# Remove branches A-B-C into A-B+C
tree.chomsky_normal_form(horzMarkov = 2)# Remove A->(B,C,D) into A->B,C+D->D
productions += tree.productions()