[[TopPage]]
 
 *** NLTK TFIDF [#s63f9719]
 +http://nltk.googlecode.com/svn/trunk/doc/api/nltk.text.TextCollection-class.html
 --import nltk
 --from nltk.book import *
 +NLTKを利用して、TFIDを求める。
 	#! /usr/bin/env python
 	#encoding: utf-8
 	import nltk
 	import commands
 	
 	#ファイル名取得
 	ls = commands.getoutput('/bin/ls Tw*')
 	files = ls.split('\n')
 	
 	lists = []
 	AllWords = []
 	#ファイルをリストに格納する
 	for file in files:
 	        print file
 	        list = []
 	        for line in open(file, 'r'):
 	                words = line[:-1].split(" ")
 	                for word in words:
 	                        list.append(word)
 	                        AllWords.append(word)
 	        lists.append(list)
 	        print len(list)
 	        break
 	
 	A = nltk.TextCollection(lists);
 	
 	print len(lists)
 	print A.tf("RT",AllWords)
 	print A.idf("RT")