#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# @Date : 2015-03-07 12:41:14
# @Author : NSSimacer
# @Email : wuxiaoqiang1020@gmail.com
# @Version : 1.0
import re
def count_words(file_name):
'''
统计纯英文文本中单词出现的个数
'''
words = []
words_dict = {}
lines_count = 0
with open(file_name, 'r') as f:
for line in f:
lines_count += 1
# 过滤非英文单词
words.extend(re.findall(r'[a-zA-Z0-9]+', line.strip()))
for word in words:
if word not in words_dict:
words_dict[word] = 1
else:
words_dict[word] += 1
return lines_count, len(words), words, words_dict
if __name__ == '__main__':
file_name = 'plain_text.txt'
result = count_words(file_name)
print 'In file:', file_name
print 'Total Lines:', result[0]
print 'Total Words:', result[1]
print 'Words:', ', '.join(result[2])
print 'Frequency of each word:'
for key, value in result[3].items():
print '%s: %s' % (key, value)