#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# @Date    : 2015-03-07 12:41:14
# @Author  : NSSimacer
# @Email   : wuxiaoqiang1020@gmail.com
# @Version : 1.0

import re

def count_words(file_name):
    '''
    统计纯英文文本中单词出现的个数
    '''

    words = []
    words_dict = {}
    lines_count = 0

    with open(file_name, 'r') as f:

        for line in f:

            lines_count += 1
            # 过滤非英文单词
            words.extend(re.findall(r'[a-zA-Z0-9]+', line.strip()))

    for word in words:

        if word not in words_dict:

            words_dict[word] = 1
        else:

            words_dict[word] += 1

    return lines_count, len(words), words, words_dict

if __name__ == '__main__':

    file_name = 'plain_text.txt'

    result = count_words(file_name)

    print 'In file:', file_name
    print 'Total Lines:', result[0]
    print 'Total Words:', result[1]
    print 'Words:', ', '.join(result[2])
    print 'Frequency of each word:'

    for key, value in result[3].items():

        print '%s: %s' % (key, value)