# Download the following two text files (famous English books): # # http://www.samyzaf.com/braude/PYTHON/projects/jude.txt # http://www.samyzaf.com/braude/PYTHON/projects/oliver_twist.txt # # Write a Python function letter_frequency(file) for counting English # letters frequency in a text file. # Your program output should look like: # a 0.07660 # b 0.01401 # c 0.02301 # d 0.04541 # e 0.12475 # f 0.01936 # g 0.02069 # h 0.06534 # i 0.06234 # ... # A 0.00222 # B 0.00108 # C 0.00075 # D 0.00050 # E 0.00058 # F 0.00045 # G 0.00049 # ... # - The frequency of a letter is defined as the ratio between the number # of its occurrences and the total number of letters in the text # (make sure to ignore characters that are not English letters!). # - Print the frequency tables for the two books. # - Do you notice any similarities between the two tables? # - Hints: Import the string module and look at string.letters data member # Use a dictionary to hold a mapping between a letter and its number of occurrences. import string, random def letter_frequency(textfile): f = file(textfile, 'r') d = dict() for letter in string.letters: d[letter] = 0 total_letters = 0 for line in f: for letter in line: if letter in d: total_letters += 1 d[letter] += 1 f.close() for letter in d: d[letter] = d[letter] / float(total_letters) for letter in string.letters: print letter, d[letter] return d def random_cipher(): Letters = list(string.letters) random.shuffle(Letters) cipher = dict() for letter in string.letters: cipher[letter] = Letters.pop() return cipher def file_encrypt(filename, outfile, cipher): f1 = file(filename, 'r') f2 = file(outfile, 'w') for line in f1: for char in line: if char in cipher: char = cipher[char] f2.write(char) f2.close() f1.close() if __name__ == '__main__': letter_frequency("D:/workspace/jude.txt") cipher = random_cipher() file_encrypt("d:/workspace/jude.txt", "d:/workspace/jude_encripted.txt", cipher)