|
| 1 | +from collections import Counter |
| 2 | +from fractions import Fraction |
| 3 | +from math import log |
| 4 | + |
| 5 | + |
| 6 | +def get_letter_chances(message: str) -> tuple: |
| 7 | + letters = dict(Counter(message)) |
| 8 | + message_length = len(message) |
| 9 | + letters = dict(sorted(letters.items(), key=lambda x: x[1], reverse=True)) |
| 10 | + letter_chances = {i: Fraction(letters[i], message_length) for i in letters} |
| 11 | + entropy = -(sum(map(lambda x: letter_chances[x] * log(letter_chances[x]), letter_chances))) |
| 12 | + entropy = round(entropy, 1) |
| 13 | + letter_chances = dict(sorted(letter_chances.items(), key=lambda x: x[1], reverse=True)) |
| 14 | + return letter_chances, entropy |
| 15 | + |
| 16 | + |
| 17 | +Code = [] |
| 18 | + |
| 19 | + |
| 20 | +def splitter(group: list, code=None, where=None) -> None: |
| 21 | + half = sum([i[1] for i in group]) / 2 |
| 22 | + left, right = [], [] |
| 23 | + sum_right = Fraction(0, 1) |
| 24 | + for i in range(len(group)): |
| 25 | + if sum_right + group[i][1] >= half: |
| 26 | + left.append(group[i]) |
| 27 | + right.extend(group[i+1:]) |
| 28 | + break |
| 29 | + else: |
| 30 | + left.append(group[i]) |
| 31 | + sum_right += group[i][1] |
| 32 | + |
| 33 | + if len(left) == 1 and code is None and where is None: |
| 34 | + Code.append((left[0], '0')) |
| 35 | + if len(right) == 1: |
| 36 | + Code.append((right[0], '1')) |
| 37 | + else: |
| 38 | + splitter(right, '1', 'r') |
| 39 | + if len(left) > 1 and code is None and where is None: |
| 40 | + splitter(left, '0', 'l') |
| 41 | + if len(right) == 1: |
| 42 | + Code.append((right[0], '1')) |
| 43 | + else: |
| 44 | + splitter(right, '1', 'r') |
| 45 | + if len(left) > 1 and where == 'l': |
| 46 | + c = code + '0' |
| 47 | + splitter(left, c, 'l') |
| 48 | + c = code + '1' |
| 49 | + if len(right) == 1: |
| 50 | + Code.append((right[0], c)) |
| 51 | + else: |
| 52 | + splitter(right, c, 'l') |
| 53 | + if len(left) == 1 and where == 'l': |
| 54 | + c = code + '0' |
| 55 | + Code.append((left[0], c)) |
| 56 | + c = code + '1' |
| 57 | + if len(right) == 1: |
| 58 | + Code.append((right[0], c)) |
| 59 | + else: |
| 60 | + splitter(right, c, 'l') |
| 61 | + |
| 62 | + if len(right) > 1 and where == 'r': |
| 63 | + c = code + '1' |
| 64 | + splitter(right, c, 'r') |
| 65 | + c = code + '0' |
| 66 | + if len(left) == 1: |
| 67 | + Code.append((left[0], c)) |
| 68 | + else: |
| 69 | + splitter(left, c, 'r') |
| 70 | + if len(right) == 1 and where == 'r': |
| 71 | + c = code + '1' |
| 72 | + Code.append((right[0], c)) |
| 73 | + c = code + '0' |
| 74 | + if len(left) == 1: |
| 75 | + Code.append((left[0], c)) |
| 76 | + else: |
| 77 | + splitter(left, c, 'r') |
| 78 | + |
| 79 | + |
| 80 | +def encode(letter_chances: dict, entropy: float) -> dict: |
| 81 | + group = list(letter_chances.items()) |
| 82 | + splitter(group) |
| 83 | + encoded_chances = Code.copy() |
| 84 | + encoded_chances.sort(key=lambda x: x[0][1], reverse=True) |
| 85 | + lengths = Counter([len(i[1]) for i in encoded_chances]) |
| 86 | + lengths_sum = len(encoded_chances) |
| 87 | + math_expect = sum(map(lambda x: x * Fraction(lengths[x], lengths_sum), lengths)) |
| 88 | + math_expect = round(float(math_expect), 3) |
| 89 | + code_economy = round(entropy / math_expect, 3) |
| 90 | + beauty_encoded_chances = {i[0][0]: i[1] for i in encoded_chances} |
| 91 | + print("Математическое ожидание длины закодированной буквы равно", math_expect) |
| 92 | + print("Экономность кода равна", code_economy) |
| 93 | + return beauty_encoded_chances |
| 94 | + |
| 95 | + |
| 96 | +def get_codes(codes: dict) -> None: |
| 97 | + for symbol in codes: |
| 98 | + print("|Символ {0:1} закодирован как {1:10}|".format(symbol, codes[symbol])) |
| 99 | + |
| 100 | + |
| 101 | + |
| 102 | +test0, test1 = get_letter_chances('дверь_коридор_лестница') |
| 103 | +get_codes(encode(test0, test1)) |
0 commit comments