#! /usr/bin/env python
#
#   >->->->->->->->->->->->->->->->->->->->->->->->->->->->->->->->->->->->->
#    huffman.py v0.04, Heiko Stamer <stamer@informatik.uni-leipzig.de>
#                      http://stinfwww.informatik.uni-leipzig.de/~mai97ixb
#
#
#    Ph.D. Raof Hamzoui: Indroduction to data compression, WS2000/2001
#                        (Ex.1: construct D-ary huffman coding tree)
#
#    References:
#
#        [1] David A. Huffman: A Method for the Construction of
#                              Minimum-Redundancy Codes
#            Proceedings of the IRE, Vol. 40(10), pp. 1098-1101, 1952
#
#   >->->->->->->->->->->->->->->->->->->->->->->->->->->->->->->->->->->->->
#
# Copyright (C) 2000-until_the_end_of_the_world  <Heiko Stamer>
#
#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

class HuffmanNode:
	"a node of the huffman tree"
	
	def __init__(self, p, s):
		self.probability, self.joined = p, 0
		self.symbol, self.codeword = s, ''
		self.sons = []
		
	def __repr__(self):
		return "Symbol: %s Probability: %s Codeword: %s Sons: %s"\
		% (self.symbol, self.probability, self.codeword, self.sons)

class HuffmanTree:
	"the huffman tree"
	
	# Konstruktor der Klasse
	def __init__(self, INITIAL_NODE_LIST, CODE_ALPHABET):
		self.TREE = []
		for i in INITIAL_NODE_LIST:
			self.TREE.append(HuffmanNode(i[0], i[1]))
		self.C = CODE_ALPHABET

	# rekursive Ausgabe
	def show(self, idx, level):
		A = "TREE[%s] - %s\n" % (idx, self.TREE[idx])
		for i in self.TREE[idx].sons:
			A = A + "   " * level + self.show(i, level + 1)
		return A

	# Ausgabefunktion der Klasse
	def __repr__(self):
		return self.show(len(self.TREE) - 1, 1)
		
	def joinable_probabilitys(self):
		PROB_LIST, IDX_LIST, idx = [], [], 0
		for i in self.TREE:
			if (not i.joined):
				PROB_LIST.append(i.probability)
				IDX_LIST.append(idx)
			idx = idx + 1
		return PROB_LIST, IDX_LIST
	
	# join(n): fuegt neuen Knoten (aus Vereinigung von n Knoten) in Baum ein
	def join(self, n):
		J, joined_prob  = [], 0.0
		for i in range(0, n):
			# minimale Wahrscheinlichkeiten suchen
			PL, IX = self.joinable_probabilitys()
			idx = IX[PL.index(min(PL))]
			joined_prob = joined_prob + self.TREE[idx].probability
			self.TREE[idx].joined = 1
			J.append(idx)
		# Knoten eintragen
		print "join TREE%s => TREE[%s]" % (J, len(self.TREE))
		self.TREE.append(HuffmanNode(joined_prob, '-'))
		for i in J:
			self.TREE[-1].sons.append(i)

	# join_all(): Fuehrt den ersten Teil des Algorithmus - das Zusammen-
	#             fassen von Knoten mit kleinster Wahrscheinlichkeit - aus.
	def join_all(self):
		# 0te Stufe: Nach Formel aus [1] berechnen, wieviele Knoten
		# im ersten Schritt vereinigt werden muessen (n0).
		D, N, n0 = len(self.C), len(self.TREE), 0
		for i in range(2, D + 1):
			if (((N - i) % (D - 1)) == 0):
				n0 = i
		
		# Wenn so eine Zahl gefunden wird, soviele Knoten zusammenfuehren.
		if (n0 != 0):
			self.join(n0)
				
		# 1te bis n-te Stufe: Solange noch Knoten zum vereinigen da sind,
		# werden |C| Knoten (Kardinalitaet Codealphabet) zusammengefuehrt.
		while (len(self.joinable_probabilitys()[0]) > 1):
			self.join(len(self.C))
	
	# code(idx): alle Knoten unterhalb von TREE[idx] kodieren
	#
	# Wenn wir diese Funktion mit dem Wurzelindex len(TREE) - 1 aufrufen,
	# wir der gesamte Huffmanbaum kodiert. (Rekursion !)
	def	code(self, idx):
		c = 0
		for i in self.TREE[idx].sons:
			# Codewort aus Vaterknoten und Codesymbol zusammensetzen
			self.TREE[i].codeword = self.TREE[idx].codeword + C[c]
			# rekursiv mit den Sohnknoten fortfahren
			self.code(i)
			c = c + 1

################################################################################
print "Beispiele (C = Codealphabet, P = Wahrscheinlichkeitsverteilung) "
print " TREE[i] ist der i-te Knoten, wobei i eindeutige Knotennummer ist."
print " Wenn Sons = [], dann handelt es sich um ein Blatt des Baumes, "
print " das dann letztendlich die Kodierung von Symbol->Codeword realisiert.\n"

P = [ (0.25, 'A'), (0.21, 'B'), (0.18, 'C'), (0.10, 'D'), (0.14, 'E'),\
(0.11, 'F'), (0.01, 'G') ]
C = [ '0', '1' ]

print "Huffman Kodierungsbaum fuer C = %s und\n P = %s:\n" % (C, P)
EXAMPLE1 = HuffmanTree(P, C)
EXAMPLE1.join_all()
EXAMPLE1.code(len(EXAMPLE1.TREE) - 1)
print EXAMPLE1
del EXAMPLE1

C = [ '0', '1', '2' ]

print "Huffman Kodierungsbaum fuer C = %s und\n P = %s:\n" % (C, P)
EXAMPLE2 = HuffmanTree(P, C)
EXAMPLE2.join_all()
EXAMPLE2.code(len(EXAMPLE2.TREE) - 1)
print EXAMPLE2
del EXAMPLE2

# Wahrscheinlichkeitsverteilung `deutsche Texte`
P = [ (0.0540, 'A'), (0.0189, 'B'), (0.0315, 'C'), (0.0517, 'D'), (0.1810, 'E'),\
(0.0160, 'F'), (0.0315, 'G'), (0.0514, 'H'), (0.0752, 'I'), (0.0019, 'J'),\

(0.0113, 'K'), (0.0345, 'L'), (0.0251, 'M'), (0.1042, 'N'), (0.0224, 'O'),\
(0.0059, 'P'), (0.0001, 'Q'), (0.0808, 'R'), (0.0635, 'S'), (0.0557, 'T'),\
(0.0410, 'U'), (0.0087, 'V'), (0.0167, 'W'), (0.0001, 'X'), (0.0002, 'Y'),\
(0.0167, 'Z') ]
C = [ '0', '1' ]

print "Huffman Kodierungsbaum fuer C = %s und\n P = %s:\n" % (C, P)
EXAMPLE3 = HuffmanTree(P, C)
EXAMPLE3.join_all()
EXAMPLE3.code(len(EXAMPLE3.TREE) - 1)
print EXAMPLE3
del EXAMPLE3

