You are on page 1of 15

public class runle { FileOutputStream out = null; FileInputStream in = null; byte[] array = new byte[1]; public void compress(String

input_file, String output_file) { printf("Original file: " + input_file); printf("Compressed file: " + output_file);

try { in = new FileInputStream(input_file); out = new FileOutputStream(output_file); byte[] _array = new byte[1]; int last = 1; int lastbo = 2; int c; while ((c = getc()) != -1) { /* Read the next character. */ System.out.println(" Value of c : "+c); putc(c); // output it...

if (lastbo == last) { if (c == last) { int count = 0; while (( c = getc()) >= 0 ) { if (c == last) { count++; } else { break; }

} if ( c < 0 ) { /* eof handling */ if ((count >= 256) && (count % 256 == 0)) { count -=256; } else { if ( count == 0 ) count = -1; } } if (count >=0) { putc(count); } while ((count = count - 256) >= 0) { putc(last); } if (c >= 0) { putc(c); } } }

lastbo = last; last = c; } in.close(); out.close(); } catch(Exception e) {

printf("Error while processing file:"); } if (true) { File f1 = new File(input_file); File f2 = new File(output_file); long f1l = f1.length(); long f2l = f2.length(); printf("Original file size: " + f1l + " bytes."); printf("Compressed file size: " + f2l + " bytes."); if (f1l > f2l) { printf("Compressed by " + (f1l - f2l) + " bytes."); } else { printf("*** File gained in size by " + (f2l - f1l) + " bytes. ***"); }

} } int getc() throws IOException { int rb; if ((rb = in.read(array, 0, 1)) == -1) {/* Read the next character. */ return -1; // EOF flag... } System.out.println(" ARRAY[0] : "+array[0]); return array[0]; } void putc(int i) throws IOException { out.write((byte)i); }

static void printf(String s) { System.out.println(s); } public static void main(String[] args) throws IOException { runle ca = new runle(); String input_file = null; String output_file = null; printf(" **** RUN LENGTH ENCODING ****");

if (args.length > 0) { input_file = args[0]; } else { printf("Error: No input file specified"); System.exit(0); } if (args.length > 1) { output_file = args[1]; } else { printf("Error: No output file specified"); System.exit(0); } ca.compress(input_file, output_file); } }

Plain Text Attachment [ Scan and Save to Computer ]


The Laws of Cryptography: Huffman Coding Algorithm Copyright 2002 by Neal R. Wagner. All rights reserved.

NOTE: This site is obsolete. See book draft (in PDF):

The Laws of Cryptography with Java Code. Here is a Huffman code program in 6 files, coded in Java. The program either reads a file directly from standard input, or if the file name is on the command line, it uses that as the input. The program analyzes input file to get the symbol frequencies, and then calculates the code words for each symbol. It also actually creates the output coded file. However this file is a string of 0 and 1 ascii characters, not binary numbers. To actually produce a binary file would require more code. The code here does not create and transmit the decoding tree or information needed to create the decoding tree such as the frequency table. The encode algorithm (function encode inside Huffman.java) just uses sequential search, although the corresponding decode algorithm makes efficient use of the Huffman tree. The priority queue (implemented in the file PQueue.java) just uses a simple list and sequential search, whereas a good priority queue should be implemented with a heap. With the listing below I am experimenting with a program to color code the Java listing: Java source. ------------------------------------------------------------------------------// Entry.java: entry in the code frequency table class Entry { public char symb; // character to be encoded public double weight; // probability of occurrence of the character public String rep; // string giving 0-1 Huffman codeword for the char } ------------------------------------------------------------------------------// Table.java: Huffman code frequency table import java.io.*; class Table { public final int MAXT = 100; // maximum number of different symbols public int currTableSize; // current size as table is constructed public Entry[] tab; // the table array, not allocated private Reader in; // internal file name for input stream String file = ""; // the whole input file as a String private boolean fileOpen = false; // is the file open yet?

private String fileName; // name of input file, if present private int totalChars = 0; // total number of chars read char markerChar = '@'; // sentinal at end of file // Table: constructor, input parameter: input file name or null public Table(String f) { fileName = f; currTableSize = 0; tab = new Entry[MAXT]; } // dumpTable: debug dump of contents of Table public void dumpTable() { int i; System.out.println("\nDump of Table ----->"); System.out.println(" Size: " + currTableSize); for (i = 0; i < currTableSize; i++) { System.out.println("Entry " + i + ". Symbol: " + tab[i].symb + ", Weight: " + tab[i].weight + ", Representation: " + tab[i].rep); } System.out.println("----> End Dump of Table\n"); } // getNextChar: fetches next char. Also opens input file private char getNextChar() { char ch = ' '; // = ' ' to keep compiler happy if (!fileOpen) { fileOpen = true; if (fileName == null) in = new InputStreamReader(System.in); else { try { in = new FileReader(fileName); } catch (IOException e) { System.out.println("Exception opening " + fileName); } } } try { ch = (char)in.read(); } catch (IOException e) { System.out.println("Exception reading character"); } return ch;

} // buildTable: fetch each character and build the Table public void buildTable() { char ch = getNextChar(); while (ch != 65535 && ch != markerChar) { // EOF or special sentinal # totalChars++; file += ch; int i = lookUp(ch); if (i == -1) { // new entry tab[currTableSize] = new Entry(); tab[currTableSize].symb = ch; tab[currTableSize].weight = 1.0; tab[currTableSize].rep = ""; currTableSize++; } else { // existing entry tab[i].weight += 1.0; } // System.out.print(ch); // for debug ch = getNextChar(); } // while // finish calculating the weights for (int j = 0; j < currTableSize; j++) tab[j].weight /= (double)totalChars; // System.out.println(); // for debug } // lookUp: loop up the next char in the Table tab public int lookUp(char ch) { for (int j = 0; j < currTableSize; j++) if (tab[j].symb == ch) return j; return -1; } // log2: Logarithm base 2 public double log2(double d) { return Math.log(d) / Math.log(2.0); } // entropy: calculate entropy of the Table public double entropy() { double res = 0.0; for (int i = 0; i < currTableSize; i++)

res += tab[i].weight * log2(1.0/tab[i].weight); return res; } // aveCodeLen: calculate average code length public double aveCodeLen() { double res = 0.0; for (int i = 0; i < currTableSize; i++) res += tab[i].weight * tab[i].rep.length(); return res; } } ------------------------------------------------------------------------------// TreeNode.java: node in the Huffman tree, used for encode/decode class TreeNode { public double weight; // probability of symb occurring public char symb; // the symbol to be encoded public String rep; // string of 0's and 1's, the huffman code word public TreeNode left, right; // tree pointeres public int step; // step number in construction (just for displaying tree) } ------------------------------------------------------------------------------// ListNode.java: node in the linked list of trees, initially root node trees class ListNode { public TreeNode hufftree; public ListNode next; } ------------------------------------------------------------------------------// PQueue.java: implement a priority queue as a linked list of trees // Initialize it as a linked list of singleton trees class PQueue { ListNode list = null; // this points to the main list // insert: insert new entry into the list public void insert(TreeNode t) { ListNode l = new ListNode(); l.hufftree = t; l.next = list; list = l; }

// buildList: create the initial list with singleton trees public void buildList(Entry[] tab, int n) { int i; TreeNode tNode; for (i = 0; i < n; i++) { tNode = new TreeNode(); tNode.weight = tab[i].weight; tNode.left = tNode.right = null; tNode.symb = tab[i].symb; tNode.rep = ""; insert(tNode); } } // dumpList: debug dump of the list public void dumpList() { System.out.println("\nDump of List ----->"); ListNode l = list; while (l != null) { System.out.print("Symb: " + l.hufftree.symb); System.out.println(", Weight: " + l.hufftree.weight); l = l.next; } System.out.println("----> End Dump of List\n"); } // least: Remove and return from the list tree with greatest root weight // sort of a pain in the ass to write public TreeNode least() { ListNode l, oldl, minl = null, oldminl = null; // = null: for compiler double minw = 1000000; oldl = list; l = list; while (l != null) { if (l.hufftree.weight < minw) { minw = l.hufftree.weight; oldminl = oldl; minl = l; } oldl = l; l = l.next; }

if (minl == oldminl) { list = list.next; return minl.hufftree; } oldminl.next = minl.next; return minl.hufftree; } } ------------------------------------------------------------------------------// Huffman.java: the Huffman tree algorithm import java.text.DecimalFormat; class Huffman { public TreeNode tree; // the decoding tree public Table t; // the frequency and encoding table public PQueue p; // priority queue for building the Huffman tree private int depth; // depth variable for debug printing of tree String encodedFile, decodedFile; // files as Strings char markerChar = '@'; // sentinal at end of file public DecimalFormat fourDigits = new DecimalFormat("0.0000"); // Huffman: constructor, does all the work public Huffman(String fileName) { t = new Table(fileName); t.buildTable(); // t.dumpTable(); p = new PQueue(); p.buildList(t.tab, t.currTableSize); // p.dumpList(); tree = huffman(t.currTableSize); insertRep(tree, t.tab, t.currTableSize, ""); displayTree(tree); t.dumpTable(); // System.out.println("\nInput file (as a String):"); // System.out.println(t.file); encodedFile = encode(t.file); // System.out.println("\nEncoded file (as a String):"); // System.out.println(encodedFile); // decodedFile = decode(encodedFile); // System.out.println("\nDecoded file (as a String):"); // System.out.println(decodedFile); System.out.println("Entropy: " + t.entropy() + ", Ave. Code Length: " + t.aveCodeLen()); }

// encode: translate the input file to binary Huffman file public String encode(String file) { String returnFile = ""; // encoded file to return (as a String) for (int i = 0; i < file.length(); i++) { int loc = t.lookUp(file.charAt(i)); if (loc == -1) { System.out.println("Error in encode: can't find: " + file.charAt(i)); System.exit(0); } returnFile += t.tab[loc].rep; } return returnFile; } // decode: translate the binary file (as a string) back to chars public String decode(String file) { String returnFile = ""; // decoded file to return (as a String) TreeNode treeRef; // local tree variable to keep chasing into tree int i = 0; // index in the Huffman String while (i < file.length()) { // keep going to end of String treeRef = tree; // start at root of tree while (true) { if (treeRef.symb != markerChar) { // at a leaf node returnFile += treeRef.symb; break; } else if (file.charAt(i) == '0') { // go left with '0' treeRef = treeRef.left; i++; } else { // go right with '1' treeRef = treeRef.right; i++; } } // while (true) } // while return returnFile; } // huffman: construct the Huffman tree, for decoding public TreeNode huffman(int n) { int i; TreeNode tree = null; // = null for compiler

for (i = 0; i < n-1; i++) { tree = new TreeNode(); tree.left = p.least(); tree.left.step = i + 1; // just for displaying tree tree.right = p.least(); tree.right.step = i + 1; // just for displaying tree tree.weight = tree.left.weight + tree.right.weight; tree.symb = markerChar; // must not use '@' in input file tree.rep = ""; p.insert(tree); } return tree; } // displayTree: print out the tree, with initial and final comments public void displayTree(TreeNode tree) { System.out.println("\nDisplay of Huffman coding tree\n"); depth = 0; displayTreeRecurs(tree); } // displayTreeRecurs: need recursive function for inorder traveral public void displayTreeRecurs(TreeNode tree) { depth++; // depth of recursion String s = ""; if (tree != null) { s = display(tree.rep + "0"); System.out.println(s); displayTreeRecurs(tree.left); s = display(tree.rep); System.out.print(s + "+---"); if (depth != 1) { if (tree.symb == markerChar) System.out.print("+---"); } System.out.print(tree.symb + ": " + fourDigits.format(tree.weight) + ", " + tree.rep); if (depth != 1) System.out.println(" (step " + tree.step + ")"); else System.out.println(); displayTreeRecurs(tree.right); s = display(tree.rep + "1"); System.out.println(s); } depth--;

} // display: output blanks and verical lines to display tree private String display(String rep) { // tricky use of rep string to display correctly String s = " "; for (int i = 0; i < rep.length() - 1; i++) { // initial chars if (rep.charAt(i) != rep.charAt(i+1) ) s += "|"; else s += " "; s += " "; } return s; } // insertRep: tricky function to use Huffman tree to create representation public void insertRep(TreeNode tree, Entry tab[], int n, String repr) { //recursive function to insert Huffman codewords at each node. // this could just insert at the leaves. String s1, s2; tree.rep = repr; if ((tree.left) == null && (tree.right) == null) { for (int i = 0; i < n; i++) if (tree.symb == tab[i].symb) tab[i].rep = tree.rep; return; } s1 = repr; s1 += "0"; insertRep(tree.left, tab, n, s1); // recursive call to the left s2 = repr; s2 += "1"; insertRep(tree.right, tab, n, s2); // recursive call to the right } // main: doesn't do much; just feeds in input file name public static void main(String[] args) { Huffman huff; // pass an input file name if present on command line if (args.length > 0) huff = new Huffman(args[0]); else huff = new Huffman(null); }

} ------------------------------------------------------------------------------Here is an input file: ------------------------------------------------------------------------------[localhost:~/Documents/laws/huffman] neal% cat Testit.txt aaaaabbbbbcccccccccccccccdddddddddddddddddeeeeeeeeeeeeeeeeeefffffffffff fffffffffffffffffffffffffffff# ------------------------------------------------------------------------------Here is the output with various debug dumps: ------------------------------------------------------------------------------[localhost:~/Documents/laws/huffman] neal% java Huffman Testit.txt Display of Huffman coding tree +---f: 0.4000, 0 (step 5) | +---@: 1.0000, | | +---b: 0.0500, 1000 (step 1) | | | +---+---@: 0.1000, 100 (step 2) | | | | | +---a: 0.0500, 1001 (step 1) | | | +---+---@: 0.2500, 10 (step 4) | | | | | +---c: 0.1500, 101 (step 2) | | +---+---@: 0.6000, 1 (step 5) | | +---d: 0.1700, 110 (step 3) | | +---+---@: 0.3500, 11 (step 4) | +---e: 0.1800, 111 (step 3) Dump of Table -----> Size: 6 Entry 0. Symbol: a, Weight: Entry 1. Symbol: b, Weight: Entry 2. Symbol: c, Weight: Entry 3. Symbol: d, Weight: Entry 4. Symbol: e, Weight:

0.05, 0.05, 0.15, 0.17, 0.18,

Representation: Representation: Representation: Representation: Representation:

1001 1000 101 110 111

Entry 5. Symbol: f, Weight: 0.4, Representation: 0 ----> End Dump of Table Entropy: 2.251403369717592, Ave. Code Length: 2.3 Input file (as a String): aaaaabbbbbcccccccccccccccdddddddddddddddddeeeeeeeeeeeeeeeeeefffffffffff fffffffffffffffffffffffffffff Encoded file (as a String): 10011001100110011001100010001000100010001011011011011011011011011011011 01101101101101110110110110110110110110110110110110110110110110110111111 11111111111111111111111111111111111111111111111100000000000000000000000 00000000000000000 Decoded file (as a String): aaaaabbbbbcccccccccccccccdddddddddddddddddeeeeeeeeeeeeeeeeeefffffffffff fffffffffffffffffffffffffffff ten42%

You might also like