diff --git a/README.md b/README.md index 99ef394..4628134 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,10 @@ A Java text editor program that will become a coding IDE among others. It can be ## Compression +### Huffman Coding (1951) + +Huffman coding is a lossless data compression algorithm. The idea is to assign variable-length codes to input characters, lengths of the assigned codes are based on the frequencies of corresponding characters. The most frequent character gets the smallest code and the least frequent character gets the largest code. + ### RLE (1967) Run-length encoding (RLE) is a form of lossless data compression in which runs of data (consecutive occurrences of the same data value) are stored as a single occurrence of that data value and a count of its consecutive occurrences, rather than as the original run. As an imaginary example of the concept, when encoding an image built up from colored dots, the sequence "green green green green green green green green green" is shortened to "green x 9". This is most efficient on data that contains many such runs, for example, simple graphic images such as icons, line drawings, games, and animations. For files that do not have many runs, encoding them with RLE could increase the file size. diff --git a/src/main/java/com/ankhorage/eddy/TextEditor.java b/src/main/java/com/ankhorage/eddy/TextEditor.java index c4a3784..36a62c7 100644 --- a/src/main/java/com/ankhorage/eddy/TextEditor.java +++ b/src/main/java/com/ankhorage/eddy/TextEditor.java @@ -7,6 +7,7 @@ import com.ankhorage.eddy.encryption.CaesarCipher; import com.ankhorage.eddy.encryption.EncryptionException; import com.ankhorage.eddy.compression.RLECompression; +import com.ankhorage.eddy.compression.HuffmanCompression; import com.ankhorage.eddy.compression.CompressionException; public class TextEditor extends JFrame { @@ -14,11 +15,13 @@ public class TextEditor extends JFrame { private JTextArea textArea; private CaesarCipher caesarCipher; private RLECompression rleCompression; + private HuffmanCompression huffmanCompression; public TextEditor() { // Initialize our algorithms caesarCipher = new CaesarCipher(); rleCompression = new RLECompression(); + huffmanCompression = new HuffmanCompression(); // Setup window setTitle("Java Text Editor"); @@ -95,14 +98,21 @@ private JMenu createSecurityMenu() { private JMenu createCompressionMenu() { JMenu compressionMenu = new JMenu("Compression"); - JMenuItem compressItem = new JMenuItem("Compress (RLE)"); - JMenuItem decompressItem = new JMenuItem("Decompress (RLE)"); - - compressItem.addActionListener(e -> handleCompression(true)); - decompressItem.addActionListener(e -> handleCompression(false)); - - compressionMenu.add(compressItem); - compressionMenu.add(decompressItem); + JMenuItem rleCompressItem = new JMenuItem("Compress (RLE)"); + JMenuItem rleDecompressItem = new JMenuItem("Decompress (RLE)"); + rleCompressItem.addActionListener(e -> handleRLECompression(true)); + rleDecompressItem.addActionListener(e -> handleRLECompression(false)); + + JMenuItem huffmanCompressItem = new JMenuItem("Compress (Huffman)"); + JMenuItem huffmanDecompressItem = new JMenuItem("Decompress (Huffman)"); + huffmanCompressItem.addActionListener(e -> handleHuffmanCompression(true)); + huffmanDecompressItem.addActionListener(e -> handleHuffmanCompression(false)); + + compressionMenu.add(rleCompressItem); + compressionMenu.add(rleDecompressItem); + compressionMenu.addSeparator(); + compressionMenu.add(huffmanCompressItem); + compressionMenu.add(huffmanDecompressItem); return compressionMenu; } @@ -127,7 +137,7 @@ private void handleCaesarOperation(boolean isEncrypt) { } } - private void handleCompression(boolean isCompress) { + private void handleRLECompression(boolean isCompress) { try { String text = getSelectedOrAllText(); if (isCompress) { @@ -142,8 +152,26 @@ private void handleCompression(boolean isCompress) { String operation = isCompress ? "Compression" : "Decompression"; showError(operation + " error: " + ex.getMessage()); } catch (IllegalArgumentException ex) { - // This catches Base64 decoding errors - showError("Invalid compressed data format"); + showError("Invalid RLE compressed data format"); + } + } + + private void handleHuffmanCompression(boolean isCompress) { + try { + String text = getSelectedOrAllText(); + if (isCompress) { + byte[] compressed = huffmanCompression.compress(text.getBytes()); + updateText(Base64.getEncoder().encodeToString(compressed)); + } else { + byte[] decompressed = huffmanCompression.decompress( + Base64.getDecoder().decode(text)); + updateText(new String(decompressed)); + } + } catch (CompressionException ex) { + String operation = isCompress ? "Compression" : "Decompression"; + showError(operation + " error: " + ex.getMessage()); + } catch (IllegalArgumentException ex) { + showError("Invalid Huffman compressed data format"); } } diff --git a/src/main/java/com/ankhorage/eddy/compression/HuffmanCompression.java b/src/main/java/com/ankhorage/eddy/compression/HuffmanCompression.java new file mode 100644 index 0000000..cf7d118 --- /dev/null +++ b/src/main/java/com/ankhorage/eddy/compression/HuffmanCompression.java @@ -0,0 +1,187 @@ +package com.ankhorage.eddy.compression; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.util.Comparator; +import java.util.HashMap; +import java.util.Map; +import java.util.PriorityQueue; + +public class HuffmanCompression implements CompressionAlgorithm { + + private static final int ALGORITHM_YEAR = 1951; + + @Override + public byte[] compress(byte[] data) throws CompressionException { + if (data == null || data.length == 0) { + return new byte[0]; + } + + Map frequencyTable = buildFrequencyTable(data); + Node root = buildHuffmanTree(frequencyTable); + Map huffmanCodes = generateHuffmanCodes(root); + + BitStream bitStream = new BitStream(); + for (byte b : data) { + bitStream.write(huffmanCodes.get(b)); + } + + try (ByteArrayOutputStream byteOut = new ByteArrayOutputStream(); + ObjectOutputStream objOut = new ObjectOutputStream(byteOut)) { + objOut.writeObject(frequencyTable); + objOut.writeInt(bitStream.getBitCount()); + objOut.write(bitStream.toByteArray()); + return byteOut.toByteArray(); + } catch (IOException e) { + throw new CompressionException("Error during Huffman compression", e); + } + } + + @Override + public byte[] decompress(byte[] data) throws CompressionException { + if (data == null || data.length == 0) { + return new byte[0]; + } + + try (ByteArrayInputStream byteIn = new ByteArrayInputStream(data); + ObjectInputStream objIn = new ObjectInputStream(byteIn)) { + + @SuppressWarnings("unchecked") + Map frequencyTable = (Map) objIn.readObject(); + int bitCount = objIn.readInt(); + + byte[] compressedData = new byte[byteIn.available()]; + objIn.readFully(compressedData); + + Node root = buildHuffmanTree(frequencyTable); + return decode(root, bitCount, compressedData); + } catch (IOException | ClassNotFoundException e) { + throw new CompressionException("Error during Huffman decompression", e); + } + } + + @Override + public String getAlgorithmName() { + return "Huffman Coding"; + } + + public int getAlgorithmYear() { + return ALGORITHM_YEAR; + } + + private Map buildFrequencyTable(byte[] data) { + Map frequencyTable = new HashMap<>(); + for (byte b : data) { + frequencyTable.put(b, frequencyTable.getOrDefault(b, 0) + 1); + } + return frequencyTable; + } + + private Node buildHuffmanTree(Map frequencyTable) { + PriorityQueue priorityQueue = new PriorityQueue<>( + Comparator.comparingInt((Node n) -> n.frequency) + .thenComparing(n -> n.data) + ); + for (Map.Entry entry : frequencyTable.entrySet()) { + priorityQueue.add(new Node(entry.getKey(), entry.getValue())); + } + + while (priorityQueue.size() > 1) { + Node left = priorityQueue.poll(); + Node right = priorityQueue.poll(); + Node parent = new Node(null, left.frequency + right.frequency); + parent.left = left; + parent.right = right; + priorityQueue.add(parent); + } + return priorityQueue.poll(); + } + + private Map generateHuffmanCodes(Node root) { + Map huffmanCodes = new HashMap<>(); + generateCodesRecursive(root, "", huffmanCodes); + return huffmanCodes; + } + + private void generateCodesRecursive(Node node, String code, Map huffmanCodes) { + if (node == null) { + return; + } + if (node.isLeaf()) { + huffmanCodes.put(node.data, code); + } + generateCodesRecursive(node.left, code + "0", huffmanCodes); + generateCodesRecursive(node.right, code + "1", huffmanCodes); + } + + private byte[] decode(Node root, int bitCount, byte[] compressedData) { + ByteArrayOutputStream decodedBytes = new ByteArrayOutputStream(); + Node current = root; + int bitsProcessed = 0; + + for (byte b : compressedData) { + for (int i = 7; i >= 0 && bitsProcessed < bitCount; i--) { + int bit = (b >> i) & 1; + current = (bit == 0) ? current.left : current.right; + if (current.isLeaf()) { + decodedBytes.write(current.data); + current = root; + } + bitsProcessed++; + } + } + return decodedBytes.toByteArray(); + } + + private static class Node { + Byte data; + int frequency; + Node left; + Node right; + + Node(Byte data, int frequency) { + this.data = data; + this.frequency = frequency; + } + + boolean isLeaf() { + return left == null && right == null; + } + } + + private static class BitStream { + private final ByteArrayOutputStream byteStream = new ByteArrayOutputStream(); + private int bitCount = 0; + private int currentByte = 0; + + void write(String bits) { + for (char bit : bits.toCharArray()) { + write(bit == '1' ? 1 : 0); + } + } + + void write(int bit) { + currentByte = (currentByte << 1) | bit; + bitCount++; + if (bitCount % 8 == 0) { + byteStream.write(currentByte); + currentByte = 0; + } + } + + byte[] toByteArray() { + if (bitCount % 8 != 0) { + currentByte <<= (8 - (bitCount % 8)); + byteStream.write(currentByte); + } + return byteStream.toByteArray(); + } + + int getBitCount() { + return bitCount; + } + } +} \ No newline at end of file