基本术语:
路径和路径长度:
在一棵树中,从一个结点往下可以达到的孩子或孙子结点之间的通路,称为路径。通路中分支的数目称为路径长度。若规定根结点的层数为1,则从根结点 到第L层结点的路径长度为L-1。
结点的权及带权路径长度:
若将树中结点赋给一个有着某种含义的数值,则这个数值称为该结点的权。结点的带权路径长度为:从根结点到该结点之间的路径长度与该结点的权的乘 积。
树的带权路径长度:
树的带权路径长度规定为所有叶子结点的带权路径长度之和,记为WPL。
哈夫曼树:
给定N个权值作为N个叶子节点,构造一棵二叉树,若该树的带权路径长度达到最小,称这样的二叉树为最优二叉树,也称为哈夫曼树(Huffman Tree)。 哈夫曼树是带权路径长度最短的树,权值较大的结点离根较近。
package data_structure_and_algorithm.huffman; import lombok.Data; /** * 哈夫曼节点 */ @Data class HuffmanNode implements Comparable<HuffmanNode> { /** * 字符 */ private Byte character; /** * 权重 */ private int weight; /** * 左子节点 */ private HuffmanNode left; /** * 右子节点 */ private HuffmanNode right; public HuffmanNode(int weight, Byte character) { this.weight = weight; this.character = character; } @Override public String toString() { return "HuffmanNode{" + "weight=" + weight + "character=" + character + '}'; } @Override public int compareTo(HuffmanNode o) { return this.weight - o.weight; } }
package data_structure_and_algorithm.huffman; import cn.hutool.core.collection.CollUtil; import lombok.Data; import java.util.Collections; import java.util.List; /** * 哈夫曼树 */ @Data public class HuffmanTree { /** * 根节点 */ private HuffmanNode root; /** * 构建哈夫曼树 * @param huffmanNodes */ public void createHuffmanTree(List<HuffmanNode> huffmanNodes) { if (CollUtil.isEmpty(huffmanNodes)) { return; } //当集合中只剩下一个节点,则哈夫曼树构建完成,且剩下的那个节点就是哈夫曼树的根节点 while (huffmanNodes.size() > 1) { //将集合变得有序(从小到大) Collections.sort(huffmanNodes); //取其中最小的两个节点作为子节点,构成一棵二叉树,该二叉树根节点的权重为两个子节点权重之和 HuffmanNode right = huffmanNodes.get(0); HuffmanNode left = huffmanNodes.get(1); HuffmanNode fatherNode = new HuffmanNode(left.getWeight() + right.getWeight(), null); fatherNode.setLeft(left); fatherNode.setRight(right); //从集合中删除这两个节点并且添加fatherNode节点 huffmanNodes.remove(left); huffmanNodes.remove(right); huffmanNodes.add(fatherNode); } this.root = huffmanNodes.get(0); } /** * 前序遍历 */ public void preorderTraversal() { preorderTraversal(root); } public void preorderTraversal(HuffmanNode node) { System.out.println(node); if (node.getLeft() != null) { preorderTraversal(node.getLeft()); } if (node.getRight() != null) { preorderTraversal(node.getRight()); } } }
哈夫曼编码:
Huffman于1952年提出一种编码方法,是可变字长编码(VLC)的一种,该方法完全依据字符出现概率来构造异字头的平均长度最短的码字,有时称之为最佳编码,一般就叫做Huffman编码(有时也称为霍夫曼编码)。
package data_structure_and_algorithm.huffman; import cn.hutool.core.collection.CollUtil; import cn.hutool.core.util.StrUtil; import lombok.Data; import lombok.NoArgsConstructor; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; /** * 哈夫曼编码 */ @Data @NoArgsConstructor public class HuffmanCode { /** * 哈夫曼码是否被8整除 */ private boolean integralDivision; /** * 原字节和哈夫曼码的映射 */ private Map<Byte, String> huffmanCodeMap; //哈夫曼编码:设定左分支为0,右分支为1 private static final String LEFT_BRANCH = "0"; private static final String RIGHT_BRANCH = "1"; /** * 创建原字节和权重的映射 * @param srcBytes * @return */ private Map<Byte, Integer> createWeightMap(byte[] srcBytes) { Map<Byte, Integer> weightMap = new HashMap<>(); for (byte srcByte : srcBytes) { if (weightMap.get(srcByte) == null) { weightMap.put(srcByte, 1); } else { weightMap.put(srcByte, weightMap.get(srcByte) + 1); } } return weightMap; } /** * 构建哈夫曼树 * @param weightMap * @return */ private HuffmanTree createHuffmanTree(Map<Byte, Integer> weightMap) { HuffmanTree huffmanTree = new HuffmanTree(); List<HuffmanNode> huffmanNodes = new ArrayList<>(weightMap.size()); for (Map.Entry<Byte, Integer> entry : weightMap.entrySet()) { HuffmanNode huffmanNode = new HuffmanNode(entry.getValue(), entry.getKey()); huffmanNodes.add(huffmanNode); } huffmanTree.createHuffmanTree(huffmanNodes); return huffmanTree; } /** * 原字节和哈夫曼码的映射 * @param root * @param srcBytes */ private void createHuffmanCodeMap(HuffmanNode root, byte[] srcBytes) { huffmanCodeMap = new HashMap<>(srcBytes.length); //原文件只有一个字符 if (root.getLeft() == null && root.getRight() == null) { huffmanCodeMap.put(root.getCharacter(), "0"); return; } StringBuilder huffmanCode = new StringBuilder(); createHuffmanCodeMap(root, huffmanCode, ""); } private void createHuffmanCodeMap(HuffmanNode node, StringBuilder huffmanCode, String branch) { StringBuilder huffmanCodeMethod = new StringBuilder(huffmanCode); huffmanCodeMethod.append(branch); if (node.getLeft() != null) { createHuffmanCodeMap(node.getLeft(), huffmanCodeMethod, LEFT_BRANCH); } if (node.getRight() != null) { createHuffmanCodeMap(node.getRight(), huffmanCodeMethod, RIGHT_BRANCH); } //叶子节点与哈夫曼码的映射 if (node.getCharacter() == null) { return; } huffmanCodeMap.put(node.getCharacter(), huffmanCodeMethod.toString()); } /** * 生成哈夫曼码(补码) * @param srcBytes * @return * @throws Exception */ private String generateHuffmanCodes(byte[] srcBytes) throws Exception { StringBuilder huffmanCodes = new StringBuilder(); for (byte srcByte : srcBytes) { if (StrUtil.isBlank(huffmanCodeMap.get(srcByte))) { throw new Exception("压缩失败,请重试!"); } huffmanCodes.append(huffmanCodeMap.get(srcByte)); } return huffmanCodes.toString(); } /** * 压缩后的字节数组 * @param huffmanCodes * @return */ private byte[] zipBytes(String huffmanCodes) { if (StrUtil.isBlank(huffmanCodes)) { return new byte[0]; } if (huffmanCodes.length() % 8 == 0) { integralDivision = true; } else { integralDivision = false; } //将哈夫曼码转为字节数组之后的长度 int length = (huffmanCodes.length() + 7) / 8; byte[] zipBytes = new byte[length]; //每八个bit组成一个byte(1 byte = 8 bit) int j = 0; for (int i = 0; i < length; i++) { if (j + 8 > huffmanCodes.length()) { //余下的哈夫曼补码不足8位 zipBytes[i] = (byte)Integer.parseUnsignedInt(huffmanCodes.substring(j), 2); } else { zipBytes[i] = (byte)Integer.parseUnsignedInt(huffmanCodes.substring(j, j + 8), 2); j += 8; } } return zipBytes; } /** * 生成压缩后的字节数组 * @param srcBytes * @return * @throws Exception */ public byte[] generateZipBytes(byte[] srcBytes) throws Exception { if (srcBytes == null || srcBytes.length == 0) { throw new Exception("压缩失败,请检查原文件!"); } System.out.println("========原文件与频次的映射========="); Map<Byte, Integer> weightMap = createWeightMap(srcBytes); for (Map.Entry<Byte, Integer> entry : weightMap.entrySet()) { System.out.print(entry.getKey() + "->" + entry.getValue() + " "); } System.out.println(); //System.out.println("========根据原文件构建的哈夫曼树========="); HuffmanTree huffmanTree = createHuffmanTree(weightMap); //huffmanTree.preorderTraversal(); System.out.println("========原字节和哈夫曼码的映射========="); createHuffmanCodeMap(huffmanTree.getRoot(), srcBytes); for (Map.Entry<Byte, String> entry : huffmanCodeMap.entrySet()) { System.out.print(entry.getKey() + "->" + entry.getValue() + " "); } System.out.println(); System.out.println("========最终的哈夫曼码(补码)========="); String huffmanCodes = generateHuffmanCodes(srcBytes); System.out.println(huffmanCodes + "(长度:" + huffmanCodes.length() + ")"); System.out.println("========压缩后的字节数组========="); byte[] zipBytes = zipBytes(huffmanCodes); return zipBytes; } /** * 解码 * @param zipBytes * @return */ public byte[] decode(byte[] zipBytes) throws Exception { if (zipBytes == null || zipBytes.length == 0) { return new byte[0]; } String huffmanCodes = changeBytesToCodes(zipBytes); return changeCodesToBytes(huffmanCodes); } /** * 将压缩的字节数组转为哈夫曼码(补码) * @param zipBytes * @return */ private String changeBytesToCodes(byte[] zipBytes) { StringBuilder huffmanCodes = new StringBuilder(); for (int i = 0; i < zipBytes.length; i++) { int temp = zipBytes[i]; String binaryString = null; if (temp >= 0) { //如果是正数则需要根据情况补0 if (i == zipBytes.length - 1 && !integralDivision) { //不用补0 binaryString = Integer.toBinaryString(temp); } else { temp |= 256; binaryString = Integer.toBinaryString(temp); binaryString = binaryString.substring(binaryString.length() - 8); } } else { //如果是负数则需要截取后8位 binaryString = Integer.toBinaryString(temp); binaryString = binaryString.substring(binaryString.length() - 8); } huffmanCodes.append(binaryString); } return huffmanCodes.toString(); } /** * 将哈夫曼码转为原字节数组 * @param haffmanCodes * @return */ private byte[] changeCodesToBytes(String haffmanCodes) throws Exception { if (CollUtil.isEmpty(huffmanCodeMap)) { throw new Exception("未压缩,无法解压!"); } List<Byte> srcBytes = new ArrayList<>(); //哈夫曼码和原字节的映射 Map<String, Byte> srcBytesMap = new HashMap<>(huffmanCodeMap.size()); for (Map.Entry<Byte, String> entry : huffmanCodeMap.entrySet()) { srcBytesMap.put(entry.getValue(), entry.getKey()); } //待匹配的尾下标 int count = 1; for (int i = 0; i < haffmanCodes.length();) { Byte srcByte = srcBytesMap.get(haffmanCodes.substring(i, count)); if (srcByte != null) { srcBytes.add(srcByte); i = count; } count++; } byte[] bytes = new byte[srcBytes.size()]; for (int i = 0; i < srcBytes.size(); i++) { bytes[i] = srcBytes.get(i); } return bytes; } }
package data_structure_and_algorithm.huffman; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; import java.io.OutputStream; import java.util.Map; /** * 哈夫曼文件压缩和解压 */ public class HuffmanFile { /** * 压缩文件 * @param srcFile * @param dstFile */ public static void zipFile(String srcFile, String dstFile) { OutputStream os = null; ObjectOutputStream oos = null; FileInputStream is = null; try { is = new FileInputStream(srcFile); byte[] srcBytes = new byte[is.available()]; is.read(srcBytes); //压缩 HuffmanCode huffmanCode = new HuffmanCode(); byte[] zipBytes = huffmanCode.generateZipBytes(srcBytes); os = new FileOutputStream(dstFile); oos = new ObjectOutputStream(os); oos.writeObject(zipBytes); oos.writeObject(huffmanCode.getHuffmanCodeMap()); } catch (Exception e) { System.out.println(e.getMessage()); } finally { if (oos != null) { try { oos.close(); } catch (IOException e) { e.printStackTrace(); } } if (os != null) { try { os.close(); } catch (IOException e) { e.printStackTrace(); } } if (is != null) { try { is.close(); } catch (IOException e) { e.printStackTrace(); } } } } /** * 解压 * @param zipFile * @param dstFile */ public static void unZipFile(String zipFile, String dstFile) { InputStream is = null; ObjectInputStream ois = null; OutputStream os = null; try { is = new FileInputStream(zipFile); ois = new ObjectInputStream(is); byte[] zipBytes = (byte[]) ois.readObject(); Map<Byte, String> huffmanCodeMap = (Map<Byte, String>) ois.readObject(); //解压 HuffmanCode huffmanCode = new HuffmanCode(); huffmanCode.setHuffmanCodeMap(huffmanCodeMap); byte[] decodes = huffmanCode.decode(zipBytes); os = new FileOutputStream(dstFile); os.write(decodes); } catch (Exception e) { System.out.println(e.getMessage()); } finally { if (os != null) { try { os.close(); } catch (IOException e) { e.printStackTrace(); } } if (ois != null) { try { ois.close(); } catch (IOException e) { e.printStackTrace(); } } if (is != null) { try { is.close(); } catch (IOException e) { e.printStackTrace(); } } } } }
标签:编码,return,哈夫曼,srcBytes,new,byte,null From: https://www.cnblogs.com/xueseng/p/17306344.html