压缩算法(Compression)
行程长度压缩算法(Run-Length Encoding (RLE))
RLE可能是进行压缩的最简单方法。假设数据如下:
aaaaabbbcdeeeeeeef...
复制代码
然后RLE将其编码为:
5a3b1c1d7e1f...
复制代码
数据有很多重复字节时,RLE可以节省相当多的空间。它在图像上运行良好。
压缩代码
extension Data {
public func compressRLE() -> Data {
var data = Data()
self.withUnsafeBytes { (uPtr: UnsafePointer<UInt8>) in
var ptr = uPtr
let end = ptr + count
while ptr < end {
var count = 0
var byte = ptr.pintee
var next = byte
while next == byte && ptr < end && count < 64 {
ptr = ptr.advanced(by: 1)
next = ptr.pointee
count += 1
}
if count > 1 || byte >= 192 {
var size = 191 + UInt8(count)
data.append(&size, count: 1)
data.append(&size, count: 1)
} else {
data.append(&byte, count: 1)
}
}
}
return data
}
}
复制代码
解压缩代码
public func decompressRLE() -> Data {
var data = Data()
self.withUnsafeBytes { (uPtr: UnsafePointer<UInt8>) in
var ptr = uPtr
let end = ptr + count
while ptr < end {
// Read the next byte. This is either a single value less than 192,
// or the start of a byte run.
var byte = ptr.pointee
ptr = ptr.advanced(by: 1)
if byte < 192 {
data.append(&byte, count: 1)
} else if ptr < end {
// Read the actual data value.
var value = ptr.pointee
ptr = ptr.advanced(by: 1)
// And write it out repeatedly.
for _ in 0 ..< byte - 191 {
data.append(&value, count: 1)
}
}
}
}
return data
}
复制代码
霍夫曼编码(Huffman Coding)
举例子是最直观简单的说明
假设有以下文本
so much words wow many compression
复制代码
计算每个字节出现的频率
space: 5 u: 1
0: 5 h: 1
s: 4 d: 1
m: 3 a: 1
w: 3 y: 1
c: 2 p: 1
r: 2 e: 1
n: 2 i: 1
复制代码
可以这样来编码
sapce: 5 010 u: 1 11001
0: 5 000 h: 1 10001
s: 4 101 d: 1 11010
m: 3 111 a: 1 11011
w: 3 0010 y: 1 01111
c: 2 0011 p: 1 11000
r: 2 1001 e: 1 01110
n: 2 0110 i: 1 10000
复制代码
101 000 010 111 11001 0011 10001 010 0010 000 1001 11010 101
s o _ m u c h _ w o r d s
010 0010 000 0010 010 111 11011 0110 01111 010 0011 000 111
_ w o w _ m a n y _ c o m
11000 1001 01110 101 101 10000 000 0110 0
p r e s s i o n
复制代码
在小输入上使用霍夫曼编码是不利的。
辅助代码
NSData
理解的最小数据是字节,但我们处理的是比特,所以我们需要在两者之间进行转换
Npublic class BitWriter {
public var data = NSMutableData()
var outByte: UInt8 = 0
var outCount = 0
public func writeBit(bit: Bool) {
if outCount == 8 {
data.append(&outByte, length: 1)
outCount = 0
}
outByte = (outByte << 1) | (bit ? 1 : 0)
outCount += 1
}
public func flush() {
if outCount > 0 {
if outCount < 8 {
let diff = UInt8(8 - outCount)
outByte <<= diff
}
data.append(&outByte, length: 1)
}
}
}
复制代码
用于从NSData
读取各个位:
public class BitReader {
var ptr: UnsafePointer<UInt8>
var inByte: UInt8 = 0
var inCount = 8
public init(data: NSData) {
ptr = data.bytes.assumingMemoryBound(to: UInt8.self)
}
public func readBit() -> Bool {
if inCount == 8 {
inByte = ptr.pointee // load the next byte
inCount = 0
ptr = ptr.successor()
}
let bit = inByte & 0x80 // read the next bit
inByte <<= 1
inCount += 1
return bit == 0 ? false : true
}
}
复制代码
class Huffman {
typealias NodeIndex = Int
struct Node {
var count = 0
var index: NodeIndex = -1
var parent: NodeIndex = -1
var left: NodeIndex = -1
var right: NodeIndex = -1
}
var tree = [Node](repeating: Node(), count: 256)
var root: NodeIndex = -1
}
复制代码
计算输入数据中每个字节出现的频率
fileprivate func countByteFrequency(inData data: NSData) {
var ptr = data.bytes.assumingMemoryBound(to: UInt8.self)
for _ in 0..<data.length {
let i = Int(ptr.pointee)
tree[i].count += 1
tree[i].index = i
ptr = ptr.successor()
}
}
复制代码
导出频率表
struct Freq {
var byte: UInt8 = 0
var count = 0
}
func frequencyTable() -> [Freq] {
var a = [Freq]()
for i in 0..<256 where tree[i].count > 0 {
a.append(Freq(byte: UInt8(i), count: tree[i].count))
}
return a
}
复制代码
使用优先级队列
fileprivate func buildTree() {
var queue = PriortyQueue<Node>(sort: { $0.count < $1.count })
for node in tree where node.count > 0 {
queue.enqueue(node)
}
while queue.count > 1 {
let node1 = queue.dequeue()!
let node2 = queue.dequeue()!
var parentNode = Node()
parentNode.count = node1.count + node2.count
parentNode.left = node1.index
parentNode.right = node2.index
parentNode.index = tree.count
tree.append(parentNode)
tree[node1.index].parent = parentNode.index
tree[node2.index].parent = parentNode.index
queue.enqueue(parentNode)
}
let rootNode = queue.dequeue()!
root = rootNode.index
}
复制代码
现在我们知道如何从频率表构建压缩树,我们可以用它来压缩NSData
对象的内容。
public func compressData(data: NSData) -> NSData {
countByteFrequency(inData: data)
buildTree()
let writer = BitWriter()
var ptr = data.bytes.assumingMemoryBound(to: UInt8.self)
for _ in 0..<data.length {
let c = ptr.pointee
let i = Int(c)
traverseTree(writer: writer, nodeIndex: i, childIndex: -1)
ptr = ptr.successor()
}
writer.flush()
return writer.data
}
复制代码
注意:压缩总是需要两次遍历整个输入数据: 首先构建频率表,然后将字节转换为压缩的位序列。
有趣的东西发生在traverseTree()
中。这是一种递归方法:
private func traverseTree(writer: BitWriter, nodeIndex h: Int, childIndex child: Int) {
if tree[h].parent != -1 {
traverseTree(writer: writer, nodeIndex: tree[h].parent, childIndex: h)
}
if child != -1 {
if child == tree[h].left {
writer.writeBit(bit: true)
} else if child == tree[h].right {
writer.writeBit(bit: false)
}
}
}
复制代码
像这样使用compressData()
方法:
let s1 = "so much words wow many compression"
if let originalData = s1.dataUsingEncoding(NSUTF8StringEncoding) {
let huffman1 = Huffman()
let compressedData = huffman1.compressData(originalData)
print(compressedData.length)
}
复制代码
解压缩
首先需要一些方法将[Freq]
数组转换回压缩树:
fileprivate func restoreTree(fromTable frequencyTable: [Freq]) {
for freq in frequencyTable {
let i = Int(freq.byte)
tree[i].count = freq.count
tree[i].index = i
}
buildTree()
}
复制代码
func decompressData(data: NSData, frequencyTable: [Freq]) -> NSData {
restoreTree(fromTable: frequencyTable)
let reader = BitReader(data: data)
let outData = NSMutableData()
let byteCount = tree[root].count
var i = 0
while i < byteCount {
var b = findLeafNode(reader: reader, nodeIndex: root)
outData.append(&b, length: 1)
i += 1
}
return outData
}
复制代码
也是使用辅助方法遍历树
private func findLeafNode(reader reader: BitReader, nodeIndex: Int) -> UInt8 {
var h = nodeIndex
while tree[h].right != -1 {
if reader.readBit() {
h = tree[h].left
} else {
h = tree[h].right
}
}
return UInt8(h)
}
复制代码
如何使用解压缩的方法:
let frequencyTable = huffman1.frequencyTable()
let huffman2 = Huffman()
let decompressedData = huffman2.decompressData(compressedData, frequencyTable: frequencyTable: frequencyTable)
let s2 = String(data: decompressedData, encoding: NSUTF8StringEncoding)!
复制代码
© 版权声明
文章版权归作者所有,未经允许请勿转载。
THE END