VBA解析VBAProject 04——run length encoding

时间:2022-07-25
本文章向大家介绍VBA解析VBAProject 04——run length encoding,主要内容包括其使用实例、应用技巧、基本知识点总结和需要注意事项,具有一定的参考价值,需要的朋友可以参考一下。

在VBAProject中,dir流以及VBA模块代码流都使用了run length encoding的算法进行压缩。

run length encoding请参考官方文档的2.4.1 Compression and Decompression。

代码实现:

'run length encoding
Private Type RLE
    cpBytes() As Byte
    cpBytesLen As Long
    pcp As Long
    
    uncpBytes() As Byte
    puncp As Long
    uncpBytesLen As Long
    
    cpChunkStart As Long
    cpChunkEnd As Long
    uncpChunkStart As Long
End Type

Private r As RLE

Function UnCompress(b() As Byte, ret() As Byte) As String
    r.cpBytes = b
'    // SignatureByte 压缩标识为0x1才是压缩过的
    If r.cpBytes(0) <> 1 Then
        r.uncpBytes = r.cpBytes
        Exit Function
    End If

    r.cpBytesLen = UBound(r.cpBytes) + 1
    
    r.uncpBytesLen = 2 * r.cpBytesLen
    ReDim r.uncpBytes(r.uncpBytesLen - 1) As Byte
    r.pcp = r.pcp + 1
    Do While r.pcp < r.cpBytesLen - 1
        r.cpChunkStart = r.pcp
        Chunk
    Loop
    
    ReDim Preserve r.uncpBytes(r.puncp - 1) As Byte
    
    ret = r.uncpBytes
End Function


Private Function Chunk() As String
'    // 每个输出块前面都有一个两个字节的头,表示块中的字节数和块的格式。
'    // 每个压缩块被解码成4096字节的未压缩数据,被写入输出。
'    // 对于每个块,从块header中提取大小和格式样式。然后根据header题中指定的格式读取和解码该块
    Dim header As Integer
    header = Bytes2Int(r.cpBytes, r.pcp)
    
    r.pcp = r.pcp + 2
    
'    获得压缩数据块的大小
    Dim chunksize As Integer
    chunksize = (header And &HFFF) + 3
    
    Dim i As Long
    Dim iend As Long
    
'    // 获取数据块压缩标识,1是压缩,0是没有压缩
    Dim flag As Integer
    flag = header And &H8000
    If flag = &H8000 Then
        '压缩数据块的最后位置
        If r.cpBytesLen - 1 > (r.cpChunkStart + chunksize) Then
            r.cpChunkEnd = r.cpChunkStart + chunksize
        Else
            r.cpChunkEnd = r.cpBytesLen - 1
        End If
        
        Do While r.pcp < r.cpChunkEnd
            TokenSequence
        Loop
    Else
'        // 未压缩的块,直接读取
        chunksize = 4096
        iend = r.pcp + chunksize
        If iend >= r.cpBytesLen Then iend = r.cpBytesLen - 1
        chunksize = iend - r.pcp
        
        For i = 0 To chunksize - 1
            r.uncpBytes(r.puncp) = r.cpBytes(r.pcp)
            r.pcp = r.pcp + 1
            puncpAdd
        Next
    End If
    
    r.cpChunkStart = r.pcp
    r.uncpChunkStart = r.puncp
End Function

Private Function TokenSequence() As String
'    // flagByte的8位对应了8个Tokens
'    // 0表示没有压缩,1表示是1个copyToken
    Dim flagbyte As Byte
    flagbyte = r.cpBytes(r.pcp)
    r.pcp = r.pcp + 1
    
    Dim i As Long
    For i = 0 To 8 - 1
        If r.pcp < r.cpChunkEnd Then ' // 有可能没有8个token
'            // CALL Decompressing a Token (section 2.4.1.3.5) with index and Byte
            Token i, flagbyte
        End If
    Next
    
    
End Function

Private Function Token(index As Long, flagbyte As Byte) As String
    Dim flag As Boolean
    flag = ((flagbyte  (2 ^ index)) And 1) > 0
    
    Dim itoken As Integer
    Dim Offset As Integer, Length As Integer
    Dim i_start As Long, i_end As Long
    Dim i As Long
    If flag Then
        itoken = Bytes2Int(r.cpBytes, r.pcp)
        
        unpackCopyToken itoken, Offset, Length
'        // SET CopySource TO DecompressedCurrent - Offset
'        // CALL Byte Copy (section 2.4.1.3.11) with CopySource, DecompressedCurrent, and Length
        i_start = r.puncp - Offset
        i_end = r.puncp - Offset + Length

        For i = i_start To i_end - 1
            r.uncpBytes(r.puncp) = r.uncpBytes(i)
            puncpAdd
        Next
'
        r.pcp = r.pcp + 2
    Else
'        COPY the byte at CompressedCurrent TO DecompressedCurrent
        r.uncpBytes(r.puncp) = r.cpBytes(r.pcp)
        r.pcp = r.pcp + 1
        puncpAdd
    End If
End Function

Private Function unpackCopyToken(Token As Integer, ByRef Offset As Integer, ByRef Length As Integer) As String
'    // 2.4.1.3.19.2 Unpack CopyToken
'    // Offset (2 bytes): An unsigned 16-bit integer that specifies the beginning of a CopySequence (section 2.4.1.3.19).
'    // Length (2 bytes): An unsigned 16-bit integer that specifies the length of a CopySequence
'
'    //1.    CALL CopyToken Help (section 2.4.1.3.19.1) returning LengthMask, OffsetMask, and BitCount.
    Dim LengthMask As Integer, OffsetMask As Integer, BitCount As Integer
    copyTokenHelp LengthMask, OffsetMask, BitCount, 0
'    //2.    SET Length TO (Token BITWISE AND LengthMask) PLUS 3.
    Length = (Token And LengthMask) + 3
'    //3.    SET temp1 TO Token BITWISE AND OffsetMask.
    Dim temp1 As Integer
    temp1 = Token And OffsetMask
'    //4.    SET temp2 TO 16 MINUS BitCount.
    Dim temp2 As Integer
    temp2 = 16 - BitCount
'    //5.    SET Offset TO (temp1 RIGHT SHIFT BY temp2) PLUS 1.
    Offset = BitMoveRightInt(temp1, VBA.CLng(temp2)) + 1
End Function

Private Function copyTokenHelp(LengthMask As Integer, OffsetMask As Integer, BitCount As Integer, MaximumLength As Integer) As String
'    // LengthMask (2 bytes): An unsigned 16-bit integer. A bitmask used to access CopyToken.Length.
'    // OffsetMask (2 bytes): An unsigned 16-bit integer. A bitmask used to access CopyToken.Offset.
'    // BitCount (2 bytes): An unsigned 16-bit integer. The number of bits set to 0b1 in OffsetMask.
'    // MaximumLength (2 bytes): An unsigned 16-bit integer. The largest possible integral value that can fit into CopyToken.Length
'
'    //§    SET difference TO DecompressedCurrent MINUS DecompressedChunkStart
    Dim difference As Long
    difference = r.puncp - r.uncpChunkStart
'    //§    SET BitCount TO the smallest integer that is GREATER THAN OR EQUAL TO LOGARITHM base 2 of difference
'    // 大于或者等于log2(different)的最小整数,要向上取整
    BitCount = VBA.CInt(Application.WorksheetFunction.RoundUp(Math.Log(difference) / Math.Log(2), 0))
'
'    //§    SET BitCount TO the maximum of BitCount and 4
    If BitCount < 4 Then
        BitCount = 4
    End If
'
'    //§    SET LengthMask TO 0xFFFF RIGHT SHIFT BY BitCount
    LengthMask = &HFFFF
    LengthMask = BitMoveRightInt(LengthMask, VBA.CLng(BitCount))
'    //§    SET OffsetMask TO BITWISE NOT LengthMask
    OffsetMask = Not LengthMask
'    //§    SET MaximumLength TO (0xFFFF RIGHT SHIFT BY BitCount) PLUS 3
    MaximumLength = &HFFFF
    MaximumLength = BitMoveRightInt(MaximumLength, VBA.CLng(BitCount)) + 3
End Function

Private Function puncpAdd() As Long
    If r.puncp = r.uncpBytesLen - 1 Then
        r.uncpBytesLen = r.uncpBytesLen * 1.2
        ReDim Preserve r.uncpBytes(r.uncpBytesLen - 1) As Byte
    End If
    
    r.puncp = 1 + r.puncp
End Function