Ticket #171: parser.patch
| File parser.patch, 141.5 kB (added by nneonneo <nneonneo@gmail.com>, 1 year ago) |
|---|
-
archive/__init__.py
old new 9 9 from hachoir_parser.archive.rpm import RpmFile 10 10 from hachoir_parser.archive.sevenzip import SevenZipParser 11 11 from hachoir_parser.archive.mar import MarFile 12 12 from hachoir_parser.archive.mozilla_ar import MozillaArchive 13 from hachoir_parser.archive.zlib import ZlibData -
archive/bzip2_parser.py
old new 1 1 """ 2 2 BZIP2 archive file 3 3 4 Author: Victor Stinner 4 Author: Victor Stinner, Robert Xiao 5 5 """ 6 6 7 7 from hachoir_parser import Parser 8 from hachoir_core.field import (ParserError, String, 9 Bytes, Character, UInt8, UInt32, CompressedField) 10 from hachoir_core.endian import LITTLE_ENDIAN 8 from hachoir_core.tools import paddingSize 9 from hachoir_core.field import (Field, FieldSet, GenericVector, 10 ParserError, String, 11 PaddingBits, Bit, Bits, Character, 12 UInt32, Enum, CompressedField) 13 from hachoir_core.endian import BIG_ENDIAN 11 14 from hachoir_core.text_handler import textHandler, hexadecimal 15 from hachoir_parser.archive.zlib import build_tree, HuffmanCode 12 16 13 17 try: 14 18 from bz2 import BZ2Decompressor … … 27 31 except ImportError: 28 32 has_deflate = False 29 33 34 class ZeroTerminatedNumber(Field): 35 """Zero (bit) terminated number: e.g. 11110 is 4.""" 36 def __init__(self, parent, name, description=None): 37 Field.__init__(self, parent, name, 0, description) 38 39 endian = self.parent.endian 40 stream = self.parent.stream 41 addr = self.absolute_address 42 43 value = 0 44 while True: 45 bit = stream.readBits(addr, 1, endian) 46 addr += 1 47 self._size += 1 48 if not bit: 49 break 50 value += 1 51 self.createValue = lambda: value 52 53 def move_to_front(l, c): 54 l[:] = l[c:c+1] + l[0:c] + l[c+1:] 55 56 class Bzip2Bitmap(FieldSet): 57 def __init__(self, parent, name, nb_items, start_index, *args, **kwargs): 58 FieldSet.__init__(self, parent, name, *args, **kwargs) 59 self.nb_items = nb_items 60 self.start_index = start_index 61 62 def createFields(self): 63 for i in xrange(self.start_index, self.start_index+self.nb_items): 64 yield Bit(self, "symbol_used[%i]"%i, "Is the symbol %i (%r) used?"%(i, chr(i))) 65 66 class Bzip2Lengths(FieldSet): 67 def __init__(self, parent, name, symbols, *args, **kwargs): 68 FieldSet.__init__(self, parent, name, *args, **kwargs) 69 self.symbols = symbols 70 71 def createFields(self): 72 yield Bits(self, "start_length", 5) 73 length = self["start_length"].value 74 lengths = [] 75 for i in xrange(self.symbols): 76 while True: 77 bit = Bit(self, "change_length[%i][]"%i, "Should the length be changed for symbol %i?"%i) 78 yield bit 79 if not bit.value: 80 break 81 else: 82 bit = Enum(Bit(self, "length_decrement[%i][]"%i, "Decrement the value?"), {True: "Decrement", False: "Increment"}) 83 yield bit 84 if bit.value: 85 length -= 1 86 else: 87 length += 1 88 lengths.append(length) 89 self.final_length = length 90 self.tree = build_tree(lengths) 91 92 class Bzip2Selectors(FieldSet): 93 def __init__(self, parent, name, ngroups, *args, **kwargs): 94 FieldSet.__init__(self, parent, name, *args, **kwargs) 95 self.groups = range(ngroups) 96 97 def createFields(self): 98 for i in xrange(self["../selectors_used"].value): 99 field = ZeroTerminatedNumber(self, "selector_list[]") 100 move_to_front(self.groups, field.value) 101 field.realvalue = self.groups[0] 102 field._description = "MTF'ed selector index: raw value %i, real value %i"%(field.value, field.realvalue) 103 yield field 104 105 class Bzip2Block(FieldSet): 106 def createFields(self): 107 yield textHandler(Bits(self, "blockheader", 48, "Block header"), hexadecimal) 108 if self["blockheader"].value != 0x314159265359: # pi 109 raise ParserError("Invalid block header!") 110 yield textHandler(UInt32(self, "crc32", "CRC32 for this block"), hexadecimal) 111 yield Bit(self, "randomized", "Is this block randomized?") 112 yield Bits(self, "orig_bwt_pointer", 24, "Starting pointer into BWT after untransform") 113 yield GenericVector(self, "huffman_used_map", 16, Bit, 'block_used', "Bitmap showing which blocks (representing 16 literals each) are in use") 114 symbols_used = [] 115 for index, block_used in enumerate(self["huffman_used_map"].array('block_used')): 116 if block_used.value: 117 start_index = index*16 118 field = Bzip2Bitmap(self, "huffman_used_bitmap[%i]"%index, 16, start_index, "Bitmap for block %i (literals %i to %i) showing which symbols are in use"%(index, start_index, start_index + 15)) 119 yield field 120 for i, used in enumerate(field): 121 if used.value: 122 symbols_used.append(start_index + i) 123 yield Bits(self, "huffman_groups", 3, "Number of different Huffman tables in use") 124 yield Bits(self, "selectors_used", 15, "Number of times the Huffman tables are switched") 125 yield Bzip2Selectors(self, "selectors_list", self["huffman_groups"].value) 126 trees = [] 127 for group in xrange(self["huffman_groups"].value): 128 field = Bzip2Lengths(self, "huffman_lengths[]", len(symbols_used)+2) 129 yield field 130 trees.append(field.tree) 131 counter = 0 132 rle_run = 0 133 selector_tree = None 134 while True: 135 if counter%50 == 0: 136 select_id = self["selectors_list"].array("selector_list")[counter//50].realvalue 137 selector_tree = trees[select_id] 138 field = HuffmanCode(self, "huffman_code[]", selector_tree) 139 if field.realvalue in [0, 1]: 140 # RLE codes 141 if rle_run == 0: 142 rle_power = 1 143 rle_run += (field.realvalue + 1) * rle_power 144 rle_power <<= 1 145 field._description = "RLE Run Code %i (for %r); Total accumulated run %i (Huffman Code %i)" % (field.realvalue, chr(symbols_used[0]), rle_run, field.value) 146 elif field.realvalue == len(symbols_used)+1: 147 field._description = "Block Terminator (%i) (Huffman Code %i)"%(field.realvalue, field.value) 148 yield field 149 break 150 else: 151 rle_run = 0 152 move_to_front(symbols_used, field.realvalue-1) 153 field._description = "Literal %r (value %i) (Huffman Code %i)"%(chr(symbols_used[0]), field.realvalue, field.value) 154 yield field 155 if field.realvalue == len(symbols_used)+1: 156 break 157 counter += 1 158 159 class Bzip2Stream(FieldSet): 160 START_BLOCK = 0x314159265359 # pi 161 END_STREAM = 0x177245385090 # sqrt(pi) 162 def createFields(self): 163 end = False 164 while not end: 165 marker = self.stream.readBits(self.absolute_address + self.current_size, 48, self.endian) 166 if marker == self.START_BLOCK: 167 yield Bzip2Block(self, "block[]") 168 elif marker == self.END_STREAM: 169 yield textHandler(Bits(self, "stream_end", 48, "End-of-stream marker"), hexadecimal) 170 yield textHandler(UInt32(self, "crc32", "CRC32 for entire stream"), hexadecimal) 171 padding = paddingSize(self.current_size, 8) 172 if padding: 173 yield PaddingBits(self, "padding[]", padding) 174 end = True 175 else: 176 raise ParserError("Invalid marker 0x%02X!"%marker) 177 30 178 class Bzip2Parser(Parser): 31 179 PARSER_TAGS = { 32 180 "id": "bzip2", … … 37 185 "magic": (('BZh', 0),), 38 186 "description": "bzip2 archive" 39 187 } 40 endian = LITTLE_ENDIAN188 endian = BIG_ENDIAN 41 189 42 190 def validate(self): 43 191 if self.stream.readBytes(0, 3) != 'BZh': … … 50 198 yield String(self, "id", 3, "Identifier (BZh)", charset="ASCII") 51 199 yield Character(self, "blocksize", "Block size (KB of memory needed to uncompress)") 52 200 53 yield UInt8(self, "blockheader", "Block header")54 if self["blockheader"].value == 0x17:55 yield String(self, "id2", 4, "Identifier2 (re8P)", charset="ASCII")56 yield UInt8(self, "id3", "Identifier3 (0x90)")57 elif self["blockheader"].value == 0x31:58 yield String(self, "id2", 5, "Identifier 2 (AY&SY)", charset="ASCII")59 if self["id2"].value != "AY&SY":60 raise ParserError("Invalid identifier 2 (AY&SY)!")61 else:62 raise ParserError("Invalid block header!")63 yield textHandler(UInt32(self, "crc32", "CRC32"), hexadecimal)64 65 201 if self._size is None: # TODO: is it possible to handle piped input? 66 202 raise NotImplementedError 67 203 … … 73 209 break 74 210 else: 75 211 filename = None 76 data = B ytes(self, "file", size)212 data = Bzip2Stream(self, "file", size=size*8) 77 213 if has_deflate: 78 214 CompressedField(self, Bunzip2) 79 215 def createInputStream(**args): -
archive/cab.py
old new 1 1 """ 2 2 Microsoft Cabinet (CAB) archive. 3 3 4 Author: Victor Stinner 4 Author: Victor Stinner, Robert Xiao 5 5 Creation date: 31 january 2007 6 7 - Microsoft Cabinet SDK 8 http://msdn2.microsoft.com/en-us/library/ms974336.aspx 6 9 """ 7 10 from __future__ import absolute_import 8 11 from hachoir_parser import Parser 9 12 from hachoir_core.field import (FieldSet, Enum, 10 13 CString, String, 11 UInt 16, UInt32, Bit, Bits, PaddingBits, NullBits,14 UInt8, UInt16, UInt32, Bit, Bits, PaddingBits, NullBits, 12 15 DateTimeMSDOS32, RawBytes) 13 from hachoir_parser.common.msdos import MSDOSFileAttr1614 16 from hachoir_core.text_handler import textHandler, hexadecimal, filesizeHandler 15 17 from hachoir_core.endian import LITTLE_ENDIAN 18 from hachoir_core.tools import paddingSize 19 from hachoir_core.stream import StringInputStream 20 from hachoir_parser.archive.lzx import LZXStream, lzx_decompress 21 from hachoir_parser.archive.zlib import DeflateBlock 16 22 17 23 MAX_NB_FOLDER = 30 18 24 … … 26 32 27 33 class Folder(FieldSet): 28 34 def createFields(self): 29 yield UInt32(self, "off _data", "Offset of data")30 yield UInt16(self, " cf_data")35 yield UInt32(self, "offset", "Offset to data (from file start)") 36 yield UInt16(self, "data_blocks", "Number of data blocks which are in this cabinet") 31 37 yield Enum(Bits(self, "compr_method", 4, "Compression method"), COMPRESSION_NAME) 32 yield Bits(self, "compr_level", 5, "Compression level") 33 yield PaddingBits(self, "padding", 7) 38 if self["compr_method"].value in [2, 3]: # Quantum or LZX use compression level 39 yield PaddingBits(self, "padding[]", 4) 40 yield Bits(self, "compr_level", 5, "Compression level") 41 yield PaddingBits(self, "padding[]", 3) 42 else: 43 yield PaddingBits(self, "padding[]", 12) 44 if self["../flags/has_reserved"].value and self["../reserved_folder_size"].value: 45 yield RawBytes(self, "reserved_folder", self["../reserved_folder_size"].value, "Per-folder reserved area") 34 46 35 47 def createDescription(self): 36 48 text= "Folder: compression %s" % self["compr_method"].display 37 if self["compr_method"].value != COMPRESSION_NONE:38 text += " (level %u )" % self["compr_level"].value49 if self["compr_method"].value in [2, 3]: # Quantum or LZX use compression level 50 text += " (level %u: window size %u)" % (self["compr_level"].value, 2**self["compr_level"].value) 39 51 return text 40 52 53 class CabFileAttributes(FieldSet): 54 def createFields(self): 55 yield Bit(self, "readonly") 56 yield Bit(self, "hidden") 57 yield Bit(self, "system") 58 yield Bits(self, "reserved[]", 2) 59 yield Bit(self, "archive", "Has the file been modified since the last backup?") 60 yield Bit(self, "exec", "Run file after extraction?") 61 yield Bit(self, "name_is_utf", "Is the filename using UTF-8?") 62 yield Bits(self, "reserved[]", 8) 63 41 64 class File(FieldSet): 42 65 def createFields(self): 43 66 yield filesizeHandler(UInt32(self, "filesize", "Uncompressed file size")) 44 yield UInt32(self, "offset", "File offset after decompression") 45 yield UInt16(self, "iFolder", "file control id") 67 yield UInt32(self, "folder_offset", "File offset in uncompressed folder") 68 yield Enum(UInt16(self, "folder_index", "Containing folder ID (index)"), { 69 0xFFFD:"Folder continued from previous cabinet (real folder ID = 0)", 70 0xFFFE:"Folder continued to next cabinet (real folder ID = %i)" % (self["../nb_folder"].value - 1), 71 0xFFFF:"Folder spanning previous, current and next cabinets (real folder ID = 0)"}) 46 72 yield DateTimeMSDOS32(self, "timestamp") 47 yield MSDOSFileAttr16(self, "attributes") 48 yield CString(self, "filename", charset="ASCII") 73 yield CabFileAttributes(self, "attributes") 74 if self["attributes/name_is_utf"].value: 75 yield CString(self, "filename", charset="UTF-8") 76 else: 77 yield CString(self, "filename", charset="ASCII") 49 78 50 79 def createDescription(self): 51 80 return "File %s (%s)" % ( 52 81 self["filename"].display, self["filesize"].display) 53 82 54 class Reserved(FieldSet):55 def createFields(self):56 yield UInt32(self, "size")57 size = self["size"].value58 if size:59 yield RawBytes(self, "data", size)60 61 83 class Flags(FieldSet): 62 84 static_size = 16 63 85 def createFields(self): … … 66 88 yield Bit(self, "has_reserved") 67 89 yield NullBits(self, "padding", 13) 68 90 91 class FragmentGroup: 92 def __init__(self, parser): 93 self.items = [] 94 self.parser = parser 95 self.args = {} 96 97 def add(self, item): 98 self.items.append(item) 99 100 def createInputStream(self): 101 # FIXME: Use lazy stream creation 102 data = [] 103 for item in self.items: 104 data.append( item["rawdata"].value ) 105 data = "".join(data) 106 107 # FIXME: Use smarter code to send arguments 108 self.args["compr_level"] = self.items[0].parent.parent.folder["compr_level"].value 109 tags = {"class": self.parser, "args": self.args} 110 tags = tags.iteritems() 111 return StringInputStream(data, "<fragment group>", tags=tags) 112 113 class CustomFragment(FieldSet): 114 def __init__(self, parent, name, size, parser, description=None, group=None): 115 FieldSet.__init__(self, parent, name, description, size=size) 116 if not group: 117 group = FragmentGroup(parser) 118 self.field_size = size 119 self.group = group 120 self.group.add(self) 121 122 def createFields(self): 123 yield RawBytes(self, "rawdata", self.field_size//8) 124 125 def _createInputStream(self, **args): 126 return self.group.createInputStream() 127 128 class DataBlock(FieldSet): 129 def __init__(self, *args, **kwargs): 130 FieldSet.__init__(self, *args, **kwargs) 131 size = (self["size"].value + 8) * 8 # +8 for header values 132 if self["/flags/has_reserved"].value: 133 size += self["/reserved_data_size"].value * 8 134 self._size = size 135 136 def createFields(self): 137 yield textHandler(UInt32(self, "crc32"), hexadecimal) 138 yield UInt16(self, "size") 139 yield UInt16(self, "uncompressed_size", "If this is 0, this block is continued in a subsequent cabinet") 140 if self["/flags/has_reserved"].value and self["/reserved_data_size"].value: 141 yield RawBytes(self, "reserved_data", self["/reserved_data_size"].value, "Per-datablock reserved area") 142 compr_method = self.parent.folder["compr_method"].value 143 if compr_method == 0: # Uncompressed 144 yield RawBytes(self, "data", self["size"].value, "Folder Data") 145 self.parent.uncompressed_data += self["data"].value 146 elif compr_method == 1: # MSZIP 147 yield String(self, "mszip_signature", 2, "MSZIP Signature (CK)") 148 yield DeflateBlock(self, "deflate_block", self.parent.uncompressed_data) 149 padding = paddingSize(self.current_size, 8) 150 if padding: 151 yield PaddingBits(self, "padding[]", padding) 152 self.parent.uncompressed_data = self["deflate_block"].uncomp_data 153 elif compr_method == 2: # Quantum 154 yield RawBytes(self, "compr_data", self["size"].value, "Compressed Folder Data") 155 elif compr_method == 3: # LZX 156 group = getattr(self.parent.folder, "lzx_group", None) 157 field = CustomFragment(self, "data", self["size"].value*8, LZXStream, "LZX data fragment", group) 158 self.parent.folder.lzx_group = field.group 159 yield field 160 161 class FolderParser(Parser): 162 endian = LITTLE_ENDIAN 163 def createFields(self): 164 for file in sorted(self.files, key=lambda x:x["folder_offset"].value): 165 padding = self.seekByte(file["folder_offset"].value) 166 if padding: 167 yield padding 168 yield RawBytes(self, "file[]", file["filesize"].value, file.description) 169 170 class FolderData(FieldSet): 171 def __init__(self, parent, name, folder, files, *args, **kwargs): 172 FieldSet.__init__(self, parent, name, *args, **kwargs) 173 def createInputStream(cis, source=None, **args): 174 stream = cis(source=source) 175 tags = args.setdefault("tags",[]) 176 tags.extend(stream.tags) 177 tags.append(( "class", FolderParser )) 178 tags.append(( "args", {'files': files} )) 179 for unused in self: 180 pass 181 if folder["compr_method"].value == 3: # LZX 182 self.uncompressed_data = lzx_decompress(self["block[0]/data"].getSubIStream(), folder["compr_level"].value) 183 return StringInputStream(self.uncompressed_data, source=source, **args) 184 self.setSubIStream(createInputStream) 185 self.files = files 186 self.folder = folder # Folder fieldset 187 188 def createFields(self): 189 self.uncompressed_data = "" 190 for index in xrange(self.folder["data_blocks"].value): 191 block = DataBlock(self, "block[]") 192 for i in block: 193 pass 194 yield block 195 69 196 class CabFile(Parser): 70 197 endian = LITTLE_ENDIAN 71 198 MAGIC = "MSCF" … … 82 209 def validate(self): 83 210 if self.stream.readBytes(0, 4) != self.MAGIC: 84 211 return "Invalid magic" 85 if self[" cab_version"].value != 0x0103:86 return "Unknown version (% s)" % self["cab_version"].display212 if self["major_version"].value != 1 or self["minor_version"].value != 3: 213 return "Unknown version (%i.%i)" % (self["major_version"].value, self["minor_version"].value) 87 214 if not (1 <= self["nb_folder"].value <= MAX_NB_FOLDER): 88 215 return "Invalid number of folder (%s)" % self["nb_folder"].value 89 216 return True … … 95 222 yield textHandler(UInt32(self, "fld_checksum", "Folders checksum (0 if not used)"), hexadecimal) 96 223 yield UInt32(self, "off_file", "Offset of first file") 97 224 yield textHandler(UInt32(self, "files_checksum", "Files checksum (0 if not used)"), hexadecimal) 98 yield textHandler(UInt16(self, "cab_version", "Cabinet version"), hexadecimal) 225 yield UInt8(self, "minor_version", "Minor version (should be 3)") 226 yield UInt8(self, "major_version", "Major version (should be 1)") 99 227 yield UInt16(self, "nb_folder", "Number of folders") 100 228 yield UInt16(self, "nb_files", "Number of files") 101 229 yield Flags(self, "flags") 102 230 yield UInt16(self, "setid") 103 yield UInt16(self, " number", "Zero-based cabinet number")231 yield UInt16(self, "cabinet_serial", "Zero-based cabinet number") 104 232 105 # --- TODO: Support flags106 233 if self["flags/has_reserved"].value: 107 yield Reserved(self, "reserved") 108 #(3) Previous cabinet name, if CAB_HEADER.flags & CAB_FLAG_HASPREV 109 #(4) Previous disk name, if CAB_HEADER.flags & CAB_FLAG_HASPREV 110 #(5) Next cabinet name, if CAB_HEADER.flags & CAB_FLAG_HASNEXT 111 #(6) Next disk name, if CAB_HEADER.flags & CAB_FLAG_HASNEXT 112 # ---- 234 yield UInt16(self, "reserved_header_size", "Size of per-cabinet reserved area") 235 yield UInt8(self, "reserved_folder_size", "Size of per-folder reserved area") 236 yield UInt8(self, "reserved_data_size", "Size of per-datablock reserved area") 237 if self["reserved_header_size"].value: 238 yield RawBytes(self, "reserved_header", self["reserved_header_size"].value, "Per-cabinet reserved area") 239 if self["flags/has_previous"].value: 240 yield CString(self, "previous_cabinet", "File name of previous cabinet", charset="ASCII") 241 yield CString(self, "previous_disk", "Description of disk/media on which previous cabinet resides", charset="ASCII") 242 if self["flags/has_next"].value: 243 yield CString(self, "next_cabinet", "File name of next cabinet", charset="ASCII") 244 yield CString(self, "next_disk", "Description of disk/media on which next cabinet resides", charset="ASCII") 113 245 114 246 for index in xrange(self["nb_folder"].value): 115 247 yield Folder(self, "folder[]") 116 248 for index in xrange(self["nb_files"].value): 117 249 yield File(self, "file[]") 118 250 251 folders = sorted(enumerate(self.array("folder")), key=lambda x:x[1]["offset"].value) 252 253 for i in xrange(len(folders)): 254 index, folder = folders[i] 255 padding = self.seekByte(folder["offset"].value) 256 if padding: 257 yield padding 258 files = [] 259 for file in self.array("file"): 260 if file["folder_index"].value == index: 261 files.append(file) 262 if i+1 == len(folders): 263 size = (self.size // 8) - folder["offset"].value 264 else: 265 size = (folders[i+1][1]["offset"].value) - folder["offset"].value 266 yield FolderData(self, "folder_data[%i]" % index, folder, files, size=size*8) 267 119 268 end = self.seekBit(self.size, "endraw") 120 269 if end: 121 270 yield end -
archive/lzx.py
old new 1 """LZX data stream parser. 2 3 Also includes a decompression function (slow!!) which can decompress 4 LZX data stored in a Hachoir stream. 5 6 Author: Robert Xiao 7 Creation date: July 18, 2007 8 """ 9 from hachoir_parser import Parser 10 from hachoir_core.field import (FieldSet, 11 UInt32, Bit, Bits, PaddingBits, 12 RawBytes) 13 from hachoir_core.endian import BIG_ENDIAN, LITTLE_ENDIAN 14 from hachoir_core.tools import paddingSize, alignValue 15 from hachoir_parser.archive.zlib import build_tree, HuffmanCode, extend_data 16 from hachoir_core.bits import str2long 17 import new # for instancemethod 18 19 LZX_ENDIAN = "BADC" 20 21 def readLZXBits(self, address, nbits, endian): 22 def Flip16Bits(data): 23 """Flip adjacent bytes, so as to convert little-endian to big 24 and vice versa, over 16 bits""" 25 result = [] # faster to join than to += strings 26 assert len(data) % 2 == 0 27 while len(data) >= 2: 28 result.append(data[1::-1]) # [1::-1] is [1] + [0] 29 data = data[2:] 30 result.append(data) 31 return ''.join(result) 32 33 if endian in (BIG_ENDIAN, LITTLE_ENDIAN): 34 return self.oldReadBits(address, nbits, endian) 35 36 assert endian == LZX_ENDIAN 37 assert hasattr(self, "lzx_start") 38 # lzx_start is the # of bits from the start of the LZX block 39 40 address_from_start = address - self.lzx_start 41 words_from_start, remainder = divmod(address_from_start, 16) 42 complete_nbits = alignValue(remainder + nbits, 16) 43 44 unused, data, missing = self.read(words_from_start*16 + self.lzx_start, complete_nbits) # get a full multiple of 2 bytes 45 shift = remainder 46 if missing: 47 raise ReadStreamError(nbits, address) 48 data = Flip16Bits(data) 49 value = str2long(data, BIG_ENDIAN) # the flipping above gives BE data 50 value >>= len(data) * 8 - shift - nbits 51 return value & (1 << nbits) - 1 52 53 class LZXPreTreeEncodedTree(FieldSet): 54 def __init__(self, parent, name, num_elements, *args, **kwargs): 55 FieldSet.__init__(self, parent, name, *args, **kwargs) 56 self.num_elements = num_elements 57 58 def createFields(self): 59 for i in xrange(20): 60 yield Bits(self, "pretree_lengths[]", 4) 61 pre_tree = build_tree([x.value for x in self.array("pretree_lengths")]) 62 if not hasattr(self.root, "lzx_tree_lengths_"+self.name): 63 self.lengths = [0] * self.num_elements 64 setattr(self.root, "lzx_tree_lengths_"+self.name, self.lengths) 65 else: 66 self.lengths = getattr(self.root, "lzx_tree_lengths_"+self.name) 67 i = 0 68 while i < self.num_elements: 69 field = HuffmanCode(self, "tree_code[]", pre_tree) 70 if field.realvalue <= 16: 71 self.lengths[i] = (self.lengths[i] - field.realvalue) % 17 72 field._description = "Literal tree delta length %i (new length value %i for element %i) (Huffman Code %i)" % ( 73 field.realvalue, self.lengths[i], i, field.value) 74 i += 1 75 yield field 76 elif field.realvalue == 17: 77 field._description = "Tree Code 17: Zeros for 4-19 elements (Huffman Code %i)" % field.value 78 yield field 79 extra = Bits(self, "extra[]", 4) 80 zeros = 4 + extra.value 81 extra._description = "Extra bits: zeros for %i elements (elements %i through %i)" % (zeros, i, i+zeros-1) 82 yield extra 83 self.lengths[i:i+zeros] = [0] * zeros 84 i += zeros 85 elif field.realvalue == 18: 86 field._description = "Tree Code 18: Zeros for 20-51 elements (Huffman Code %i)" % field.value 87 yield field 88 extra = Bits(self, "extra[]", 5) 89 zeros = 20 + extra.value 90 extra._description = "Extra bits: zeros for %i elements (elements %i through %i)" % (zeros, i, i+zeros-1) 91 yield extra 92 self.lengths[i:i+zeros] = [0] * zeros 93 i += zeros 94 elif field.realvalue == 19: 95 field._description = "Tree Code 19: Same code for 4-5 elements" 96 yield field 97 extra = Bits(self, "extra[]", 1) 98 run = 4 + extra.value 99 extra._description = "Extra bits: run for %i elements (elements %i through %i)" % (run, i, i+run-1) 100 yield extra 101 newfield = HuffmanCode(self, "extra_tree_code[]", pre_tree) 102 assert newfield.realvalue <= 16 103 self.lengths[i:i+run] = [(self.lengths[i] - newfield.realvalue) % 17] * run 104 i += run 105 106 class LZXBlock(FieldSet): 107 WINDOW_SIZE = {15:30, 108 16:32, 109 17:34, 110 18:36, 111 19:38, 112 20:42, 113 21:50} 114 POSITION_SLOTS = {0:(0,0,0), 115 1:(1,1,0), 116 2:(2,2,0), 117 3:(3,3,0), 118 4:(4,5,1), 119 5:(6,7,1), 120 6:(8,11,2), 121 7:(12,15,2), 122 8:(16,23,3), 123 9:(24,31,3), 124 10:(32,47,4), 125 11:(48,63,4), 126 12:(64,95,5), 127 13:(96,127,5), 128 14:(128,191,6), 129 15:(192,255,6), 130 16:(256,383,7), 131 17:(384,511,7), 132 18:(512,767,8), 133 19:(768,1023,8), 134 20:(1024,1535,9), 135 21:(1536,2047,9), 136 22:(2048,3071,10), 137 23:(3072,4095,10), 138 24:(4096,6143,11), 139 25:(6144,8191,11), 140 26:(8192,12287,12), 141 27:(12288,16383,12), 142 28:(16384,24575,13), 143 29:(24576,32767,13), 144 30:(32768,49151,14), 145 31:(49152,65535,14), 146 32:(65536,98303,15), 147 33:(98304,131071,15), 148 34:(131072,196607,16), 149 35:(196608,262143,16), 150 36:(262144,393215,17), 151 37:(393216,524287,17), 152 38:(524288,655359,17), 153 39:(655360,786431,17), 154 40:(786432,917503,17), 155 41:(917504,1048575,17), 156 42:(1048576,1179647,17), 157 43:(1179648,1310719,17), 158 44:(1310720,1441791,17), 159 45:(1441792,1572863,17), 160 46:(1572864,1703935,17), 161 47:(1703936,1835007,17), 162 48:(1835008,1966079,17), 163 49:(1966080,2097151,17), 164 } 165 def createFields(self): 166 yield Bits(self, "block_type", 3) 167 yield Bits(self, "block_size", 24) 168 self.uncompressed_size = self["block_size"].value 169 self.compression_level = self.root.compr_level 170 self.window_size = self.WINDOW_SIZE[self.compression_level] 171 if self["block_type"].value == 1: # Verbatim block 172 yield LZXPreTreeEncodedTree(self, "main_tree_start", 256) 173 yield LZXPreTreeEncodedTree(self, "main_tree_rest", self.window_size * 8) 174 main_tree = build_tree(self["main_tree_start"].lengths + self["main_tree_rest"].lengths) 175 yield LZXPreTreeEncodedTree(self, "length_tree", 249) 176 length_tree = build_tree(self["length_tree"].lengths) 177 current_decoded_size = 0 178 while current_decoded_size < self.uncompressed_size: 179 if current_decoded_size % 32768 == 0 and current_decoded_size != 0: 180 padding = paddingSize(self.address + self.current_size, 16) 181 if padding: 182 yield PaddingBits(self, "padding[]", padding) 183 field = HuffmanCode(self, "main_code[]", main_tree) 184 if field.realvalue < 256: 185 field._description = "Literal value %r (Huffman Code %i)" % (chr(field.realvalue), field.value) 186 current_decoded_size += 1 187 self.parent.uncompressed_data += chr(field.realvalue) 188 yield field 189 continue 190 position_header, length_header = divmod(field.realvalue - 256, 8) 191 info = self.POSITION_SLOTS[position_header] 192 if info[2] == 0: 193 if info[0] == 0: 194 position = self.parent.r0 195 field._description = "Position Slot %i, Position [R0] (%i)" % (position_header, position) 196 elif info[0] == 1: 197 position = self.parent.r1 198 self.parent.r1 = self.parent.r0 199 self.parent.r0 = position 200 field._description = "Position Slot %i, Position [R1] (%i)" % (position_header, position) 201 elif info[0] == 2: 202 position = self.parent.r2 203 self.parent.r2 = self.parent.r0 204 self.parent.r0 = position 205 field._description = "Position Slot %i, Position [R2] (%i)" % (position_header, position) 206 else: 207 position = info[0] - 2 208 self.parent.r2 = self.parent.r1 209 self.parent.r1 = self.parent.r0 210 self.parent.r0 = position 211 field._description = "Position Slot %i, Position %i" % (position_header, position) 212 else: 213 field._description = "Position Slot %i, Positions %i to %i" % (position_header, info[0] - 2, info[1] - 2, field.value) 214 if length_header == 7: 215 field._description += ", Length Values 9 and up (Huffman Code %i)"%field.value 216 yield field 217 length_field = HuffmanCode(self, "length_code[]", length_tree) 218 length = length_field.realvalue + 9 219 length_field._description = "Length Code %i, total length %i (Huffman Code %i)" % (length_field.realvalue, length, length_field.value) 220 yield length_field 221 else: 222 field._description += ", Length Value %i (Huffman Code %i)"%(length_header + 2, field.value) 223 yield field 224 length = length_header + 2 225 if info[2]: 226 extrafield = Bits(self, "position_extra[%s" % field.name.split('[')[1], info[2]) 227 position = extrafield.value + info[0] - 2 228 self.parent.r2 = self.parent.r1 229 self.parent.r1 = self.parent.r0 230 self.parent.r0 = position 231 extrafield._description = "Position Extra Bits (%i), total position %i"%(extrafield.value, position) 232 yield extrafield 233 self.parent.uncompressed_data = extend_data(self.parent.uncompressed_data, length, position) 234 current_decoded_size += length 235 elif self["block_type"].value == 2: # Aligned offset block 236 pass 237 elif self["block_type"].value == 3: # Uncompressed block 238 padding = paddingSize(self.address + self.current_size, 16) 239 if padding: 240 yield PaddingBits(self, "padding[]", padding) 241 else: 242 yield PaddingBits(self, "padding[]", 16) 243 self.endian = LITTLE_ENDIAN 244 yield UInt32(self, "r[]", "New value of R0") 245 yield UInt32(self, "r[]", "New value of R1") 246 yield UInt32(self, "r[]", "New value of R2") 247 self.parent.r0 = self["r[0]"].value 248 self.parent.r1 = self["r[1]"].value 249 self.parent.r2 = self["r[2]"].value 250 yield RawBytes(self, "data", self.uncompressed_size) 251 self.parent.uncompressed_data+=self["data"].value 252 if self["block_size"].value % 2: 253 yield PaddingBits(self, "padding", 8) 254 255 class LZXStream(Parser): 256 endian = LZX_ENDIAN 257 def createFields(self): 258 if not hasattr(self.stream, "oldReadBits"): 259 self.stream.oldReadBits = self.stream.readBits 260 self.stream.readBits = new.instancemethod(readLZXBits, self.stream, self.stream.__class__) 261 self.stream.lzx_start = 0 262 self.uncompressed_data = "" 263 self.r0 = 1 264 self.r1 = 1 265 self.r2 = 1 266 yield Bit(self, "filesize_indicator") 267 if self["filesize_indicator"].value: 268 yield UInt32(self, "filesize") 269 while self.current_size < self.size: 270 block = LZXBlock(self, "block[]") 271 yield block 272 if self.size - self.current_size < 16: 273 padding = paddingSize(self.address + self.current_size, 16) 274 if padding: 275 yield PaddingBits(self, "padding[]", padding) 276 break 277 278 def lzx_decompress(stream, window_bits): 279 data = LZXStream(stream) 280 data.compr_level = window_bits 281 for unused in data: 282 pass 283 return data.uncompressed_data -
archive/mozilla_ar.py
old new 1 """MAR (Mozilla ARchive) parser 2 3 Author: Robert Xiao 4 Creation date: July 10, 2007 5 6 """ 7 8 from hachoir_core.endian import BIG_ENDIAN 9 from hachoir_core.field import (RootSeekableFieldSet, FieldSet, 10 String, CString, UInt32, RawBytes) 11 from hachoir_core.text_handler import displayHandler, filesizeHandler 12 from hachoir_core.tools import humanUnixAttributes 13 from hachoir_parser import HachoirParser 14 15 class IndexEntry(FieldSet): 16 def createFields(self): 17 yield UInt32(self, "offset", "Offset in bytes relative to start of archive") 18 yield filesizeHandler(UInt32(self, "length", "Length in bytes")) 19 yield displayHandler(UInt32(self, "flags"), humanUnixAttributes) 20 yield CString(self, "name", "Filename (byte array)") 21 def createDescription(self): 22 return 'File %s, Size %s, Mode %s'%( 23 self["name"].display, self["length"].display, self["flags"].display) 24 class MozillaArchive(HachoirParser, RootSeekableFieldSet): 25 MAGIC = "MAR1" 26 PARSER_TAGS = { 27 "id": "mozilla_ar", 28 "category": "archive", 29 "file_ext": ("mar",), 30 "min_size": (8+4+13)*8, # Header, Index Header, 1 Index Entry 31 "magic": ((MAGIC, 0),), 32 "description": "Mozilla Archive", 33 } 34 endian = BIG_ENDIAN 35 36 def __init__(self, stream, **args): 37 RootSeekableFieldSet.__init__(self, None, "root", stream, None, stream.askSize(self)) 38 HachoirParser.__init__(self, stream, **args) 39 40 def validate(self): 41 if self.stream.readBytes(0, 4) != self.MAGIC: 42 return "Invalid magic" 43 return True 44 45 def createFields(self): 46 yield String(self, "magic", 4, "File signature (MAR1)", charset="ASCII") 47 yield UInt32(self, "index_offset", "Offset to index relative to file start") 48 self.seekByte(self["index_offset"].value, False) 49 yield UInt32(self, "index_size", "size of index in bytes") 50 current_index_size = 0 # bytes 51 while current_index_size < self["index_size"].value: 52 # plus 4 compensates for index_size 53 self.seekByte(self["index_offset"].value + current_index_size + 4, False) 54 entry = IndexEntry(self, "index_entry[]") 55 yield entry 56 current_index_size += entry.size // 8 57 self.seekByte(entry["offset"].value, False) 58 yield RawBytes(self, "file[]", entry["length"].value) -
archive/zlib.py
old new 1 """Detailed ZLIB parser 2 3 Author: Robert Xiao 4 Creation date: July 9 2007 5 6 """ 7 8 from hachoir_parser import Parser 9 from hachoir_core.field import (Bit, Bits, Field, Int16, UInt32, 10 Enum, FieldSet, GenericFieldSet, 11 PaddingBits, ParserError, RawBytes) 12 from hachoir_core.endian import LITTLE_ENDIAN 13 from hachoir_core.text_handler import textHandler, hexadecimal 14 from hachoir_core.tools import paddingSize, alignValue 15 16 def extend_data(data, length, offset): 17 """Extend data using a length and an offset.""" 18 if length >= offset: 19 new_data = data[-offset:] * (alignValue(length, offset) // offset) 20 return data + new_data[:length] 21 else: 22 return data + data[-offset:-offset+length] 23 24 def build_tree(lengths): 25 max_length = max(lengths) + 1 26 bit_counts = [0]*max_length 27 next_code = [0]*max_length 28 tree = {} 29 for i in lengths: 30 if i: 31 bit_counts[i] += 1 32 code = 0 33 for i in xrange(1, len(bit_counts)): 34 next_code[i] = code = (code + bit_counts[i-1]) << 1 35 for i, ln in enumerate(lengths): 36 if ln: 37 tree[(ln, next_code[ln])] = i 38 next_code[ln] += 1 39 return tree