Ticket #138: ole2_util.py

File ole2_util.py, 3.2 kB (added by nneonneo <nneonneo@gmail.com>, 1 year ago)

hachoir-parser/hachoir_parser/misc/ole2_util.py

Line 
1 from hachoir_core.endian import LITTLE_ENDIAN
2 from hachoir_core.field import Int8, RawBytes, RootSeekableFieldSet, FieldSet, ParserError
3 from hachoir_core.stream import StringInputStream
4 from hachoir_parser import HachoirParser
5
6 class OLE2FragmentParser(HachoirParser,RootSeekableFieldSet):
7     tags = {
8         "description": "Microsoft Office document subfragments",
9     }
10     endian = LITTLE_ENDIAN
11    
12
13     OS_MAC = 1
14     OS_NAME = {
15         0: "Windows 16-bit",
16         1: "Macintosh",
17         2: "Windows 32-bit",
18     }
19     ENDIAN_CHECK=False
20     OS_CHECK=False
21
22     def __init__(self, stream, **args):
23         RootSeekableFieldSet.__init__(self, None, "root", stream, None, stream.askSize(self))
24         HachoirParser.__init__(self, stream, **args)
25         if self.ENDIAN_CHECK:
26             if self["endian"].value == "\xFF\xFE":
27                 self.endian = BIG_ENDIAN
28             elif self["endian"].value == "\xFE\xFF":
29                 self.endian = LITTLE_ENDIAN
30             else:
31                 raise ParserError("OLE2: Invalid endian value")
32         if self.OS_CHECK:
33             self.osconfig = OSConfig(self["os"].value == self.OS_MAC)
34
35     def validate(self):
36         if self.ENDIAN_CHECK:
37             if self["endian"].value not in ["\xFF\xFE", "\xFE\xFF"]:
38                 return "Unknown endian value %s"%self["endian"].value.encode('hex')
39         return True
40
41 class RawParser(OLE2FragmentParser):
42     ENDIAN_CHECK=False
43     OS_CHECK=False
44     def createFields(self):
45         yield RawBytes(self,"rawdata",self.datasize)
46         if self.datasize<self.size//8: yield RawBytes(self,"slack_space",(self.size//8)-self.datasize)
47
48 class OSConfig:
49     def __init__(self, big_endian):
50         if big_endian:
51             self.charset = "MacRoman"
52             self.utf16 = "UTF-16-BE"
53         else:
54             self.charset = "ISO-8859-1"
55             self.utf16 = "UTF-16-LE"
56
57 class Bool(Int8):
58     def createValue(self):
59         value = Int8.createValue(self)
60         return (value == -1)
61
62 class FragmentGroup:
63     def __init__(self, parser):
64         self.items = []
65         self.parser = parser
66         self.args = {}
67
68     def add(self, item):
69         self.items.append(item)
70
71     def createInputStream(self):
72         # FIXME: Use lazy stream creation
73         data = []
74         for item in self.items:
75             data.append( item["rawdata"].value )
76         data = "".join(data)
77
78         # FIXME: Use smarter code to send arguments
79         self.args["ole2"] = self.items[0].root
80         tags = {"class": self.parser, "args": self.args}
81         tags = tags.iteritems()
82         return StringInputStream(data, "<fragment group>", tags=tags)
83
84 class CustomFragment(FieldSet):
85     def __init__(self, parent, name, size, parser, description=None, group=None):
86         FieldSet.__init__(self, parent, name, description, size=size)
87         if not group:
88             group = FragmentGroup(parser)
89         self.group = group
90         self.group.add(self)
91
92     def createFields(self):
93         yield RawBytes(self, "rawdata", self.size//8)
94
95     def _createInputStream(self, **args):
96         return self.group.createInputStream()