gnucash_python/gnc_html_document.py at master · davidjo/gnucash_python · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312

import xml.etree.ElementTree as ET

import pdb
import traceback


from stylesheets import StyleTable


class HtmlDoc(object):

    def __init__ (self):
        self.dtdstr = None
        self.engine_supports_css = True
        self.header = None
        self.docobj = None

    def dtd (self, dtdstr):
        self.dtdstr = dtdstr

    def tostring (self, encoding):
        if self.dtdstr != None:
            return self.dtdstr + ET.tostring(self.docobj, encoding=encoding)
        else:
            return ET.tostring(self.docobj, encoding=encoding)

    def Element (self, tag, attrib={}, **extra):
        if self.docobj == None:
            self.docobj = ET.Element(tag, attrib, **extra)
            return self.docobj
        else:
            subobj = ET.Element(tag, attrib, **extra)
            self.docobj.append(subobj)
            return subobj

    def SubElement (self, parent, tag, attrib={}, **extra):
        subobj = ET.SubElement(parent,tag, attrib, **extra)
        return subobj

    def HeaderElement (self, tag, attrib={}, **extra):
        # this identifies the header element in the style sheet
        # to update the text
        subobj = self.Element(tag, attrib, **extra)
        self.header = subobj
        return subobj


class HtmlDocument(object):

    # this emulates some of scheme but Ive chose an xml DOM model
    # for the python document model

    def __init__ (self,stylesheet=None):
        # this emulates the scheme html-document record
        # note that objects is now the doc variable
        # (style-sheet style-stack style style-text title headline objects)
        # style-text seems to be where the html CSS text is stored
        self.style_sheet = stylesheet
        self.style_stack = None
        self.style = StyleTable()
        self.style_text = None
        self.title = None
        self.headline = None
        self.doc = HtmlDoc()
        tmpobj = self.doc.Element("body")
        tmpobj.text = "\n"
        tmpobj.tail = "\n"

    def get_doc (self):
        return self.doc

    def get_xml (self):
        return self.doc.docobj

    # this only works in python 2.7
    #def tostring (self, encoding, xmlmethod):
    #    return ET.tostring(self.doc.docobj, encoding=encoding, method=xmlmethod)

    def tostring (self, encoding):
        if self.doc.dtdstr != None:
            return self.doc.dtdstr + ET.tostring(self.doc.docobj, encoding=encoding)
        else:
            return ET.tostring(self.doc.docobj, encoding=encoding)


    # new idea - use this function to add the body object with style
    def add_body_style (self, attr_dict=None):
        #pdb.set_trace()
        if self.doc.docobj != None:
            bodyobj = self.doc.docobj.find("body")
            if bodyobj == None:
                htmlobj = self.doc.docobj.find("html")
                if htmlobj != None:
                    bodyobj = self.doc.SubElement(htmlobj,"body")
                else:
                    # we have elements but no body - nor html
                    # make an outer body object
                    bodyobj = ET.Element("body")
                    bodyobj.append(self.doc.docobj)
                    self.docobj = bodyobj
        else:
            bodyobj = self.doc.Element("body")

        for key,val in attr_dict['body'].items():
            if key == "attribute":
                for key1,val1 in val.items():
                    bodyobj.attrib[key1] = val1
            else:
                subobj = self.doc.SubElement(bodyobj,key)
                for key1,val1 in val.items():
                    subobj.attrib[key1] = val1

    def set_stylesheet (self, stylesheet):
        self.style_sheet = stylesheet

    def StyleElement (self, tag, attrib={}, parent=None):
        newelm = self.style.make_html(tag,attrib)
        if self.doc.docobj != None:
            if parent != None:
                parent.append(newelm)
            else:
                self.doc.docobj.append(newelm)
        else:
            self.doc.docobj = newelm
        return newelm

    def StyleSubElement (self, parent, tag, attrib={}):
        # this simply makes report writing more consistent with Elementtree
        newelm = self.StyleElement(tag, attrib=attrib, parent=parent)
        return newelm


    def render (self, report, headers=None):

        #pdb.set_trace()

        # need to deal with fact this is called sort of recusively
        # - if have stylesheet we go through here and call self.style_sheet.render
        # which calls this function again
        # need to ensure we have a fresh docxml object here
        parent_docobj = None
        if self.doc.docobj != None:
           parent_docobj = self.doc
           self.doc = HtmlDoc()

        # now going using a python xml dom model - currently ElementTree
        docxml = self.doc

        # make sure this is defined
        report.stylesheet_document = None

        # so thats how this works - the scheme does a first call to the stylesheet
        # render which then creates a new HtmlDocument object where this render
        # function is called with an undefined style_sheet - but a defined style!!
        if self.style_sheet != None:
            # this is weird but as self.style_sheet.render calls this function
            # recursively which returns the document string the return here
            # is also the document string
            docstr = self.style_sheet.render(report,self,headers)
        else:
            # do trivial render

            if headers:

                # how do we implement this
                # where is this written out
                # do we make a list of strings and join - as concatenating strings would be very slow
                # we dont add this I think - will be added at string generation
                docxml.dtd('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" \n"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n')
                htmlobj = docxml.Element('html', attrib={'xmlns' : "http://www.w3.org/1999/xhtml"})
                htmlobj.text = "\n"
                headobj = docxml.SubElement(htmlobj,"head")
                headobj.text = "\n"
                headobj.tail = "\n"
                metaobj = docxml.SubElement(headobj,'meta', attrib={'http-equiv' : "content-type", 'content' : "text/html; charset=utf-8"})
                metaobj.text = "\n"
                metaobj.tail = "\n"

                #pdb.set_trace()

                if self.doc.engine_supports_css:
                    if self.style_text:
                        cssobj = docxml.SubElement(headobj,'style', attrib={'type' : "text/css", })
                        cssobj.text = self.style_text
                        cssobj.tail = "\n"

                # the report title is stored in the document title
                # because we are doing things differently in python
                # we dont have the report title yet
                # set a title object which will update when have the document title
                #if title:
                #    titlobj = docxml.SubElement(headobj,"title")
                #    titlobj.text = title
                titlobj = docxml.SubElement(headobj,"title")

                # ;; this lovely little number just makes sure that <body>
                # ;; attributes like bgcolor get included
                # (push ((gnc:html-markup/open-tag-only "body") doc))))


                # now added in HtmlDoc
                ## we now let the report add the body with attributes
                ## leave this in as dummy if not replaced
                bodyobj = ET.Element("body")
                bodyobj.text = "\n"
                bodyobj.tail = "\n"

            renderer = report.report_type.renderer

            # we need to pass the style table (unlike scheme way)
            # do it through the report for the moment
            report.style = self.style

            try:
                subdoc = renderer(report)
            except Exception as errexc:
                traceback.print_exc()
                # ensure GUI reactivated
                report.report_type.cleanup_gui()
                pdb.set_trace()
                subdoc = None

            if subdoc == None:
                pdb.set_trace()
                docstr = None
                raise RuntimeError("Invalid Html subdoc")

            #pdb.set_trace()

            # copy in the report title
            if hasattr(subdoc,"title"):
                if subdoc.title != None:
                    titlobj.text = subdoc.title
                else:
                    titlobj.text = "\n"
            else:
                titlobj.text = "\n"

            # figure the headline
            # always default to title??
            headline = ""
            if hasattr(subdoc,"headline"):
                if subdoc.headline != None:
                    headline = subdoc.headline
                elif hasattr(subdoc,"title"):
                    if subdoc.title != None:
                        headline = subdoc.title
            elif hasattr(subdoc,"title"):
                if subdoc.title != None:
                    headline = subdoc.title

            # add header from ssdoc and update text
            # note this assumes the only thing in parent_docobj is the header
            hdrobj = None
            if parent_docobj != None:
                if parent_docobj.header != None:
                    if headline != None and headline != "":
                        parent_docobj.header.text = headline
                        hdrobj = parent_docobj.header
                        if subdoc.doc.docobj.tag == "body":
                            subdoc.doc.docobj.insert(0,hdrobj)
                            # bit weird but only way I can see to do this
                            # we move the bodyobj text to the tail of the header
                            # as header needs to come first
                            if subdoc.doc.docobj.text != None:
                                hdrobj.tail = "\n"+subdoc.doc.docobj.text
                                subdoc.doc.docobj.text = None
                        else:
                            bodyobj.append(hdrobj)
                            bodyobj.append(subdoc.doc.docobj)

            try:
                # assume body not added if not first element
                if subdoc.doc.docobj.tag == "body":
                    #htmlobj.remove(bodyobj)
                    htmlobj.append(subdoc.doc.docobj)
                else:
                    htmlobj.append(bodyobj)
                    #bodyobj.append(subdoc.doc.docobj)
            except Exception as errexc:
                traceback.print_exc()
                pdb.set_trace()

            #pdb.set_trace()

            try:
                # only in python 2.7
                #docstr = ET.tostring(htmlobj, encoding="utf-8", method="html")
                #docstr = docxml.tostring(encoding="utf-8")
                # apparently in python 3 to keep in python unicode we need to use unicode encoding
                # - default is to convert to bytestring
                docstr = docxml.tostring(encoding="unicode")
            except Exception as errexc:
                traceback.print_exc()
                try:
                    ET.dump(htmlobj)
                except Exception as errexc1:
                    pass
                pdb.set_trace()


            # debugging dump
            if docstr != None:
                fds = open("junk.html","w")
                fds.write(docstr)
                fds.close()

            if docstr == None:
                raise RuntimeError("Invalid Html doc")

        return docstr