Construction of an html_filter object feeds the parser with an initial <HTML> tag. Termination semantics are as follows: if the external data source becomes exhausted, processing of buffered data should be forced by calling the close method of the sgml_wrapper or html_filter object.
If a </HTML> ending tag is detected, an eoi exception is thrown.
In either case, reset() me be called to reinitialise the object state, or the object can be destroyed. Note that the current implementation dispatches tags to global methods, rather than to a weaver bound to the object. This is to permit bindings to Interscript operations other than weaving.
Embedded Python and Tcl (if supported) can be executed in the Interscript environment using the <SCRIPT> tag as follows:
<SCRIPT LANGUAGE="Python"><!-- print "Hello World" #--> </SCRIPT> <SCRIPT LANGUAGE="Tcl"><!-- puts "Hello World" #-->Note that the used of comments is _not_ optional. If an error is detected during execution, a diagnostic will be printed but will not terminate continued processing of the document beyond the ending SCRIPT tag.
1: #line 39 "html_parser.ipk" 2: from interscript.drivers.sources.base import eoi 3: import string 4: import traceback 5: class sgml_wrapper: 6: def __init__(self, sgml): 7: self.sgml = sgml 8: 9: def writeline(self,data,file,count): 10: self.sgml.feed(data) 11: 12: def close(): 13: self.sgml.close(self) 14: 15: def reset(self): 16: self.sgml.reset() 17: 18: # this is a hack: sgmllib needs to be imported here 19: # so the class SGMLParser defined in it can be used as a base 20: import sgmllib 21: 22: class html_filter(sgmllib.SGMLParser): 23: def __init__(self, input_frame): 24: sgmllib.SGMLParser.__init__(self) 25: self.save_data = 0 26: self.script_language = '' 27: self.input_frame = input_frame 28: self.weaver = input_frame.get_weaver() 29: self.process = input_frame.process 30: # feeding <HTML> in here is a hack to get around a bug in sgmllib, 31: # which fails to process unbalanced end tags correctly 32: self.feed('<HTML>') 33: 34: def _save(self): 35: self.save_data = 1 36: self.saved_data = '' 37: def _saved(self): 38: self.save_data = 0 39: return self.saved_data 40: 41: def handle_data(self,data): 42: new_data = '' 43: for ch in data: 44: if ch == '\n': ch = ' \n' 45: new_data = new_data + ch 46: if self.save_data: 47: self.saved_data = self.saved_data + new_data 48: else: 49: self.weaver.write(new_data) 50: 51: def handle_comment(self,data): 52: if 'parsers' in self.process.trace: 53: print 'SGML comment',data 54: if self.script_language != '': 55: self.saved_comments = self.saved_comments + data 56: 57: def start_html(self, attributes): pass 58: def start_head(self, attributes): pass 59: def end_head(self): pass 60: def start_body(self, attributes): pass 61: def end_body(self): pass 62: def end_html(self): 63: del self.input_frame 64: del self.weaver 65: raise eoi 66: 67: # fonts 68: def start_b(self,attributes): self.weaver.begin_bold() 69: def end_b(self): self.weaver.end_bold() 70: 71: def start_i(self,attributes): self.weaver.begin_italic() 72: def end_i(self): self.weaver.end_italic() 73: 74: def start_em(self,attributes): self.weaver.begin_emphasize() 75: def end_em(self): self.weaver.end_emphasize() 76: 77: def start_strong(self,attributes): self.weaver.begin_strong() 78: def end_strong(self): self.weaver.end_strong() 79: 80: def start_small(self,attributes): self.weaver.begin_small() 81: def end_small(self): self.weaver.end_small() 82: 83: def start_big(self,attributes): self.weaver.begin_big() 84: def end_big(self): self.weaver.end_big() 85: 86: def start_code(self,attributes): self.weaver.begin_code() 87: def end_code(self): self.weaver.end_code() 88: 89: # paragraphs 90: def start_p(self,attributes): self.weaver.prose() 91: def end_p(self): self.weaver.eop() 92: 93: # displays 94: def start_pre(self,attributes): self.weaver.begin_displayed_code() 95: def end_pre(self): self.weaver.end_displayed_code() 96: 97: #lists 98: def start_ol(self,attributes): 99: self.weaver.begin_numbered_list() 100: self.list_kind = 'ol' 101: def end_ol(self): 102: self.weaver.end_numbered_list() 103: 104: def start_dl(self,attributes): 105: self.weaver.begin_keyed_list() 106: self.list_kind = 'dl' 107: def end_dl(self): 108: self.weaver.end_keyed_list() 109: 110: def start_ul(self,attributes): 111: self.weaver.begin_bullet_list() 112: self.list_kind = 'ul' 113: def end_ul(self): 114: self.weaver.end_bullet_list() 115: 116: #list items 117: def start_li(self,attributes): 118: if self.list_kind == 'ol': 119: self.weaver.begin_numbered_list_item() 120: else: 121: self.weaver.begin_bullet_list_item() 122: 123: def end_li(self): 124: if self.list_kind == 'ol': 125: self.weaver.end_numbered_list_item() 126: else: 127: self.weaver.end_bullet_list_item() 128: 129: def start_dt(self,attributes): self._save() 130: def end_dt(self): 131: self.weaver.begin_keyed_list_item(self._saved()) 132: 133: def start_dd(self,attributes): pass 134: def end_dd(self): self.weaver.end_keyed_list_item() 135: 136: #headings 137: def start_h1(self,attributes): self._save() 138: def end_h1(self): self.weaver.head(1,self._saved()) 139: 140: def start_h2(self,attributes): self._save() 141: def end_h2(self): self.weaver.head(2,self._saved()) 142: 143: def start_h3(self,attributes): self._save() 144: def end_h3(self): self.weaver.head(3,self._saved()) 145: 146: def start_h4(self,attributes): self._save() 147: def end_h4(self): self.weaver.head(4,self._saved()) 148: 149: def start_h5(self,attributes): self._save() 150: def end_h5(self): self.weaver.head(5,self._saved()) 151: 152: def start_h6(self,attributes): self._save() 153: def end_h6(self): self.weaver.head(6,self._saved()) 154: 155: def unknown_starttag(self,tag,attributes): 156: print 'UNKNOWN START TAG',tag,attributes 157: 158: def unknown_endtag(self,tag): 159: print 'UNKNOWN END TAG',tag 160: 161: def unknown_charref(self,ref): 162: print 'BAD CHAR REF',ref 163: 164: def unknown_entityref(self,ref): 165: print 'UNKNOWN ENTITY REF',ref 166: 167: # due to a bug in sgmllib, this routine will 168: # never be called 169: def report_unbalanced(self,tag): 170: print 'LONELY ENDTAG',tag 171: 172: def start_script(self,attributes): 173: if 'parsers' in self.process.trace: 174: print 'start of script' 175: for param, value in attributes: 176: if string.lower(param) == 'language': 177: self.script_language = string.lower(value) 178: self.saved_comments = '' 179: 180: def end_script(self): 181: if 'parsers' in self.process.trace: 182: print 'end of script' 183: if self.script_language == 'python': 184: try: 185: exec self.saved_comments in globals(),self.input_frame.userdict 186: except: 187: print "Error executing python <SCRIPT>" 188: traceback.print_exc() 189: else: 190: print 'Sorry',self.script_language,'not available'