|
本帖最后由 mylife18 于 2017-11-8 11:03 编辑
from xml.sax.handler import ContentHandler
from xml.sax import parse
import os
class Dispatcher:
def dispatch(self, prefix, name, attrs=None):
mname = prefix + name.capitalize()
dname = 'default' + prefix.capitalize()
method = getattr(self, mname, None)
if callable(method): args = ()
else:
method = getattr(self, dname, None)
args = name,
if prefix == 'start': args += attrs,
if callable(method): method(*args)
def startElement(self, name, attrs):
self.dispatch('start', name, attrs)
def endElement(self, name):
self.dispatch('end', name)
class WebsiteConstructor(Dispatcher, ContentHandler):
passthrough = False
# initailize the attributes of class "WebsiteConstructor"
def __init__(self, directory):
self.directory = [directory]
self.ensureDirectory()
# create directory
def ensureDirectory(self):
path = os.path.join(*self.directory)
print path
print '----'
if not os.path.isdir(path): os.makedirs(path)
# write sth into file
def characters(self, chars):
if self.passthrough: self.out.write(chars)
def defaultStart(self, name, attrs):
#can print name and attrs
print "Start---name: ", name
print "Start---attrs: ", attrs
print "Start---attrs.items(): ", attrs.items()
if self.passthrough:
self.out.write('<' + name)
for key, val in attrs.items():
self.out.write(' %s="%s"' %(key, val))
self.out.write('>')
def test1(self, name, attrs):
#why cann't it print name and attrs ???
print "test---name: ", name
print "test---attrs: ", attrs
def defaultEnd(self, name):
#can print name
print "defaultEnd---name: ", name
if self.passthrough:
self.out.write('</%s>' % name)
def startDirectory(self, attrs):
self.directory.append(attrs['name'])
self.ensureDirectory()
def endDirectory(self):
print 'endDirectory'
self.directory.pop()
def startPage(self, attrs):
print 'startPage'
filename = os.path.join(*self.directory + [attrs['name']+'.html'])
#filename
print "filename:", filename
self.out = open(filename, 'w')
self.writeHeader(attrs['title'])
self.passthrough = True
def endPage(self):
print 'endPage'
self.passthrough = False
self.writeFooter()
self.out.close()
def writeHeader(self, title):
self.out.write('<html>\n <head>\n <title>')
self.out.write(title)
self.out.write('</title>\n </head>\n <body>\n')
def writeFooter(self):
self.out.write('\n </body>\n</html>\n')
parse('website.xml',WebsiteConstructor('public_html'))
版本:Python 2.7.14 (v2.7.14:84471935ed, Sep 16 2017, 20:25:58) [MSC v.1500 64 bit (AMD64)]
我在尝试读懂这简单的网页生成脚本。py和xml文件都在附件
问题一:
在powershell中,为何函数「test1」不能打印参数 name 和 attrs 呢?
我的分析:
一,
函数「test1」上下的函数「defaultStart」和「defaultEnd」都打印参数 name 和 attrs,反而处于中间的函数「test1」不能,我很懵逼
二,函数「test1」上下的函数「defaultStart」和「defaultEnd」并不是引用或内建的函数,是用户自定义的,这没什么特别
三,xml文件也是简单的,没什么特别
问题二:
print attrs的结果是:<xml.sax.xmlreader.AttributesImpl instance at 0x00000000029B60C8>
为什么能直接引用作为一个AttributesImpl的实例的attrs,它从哪来呢?
我的分析:
一,
from xml.sax.handler import ContentHandler
from xml.sax import parse
import os
--------------------------------------
导入的模块或函数根本就没有涉及xml.sax.xmlreader.AttributesImpl,它是从哪里来的呢?
二,根据官方文档,我查了导入的ContentHandler和parse都没有联系到AttributesImpl
新手一枚望指导!
|
|