Commit cffdaaae authored by Alexander Gehrke's avatar Alexander Gehrke
Browse files

More cleaning up of generated code

parent a8f39510
...@@ -16,7 +16,7 @@ def new_document_from_image(path: str, metadata: Optional[ds.MetadataType] = Non ...@@ -16,7 +16,7 @@ def new_document_from_image(path: str, metadata: Optional[ds.MetadataType] = Non
im = Image.open(path) im = Image.open(path)
w = im.width w = im.width
h = im.height h = im.height
xres, yres = im.info['dpi'] xres, yres = im.info['dpi'] if 'dpi' in im.info else (None, None)
return ds.PcGtsTypeSub( return ds.PcGtsTypeSub(
metadata=metadata if metadata is not None else ds.MetadataTypeSub.default(), metadata=metadata if metadata is not None else ds.MetadataTypeSub.default(),
......
This diff is collapsed.
This diff is collapsed.
import sys
from pypagexml.ds import PcGtsType
from pypagexml.ds.gdscommon import Tag_pattern_, GdsCollector_, parsexml_, SaveElementTreeNode, parsexmlstring_
try:
from lxml import etree as etree_
except ImportError:
from xml.etree import ElementTree as etree_
GDSClassesMapping = {
'PcGts': PcGtsType,
}
USAGE_TEXT = """
Usage: python <Parser>.py [ -s ] <in_xml_file>
"""
def usage():
print(USAGE_TEXT)
sys.exit(1)
def get_root_tag(node):
tag = Tag_pattern_.match(node.tag).groups()[-1]
rootClass = GDSClassesMapping.get(tag)
if rootClass is None:
rootClass = globals().get(tag)
return tag, rootClass
def get_required_ns_prefix_defs(rootNode):
"""Get all name space prefix definitions required in this XML doc.
Return a dictionary of definitions and a char string of definitions.
"""
nsmap = {
prefix: uri
for node in rootNode.iter()
for (prefix, uri) in node.nsmap.items()
if prefix is not None
}
namespacedefs = ' '.join([
'xmlns:{}="{}"'.format(prefix, uri)
for prefix, uri in nsmap.items()
])
return nsmap, namespacedefs
def parse(inFileName, silence=False, print_warnings=True):
global CapturedNsmap_
gds_collector = GdsCollector_()
parser = None
doc = parsexml_(inFileName, parser)
rootNode = doc.getroot()
rootTag, rootClass = get_root_tag(rootNode)
if rootClass is None:
rootTag = 'PcGtsType'
rootClass = PcGtsType
rootObj = rootClass.factory()
rootObj.build(rootNode, gds_collector_=gds_collector)
CapturedNsmap_, namespacedefs = get_required_ns_prefix_defs(rootNode)
if not SaveElementTreeNode:
pass
if not silence:
sys.stdout.write('<?xml version="1.0" ?>\n')
rootObj.export(
sys.stdout, 0, name_=rootTag,
namespacedef_=namespacedefs,
pretty_print=True)
if print_warnings and len(gds_collector.get_messages()) > 0:
separator = ('-' * 50) + '\n'
sys.stderr.write(separator)
sys.stderr.write('----- Warnings -- count: {} -----\n'.format(
len(gds_collector.get_messages()), ))
gds_collector.write_messages(sys.stderr)
sys.stderr.write(separator)
return rootObj
def parseEtree(inFileName, silence=False, print_warnings=True,
mapping=None, nsmap=None):
parser = None
doc = parsexml_(inFileName, parser)
gds_collector = GdsCollector_()
rootNode = doc.getroot()
rootTag, rootClass = get_root_tag(rootNode)
if rootClass is None:
rootTag = 'PcGtsType'
rootClass = PcGtsType
rootObj = rootClass.factory()
rootObj.build(rootNode, gds_collector_=gds_collector)
# Enable Python to collect the space used by the DOM.
if mapping is None:
mapping = {}
rootElement = rootObj.to_etree(
None, name_=rootTag, mapping_=mapping, nsmap_=nsmap)
reverse_mapping = rootObj.gds_reverse_node_mapping(mapping)
if not SaveElementTreeNode:
pass
if not silence:
content = etree_.tostring(
rootElement, pretty_print=True,
xml_declaration=True, encoding="utf-8")
sys.stdout.write(str(content))
sys.stdout.write('\n')
if print_warnings and len(gds_collector.get_messages()) > 0:
separator = ('-' * 50) + '\n'
sys.stderr.write(separator)
sys.stderr.write('----- Warnings -- count: {} -----\n'.format(
len(gds_collector.get_messages()), ))
gds_collector.write_messages(sys.stderr)
sys.stderr.write(separator)
return rootObj, rootElement, mapping, reverse_mapping
def parseString(inString, silence=False, print_warnings=True):
"""Parse a string, create the object tree, and export it.
Arguments:
- inString -- A string. This XML fragment should not start
with an XML declaration containing an encoding.
- silence -- A boolean. If False, export the object.
Returns -- The root object in the tree.
"""
parser = None
rootNode = parsexmlstring_(inString, parser)
gds_collector = GdsCollector_()
rootTag, rootClass = get_root_tag(rootNode)
if rootClass is None:
rootTag = 'PcGtsType'
rootClass = PcGtsType
rootObj = rootClass.factory()
rootObj.build(rootNode, gds_collector_=gds_collector)
if not SaveElementTreeNode:
pass
if not silence:
sys.stdout.write('<?xml version="1.0" ?>\n')
rootObj.export(
sys.stdout, 0, name_=rootTag,
namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"')
if print_warnings and len(gds_collector.get_messages()) > 0:
separator = ('-' * 50) + '\n'
sys.stderr.write(separator)
sys.stderr.write('----- Warnings -- count: {} -----\n'.format(
len(gds_collector.get_messages()), ))
gds_collector.write_messages(sys.stderr)
sys.stderr.write(separator)
return rootObj
def parseLiteral(inFileName, silence=False, print_warnings=True):
parser = None
doc = parsexml_(inFileName, parser)
gds_collector = GdsCollector_()
rootNode = doc.getroot()
rootTag, rootClass = get_root_tag(rootNode)
if rootClass is None:
rootTag = 'PcGtsType'
rootClass = PcGtsType
rootObj = rootClass.factory()
rootObj.build(rootNode, gds_collector_=gds_collector)
# Enable Python to collect the space used by the DOM.
if not SaveElementTreeNode:
pass
if not silence:
sys.stdout.write('#from generated import *\n\n')
sys.stdout.write('import generated as model_\n\n')
sys.stdout.write('rootObj = model_.rootClass(\n')
rootObj.exportLiteral(sys.stdout, 0, name_=rootTag)
sys.stdout.write(')\n')
if print_warnings and len(gds_collector.get_messages()) > 0:
separator = ('-' * 50) + '\n'
sys.stderr.write(separator)
sys.stderr.write('----- Warnings -- count: {} -----\n'.format(
len(gds_collector.get_messages()), ))
gds_collector.write_messages(sys.stderr)
sys.stderr.write(separator)
return rootObj
def main():
args = sys.argv[1:]
if len(args) == 1:
parse(args[0])
else:
usage()
if __name__ == '__main__':
# import pdb; pdb.set_trace()
main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment