Views: 102
JP PINT 0.9.3のダウンロードファイル

画面左下の「Download resources」ボタンをクリックするとzipファイルがダウンロードできます。
このzipファイルを解凍すると、次のようなファイルがあります。
. ├── common │ └── docs │ ├── bis.pdf │ ├── compliance.pdf │ └── release-notes.pdf └── trn-invoice ├── codelist │ ├── Aligned-TaxCategoryCodes.gc │ ├── Aligned-TaxCategoryCodes.yaml │ ├── Aligned-TaxExemptionCodes.gc │ ├── Aligned-TaxExemptionCodes.yaml │ ├── ICD.gc │ ├── ICD.yaml │ ├── ISO3166.gc │ ├── ISO3166.yaml │ ├── ISO4217.gc │ ├── ISO4217.yaml │ ├── MimeCode.gc │ ├── MimeCode.yaml │ ├── SEPA.gc │ ├── SEPA.yaml │ ├── UNCL1001-inv.gc │ ├── UNCL1001-inv.yaml │ ├── UNCL1153.gc │ ├── UNCL1153.yaml │ ├── UNCL2005.gc │ ├── UNCL2005.yaml │ ├── UNCL4461.gc │ ├── UNCL4461.yaml │ ├── UNCL5189.gc │ ├── UNCL5189.yaml │ ├── UNCL7143.gc │ ├── UNCL7143.yaml │ ├── UNCL7161.gc │ ├── UNCL7161.yaml │ ├── UNECERec20.gc │ ├── UNECERec20.yaml │ ├── eas.gc │ └── eas.yaml ├── schematron │ ├── PINT-UBL-validation-preprocessed.sch │ └── PINT-jurisdiction-aligned-rules.sch ├── semantic-model.yaml └── syntax-binding.yaml
解凍したファイルには、2つのディレクトリがあります。
common/docs/bis.pdf は、
Peppol International (PINT) model for Billing
をPDF出力したものです。
trn-invoiceディレクトリには、デジタルインボイス関連のファイルが格納されています。
codelistサブディレクトリには、.gcファイルと.yamlファイルがあります。Aligned-TaxCategoryCodes.gcは、generic codeのXML文書。Aligned-TaxCategoryCodes.yamlは、genericcodeのXML文書です。
Aligned tax category codes
D.16B
Identifier
Name
Description
AA
Lower rate
Tax rate is lower than standard rate.
E
Exempt from tax
Code specifying that taxes are not applicable.
G
Free export item, tax not charged
Code specifying that the item is free export and taxes are not charged.
O
Outside scope of tax
Code specifying not subject to tax.
S
Standard rate
Code specifying the standard rate.
metadata: title: en: Aligned tax category codes description: en: Codes specifying tax categories in jurisdictions. identifier: AlignedTaxCat agency: UN/CEFACT language: en version: identifier: D.16B status: published content: - id: AA name: en: Lower rate description: en: Tax rate is lower than standard rate. - id: E name: en: Exempt from tax description: en: Code specifying that taxes are not applicable. - id: G name: en: Free export item, tax not charged description: en: Code specifying that the item is free export and taxes are not charged. - id: O name: en: Outside scope of tax description: en: Code specifying not subject to tax. - id: S name: en: Standard rate description: en: Code specifying the standard rate.
そして、schematronサブディレクトリにあるのが、スキーマトロンファイルです。PINT-UBL-validation-preprocessed.schがシェアドルール、PINT-jurisdiction-aligned-rules.schがアラインドルールです。
. └── trn-invoice ├── schematron │ ├── PINT-UBL-validation-preprocessed.sch │ └── PINT-jurisdiction-aligned-rules.sch
schematronファイルをjsonに変換するプログラム
最近どうもXML技術者が少なくなっているようなので、馴染みのあるjsonに変換するプログラムを試作しました。
#!/usr/bin/env python3 #coding: utf-8 # # generate JSON from XML Schematron # # designed by SAMBUICHI, Nobuyuki (Sambuichi Professional Engineers Office) # written by SAMBUICHI, Nobuyuki (Sambuichi Professional Engineers Office) # # MIT License # # Copyright (c) 2021 SAMBUICHI Nobuyuki (Sambuichi Professional Engineers Office) # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import xml.etree.ElementTree as ET from collections import defaultdict import re import json import sys import os import argparse from dic2etree import * dictID = defaultdict(type('')) dictXpath = defaultdict(type('')) def file_path(pathname): if '/' == pathname[0:1]: return pathname else: dir = os.path.dirname(__file__) new_path = os.path.join(dir, pathname) return new_path def dict_to_tsv(tsv, root): def setup_record(context, id, flag, test, text): test = test.strip() test = ' '.join(test.split()) text = text.strip() text = ' '.join(text.split()) terms_pattern = 'I?BT-[0-9]*' terms = re.findall(terms_pattern, text, flags=re.IGNORECASE) terms = ' '.join(terms) groups_pattern = 'I?BG-[0-9]*' groups = re.findall(groups_pattern, text, flags=re.IGNORECASE) groups = ' '.join(groups) record = [context, id, flag, test, text, groups, terms] return record def process_pattern(tsv, pattern): pattern_id = pattern['@id'] rules = pattern['sch:rule'] if isinstance(rules, dict): try: context = rules['@context'] d = rules['sch:assert'] if isinstance(d, list): for v in d: record = setup_record(context, v['@id'], v['@flag'], v['@test'], v['#text']) tsv.append(record) elif isinstance(d, dict): record = setup_record(context, d['@id'], d['@flag'], d['@test'], d['#text']) tsv.append(record) else: if verbose: print(json.dumps(d)) pass except Exception as expt: if verbose: print(expt.args) pass elif isinstance(rules, list): for rule in rules: try: context = rule['@context'] d = rule['sch:assert'] if isinstance(d, list): for v in d: record = setup_record(context, v['@id'], v['@flag'], v['@test'], v['#text']) tsv.append(record) elif isinstance(d, dict): record = setup_record(context, d['@id'], d['@flag'], d['@test'], d['#text']) tsv.append(record) else: if verbose: print(json.dumps(d)) pass except Exception as expt: if verbose: print(expt.args) pass else: pass return tsv tsv.append(record) if 'sch:pattern' in root: pattern = root['sch:pattern'] tsv = process_pattern(tsv, pattern) elif root['sch:schema'] and isinstance(root['sch:schema'], dict): for tag, body in root['sch:schema'].items(): if 'sch:pattern' == tag: for pattern in body: tsv = process_pattern(tsv, pattern) if __name__ == '__main__': # Create the parser parser = argparse.ArgumentParser(prog='invoice2tsv', usage='%(prog)s [options] pintFile -o out_file', description='スキーマトロンファイルをjsonファイルに変換') # Add the arguments parser.add_argument('pintFile', metavar='pintfile', type=str, help='入力スキーマトロンファイル) parser.add_argument('-o', '--out') parser.add_argument('-v', '--verbose', action='store_true') args = parser.parse_args() pint_file = file_path(args.pintFile) pre, ext = os.path.splitext(pint_file) # tmp_file = pre + '.tmp' if args.out: out_file = args.out.lstrip() out_file = file_path(out_file) else: out_file = pre + '.json' verbose = args.verbose # Check if infile exists if not os.path.isfile(pint_file): print('入力ファイルがありません') sys.exit() if verbose: print('** START ** ', __file__) pint_tree = ET.parse(pint_file) pint_root = pint_tree.getroot() pint_dict = etree_to_dict(pint_root) dicJson = json.dumps(pint_dict) dicJson = re.sub('{' + ns[''] + '}', '', dicJson) dicJson = re.sub('{' + ns['cac'] + '}', 'cac:', dicJson) dicJson = re.sub('{' + ns['cbc'] + '}', 'cbc:', dicJson) dicJson = re.sub('{' + ns['sch'] + '}', 'sch:', dicJson) pint_dict2 = json.loads(dicJson) pint_tsv = [] dict_to_tsv(pint_tsv, pint_dict2) pint_set = set([json.dumps(x) for x in pint_tsv]) pint_list = list(pint_set) tsv = [json.loads(w) for w in pint_list] header = ['Context', 'Identifier', 'Flag', 'Test', 'Message', 'BG', 'BT'] rules = [] for d in tsv: rule = {} i = 0 for h in header: rule[h] = d[i] i += 1 rules.append(rule) with open(out_file,'w',newline='') as f: json.dump(rules, f, indent=4) if verbose: print(f'** END ** {out_file}')
実行時には、解析対象のスキーマトロンファイルを指定します。
"args":["trn-invoice/schematron/PINT-UBL-validation-preprocessed.sch","-v"]
XMLとPython dictのデータ変換ライブラリ
XMLの解析には、`xml.etree.ElementTree`を使用しています。ElementTreeを直接プログラムで操作しなくても済ますために、dic2etreeライブラリを stackoverflow converting xml to dictionary using elementtree を参考に作成しました。
├── dic2etree │ ├── __init__.py │ └── dic2etree.py
from .dic2etree import dict_to_etree from .dic2etree import etree_to_dict ns = { '': 'urn:oasis:names:specification:ubl:schema:xsd:Invoice-2', 'xsd': 'http://www.w3.org/2001/XMLSchema', 'xsi': 'http://www.w3.org/2001/XMLSchema-instance', 'ext': 'urn:oasis:names:specification:ubl:schema:xsd:CommonExtensionComponents-2', 'cac': 'urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2', 'cbc': 'urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2', 'qdt': 'urn:oasis:names:specification:ubl:schema:xsd:QualifiedDataTypes-2', 'udt': 'urn:oasis:names:specification:ubl:schema:xsd:UnqualifiedDataTypes-2', 'ccts': 'urn:un:unece:uncefact:documentation:2', 'cn': 'urn:oasis:names:specification:ubl:schema:xsd:CreditNote-2', 'ubl': 'urn:oasis:names:specification:ubl:schema:xsd:Invoice-2', 'sch': 'http://purl.oclc.org/dsdl/schematron' } __all__ = ['dict_to_etree', 'etree_to_dict', 'ns']
# convert between ElementTree from/to dict # # designed by SAMBUICHI, Nobuyuki (Sambuichi Professional Engineers Office) # written by SAMBUICHI, Nobuyuki (Sambuichi Professional Engineers Office) # # MIT License # # Copyright (c) 2021 SAMBUICHI Nobuyuki (Sambuichi Professional Engineers Office) # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import xml.etree.ElementTree as ET from collections import defaultdict import pprint ET.register_namespace('cac', 'urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2') ET.register_namespace('cbc', 'urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2') ET.register_namespace('qdt', 'urn:oasis:names:specification:ubl:schema:xsd:QualifiedDataTypes-2') ET.register_namespace('udt', 'urn:oasis:names:specification:ubl:schema:xsd:UnqualifiedDataTypes-2') ET.register_namespace('ccts', 'urn:un:unece:uncefact:documentation:2') ET.register_namespace('', 'urn:oasis:names:specification:ubl:schema:xsd:Invoice-2') # https://stackoverflow.com/questions/7684333/converting-xml-to-dictionary-using-elementtree def etree_to_dict(t): d = {t.tag: {} if t.attrib else None} children = list(t) if children: dd = defaultdict(list) for dc in map(etree_to_dict, children): for k, v in dc.items(): dd[k].append(v) d = {t.tag: {k: v[0] if len(v) == 1 else v for k, v in dd.items()}} if t.attrib: d[t.tag].update(('@' + k, v) for k, v in t.attrib.items()) if t.text: text = t.text.strip() if children or t.attrib: if text: d[t.tag]['#text'] = text else: d[t.tag] = text return d def dict_to_etree(d, root): def _to_etree(d, root): if not d: pass elif isinstance(d, str): root.text = d elif isinstance(d, dict): for k,v in d.items(): assert isinstance(k, str) if k.startswith('#'): try: assert k == '#text' and isinstance(v, str) root.text = v except (Exception, ValueError, TypeError) as e: print(e, v) elif k.startswith('@'): if isinstance(v, str): # 2021-06-05 root.set(k[1:], v) else: pass elif isinstance(v, list): for e in v: _to_etree(e, ET.SubElement(root, k)) else: _to_etree(v, ET.SubElement(root, k)) else: assert d == 'invalid type', (type(d), d) assert isinstance(d, dict) and len(d) == 1 tag, body = next(iter(d.items())) _to_etree(body, root) return root