Views: 106
JP PINT 0.9.3のダウンロードファイル
画面左下の「Download resources」ボタンをクリックするとzipファイルがダウンロードできます。
このzipファイルを解凍すると、次のようなファイルがあります。
.
├── common
│ └── docs
│ ├── bis.pdf
│ ├── compliance.pdf
│ └── release-notes.pdf
└── trn-invoice
├── codelist
│ ├── Aligned-TaxCategoryCodes.gc
│ ├── Aligned-TaxCategoryCodes.yaml
│ ├── Aligned-TaxExemptionCodes.gc
│ ├── Aligned-TaxExemptionCodes.yaml
│ ├── ICD.gc
│ ├── ICD.yaml
│ ├── ISO3166.gc
│ ├── ISO3166.yaml
│ ├── ISO4217.gc
│ ├── ISO4217.yaml
│ ├── MimeCode.gc
│ ├── MimeCode.yaml
│ ├── SEPA.gc
│ ├── SEPA.yaml
│ ├── UNCL1001-inv.gc
│ ├── UNCL1001-inv.yaml
│ ├── UNCL1153.gc
│ ├── UNCL1153.yaml
│ ├── UNCL2005.gc
│ ├── UNCL2005.yaml
│ ├── UNCL4461.gc
│ ├── UNCL4461.yaml
│ ├── UNCL5189.gc
│ ├── UNCL5189.yaml
│ ├── UNCL7143.gc
│ ├── UNCL7143.yaml
│ ├── UNCL7161.gc
│ ├── UNCL7161.yaml
│ ├── UNECERec20.gc
│ ├── UNECERec20.yaml
│ ├── eas.gc
│ └── eas.yaml
├── schematron
│ ├── PINT-UBL-validation-preprocessed.sch
│ └── PINT-jurisdiction-aligned-rules.sch
├── semantic-model.yaml
└── syntax-binding.yaml
解凍したファイルには、2つのディレクトリがあります。
common/docs/bis.pdf は、
Peppol International (PINT) model for Billing
をPDF出力したものです。
trn-invoiceディレクトリには、デジタルインボイス関連のファイルが格納されています。
codelistサブディレクトリには、.gcファイルと.yamlファイルがあります。Aligned-TaxCategoryCodes.gcは、generic codeのXML文書。Aligned-TaxCategoryCodes.yamlは、genericcodeのXML文書です。
Aligned tax category codes
D.16B
Identifier
Name
Description
AA
Lower rate
Tax rate is lower than standard rate.
E
Exempt from tax
Code specifying that taxes are not applicable.
G
Free export item, tax not charged
Code specifying that the item is free export and taxes are not charged.
O
Outside scope of tax
Code specifying not subject to tax.
S
Standard rate
Code specifying the standard rate.
metadata:
title:
en: Aligned tax category codes
description:
en: Codes specifying tax categories in jurisdictions.
identifier: AlignedTaxCat
agency: UN/CEFACT
language: en
version:
identifier: D.16B
status: published
content:
- id: AA
name:
en: Lower rate
description:
en: Tax rate is lower than standard rate.
- id: E
name:
en: Exempt from tax
description:
en: Code specifying that taxes are not applicable.
- id: G
name:
en: Free export item, tax not charged
description:
en: Code specifying that the item is free export and taxes are not charged.
- id: O
name:
en: Outside scope of tax
description:
en: Code specifying not subject to tax.
- id: S
name:
en: Standard rate
description:
en: Code specifying the standard rate.
そして、schematronサブディレクトリにあるのが、スキーマトロンファイルです。PINT-UBL-validation-preprocessed.schがシェアドルール、PINT-jurisdiction-aligned-rules.schがアラインドルールです。
.
└── trn-invoice
├── schematron
│ ├── PINT-UBL-validation-preprocessed.sch
│ └── PINT-jurisdiction-aligned-rules.sch
schematronファイルをjsonに変換するプログラム
最近どうもXML技術者が少なくなっているようなので、馴染みのあるjsonに変換するプログラムを試作しました。
#!/usr/bin/env python3
#coding: utf-8
#
# generate JSON from XML Schematron
#
# designed by SAMBUICHI, Nobuyuki (Sambuichi Professional Engineers Office)
# written by SAMBUICHI, Nobuyuki (Sambuichi Professional Engineers Office)
#
# MIT License
#
# Copyright (c) 2021 SAMBUICHI Nobuyuki (Sambuichi Professional Engineers Office)
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import xml.etree.ElementTree as ET
from collections import defaultdict
import re
import json
import sys
import os
import argparse
from dic2etree import *
dictID = defaultdict(type(''))
dictXpath = defaultdict(type(''))
def file_path(pathname):
if '/' == pathname[0:1]:
return pathname
else:
dir = os.path.dirname(__file__)
new_path = os.path.join(dir, pathname)
return new_path
def dict_to_tsv(tsv, root):
def setup_record(context, id, flag, test, text):
test = test.strip()
test = ' '.join(test.split())
text = text.strip()
text = ' '.join(text.split())
terms_pattern = 'I?BT-[0-9]*'
terms = re.findall(terms_pattern, text, flags=re.IGNORECASE)
terms = ' '.join(terms)
groups_pattern = 'I?BG-[0-9]*'
groups = re.findall(groups_pattern, text, flags=re.IGNORECASE)
groups = ' '.join(groups)
record = [context, id, flag, test, text, groups, terms]
return record
def process_pattern(tsv, pattern):
pattern_id = pattern['@id']
rules = pattern['sch:rule']
if isinstance(rules, dict):
try:
context = rules['@context']
d = rules['sch:assert']
if isinstance(d, list):
for v in d:
record = setup_record(context, v['@id'], v['@flag'], v['@test'], v['#text'])
tsv.append(record)
elif isinstance(d, dict):
record = setup_record(context, d['@id'], d['@flag'], d['@test'], d['#text'])
tsv.append(record)
else:
if verbose:
print(json.dumps(d))
pass
except Exception as expt:
if verbose:
print(expt.args)
pass
elif isinstance(rules, list):
for rule in rules:
try:
context = rule['@context']
d = rule['sch:assert']
if isinstance(d, list):
for v in d:
record = setup_record(context, v['@id'], v['@flag'], v['@test'], v['#text'])
tsv.append(record)
elif isinstance(d, dict):
record = setup_record(context, d['@id'], d['@flag'], d['@test'], d['#text'])
tsv.append(record)
else:
if verbose:
print(json.dumps(d))
pass
except Exception as expt:
if verbose:
print(expt.args)
pass
else:
pass
return tsv
tsv.append(record)
if 'sch:pattern' in root:
pattern = root['sch:pattern']
tsv = process_pattern(tsv, pattern)
elif root['sch:schema'] and isinstance(root['sch:schema'], dict):
for tag, body in root['sch:schema'].items():
if 'sch:pattern' == tag:
for pattern in body:
tsv = process_pattern(tsv, pattern)
if __name__ == '__main__':
# Create the parser
parser = argparse.ArgumentParser(prog='invoice2tsv',
usage='%(prog)s [options] pintFile -o out_file',
description='スキーマトロンファイルをjsonファイルに変換')
# Add the arguments
parser.add_argument('pintFile', metavar='pintfile', type=str, help='入力スキーマトロンファイル)
parser.add_argument('-o', '--out')
parser.add_argument('-v', '--verbose', action='store_true')
args = parser.parse_args()
pint_file = file_path(args.pintFile)
pre, ext = os.path.splitext(pint_file)
# tmp_file = pre + '.tmp'
if args.out:
out_file = args.out.lstrip()
out_file = file_path(out_file)
else:
out_file = pre + '.json'
verbose = args.verbose
# Check if infile exists
if not os.path.isfile(pint_file):
print('入力ファイルがありません')
sys.exit()
if verbose:
print('** START ** ', __file__)
pint_tree = ET.parse(pint_file)
pint_root = pint_tree.getroot()
pint_dict = etree_to_dict(pint_root)
dicJson = json.dumps(pint_dict)
dicJson = re.sub('{' + ns[''] + '}', '', dicJson)
dicJson = re.sub('{' + ns['cac'] + '}', 'cac:', dicJson)
dicJson = re.sub('{' + ns['cbc'] + '}', 'cbc:', dicJson)
dicJson = re.sub('{' + ns['sch'] + '}', 'sch:', dicJson)
pint_dict2 = json.loads(dicJson)
pint_tsv = []
dict_to_tsv(pint_tsv, pint_dict2)
pint_set = set([json.dumps(x) for x in pint_tsv])
pint_list = list(pint_set)
tsv = [json.loads(w) for w in pint_list]
header = ['Context', 'Identifier', 'Flag', 'Test', 'Message', 'BG', 'BT']
rules = []
for d in tsv:
rule = {}
i = 0
for h in header:
rule[h] = d[i]
i += 1
rules.append(rule)
with open(out_file,'w',newline='') as f:
json.dump(rules, f, indent=4)
if verbose:
print(f'** END ** {out_file}')
実行時には、解析対象のスキーマトロンファイルを指定します。
"args":["trn-invoice/schematron/PINT-UBL-validation-preprocessed.sch","-v"]
XMLとPython dictのデータ変換ライブラリ
XMLの解析には、`xml.etree.ElementTree`を使用しています。ElementTreeを直接プログラムで操作しなくても済ますために、dic2etreeライブラリを stackoverflow converting xml to dictionary using elementtree を参考に作成しました。
├── dic2etree │ ├── __init__.py │ └── dic2etree.py
from .dic2etree import dict_to_etree
from .dic2etree import etree_to_dict
ns = {
'': 'urn:oasis:names:specification:ubl:schema:xsd:Invoice-2',
'xsd': 'http://www.w3.org/2001/XMLSchema',
'xsi': 'http://www.w3.org/2001/XMLSchema-instance',
'ext': 'urn:oasis:names:specification:ubl:schema:xsd:CommonExtensionComponents-2',
'cac': 'urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2',
'cbc': 'urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2',
'qdt': 'urn:oasis:names:specification:ubl:schema:xsd:QualifiedDataTypes-2',
'udt': 'urn:oasis:names:specification:ubl:schema:xsd:UnqualifiedDataTypes-2',
'ccts': 'urn:un:unece:uncefact:documentation:2',
'cn': 'urn:oasis:names:specification:ubl:schema:xsd:CreditNote-2',
'ubl': 'urn:oasis:names:specification:ubl:schema:xsd:Invoice-2',
'sch': 'http://purl.oclc.org/dsdl/schematron'
}
__all__ = ['dict_to_etree', 'etree_to_dict', 'ns']
# convert between ElementTree from/to dict
#
# designed by SAMBUICHI, Nobuyuki (Sambuichi Professional Engineers Office)
# written by SAMBUICHI, Nobuyuki (Sambuichi Professional Engineers Office)
#
# MIT License
#
# Copyright (c) 2021 SAMBUICHI Nobuyuki (Sambuichi Professional Engineers Office)
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import xml.etree.ElementTree as ET
from collections import defaultdict
import pprint
ET.register_namespace('cac', 'urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2')
ET.register_namespace('cbc', 'urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2')
ET.register_namespace('qdt', 'urn:oasis:names:specification:ubl:schema:xsd:QualifiedDataTypes-2')
ET.register_namespace('udt', 'urn:oasis:names:specification:ubl:schema:xsd:UnqualifiedDataTypes-2')
ET.register_namespace('ccts', 'urn:un:unece:uncefact:documentation:2')
ET.register_namespace('', 'urn:oasis:names:specification:ubl:schema:xsd:Invoice-2')
# https://stackoverflow.com/questions/7684333/converting-xml-to-dictionary-using-elementtree
def etree_to_dict(t):
d = {t.tag: {} if t.attrib else None}
children = list(t)
if children:
dd = defaultdict(list)
for dc in map(etree_to_dict, children):
for k, v in dc.items():
dd[k].append(v)
d = {t.tag: {k: v[0] if len(v) == 1 else v for k, v in dd.items()}}
if t.attrib:
d[t.tag].update(('@' + k, v) for k, v in t.attrib.items())
if t.text:
text = t.text.strip()
if children or t.attrib:
if text:
d[t.tag]['#text'] = text
else:
d[t.tag] = text
return d
def dict_to_etree(d, root):
def _to_etree(d, root):
if not d:
pass
elif isinstance(d, str):
root.text = d
elif isinstance(d, dict):
for k,v in d.items():
assert isinstance(k, str)
if k.startswith('#'):
try:
assert k == '#text' and isinstance(v, str)
root.text = v
except (Exception, ValueError, TypeError) as e:
print(e, v)
elif k.startswith('@'):
if isinstance(v, str): # 2021-06-05
root.set(k[1:], v)
else:
pass
elif isinstance(v, list):
for e in v:
_to_etree(e, ET.SubElement(root, k))
else:
_to_etree(v, ET.SubElement(root, k))
else:
assert d == 'invalid type', (type(d), d)
assert isinstance(d, dict) and len(d) == 1
tag, body = next(iter(d.items()))
_to_etree(body, root)
return root

