diff options
author | Leif Johansson <leifj@sunet.se> | 2012-10-20 23:24:16 +0200 |
---|---|---|
committer | Leif Johansson <leifj@sunet.se> | 2012-10-20 23:24:16 +0200 |
commit | 519b09da29a0ab620e85f1dc32816dd248f5f12d (patch) | |
tree | 1c35d3345b5bf0dc964d5cc122ed259fc27b5ebd /meetingtools/ac/api.py | |
parent | c1655608c032cbef274943a67352958b47aa123d (diff) |
sanitize broken xml from AC
Diffstat (limited to 'meetingtools/ac/api.py')
-rw-r--r-- | meetingtools/ac/api.py | 23 |
1 files changed, 22 insertions, 1 deletions
diff --git a/meetingtools/ac/api.py b/meetingtools/ac/api.py index 230bae8..2f53b74 100644 --- a/meetingtools/ac/api.py +++ b/meetingtools/ac/api.py @@ -30,10 +30,31 @@ def _first_or_none(x): return None return x[0] +def strip_control_characters(input): + + if input: + + import re + + # unicode invalid characters + RE_XML_ILLEGAL = u'([\u0000-\u0008\u000b-\u000c\u000e-\u001f\ufffe-\uffff])' + \ + u'|' + \ + u'([%s-%s][^%s-%s])|([^%s-%s][%s-%s])|([%s-%s]$)|(^[%s-%s])' % \ + (unichr(0xd800),unichr(0xdbff),unichr(0xdc00),unichr(0xdfff), + unichr(0xd800),unichr(0xdbff),unichr(0xdc00),unichr(0xdfff), + unichr(0xd800),unichr(0xdbff),unichr(0xdc00),unichr(0xdfff), + ) + input = re.sub(RE_XML_ILLEGAL, "", input) + + # ascii control characters + input = re.sub(r"[\x01-\x1F\x7F]", "", input) + + return input + class ACPResult(): def __init__(self,content): - self.et = etree.fromstring(content) + self.et = etree.fromstring(strip_control_characters(content)) self.status = _first_or_none(self.et.xpath('//status')) def is_error(self): |