ValueError: All strings must be XML compatible: Unicode or ASCII, no NULL bytes or control characters

classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

ValueError: All strings must be XML compatible: Unicode or ASCII, no NULL bytes or control characters

Ron Gaw <ronmlgaw at yahoo dot com>


I'm getting errors from a very basic call to "from lxml import etree" and "import lxml.html", but *not* "import lxml.etree" (which tells me a little something, but not clearly enough to solve the mystery).  Need a little help, since I do not quite (yet) understand whether MSYS python2.7 or the LXML code itself is causing the Unicode error:

Non-debug output first to illustrate the issue.  Below this, I've added output with python2 -vvvvv and executing "from lxml import etree" [edited out lines starting with "#" for brevity] - just because I think there is likely a clue I'm not getting in that output - hopefully not just wasting bytes. 
$ python2
Python 2.7.11 (default, Mar 4 2016, 10:54:03)
[GCC 4.9.2] on msys
Type "help", "copyright", "credits" or "license" for more information.
>>> import sys
>>> from lxml import etree
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "src/lxml/xmlschema.pxi", line 22, in init lxml.etree (src/lxml/lxml.etree.c:227961)
cdef XPath _check_for_default_attributes = XPath(
File "src/lxml/xpath.pxi", line 414, in lxml.etree.XPath.__init__ (src/lxml/lxml.etree.c:170397)
_XPathEvaluatorBase.__init__(self, namespaces, extensions,
File "src/lxml/xpath.pxi", line 133, in lxml.etree._XPathEvaluatorBase.__init__ (src/lxml/lxml.etree.c:166613)
self._context = _XPathContext(namespaces, extensions, self._error_log,
File "src/lxml/xpath.pxi", line 57, in lxml.etree._XPathContext.__init__ (src/lxml/lxml.etree.c:165569)
_BaseContext.__init__(self, namespaces, extensions, error_log, enable_regexp,
File "src/lxml/extensions.pxi", line 89, in lxml.etree._BaseContext.__init__ (src/lxml/lxml.etree.c:154497)
prefix_utf = self._to_utf(prefix)
File "src/lxml/extensions.pxi", line 128, in lxml.etree._BaseContext._to_utf (src/lxml/lxml.etree.c:155103)
utf = _utf8(s)
File "src/lxml/apihelpers.pxi", line 1443, in lxml.etree._utf8 (src/lxml/lxml.etree.c:31496)
raise ValueError(
ValueError: All strings must be XML compatible: Unicode or ASCII, no NULL bytes or control characters
>>> import lxml.etree
>>> import lxml.html
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python2.7/site-packages/lxml-3.7.1-x86_64.egg/lxml/html/__init__.py", line 89, in <module>
namespaces={'x':XHTML_NAMESPACE})
File "src/lxml/xpath.pxi", line 414, in lxml.etree.XPath.__init__ (src/lxml/lxml.etree.c:170397)
_XPathEvaluatorBase.__init__(self, namespaces, extensions,
File "src/lxml/xpath.pxi", line 133, in lxml.etree._XPathEvaluatorBase.__init__ (src/lxml/lxml.etree.c:166613)
self._context = _XPathContext(namespaces, extensions, self._error_log,
File "src/lxml/xpath.pxi", line 57, in lxml.etree._XPathContext.__init__ (src/lxml/lxml.etree.c:165569)
_BaseContext.__init__(self, namespaces, extensions, error_log, enable_regexp,
File "src/lxml/extensions.pxi", line 89, in lxml.etree._BaseContext.__init__ (src/lxml/lxml.etree.c:154497)
prefix_utf = self._to_utf(prefix)
File "src/lxml/extensions.pxi", line 128, in lxml.etree._BaseContext._to_utf (src/lxml/lxml.etree.c:155103)
utf = _utf8(s)
File "src/lxml/apihelpers.pxi", line 1443, in lxml.etree._utf8 (src/lxml/lxml.etree.c:31496)
raise ValueError(
ValueError: All strings must be XML compatible: Unicode or ASCII, no NULL bytes or control characters


Debug output (NOTE: I've manually deleted all output starting with the "#" to simplify / shorten this):

$ python2 -vvvvv
import zipimport # builtin
import site # precompiled from /usr/lib/python2.7/site.pyc
import os # precompiled from /usr/lib/python2.7/os.pyc
import errno # builtin
import posix # builtin
import posixpath # precompiled from /usr/lib/python2.7/posixpath.pyc
import stat # precompiled from /usr/lib/python2.7/stat.pyc
import genericpath # precompiled from /usr/lib/python2.7/genericpath.pyc
import warnings # precompiled from /usr/lib/python2.7/warnings.pyc
import linecache # precompiled from /usr/lib/python2.7/linecache.pyc
import types # precompiled from /usr/lib/python2.7/types.pyc
import UserDict # precompiled from /usr/lib/python2.7/UserDict.pyc
import _abcoll # precompiled from /usr/lib/python2.7/_abcoll.pyc
import abc # precompiled from /usr/lib/python2.7/abc.pyc
import _weakrefset # precompiled from /usr/lib/python2.7/_weakrefset.pyc
import _weakref # builtin
import copy_reg # precompiled from /usr/lib/python2.7/copy_reg.pyc
import traceback # precompiled from /usr/lib/python2.7/traceback.pyc
import sysconfig # precompiled from /usr/lib/python2.7/sysconfig.pyc
import re # precompiled from /usr/lib/python2.7/re.pyc
import sre_compile # precompiled from /usr/lib/python2.7/sre_compile.pyc
import _sre # builtin
import sre_parse # precompiled from /usr/lib/python2.7/sre_parse.pyc
import sre_constants # precompiled from /usr/lib/python2.7/sre_constants.pyc
dlopen("/usr/lib/python2.7/lib-dynload/_locale.dll", 2);
import _locale # dynamically loaded from /usr/lib/python2.7/lib-dynload/_locale.dll
import _sysconfigdata # precompiled from /usr/lib/python2.7/_sysconfigdata.pyc
import encodings # directory /usr/lib/python2.7/encodings
import encodings # precompiled from /usr/lib/python2.7/encodings/__init__.pyc
import codecs # precompiled from /usr/lib/python2.7/codecs.pyc
import _codecs # builtin
import encodings.aliases # precompiled from /usr/lib/python2.7/encodings/aliases.pyc
import encodings.utf_8 # precompiled from /usr/lib/python2.7/encodings/utf_8.pyc
Python 2.7.11 (default, Mar  4 2016, 10:54:03)
[GCC 4.9.2] on msys
Type "help", "copyright", "credits" or "license" for more information.
dlopen("/usr/lib/python2.7/lib-dynload/readline.dll", 2);
import readline # dynamically loaded from /usr/lib/python2.7/lib-dynload/readline.dll
>>> import sys
>>> from lxml import etree
import lxml # directory /usr/lib/python2.7/site-packages/lxml-3.7.1-x86_64.egg/lxml
import lxml # precompiled from /usr/lib/python2.7/site-packages/lxml-3.7.1-x86_64.egg/lxml/__init__.pyc
dlopen("/usr/lib/python2.7/site-packages/lxml-3.7.1-x86_64.egg/lxml/etree.dll", 2);
import collections # precompiled from /usr/lib/python2.7/collections.pyc
dlopen("/usr/lib/python2.7/lib-dynload/_collections.dll", 2);
import _collections # dynamically loaded from /usr/lib/python2.7/lib-dynload/_collections.dll
dlopen("/usr/lib/python2.7/lib-dynload/operator.dll", 2);
import operator # dynamically loaded from /usr/lib/python2.7/lib-dynload/operator.dll
import keyword # precompiled from /usr/lib/python2.7/keyword.pyc
import heapq # precompiled from /usr/lib/python2.7/heapq.pyc
dlopen("/usr/lib/python2.7/lib-dynload/itertools.dll", 2);
import itertools # dynamically loaded from /usr/lib/python2.7/lib-dynload/itertools.dll
dlopen("/usr/lib/python2.7/lib-dynload/_heapq.dll", 2);
import _heapq # dynamically loaded from /usr/lib/python2.7/lib-dynload/_heapq.dll
import thread # builtin
import io # precompiled from /usr/lib/python2.7/io.pyc
dlopen("/usr/lib/python2.7/lib-dynload/_io.dll", 2);
import _io # dynamically loaded from /usr/lib/python2.7/lib-dynload/_io.dll
import lxml._elementpath # precompiled from /usr/lib/python2.7/site-packages/lxml-3.7.1-x86_64.egg/lxml/_elementpath.pyc
import encodings.ascii # from /usr/lib/python2.7/encodings/ascii.py
import inspect # precompiled from /usr/lib/python2.7/inspect.pyc
import string # precompiled from /usr/lib/python2.7/string.pyc
dlopen("/usr/lib/python2.7/lib-dynload/strop.dll", 2);
import strop # dynamically loaded from /usr/lib/python2.7/lib-dynload/strop.dll
import dis # precompiled from /usr/lib/python2.7/dis.pyc
import opcode # precompiled from /usr/lib/python2.7/opcode.pyc
import imp # builtin
import tokenize # precompiled from /usr/lib/python2.7/tokenize.pyc
import token # precompiled from /usr/lib/python2.7/token.pyc
import gzip # from /usr/lib/python2.7/gzip.py
import struct # precompiled from /usr/lib/python2.7/struct.pyc
dlopen("/usr/lib/python2.7/lib-dynload/_struct.dll", 2);
import _struct # dynamically loaded from /usr/lib/python2.7/lib-dynload/_struct.dll
dlopen("/usr/lib/python2.7/lib-dynload/time.dll", 2);
import time # dynamically loaded from /usr/lib/python2.7/lib-dynload/time.dll
dlopen("/usr/lib/python2.7/lib-dynload/zlib.dll", 2);
import zlib # dynamically loaded from /usr/lib/python2.7/lib-dynload/zlib.dll
import rply.errors # loaded from Zip /usr/lib/python2.7/site-packages/rply-0.7.4-py2.7.egg/rply/errors.pyc
import rply.token # loaded from Zip /usr/lib/python2.7/site-packages/rply-0.7.4-py2.7.egg/rply/token.pyc
import rply.lexer # loaded from Zip /usr/lib/python2.7/site-packages/rply-0.7.4-py2.7.egg/rply/lexer.pyc
import rply.lexergenerator # loaded from Zip /usr/lib/python2.7/site-packages/rply-0.7.4-py2.7.egg/rply/lexergenerator.pyc
import hashlib # precompiled from /usr/lib/python2.7/hashlib.pyc
dlopen("/usr/lib/python2.7/lib-dynload/_hashlib.dll", 2);
import _hashlib # dynamically loaded from /usr/lib/python2.7/lib-dynload/_hashlib.dll
import json # directory /usr/lib/python2.7/json
import json # from /usr/lib/python2.7/json/__init__.py
import json.decoder # from /usr/lib/python2.7/json/decoder.py
import json.scanner # from /usr/lib/python2.7/json/scanner.py
dlopen("/usr/lib/python2.7/lib-dynload/_json.dll", 2);
import _json # dynamically loaded from /usr/lib/python2.7/lib-dynload/_json.dll
import json.encoder # from /usr/lib/python2.7/json/encoder.py
import random # precompiled from /usr/lib/python2.7/random.pyc
import __future__ # precompiled from /usr/lib/python2.7/__future__.pyc
dlopen("/usr/lib/python2.7/lib-dynload/math.dll", 2);
import math # dynamically loaded from /usr/lib/python2.7/lib-dynload/math.dll
dlopen("/usr/lib/python2.7/lib-dynload/binascii.dll", 2);
import binascii # dynamically loaded from /usr/lib/python2.7/lib-dynload/binascii.dll
dlopen("/usr/lib/python2.7/lib-dynload/_random.dll", 2);
import _random # dynamically loaded from /usr/lib/python2.7/lib-dynload/_random.dll
import appdirs # loaded from Zip /usr/lib/python2.7/site-packages/appdirs-1.4.0-py2.7.egg/appdirs.pyc
import rply.utils # loaded from Zip /usr/lib/python2.7/site-packages/rply-0.7.4-py2.7.egg/rply/utils.pyc
import rply.grammar # loaded from Zip /usr/lib/python2.7/site-packages/rply-0.7.4-py2.7.egg/rply/grammar.pyc
import rply.parser # loaded from Zip /usr/lib/python2.7/site-packages/rply-0.7.4-py2.7.egg/rply/parser.pyc
import rply.parsergenerator # loaded from Zip /usr/lib/python2.7/site-packages/rply-0.7.4-py2.7.egg/rply/parsergenerator.pyc
import rply # loaded from Zip /usr/lib/python2.7/site-packages/rply-0.7.4-py2.7.egg/rply/__init__.pyc
import rnc2rng.parser # loaded from Zip /usr/lib/python2.7/site-packages/rnc2rng-2.1-py2.7.egg/rnc2rng/parser.pyc
import cgi # from /usr/lib/python2.7/cgi.py
import urlparse # precompiled from /usr/lib/python2.7/urlparse.pyc
import mimetools # precompiled from /usr/lib/python2.7/mimetools.pyc
import tempfile # precompiled from /usr/lib/python2.7/tempfile.pyc
dlopen("/usr/lib/python2.7/lib-dynload/cStringIO.dll", 2);
import cStringIO # dynamically loaded from /usr/lib/python2.7/lib-dynload/cStringIO.dll
dlopen("/usr/lib/python2.7/lib-dynload/fcntl.dll", 2);
import fcntl # dynamically loaded from /usr/lib/python2.7/lib-dynload/fcntl.dll
import rfc822 # precompiled from /usr/lib/python2.7/rfc822.pyc
import rnc2rng.serializer # loaded from Zip /usr/lib/python2.7/site-packages/rnc2rng-2.1-py2.7.egg/rnc2rng/serializer.pyc
import rnc2rng # loaded from Zip /usr/lib/python2.7/site-packages/rnc2rng-2.1-py2.7.egg/rnc2rng/__init__.pyc
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "src/lxml/xmlschema.pxi", line 22, in init lxml.etree (src/lxml/lxml.etree.c:227961)
    cdef XPath _check_for_default_attributes = XPath(
  File "src/lxml/xpath.pxi", line 414, in lxml.etree.XPath.__init__ (src/lxml/lxml.etree.c:170397)
    _XPathEvaluatorBase.__init__(self, namespaces, extensions,
  File "src/lxml/xpath.pxi", line 133, in lxml.etree._XPathEvaluatorBase.__init__ (src/lxml/lxml.etree.c:166613)
    self._context = _XPathContext(namespaces, extensions, self._error_log,
  File "src/lxml/xpath.pxi", line 57, in lxml.etree._XPathContext.__init__ (src/lxml/lxml.etree.c:165569)
    _BaseContext.__init__(self, namespaces, extensions, error_log, enable_regexp,
  File "src/lxml/extensions.pxi", line 89, in lxml.etree._BaseContext.__init__ (src/lxml/lxml.etree.c:154497)
    prefix_utf = self._to_utf(prefix)
  File "src/lxml/extensions.pxi", line 128, in lxml.etree._BaseContext._to_utf (src/lxml/lxml.etree.c:155103)
    utf = _utf8(s)
  File "src/lxml/apihelpers.pxi", line 1443, in lxml.etree._utf8 (src/lxml/lxml.etree.c:31496)
    raise ValueError(
ValueError: All strings must be XML compatible: Unicode or ASCII, no NULL bytes or control characters





------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, SlashDot.org! http://sdm.link/slashdot
_______________________________________________
MinGW-users mailing list
[hidden email]

This list observes the Etiquette found at
http://www.mingw.org/Mailing_Lists.
We ask that you be polite and do the same.  Disregard for the list etiquette may cause your account to be moderated.

_______________________________________________
You may change your MinGW Account Options or unsubscribe at:
https://lists.sourceforge.net/lists/listinfo/mingw-users
Also: mailto:[hidden email]?subject=unsubscribe