Updating PyYAML to 3.08

This commit is contained in:
Ryan Hitchman 2009-03-14 22:17:46 -06:00
parent 92345146c7
commit 6ced5a0baa
12 changed files with 169 additions and 172 deletions

View File

@ -8,10 +8,13 @@ from nodes import *
from loader import *
from dumper import *
__version__ = '3.08'
try:
from cyaml import *
__with_libyaml__ = True
except ImportError:
pass
__with_libyaml__ = False
def scan(stream, Loader=Loader):
"""
@ -35,8 +38,7 @@ def compose(stream, Loader=Loader):
and produce the corresponding representation tree.
"""
loader = Loader(stream)
if loader.check_node():
return loader.get_node()
return loader.get_single_node()
def compose_all(stream, Loader=Loader):
"""
@ -47,6 +49,14 @@ def compose_all(stream, Loader=Loader):
while loader.check_node():
yield loader.get_node()
def load(stream, Loader=Loader):
"""
Parse the first YAML document in a stream
and produce the corresponding Python object.
"""
loader = Loader(stream)
return loader.get_single_data()
def load_all(stream, Loader=Loader):
"""
Parse all YAML documents in a stream
@ -56,14 +66,13 @@ def load_all(stream, Loader=Loader):
while loader.check_data():
yield loader.get_data()
def load(stream, Loader=Loader):
def safe_load(stream):
"""
Parse the first YAML document in a stream
and produce the corresponding Python object.
Resolve only basic YAML tags.
"""
loader = Loader(stream)
if loader.check_data():
return loader.get_data()
return load(stream, SafeLoader)
def safe_load_all(stream):
"""
@ -73,14 +82,6 @@ def safe_load_all(stream):
"""
return load_all(stream, SafeLoader)
def safe_load(stream):
"""
Parse the first YAML document in a stream
and produce the corresponding Python object.
Resolve only basic YAML tags.
"""
return load(stream, SafeLoader)
def emit(events, stream=None, Dumper=Dumper,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None):
@ -90,10 +91,7 @@ def emit(events, stream=None, Dumper=Dumper,
"""
getvalue = None
if stream is None:
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
from StringIO import StringIO
stream = StringIO()
getvalue = stream.getvalue
dumper = Dumper(stream, canonical=canonical, indent=indent, width=width,
@ -114,10 +112,10 @@ def serialize_all(nodes, stream=None, Dumper=Dumper,
"""
getvalue = None
if stream is None:
try:
from cStringIO import StringIO
except ImportError:
if encoding is None:
from StringIO import StringIO
else:
from cStringIO import StringIO
stream = StringIO()
getvalue = stream.getvalue
dumper = Dumper(stream, canonical=canonical, indent=indent, width=width,
@ -150,10 +148,10 @@ def dump_all(documents, stream=None, Dumper=Dumper,
"""
getvalue = None
if stream is None:
try:
from cStringIO import StringIO
except ImportError:
if encoding is None:
from StringIO import StringIO
else:
from cStringIO import StringIO
stream = StringIO()
getvalue = stream.getvalue
dumper = Dumper(stream, default_style=default_style,

View File

@ -26,6 +26,27 @@ class Composer(object):
if not self.check_event(StreamEndEvent):
return self.compose_document()
def get_single_node(self):
# Drop the STREAM-START event.
self.get_event()
# Compose a document if the stream is not empty.
document = None
if not self.check_event(StreamEndEvent):
document = self.compose_document()
# Ensure that the stream contains no more documents.
if not self.check_event(StreamEndEvent):
event = self.get_event()
raise ComposerError("expected a single document in the stream",
document.start_mark, "but found another document",
event.start_mark)
# Drop the STREAM-END event.
self.get_event()
return document
def compose_document(self):
# Drop the DOCUMENT-START event.
self.get_event()

View File

@ -37,6 +37,13 @@ class BaseConstructor(object):
if self.check_node():
return self.construct_document(self.get_node())
def get_single_data(self):
# Ensure that the stream contains a single document and construct it.
node = self.get_single_node()
if node is not None:
return self.construct_document(node)
return None
def construct_document(self, node):
data = self.construct_object(node)
while self.state_generators:
@ -61,7 +68,6 @@ class BaseConstructor(object):
"found unconstructable recursive node", node.start_mark)
self.recursive_objects[node] = None
constructor = None
state_constructor = None
tag_suffix = None
if node.tag in self.yaml_constructors:
constructor = self.yaml_constructors[node.tag]
@ -314,7 +320,10 @@ class SafeConstructor(BaseConstructor):
second = int(values['second'])
fraction = 0
if values['fraction']:
fraction = int(values['fraction'][:6].ljust(6, '0'))
fraction = values['fraction'][:6]
while len(fraction) < 6:
fraction += '0'
fraction = int(fraction)
delta = None
if values['tz_sign']:
tz_hour = int(values['tz_hour'])

View File

@ -41,7 +41,7 @@ class CBaseDumper(CEmitter, BaseRepresenter, BaseResolver):
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None):
CEmitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width,
indent=indent, width=width, encoding=encoding,
allow_unicode=allow_unicode, line_break=line_break,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)
@ -58,7 +58,7 @@ class CSafeDumper(CEmitter, SafeRepresenter, Resolver):
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None):
CEmitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width,
indent=indent, width=width, encoding=encoding,
allow_unicode=allow_unicode, line_break=line_break,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)
@ -75,7 +75,7 @@ class CDumper(CEmitter, Serializer, Representer, Resolver):
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None):
CEmitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width,
indent=indent, width=width, encoding=encoding,
allow_unicode=allow_unicode, line_break=line_break,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)

View File

@ -16,7 +16,7 @@ class BaseDumper(Emitter, Serializer, BaseRepresenter, BaseResolver):
version=None, tags=None):
Emitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width,
allow_uncode=allow_unicode, line_break=line_break)
allow_unicode=allow_unicode, line_break=line_break)
Serializer.__init__(self, encoding=encoding,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)

View File

@ -11,8 +11,6 @@ __all__ = ['Emitter', 'EmitterError']
from error import YAMLError
from events import *
import re
class EmitterError(YAMLError):
pass
@ -78,6 +76,9 @@ class Emitter(object):
self.whitespace = True
self.indention = True
# Whether the document requires an explicit document indicator
self.open_ended = False
# Formatting details.
self.canonical = canonical
self.allow_unicode = allow_unicode
@ -153,7 +154,7 @@ class Emitter(object):
def expect_stream_start(self):
if isinstance(self.event, StreamStartEvent):
if self.event.encoding:
if self.event.encoding and not getattr(self.stream, 'encoding', None):
self.encoding = self.event.encoding
self.write_stream_start()
self.state = self.expect_first_document_start
@ -171,6 +172,9 @@ class Emitter(object):
def expect_document_start(self, first=False):
if isinstance(self.event, DocumentStartEvent):
if (self.event.version or self.event.tags) and self.open_ended:
self.write_indicator(u'...', True)
self.write_indent()
if self.event.version:
version_text = self.prepare_version(self.event.version)
self.write_version_directive(version_text)
@ -194,6 +198,9 @@ class Emitter(object):
self.write_indent()
self.state = self.expect_document_root
elif isinstance(self.event, StreamEndEvent):
if self.open_ended:
self.write_indicator(u'...', True)
self.write_indent()
self.write_stream_end()
self.state = self.expect_nothing
else:
@ -538,7 +545,7 @@ class Emitter(object):
raise EmitterError("tag handle must start and end with '!': %r"
% (handle.encode('utf-8')))
for ch in handle[1:-1]:
if not (u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
if not (u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \
or ch in u'-_'):
raise EmitterError("invalid character %r in the tag handle: %r"
% (ch.encode('utf-8'), handle.encode('utf-8')))
@ -553,7 +560,7 @@ class Emitter(object):
end = 1
while end < len(prefix):
ch = prefix[end]
if u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
if u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \
or ch in u'-;/?!:@&=+$,_.~*\'()[]':
end += 1
else:
@ -583,7 +590,7 @@ class Emitter(object):
start = end = 0
while end < len(suffix):
ch = suffix[end]
if u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
if u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \
or ch in u'-;/?:@&=+$,_.~*\'()[]' \
or (ch == u'!' and handle != u'!'):
end += 1
@ -606,7 +613,7 @@ class Emitter(object):
if not anchor:
raise EmitterError("anchor must not be empty")
for ch in anchor:
if not (u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
if not (u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \
or ch in u'-_'):
raise EmitterError("invalid character %r in the anchor: %r"
% (ch.encode('utf-8'), anchor.encode('utf-8')))
@ -627,15 +634,13 @@ class Emitter(object):
line_breaks = False
special_characters = False
# Whitespaces.
inline_spaces = False # non-space space+ non-space
inline_breaks = False # non-space break+ non-space
leading_spaces = False # ^ space+ (non-space | $)
leading_breaks = False # ^ break+ (non-space | $)
trailing_spaces = False # (^ | non-space) space+ $
trailing_breaks = False # (^ | non-space) break+ $
inline_breaks_spaces = False # non-space break+ space+ non-space
mixed_breaks_spaces = False # anything else
# Important whitespace combinations.
leading_space = False
leading_break = False
trailing_space = False
trailing_break = False
break_space = False
space_break = False
# Check document indicators.
if scalar.startswith(u'---') or scalar.startswith(u'...'):
@ -643,32 +648,23 @@ class Emitter(object):
flow_indicators = True
# First character or preceded by a whitespace.
preceeded_by_space = True
preceeded_by_whitespace = True
# Last character or followed by a whitespace.
followed_by_space = (len(scalar) == 1 or
followed_by_whitespace = (len(scalar) == 1 or
scalar[1] in u'\0 \t\r\n\x85\u2028\u2029')
# The current series of whitespaces contain plain spaces.
spaces = False
# The previous character is a space.
previous_space = False
# The current series of whitespaces contain line breaks.
breaks = False
# The current series of whitespaces contain a space followed by a
# break.
mixed = False
# The current series of whitespaces start at the beginning of the
# scalar.
leading = False
# The previous character is a break.
previous_break = False
index = 0
while index < len(scalar):
ch = scalar[index]
# Check for indicators.
if index == 0:
# Leading indicators are special characters.
if ch in u'#,[]{}&*!|>\'\"%@`':
@ -676,9 +672,9 @@ class Emitter(object):
block_indicators = True
if ch in u'?:':
flow_indicators = True
if followed_by_space:
if followed_by_whitespace:
block_indicators = True
if ch == u'-' and followed_by_space:
if ch == u'-' and followed_by_whitespace:
flow_indicators = True
block_indicators = True
else:
@ -687,14 +683,13 @@ class Emitter(object):
flow_indicators = True
if ch == u':':
flow_indicators = True
if followed_by_space:
if followed_by_whitespace:
block_indicators = True
if ch == u'#' and preceeded_by_space:
if ch == u'#' and preceeded_by_whitespace:
flow_indicators = True
block_indicators = True
# Check for line breaks, special, and unicode characters.
if ch in u'\n\x85\u2028\u2029':
line_breaks = True
if not (ch == u'\n' or u'\x20' <= ch <= u'\x7E'):
@ -706,65 +701,33 @@ class Emitter(object):
else:
special_characters = True
# Spaces, line breaks, and how they are mixed. State machine.
# Start or continue series of whitespaces.
if ch in u' \n\x85\u2028\u2029':
if spaces and breaks:
if ch != u' ': # break+ (space+ break+) => mixed
mixed = True
elif spaces:
if ch != u' ': # (space+ break+) => mixed
breaks = True
mixed = True
elif breaks:
if ch == u' ': # break+ space+
spaces = True
else:
leading = (index == 0)
if ch == u' ': # space+
spaces = True
else: # break+
breaks = True
# Series of whitespaces ended with a non-space.
elif spaces or breaks:
if leading:
if spaces and breaks:
mixed_breaks_spaces = True
elif spaces:
leading_spaces = True
elif breaks:
leading_breaks = True
else:
if mixed:
mixed_breaks_spaces = True
elif spaces and breaks:
inline_breaks_spaces = True
elif spaces:
inline_spaces = True
elif breaks:
inline_breaks = True
spaces = breaks = mixed = leading = False
# Series of whitespaces reach the end.
if (spaces or breaks) and (index == len(scalar)-1):
if spaces and breaks:
mixed_breaks_spaces = True
elif spaces:
trailing_spaces = True
if leading:
leading_spaces = True
elif breaks:
trailing_breaks = True
if leading:
leading_breaks = True
spaces = breaks = mixed = leading = False
# Detect important whitespace combinations.
if ch == u' ':
if index == 0:
leading_space = True
if index == len(scalar)-1:
trailing_space = True
if previous_break:
break_space = True
previous_space = True
previous_break = False
elif ch in u'\n\x85\u2028\u2029':
if index == 0:
leading_break = True
if index == len(scalar)-1:
trailing_break = True
if previous_space:
space_break = True
previous_space = False
previous_break = True
else:
previous_space = False
previous_break = False
# Prepare for the next character.
index += 1
preceeded_by_space = (ch in u'\0 \t\r\n\x85\u2028\u2029')
followed_by_space = (index+1 >= len(scalar) or
preceeded_by_whitespace = (ch in u'\0 \t\r\n\x85\u2028\u2029')
followed_by_whitespace = (index+1 >= len(scalar) or
scalar[index+1] in u'\0 \t\r\n\x85\u2028\u2029')
# Let's decide what styles are allowed.
@ -774,28 +737,28 @@ class Emitter(object):
allow_double_quoted = True
allow_block = True
# Leading and trailing whitespace are bad for plain scalars. We also
# do not want to mess with leading whitespaces for block scalars.
if leading_spaces or leading_breaks or trailing_spaces:
allow_flow_plain = allow_block_plain = allow_block = False
# Trailing breaks are fine for block scalars, but unacceptable for
# plain scalars.
if trailing_breaks:
# Leading and trailing whitespaces are bad for plain scalars.
if (leading_space or leading_break
or trailing_space or trailing_break):
allow_flow_plain = allow_block_plain = False
# The combination of (space+ break+) is only acceptable for block
# We do not permit trailing spaces for block scalars.
if trailing_space:
allow_block = False
# Spaces at the beginning of a new line are only acceptable for block
# scalars.
if inline_breaks_spaces:
if break_space:
allow_flow_plain = allow_block_plain = allow_single_quoted = False
# Mixed spaces and breaks, as well as special character are only
# Spaces followed by breaks, as well as special character are only
# allowed for double quoted scalars.
if mixed_breaks_spaces or special_characters:
if space_break or special_characters:
allow_flow_plain = allow_block_plain = \
allow_single_quoted = allow_block = False
# We don't emit multiline plain scalars.
# Although the plain scalar writer supports breaks, we never emit
# multiline plain scalars.
if line_breaks:
allow_flow_plain = allow_block_plain = False
@ -838,6 +801,7 @@ class Emitter(object):
self.whitespace = whitespace
self.indention = self.indention and indention
self.column += len(data)
self.open_ended = False
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
@ -1008,25 +972,26 @@ class Emitter(object):
end += 1
self.write_indicator(u'"', False)
def determine_chomp(self, text):
tail = text[-2:]
while len(tail) < 2:
tail = u' '+tail
if tail[-1] in u'\n\x85\u2028\u2029':
if tail[-2] in u'\n\x85\u2028\u2029':
return u'+'
else:
return u''
else:
return u'-'
def determine_block_hints(self, text):
hints = u''
if text:
if text[0] in u' \n\x85\u2028\u2029':
hints += unicode(self.best_indent)
if text[-1] not in u'\n\x85\u2028\u2029':
hints += u'-'
elif len(text) == 1 or text[-2] in u'\n\x85\u2028\u2029':
hints += u'+'
return hints
def write_folded(self, text):
chomp = self.determine_chomp(text)
self.write_indicator(u'>'+chomp, True)
self.write_indent()
leading_space = False
hints = self.determine_block_hints(text)
self.write_indicator(u'>'+hints, True)
if hints[-1:] == u'+':
self.open_ended = True
self.write_line_break()
leading_space = True
spaces = False
breaks = False
breaks = True
start = end = 0
while end <= len(text):
ch = None
@ -1072,10 +1037,12 @@ class Emitter(object):
end += 1
def write_literal(self, text):
chomp = self.determine_chomp(text)
self.write_indicator(u'|'+chomp, True)
self.write_indent()
breaks = False
hints = self.determine_block_hints(text)
self.write_indicator(u'|'+hints, True)
if hints[-1:] == u'+':
self.open_ended = True
self.write_line_break()
breaks = True
start = end = 0
while end <= len(text):
ch = None
@ -1105,6 +1072,8 @@ class Emitter(object):
end += 1
def write_plain(self, text, split=True):
if self.root_context:
self.open_ended = True
if not text:
return
if not self.whitespace:
@ -1113,7 +1082,7 @@ class Emitter(object):
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
self.writespace = False
self.whitespace = False
self.indention = False
spaces = False
breaks = False
@ -1126,7 +1095,7 @@ class Emitter(object):
if ch != u' ':
if start+1 == end and self.column > self.best_width and split:
self.write_indent()
self.writespace = False
self.whitespace = False
self.indention = False
else:
data = text[start:end]

View File

@ -72,8 +72,6 @@ class ParserError(MarkedYAMLError):
class Parser(object):
# Since writing a recursive-descendant parser is a straightforward task, we
# do not give many comments here.
# Note that we use Python generators. If you rewrite the parser in another
# language, you may replace all 'yield'-s with event handler calls.
DEFAULT_TAGS = {
u'!': u'!',

View File

@ -74,7 +74,7 @@ class ReaderError(YAMLError):
else:
return "unacceptable character #x%04x: %s\n" \
" in \"%s\", position %d" \
% (ord(self.character), self.reason,
% (self.character, self.reason,
self.name, self.position)
class Reader(object):
@ -175,7 +175,7 @@ class Reader(object):
if match:
character = match.group()
position = self.index+(len(self.buffer)-self.pointer)+match.start()
raise ReaderError(self.name, position, character,
raise ReaderError(self.name, position, ord(character),
'unicode', "special characters are not allowed")
def update(self, length):

View File

@ -286,6 +286,7 @@ SafeRepresenter.add_representer(set,
SafeRepresenter.add_representer(datetime.date,
SafeRepresenter.represent_date)
SafeRepresenter.add_representer(datetime.datetime,
SafeRepresenter.represent_datetime)

View File

@ -173,7 +173,8 @@ Resolver.add_implicit_resolver(
Resolver.add_implicit_resolver(
u'tag:yaml.org,2002:float',
re.compile(ur'''^(?:[-+]?(?:[0-9][0-9_]*)?\.[0-9_]*(?:[eE][-+][0-9]+)?
re.compile(ur'''^(?:[-+]?(?:[0-9][0-9_]*)\.[0-9_]*(?:[eE][-+][0-9]+)?
|\.[0-9_]+(?:[eE][-+][0-9]+)?
|[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\.[0-9_]*
|[-+]?\.(?:inf|Inf|INF)
|\.(?:nan|NaN|NAN))$''', re.X),
@ -191,7 +192,7 @@ Resolver.add_implicit_resolver(
Resolver.add_implicit_resolver(
u'tag:yaml.org,2002:merge',
re.compile(ur'^(?:<<)$'),
['<'])
[u'<'])
Resolver.add_implicit_resolver(
u'tag:yaml.org,2002:null',
@ -212,7 +213,7 @@ Resolver.add_implicit_resolver(
Resolver.add_implicit_resolver(
u'tag:yaml.org,2002:value',
re.compile(ur'^(?:=)$'),
['='])
[u'='])
# The following resolver is only for documentation purposes. It cannot work
# because plain scalars cannot start with '!', '&', or '*'.

View File

@ -807,7 +807,7 @@ class Scanner(object):
# See the specification for details.
length = 0
ch = self.peek(length)
while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \
or ch in u'-_':
length += 1
ch = self.peek(length)
@ -846,7 +846,7 @@ class Scanner(object):
def scan_yaml_directive_number(self, start_mark):
# See the specification for details.
ch = self.peek()
if not (u'0' <= ch <= '9'):
if not (u'0' <= ch <= u'9'):
raise ScannerError("while scanning a directive", start_mark,
"expected a digit, but found %r" % ch.encode('utf-8'),
self.get_mark())
@ -912,14 +912,14 @@ class Scanner(object):
# Therefore we restrict aliases to numbers and ASCII letters.
start_mark = self.get_mark()
indicator = self.peek()
if indicator == '*':
if indicator == u'*':
name = 'alias'
else:
name = 'anchor'
self.forward()
length = 0
ch = self.peek(length)
while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \
or ch in u'-_':
length += 1
ch = self.peek(length)
@ -1368,7 +1368,7 @@ class Scanner(object):
length = 1
ch = self.peek(length)
if ch != u' ':
while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \
or ch in u'-_':
length += 1
ch = self.peek(length)
@ -1388,7 +1388,7 @@ class Scanner(object):
chunks = []
length = 0
ch = self.peek(length)
while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \
or ch in u'-;/?:@&=+$,_.!~*\'()[]%':
if ch == u'%':
chunks.append(self.prefix(length))

View File

@ -55,7 +55,7 @@ class Serializer(object):
self.emit(DocumentEndEvent(explicit=self.use_explicit_end))
self.serialized_nodes = {}
self.anchors = {}
self.last_alias_id = 0
self.last_anchor_id = 0
def anchor_node(self, node):
if node in self.anchors: