Fix twitter plugin replied-to feature.

This commit is contained in:
Hamled 2010-01-16 20:21:28 -08:00
parent c8df4dfb38
commit f43c5238c2
68 changed files with 16735 additions and 0 deletions

View File

@ -0,0 +1,93 @@
import urllib
import htmlentitydefs
import re
import json
from util import hook
########### from http://effbot.org/zone/re-sub.htm#unescape-html #############
def unescape(text):
def fixup(m):
text = m.group(0)
if text[:2] == "&#":
# character reference
try:
if text[:3] == "&#x":
return unichr(int(text[3:-1], 16))
else:
return unichr(int(text[2:-1]))
except ValueError:
pass
else:
# named entity
try:
text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
except KeyError:
pass
return text # leave as is
return re.sub("&#?\w+;", fixup, text)
##############################################################################
languages = 'ja fr de ko ru zh'.split()
language_pairs = zip(languages[:-1], languages[1:])
def goog_trans(text, slang, tlang):
req_url = 'http://ajax.googleapis.com/ajax/services/language/translate' \
'?v=1.0&q=%s&langpair=%s'
url = req_url % (urllib.quote(text, safe=''), slang + '%7C' + tlang)
page = urllib.urlopen(url).read()
parsed = json.loads(page)
if not 200 <= parsed['responseStatus'] < 300:
raise IOError('error with the translation server: %d: %s' % (
parsed['responseStatus'], ''))
return unescape(parsed['responseData']['translatedText'])
def babel_gen(inp):
for language in languages:
inp = inp.encode('utf8')
trans = goog_trans(inp, 'en', language).encode('utf8')
inp = goog_trans(trans, language, 'en')
yield language, trans, inp
@hook.command
def babel(inp):
".babel <sentence> -- translates <sentence> through multiple languages"
if not inp:
return babel.__doc__
try:
return list(babel_gen(inp))[-1][2]
except IOError, e:
return e
@hook.command
def babelext(inp):
".babelext <sentence> -- like .babel, but with more detailed output"
if not inp:
return babelext.__doc__
try:
babels = list(babel_gen(inp))
except IOError, e:
return e
out = u''
for lang, trans, text in babels:
out += '%s:"%s", ' % (lang, text.decode('utf8'))
out += 'en:"' + babels[-1][2].decode('utf8') + '"'
if len(out) > 300:
out = out[:150] + ' ... ' + out[-150:]
return out

87
plugins_available/bf.py Normal file
View File

@ -0,0 +1,87 @@
'''brainfuck interpreter adapted from (public domain) code at
http://brainfuck.sourceforge.net/brain.py'''
import re
import random
from util import hook
BUFFER_SIZE = 5000
MAX_STEPS = 1000000
@hook.command
def bf(inp):
".bf <prog> -- executes brainfuck program <prog>"""
if not inp:
return bf.__doc__
program = re.sub('[^][<>+-.,]', '', inp)
# create a dict of brackets pairs, for speed later on
brackets={}
open_brackets=[]
for pos in range(len(program)):
if program[pos] == '[':
open_brackets.append(pos)
elif program[pos] == ']':
if len(open_brackets) > 0:
brackets[pos] = open_brackets[-1]
brackets[open_brackets[-1]] = pos
open_brackets.pop()
else:
return 'unbalanced brackets'
if len(open_brackets) != 0:
return 'unbalanced brackets'
# now we can start interpreting
ip = 0 # instruction pointer
mp = 0 # memory pointer
steps = 0
memory = [0] * BUFFER_SIZE #initial memory area
rightmost = 0
output = "" #we'll save the output here
# the main program loop:
while ip < len(program):
c = program[ip]
if c == '+':
memory[mp] = memory[mp] + 1 % 256
elif c == '-':
memory[mp] = memory[mp] - 1 % 256
elif c == '>':
mp += 1
if mp > rightmost:
rightmost = mp
if mp >= len(memory):
# no restriction on memory growth!
memory.extend([0]*BUFFER_SIZE)
elif c == '<':
mp = mp - 1 % len(memory)
elif c == '.':
output += chr(memory[mp])
if len(output) > 500:
break
elif c == ',':
memory[mp] = random.randint(1, 255)
elif c == '[':
if memory[mp] == 0:
ip = brackets[ip]
elif c == ']':
if memory[mp] != 0:
ip = brackets[ip]
ip += 1
steps += 1
if steps > MAX_STEPS:
output += "Maximum number of steps exceeded"
break
output = '/'.join(output.splitlines())
if output == '':
return 'no output'
return unicode(output, 'iso-8859-1')[:430]

View File

@ -0,0 +1,27 @@
import urllib
from util import hook
@hook.command('god')
@hook.command
def bible(inp):
".bible <passage> -- gets <passage> from the Bible (ESV)"
if not inp:
return bible.__doc__
base_url = 'http://www.esvapi.org/v2/rest/passageQuery?key=IP&' \
'output-format=plain-text&include-heading-horizontal-lines&' \
'include-headings=false&include-passage-horizontal-lines=false&' \
'include-passage-references=false&include-short-copyright=false&' \
'include-footnotes=false&line-length=0&passage='
text = urllib.urlopen(base_url + urllib.quote(inp)).read()
text = ' '.join(text.split())
if len(text) > 400:
text = text[:text.rfind(' ', 0, 400)] + '...'
return text

View File

@ -0,0 +1,19 @@
import re
import random
from util import hook
@hook.command
def choose(inp):
".choose <choice1>, <choice2>, ... <choicen> -- makes a decision"
if not inp:
return choose.__doc__
c = re.findall(r'([^,]+)', inp)
if len(c) == 1:
c = re.findall(r'(\S+)', inp)
if len(c) == 1:
return 'the decision is up to you'
return random.choice(c).strip()

59
plugins_available/dice.py Normal file
View File

@ -0,0 +1,59 @@
"""
dice.py: written by Scaevolus 2008, updated 2009
simulates dicerolls
"""
import re
import random
from util import hook
whitespace_re = re.compile(r'\s+')
valid_diceroll_re = re.compile(r'^[+-]?(\d+|\d*d\d+)([+-](\d+|\d*d\d+))*$')
sign_re = re.compile(r'[+-]?(?:\d*d)?\d+')
split_re = re.compile(r'([\d+-]*)d?(\d*)')
def nrolls(count, n):
"roll an n-sided die count times"
if n < 2: #it's a coin
if count < 5000:
return sum(random.randint(0, 1) for x in xrange(count))
else: #fake it
return int(random.normalvariate(.5*count, (.75*count)**.5))
else:
if count < 5000:
return sum(random.randint(1, n) for x in xrange(count))
else: #fake it
return int(random.normalvariate(.5*(1+n)*count,
(((n+1)*(2*n+1)/6.-(.5*(1+n))**2)*count)**.5))
@hook.command
def dice(inp):
".dice <diceroll> -- simulates dicerolls, e.g. .dice 2d20-d5+4 roll 2 " \
"D20s, subtract 1D5, add 4"
if not inp.strip():
return dice.__doc__
spec = whitespace_re.sub('', inp)
if not valid_diceroll_re.match(spec):
return "Invalid diceroll"
sum = 0
groups = sign_re.findall(spec)
for roll in groups:
count, side = split_re.match(roll).groups()
if side == "":
sum += int(count)
else:
count = int(count) if count not in" +-" else 1
side = int(side)
try:
if count > 0:
sum += nrolls(count, side)
else:
sum -= nrolls(abs(count), side)
except OverflowError:
return "Thanks for overflowing a float, jerk >:["
return str(sum)

View File

@ -0,0 +1,96 @@
"dotnetpad.py: by sklnd, because gobiner wouldn't shut up"
import urllib
import httplib
import socket
import json
from util import hook
def dotnetpad(lang, code):
"Posts a provided snippet of code in a provided langugage to dotnetpad.net"
code = code.encode('utf8')
params = urllib.urlencode({'language': lang, 'code': code})
headers = {"Content-type": "application/x-www-form-urlencoded",
"Accept": "text/plain"}
try:
conn = httplib.HTTPConnection("dotnetpad.net:80")
conn.request("POST", "/Skybot", params, headers)
response = conn.getresponse()
except httplib.HTTPException:
conn.close()
return 'error: dotnetpad is broken somehow'
except socket.error:
return 'error: unable to connect to dotnetpad'
try:
result = json.loads(response.read())
except ValueError:
conn.close()
return 'error: dotnetpad is broken somehow'
conn.close()
if result['Errors']:
return 'First error: %s' % (result['Errors'][0]['ErrorText'])
elif result['Output']:
return result['Output'].lstrip()
else:
return 'No output'
@hook.command
def fs(inp):
".fs -- post a F# code snippet to dotnetpad.net and print the results"
if not inp:
return fs.__doc__
return dotnetpad('fsharp', inp)
@hook.command
def cs(snippet):
".cs -- post a C# code snippet to dotnetpad.net and print the results"
if not snippet:
return cs.__doc__
file_template = ('using System; '
'using System.Linq; '
'using System.Collections.Generic; '
'using System.Text; '
'%(class)s')
class_template = ('public class Default '
'{ '
' %(main)s '
'}')
main_template = ('public static void Main(String[] args) '
'{ '
' %(snippet)s '
'}')
# There are probably better ways to do the following, but I'm feeling lazy
# if no main is found in the snippet, then we use the template with Main in it
if 'public static void Main' not in snippet:
code = main_template % { 'snippet': snippet }
code = class_template % { 'main': code }
code = file_template % { 'class' : code }
# if Main is found, check for class and see if we need to use the classed template
elif 'class' not in snippet:
code = class_template % { 'main': snippet }
code = file_template % { 'class' : code }
return 'Error using dotnetpad'
# if we found class, then use the barebones template
else:
code = file_template % { 'class' : snippet }
return dotnetpad('csharp', code)

27
plugins_available/down.py Normal file
View File

@ -0,0 +1,27 @@
import urllib2
import urlparse
from util import hook
@hook.command
def down(inp):
'''.down <url> -- checks to see if the site is down'''
inp = inp.strip()
if not inp:
return down.__doc__
if 'http://' not in inp:
inp = 'http://' + inp
inp = 'http://' + urlparse.urlparse(inp).netloc
# http://mail.python.org/pipermail/python-list/2006-December/589854.html
try:
request = urllib2.Request(inp)
request.get_method = lambda: "HEAD"
http_file = urllib2.urlopen(request)
head = http_file.read()
return inp + ' seems to be up'
except urllib2.URLError:
return inp + ' seems to be down'

15
plugins_available/explain.py Executable file
View File

@ -0,0 +1,15 @@
from util import hook
from pycparser.cdecl import explain_c_declaration
@hook.command('explain')
def explain(inp):
".explain <c expression> -- gives an explanation of C expression"
if not inp:
return explain.__doc__
inp = inp.encode('utf8', 'ignore')
try:
return explain_c_declaration(inp.rstrip())
except Exception, e:
return 'error: %s' % e

View File

@ -0,0 +1,59 @@
import urllib
import random
from lxml import html
import json
from util import hook
def api_get(kind, query):
req_url = 'http://ajax.googleapis.com/ajax/services/search/%s?' \
'v=1.0&safe=off&q=%s'
query = query.encode('utf8')
url = req_url % (kind, urllib.quote(query, safe=''))
page = urllib.urlopen(url).read()
return json.loads(page)
@hook.command
def gis(inp):
'''.gis <term> -- returns first google image result (safesearch off)'''
if not inp:
return gis.__doc__
parsed = api_get('images', inp)
if not 200 <= parsed['responseStatus'] < 300:
raise IOError('error searching for images: %d: %s' % (
parsed['responseStatus'], ''))
if not parsed['responseData']['results']:
return 'no images found'
return random.choice(parsed['responseData']['results'][:10]
)['unescapedUrl'] # squares is dumb
@hook.command
@hook.command('g')
def google(inp):
'''.g/.google <query> -- returns first google search result'''
if not inp:
return google.__doc__
parsed = api_get('web', inp)
if not 200 <= parsed['responseStatus'] < 300:
raise IOError('error searching for pages: %d: %s' % (
parsed['responseStatus'], ''))
if not parsed['responseData']['results']:
return 'no results found'
result = parsed['responseData']['results'][0]
title, content = map(lambda x: html.fromstring(x).text_content(),
(result['titleNoFormatting'], result['content']))
out = '%s -- \x02%s\x02: "%s"' % (result['unescapedUrl'], title, content)
out = ' '.join(out.split())
if len(out) > 300:
out = out[:out.rfind(' ')] + '..."'
return out

View File

@ -0,0 +1,8 @@
from util import hook
#Scaevolus: factormystic if you commit a re-enabled goonsay I'm going to revoke your commit access
#@hook.command
def goonsay(bot, input):
input.say(' __________ /')
input.say('(--[. ]-[ .] /')
input.say('(_______o__)')

20
plugins_available/hash.py Normal file
View File

@ -0,0 +1,20 @@
import hashlib
from util import hook
@hook.command
def md5(inp):
return hashlib.md5(inp).hexdigest()
@hook.command
def sha1(inp):
return hashlib.sha1(inp).hexdigest()
@hook.command
def hash(inp):
".hash <text> -- returns hashes of <text>"
return ', '.join(x + ": " + getattr(hashlib, x)(inp).hexdigest()
for x in 'md5 sha1 sha256'.split())

17
plugins_available/help.py Normal file
View File

@ -0,0 +1,17 @@
from util import hook
@hook.command
def help(bot, input):
".help [command] -- gives a list of commands/help for a command"
funcs = {}
for csig, func, args in bot.plugs['command']:
if args['hook'] != r'(.*)':
if func.__doc__ is not None:
funcs[csig[1]] = func
if not input.inp.strip():
input.pm('available commands: ' + ' '.join(sorted(funcs)))
else:
if input.inp in funcs:
input.pm(funcs[input.inp].__doc__)

108
plugins_available/log.py Normal file
View File

@ -0,0 +1,108 @@
"""
log.py: written by Scaevolus 2009
"""
import os
import thread
import codecs
import time
import re
from util import hook
lock = thread.allocate_lock()
log_fds = {} # '%(net)s %(chan)s' : (filename, fd)
timestamp_format = '%H:%M:%S'
formats = {'PRIVMSG': '<%(nick)s> %(msg)s',
'PART': '-!- %(nick)s [%(user)s@%(host)s] has left %(chan)s',
'JOIN': '-!- %(nick)s [%(user)s@%(host)s] has joined %(param0)s',
'MODE': '-!- mode/%(chan)s [%(param_tail)s] by %(nick)s',
'KICK': '-!- %(param1)s was kicked from %(chan)s by %(nick)s [%(msg)s]',
'TOPIC': '-!- %(nick)s changed the topic of %(chan)s to: %(msg)s',
'QUIT': '-!- %(nick)s has quit [%(msg)s]',
'PING': '',
'NOTICE': ''
}
ctcp_formats = {'ACTION': '* %(nick)s %(ctcpmsg)s'}
irc_color_re = re.compile(r'(\x03(\d+,\d+|\d)|[\x0f\x02\x16\x1f])')
def get_log_filename(dir, server, chan):
return os.path.join(dir, 'log', gmtime('%Y'), server,
gmtime('%%s.%m-%d.log') % chan).lower()
def gmtime(format):
return time.strftime(format, time.gmtime())
def beautify(input):
format = formats.get(input.command, '%(raw)s')
args = vars(input)
leng = len(args['paraml'])
for n, p in enumerate(args['paraml']):
args['param' + str(n)] = p
args['param_' + str(abs(n - leng))] = p
args['param_tail'] = ' '.join(args['paraml'][1:])
args['msg'] = irc_color_re.sub('', args['msg'])
if input.command == 'PRIVMSG' and input.msg.count('\x01') >= 2:
#ctcp
ctcp = input.msg.split('\x01', 2)[1].split(' ', 1)
if len(ctcp) == 1:
ctcp += ['']
args['ctcpcmd'], args['ctcpmsg'] = ctcp
format = ctcp_formats.get(args['ctcpcmd'],
'%(nick)s [%(user)s@%(host)s] requested unknown CTCP '
'%(ctcpcmd)s from %(chan)s: %(ctcpmsg)s')
return format % args
def get_log_fd(dir, server, chan):
fn = get_log_filename(dir, server, chan)
cache_key = '%s %s' % (server, chan)
filename, fd = log_fds.get(cache_key, ('', 0))
if fn != filename: # we need to open a file for writing
if fd != 0: # is a valid fd
fd.flush()
fd.close()
dir = os.path.split(fn)[0]
if not os.path.exists(dir):
os.makedirs(dir)
fd = codecs.open(fn, 'a', 'utf-8')
log_fds[cache_key] = (fn, fd)
return fd
@hook.tee
def log(bot, input):
with lock:
timestamp = gmtime(timestamp_format)
fd = get_log_fd(bot.persist_dir, input.server, 'raw')
fd.write(timestamp + ' ' + input.raw + '\n')
if input.command == 'QUIT': # these are temporary fixes until proper
input.chan = 'quit' # presence tracking is implemented
if input.command == 'NICK':
input.chan = 'nick'
beau = beautify(input)
if beau == '': # don't log this
return
if input.chan:
fd = get_log_fd(bot.persist_dir, input.server, input.chan)
fd.write(timestamp + ' ' + beau + '\n')
print timestamp, input.chan, beau.encode('utf8', 'ignore')

30
plugins_available/misc.py Normal file
View File

@ -0,0 +1,30 @@
from util import hook
import socket
socket.setdefaulttimeout(5) # global setting
#autorejoin channels
@hook.event('KICK')
def rejoin(bot, input):
if input.paraml[1] == input.conn.nick:
if input.paraml[0] in input.conn.channels:
input.conn.join(input.paraml[0])
#join channels when invited
@hook.event('INVITE')
def invite(bot, input):
if input.command == 'INVITE':
input.conn.join(input.inp)
#join channels when server says hello & identify bot
@hook.event('004')
def onjoin(bot, input):
for channel in input.conn.channels:
input.conn.join(channel)
nickserv_password = input.conn.conf.get('nickserv_password', '')
nickserv_name = input.conn.conf.get('nickserv_name', 'nickserv')
nickserv_command = input.conn.conf.get('nickserv_command', 'IDENTIFY %s')
if nickserv_password:
input.conn.msg(nickserv_name, nickserv_command % nickserv_password)

170
plugins_available/mtg.py Normal file
View File

@ -0,0 +1,170 @@
from lxml import html
import re
import urllib2
import sys
from util import hook
@hook.command
def mtg(inp):
url = 'http://magiccards.info/query.php?cardname='
url += urllib2.quote(inp, safe='')
h = html.parse(url)
name = h.find('/body/table/tr/td/table/tr/td/h1')
if name is None:
return "no cards found"
card = name.getparent()
text = card.find('p')
type = text.text
text = text.find('b').text_content()
text = re.sub(r'\(.*?\)', '', text) # strip parenthetical explanations
text = re.sub(r'\.(\S)', r'. \1', text) # fix spacing
printings = card.find('table/tr/td/img').getparent().text_content()
printings = re.findall(r'\s*(.+?(?: \([^)]+\))*) \((.*?)\)',
' '.join(printings.split()))
printing_out = ', '.join('%s (%s)' % (set_abbrevs.get(x[0], x[0]),
rarity_abbrevs.get(x[1], x[1]))
for x in printings)
name.make_links_absolute()
link = name.find('a').attrib['href']
name = name.text_content().strip()
type = type.strip()
text = ' '.join(text.split())
return ' | '.join((name, type, text, printing_out, link))
set_abbrevs = {
'15th Anniversary': '15ANN',
'APAC Junior Series': 'AJS',
'Alara Reborn': 'ARB',
'Alliances': 'AI',
'Anthologies': 'AT',
'Antiquities': 'AQ',
'Apocalypse': 'AP',
'Arabian Nights': 'AN',
'Arena League': 'ARENA',
'Asia Pacific Land Program': 'APAC',
'Battle Royale': 'BR',
'Beatdown': 'BD',
'Betrayers of Kamigawa': 'BOK',
'Celebration Cards': 'UQC',
'Champions of Kamigawa': 'CHK',
'Champs': 'CP',
'Chronicles': 'CH',
'Classic Sixth Edition': '6E',
'Coldsnap': 'CS',
'Coldsnap Theme Decks': 'CSTD',
'Conflux': 'CFX',
'Core Set - Eighth Edition': '8E',
'Core Set - Ninth Edition': '9E',
'Darksteel': 'DS',
'Deckmasters': 'DM',
'Dissension': 'DI',
'Dragon Con': 'DRC',
'Duel Decks: Divine vs. Demonic': 'DVD',
'Duel Decks: Elves vs. Goblins': 'EVG',
'Duel Decks: Garruk vs. Liliana': 'GVL',
'Duel Decks: Jace vs. Chandra': 'JVC',
'Eighth Edition Box Set': '8EB',
'European Land Program': 'EURO',
'Eventide': 'EVE',
'Exodus': 'EX',
'Fallen Empires': 'FE',
'Fifth Dawn': '5DN',
'Fifth Edition': '5E',
'Fourth Edition': '4E',
'Friday Night Magic': 'FNMP',
'From the Vault: Dragons': 'FVD',
'From the Vault: Exiled': 'FVE',
'Future Sight': 'FUT',
'Gateway': 'GRC',
'Grand Prix': 'GPX',
'Guildpact': 'GP',
'Guru': 'GURU',
'Happy Holidays': 'HHO',
'Homelands': 'HL',
'Ice Age': 'IA',
'Introductory Two-Player Set': 'ITP',
'Invasion': 'IN',
'Judge Gift Program': 'JR',
'Judgment': 'JU',
'Junior Series': 'JSR',
'Legend Membership': 'DCILM',
'Legends': 'LG',
'Legions': 'LE',
'Limited Edition (Alpha)': 'AL',
'Limited Edition (Beta)': 'BE',
'Lorwyn': 'LW',
'MTGO Masters Edition': 'MED',
'MTGO Masters Edition II': 'ME2',
'MTGO Masters Edition III': 'ME3',
'Magic 2010': 'M10',
'Magic Game Day Cards': 'MGDC',
'Magic Player Rewards': 'MPRP',
'Magic Scholarship Series': 'MSS',
'Magic: The Gathering Launch Parties': 'MLP',
'Media Inserts': 'MBP',
'Mercadian Masques': 'MM',
'Mirage': 'MR',
'Mirrodin': 'MI',
'Morningtide': 'MT',
'Multiverse Gift Box Cards': 'MGBC',
'Nemesis': 'NE',
'Ninth Edition Box Set': '9EB',
'Odyssey': 'OD',
'Onslaught': 'ON',
'Planar Chaos': 'PC',
'Planechase': 'PCH',
'Planeshift': 'PS',
'Portal': 'PO',
'Portal Demogame': 'POT',
'Portal Second Age': 'PO2',
'Portal Three Kingdoms': 'P3K',
'Premium Deck Series: Slivers': 'PDS',
'Prerelease Events': 'PTC',
'Pro Tour': 'PRO',
'Prophecy': 'PR',
'Ravnica: City of Guilds': 'RAV',
'Release Events': 'REP',
'Revised Edition': 'RV',
'Saviors of Kamigawa': 'SOK',
'Scourge': 'SC',
'Seventh Edition': '7E',
'Shadowmoor': 'SHM',
'Shards of Alara': 'ALA',
'Starter': 'ST',
'Starter 2000 Box Set': 'ST2K',
'Stronghold': 'SH',
'Summer of Magic': 'SOM',
'Super Series': 'SUS',
'Tempest': 'TP',
'Tenth Edition': '10E',
'The Dark': 'DK',
'Time Spiral': 'TS',
'Time Spiral Timeshifted': 'TSTS',
'Torment': 'TR',
'Two-Headed Giant Tournament': 'THGT',
'Unglued': 'UG',
'Unhinged': 'UH',
'Unhinged Alternate Foils': 'UHAA',
'Unlimited Edition': 'UN',
"Urza's Destiny": 'UD',
"Urza's Legacy": 'UL',
"Urza's Saga": 'US',
'Visions': 'VI',
'Weatherlight': 'WL',
'Worlds': 'WRL',
'WotC Online Store': 'WOTC',
'Zendikar': 'ZEN'}
rarity_abbrevs = {
'Common': 'C',
'Uncommon': 'UC',
'Rare': 'R',
'Special': 'S',
'Mythic Rare': 'MR'}

View File

@ -0,0 +1,13 @@
# for crusty old rotor
from util import hook
@hook.command
def profile(inp):
".profile <username> -- links to <username>'s profile on SA"
if not inp:
return profile.__doc__
return 'http://forums.somethingawful.com/member.php?action=getinfo' + \
'&username=' + '+'.join(inp.split())

View File

View File

@ -0,0 +1,98 @@
#-----------------------------------------------------------------
# pycparser: cdecl.py
#
# Example of the CDECL tool using pycparser. CDECL "explains"
# C type declarations in plain English.
#
# The AST generated by pycparser from the given declaration is
# traversed recursively to build the explanation.
# Note that the declaration must be a valid external declaration
# in C. All the types used in it must be defined with typedef,
# or parsing will fail. The definition can be arbitrary, it isn't
# really used - by pycparser must know which tokens are types.
#
# For example:
#
# 'typedef int Node; const Node* (*ar)[10];'
# =>
# ar is a pointer to array[10] of pointer to const Node
#
# Copyright (C) 2008, Eli Bendersky
# License: LGPL
#-----------------------------------------------------------------
import sys
from pycparser import c_parser, c_ast
def explain_c_declaration(c_decl):
""" Parses the declaration in c_decl and returns a text
explanation as a string.
The last external node of the string is used, to allow
earlier typedefs for used types.
"""
parser = c_parser.CParser()
node = parser.parse(c_decl, filename='<stdin>')
if ( not isinstance(node, c_ast.FileAST) or
not isinstance(node.ext[-1], c_ast.Decl)):
return "Last external node is invalid type"
return _explain_decl_node(node.ext[-1])
def _explain_decl_node(decl_node):
""" Receives a c_ast.Decl note and returns its explanation in
English.
"""
#~ print decl_node.show()
storage = ' '.join(decl_node.storage) + ' ' if decl_node.storage else ''
return (decl_node.name +
" is a " +
storage +
_explain_type(decl_node.type))
def _explain_type(decl):
""" Recursively explains a type decl node
"""
typ = type(decl)
if typ == c_ast.TypeDecl:
quals = ' '.join(decl.quals) + ' ' if decl.quals else ''
return quals + _explain_type(decl.type)
elif typ == c_ast.Typename or typ == c_ast.Decl:
return _explain_type(decl.type)
elif typ == c_ast.IdentifierType:
return ' '.join(decl.names)
elif typ == c_ast.PtrDecl:
quals = ' '.join(decl.quals) + ' ' if decl.quals else ''
return quals + 'pointer to ' + _explain_type(decl.type)
elif typ == c_ast.ArrayDecl:
arr = 'array'
if decl.dim: arr += '[%s]' % decl.dim.value
return arr + " of " + _explain_type(decl.type)
elif typ == c_ast.FuncDecl:
if decl.args:
params = [_explain_type(param) for param in decl.args.params]
args = ', '.join(params)
else:
args = ''
return ('function(%s) returning ' % (args) +
_explain_type(decl.type))
if __name__ == "__main__":
if len(sys.argv) > 1:
c_decl = sys.argv[1]
else:
c_decl = "char *(*(**foo[][8])())[];"
print "Explaining the declaration:", c_decl
print "\n", explain_c_declaration(c_decl)

View File

@ -0,0 +1,9 @@
# pycparser.lextab.py. This file automatically created by PLY (version 3.3). Don't edit!
_tabversion = '3.3'
_lextokens = {'VOID': 1, 'LBRACKET': 1, 'WCHAR_CONST': 1, 'FLOAT_CONST': 1, 'MINUS': 1, 'RPAREN': 1, 'LONG': 1, 'PLUS': 1, 'ELLIPSIS': 1, 'GT': 1, 'GOTO': 1, 'ENUM': 1, 'PERIOD': 1, 'GE': 1, 'INT_CONST_DEC': 1, 'ARROW': 1, 'DOUBLE': 1, 'MINUSEQUAL': 1, 'INT_CONST_OCT': 1, 'TIMESEQUAL': 1, 'OR': 1, 'SHORT': 1, 'RETURN': 1, 'RSHIFTEQUAL': 1, 'STATIC': 1, 'SIZEOF': 1, 'UNSIGNED': 1, 'UNION': 1, 'COLON': 1, 'WSTRING_LITERAL': 1, 'DIVIDE': 1, 'FOR': 1, 'PLUSPLUS': 1, 'EQUALS': 1, 'ELSE': 1, 'EQ': 1, 'AND': 1, 'TYPEID': 1, 'LBRACE': 1, 'PPHASH': 1, 'INT': 1, 'SIGNED': 1, 'CONTINUE': 1, 'NOT': 1, 'OREQUAL': 1, 'MOD': 1, 'RSHIFT': 1, 'DEFAULT': 1, 'CHAR': 1, 'WHILE': 1, 'DIVEQUAL': 1, 'EXTERN': 1, 'CASE': 1, 'LAND': 1, 'REGISTER': 1, 'MODEQUAL': 1, 'NE': 1, 'SWITCH': 1, 'INT_CONST_HEX': 1, 'PLUSEQUAL': 1, 'STRUCT': 1, 'CONDOP': 1, 'BREAK': 1, 'VOLATILE': 1, 'ANDEQUAL': 1, 'DO': 1, 'LNOT': 1, 'CONST': 1, 'LOR': 1, 'CHAR_CONST': 1, 'LSHIFT': 1, 'RBRACE': 1, 'LE': 1, 'SEMI': 1, 'LT': 1, 'COMMA': 1, 'TYPEDEF': 1, 'XOR': 1, 'AUTO': 1, 'TIMES': 1, 'LPAREN': 1, 'MINUSMINUS': 1, 'ID': 1, 'IF': 1, 'STRING_LITERAL': 1, 'FLOAT': 1, 'XOREQUAL': 1, 'LSHIFTEQUAL': 1, 'RBRACKET': 1}
_lexreflags = 0
_lexliterals = ''
_lexstateinfo = {'ppline': 'exclusive', 'INITIAL': 'inclusive'}
_lexstatere = {'ppline': [('(?P<t_ppline_FILENAME>"([^"\\\\\\n]|(\\\\(([a-zA-Z\\\\?\'"])|([0-7]{1,3})|(x[0-9a-fA-F]+))))*")|(?P<t_ppline_LINE_NUMBER>(0(([uU][lL])|([lL][uU])|[uU]|[lL])?)|([1-9][0-9]*(([uU][lL])|([lL][uU])|[uU]|[lL])?))|(?P<t_ppline_NEWLINE>\\n)|(?P<t_ppline_PPLINE>line)', [None, ('t_ppline_FILENAME', 'FILENAME'), None, None, None, None, None, None, ('t_ppline_LINE_NUMBER', 'LINE_NUMBER'), None, None, None, None, None, None, None, None, ('t_ppline_NEWLINE', 'NEWLINE'), ('t_ppline_PPLINE', 'PPLINE')])], 'INITIAL': [('(?P<t_PPHASH>[ \\t]*\\#)|(?P<t_NEWLINE>\\n+)|(?P<t_FLOAT_CONST>((((([0-9]*\\.[0-9]+)|([0-9]+\\.))([eE][-+]?[0-9]+)?)|([0-9]+([eE][-+]?[0-9]+)))[FfLl]?))|(?P<t_INT_CONST_HEX>0[xX][0-9a-fA-F]+(([uU][lL])|([lL][uU])|[uU]|[lL])?)|(?P<t_BAD_CONST_OCT>0[0-7]*[89])|(?P<t_INT_CONST_OCT>0[0-7]*(([uU][lL])|([lL][uU])|[uU]|[lL])?)|(?P<t_INT_CONST_DEC>(0(([uU][lL])|([lL][uU])|[uU]|[lL])?)|([1-9][0-9]*(([uU][lL])|([lL][uU])|[uU]|[lL])?))|(?P<t_CHAR_CONST>\'([^\'\\\\\\n]|(\\\\(([a-zA-Z\\\\?\'"])|([0-7]{1,3})|(x[0-9a-fA-F]+))))\')|(?P<t_WCHAR_CONST>L\'([^\'\\\\\\n]|(\\\\(([a-zA-Z\\\\?\'"])|([0-7]{1,3})|(x[0-9a-fA-F]+))))\')|(?P<t_UNMATCHED_QUOTE>(\'([^\'\\\\\\n]|(\\\\(([a-zA-Z\\\\?\'"])|([0-7]{1,3})|(x[0-9a-fA-F]+))))*\\n)|(\'([^\'\\\\\\n]|(\\\\(([a-zA-Z\\\\?\'"])|([0-7]{1,3})|(x[0-9a-fA-F]+))))*$))|(?P<t_BAD_CHAR_CONST>(\'([^\'\\\\\\n]|(\\\\(([a-zA-Z\\\\?\'"])|([0-7]{1,3})|(x[0-9a-fA-F]+))))[^\'\n]+\')|(\'\')|(\'([\\\\][^a-zA-Z\\\\?\'"x0-7])[^\'\\n]*\'))|(?P<t_WSTRING_LITERAL>L"([^"\\\\\\n]|(\\\\(([a-zA-Z\\\\?\'"])|([0-7]{1,3})|(x[0-9a-fA-F]+))))*")|(?P<t_BAD_STRING_LITERAL>"([^"\\\\\\n]|(\\\\(([a-zA-Z\\\\?\'"])|([0-7]{1,3})|(x[0-9a-fA-F]+))))*([\\\\][^a-zA-Z\\\\?\'"x0-7])([^"\\\\\\n]|(\\\\(([a-zA-Z\\\\?\'"])|([0-7]{1,3})|(x[0-9a-fA-F]+))))*")|(?P<t_ID>[a-zA-Z_][0-9a-zA-Z_]*)|(?P<t_STRING_LITERAL>"([^"\\\\\\n]|(\\\\(([a-zA-Z\\\\?\'"])|([0-7]{1,3})|(x[0-9a-fA-F]+))))*")', [None, ('t_PPHASH', 'PPHASH'), ('t_NEWLINE', 'NEWLINE'), ('t_FLOAT_CONST', 'FLOAT_CONST'), None, None, None, None, None, None, None, None, None, ('t_INT_CONST_HEX', 'INT_CONST_HEX'), None, None, None, ('t_BAD_CONST_OCT', 'BAD_CONST_OCT'), ('t_INT_CONST_OCT', 'INT_CONST_OCT'), None, None, None, ('t_INT_CONST_DEC', 'INT_CONST_DEC'), None, None, None, None, None, None, None, None, ('t_CHAR_CONST', 'CHAR_CONST'), None, None, None, None, None, None, ('t_WCHAR_CONST', 'WCHAR_CONST'), None, None, None, None, None, None, ('t_UNMATCHED_QUOTE', 'UNMATCHED_QUOTE'), None, None, None, None, None, None, None, None, None, None, None, None, None, None, ('t_BAD_CHAR_CONST', 'BAD_CHAR_CONST'), None, None, None, None, None, None, None, None, None, None, ('t_WSTRING_LITERAL', 'WSTRING_LITERAL'), None, None, None, None, None, None, ('t_BAD_STRING_LITERAL', 'BAD_STRING_LITERAL'), None, None, None, None, None, None, None, None, None, None, None, None, None, ('t_ID', 'ID'), (None, 'STRING_LITERAL')]), ('(?P<t_ELLIPSIS>\\.\\.\\.)|(?P<t_PLUSPLUS>\\+\\+)|(?P<t_LOR>\\|\\|)|(?P<t_OREQUAL>\\|=)|(?P<t_LSHIFTEQUAL><<=)|(?P<t_RSHIFTEQUAL>>>=)|(?P<t_TIMESEQUAL>\\*=)|(?P<t_PLUSEQUAL>\\+=)|(?P<t_XOREQUAL>^=)|(?P<t_PLUS>\\+)|(?P<t_MODEQUAL>%=)|(?P<t_LBRACE>\\{)|(?P<t_DIVEQUAL>/=)|(?P<t_RBRACKET>\\])|(?P<t_CONDOP>\\?)', [None, (None, 'ELLIPSIS'), (None, 'PLUSPLUS'), (None, 'LOR'), (None, 'OREQUAL'), (None, 'LSHIFTEQUAL'), (None, 'RSHIFTEQUAL'), (None, 'TIMESEQUAL'), (None, 'PLUSEQUAL'), (None, 'XOREQUAL'), (None, 'PLUS'), (None, 'MODEQUAL'), (None, 'LBRACE'), (None, 'DIVEQUAL'), (None, 'RBRACKET'), (None, 'CONDOP')]), ('(?P<t_XOR>\\^)|(?P<t_LSHIFT><<)|(?P<t_LE><=)|(?P<t_LPAREN>\\()|(?P<t_ARROW>->)|(?P<t_EQ>==)|(?P<t_RBRACE>\\})|(?P<t_NE>!=)|(?P<t_MINUSMINUS>--)|(?P<t_OR>\\|)|(?P<t_TIMES>\\*)|(?P<t_LBRACKET>\\[)|(?P<t_GE>>=)|(?P<t_RPAREN>\\))|(?P<t_LAND>&&)|(?P<t_RSHIFT>>>)|(?P<t_ANDEQUAL>&=)|(?P<t_MINUSEQUAL>-=)|(?P<t_PERIOD>\\.)|(?P<t_EQUALS>=)|(?P<t_LT><)|(?P<t_COMMA>,)|(?P<t_DIVIDE>/)|(?P<t_AND>&)|(?P<t_MOD>%)|(?P<t_SEMI>;)|(?P<t_MINUS>-)|(?P<t_GT>>)|(?P<t_COLON>:)|(?P<t_NOT>~)|(?P<t_LNOT>!)', [None, (None, 'XOR'), (None, 'LSHIFT'), (None, 'LE'), (None, 'LPAREN'), (None, 'ARROW'), (None, 'EQ'), (None, 'RBRACE'), (None, 'NE'), (None, 'MINUSMINUS'), (None, 'OR'), (None, 'TIMES'), (None, 'LBRACKET'), (None, 'GE'), (None, 'RPAREN'), (None, 'LAND'), (None, 'RSHIFT'), (None, 'ANDEQUAL'), (None, 'MINUSEQUAL'), (None, 'PERIOD'), (None, 'EQUALS'), (None, 'LT'), (None, 'COMMA'), (None, 'DIVIDE'), (None, 'AND'), (None, 'MOD'), (None, 'SEMI'), (None, 'MINUS'), (None, 'GT'), (None, 'COLON'), (None, 'NOT'), (None, 'LNOT')])]}
_lexstateignore = {'ppline': ' \t', 'INITIAL': ' \t'}
_lexstateerrorf = {'ppline': 't_ppline_error', 'INITIAL': 't_error'}

View File

@ -0,0 +1,75 @@
#-----------------------------------------------------------------
# pycparser: __init__.py
#
# This package file exports some convenience functions for
# interacting with pycparser
#
# Copyright (C) 2008-2009, Eli Bendersky
# License: LGPL
#-----------------------------------------------------------------
__all__ = ['c_lexer', 'c_parser', 'c_ast']
__version__ = '1.05'
from subprocess import Popen, PIPE
from types import ListType
from c_parser import CParser
def parse_file( filename, use_cpp=False,
cpp_path='cpp', cpp_args=''):
""" Parse a C file using pycparser.
filename:
Name of the file you want to parse.
use_cpp:
Set to True if you want to execute the C pre-processor
on the file prior to parsing it.
cpp_path:
If use_cpp is True, this is the path to 'cpp' on your
system. If no path is provided, it attempts to just
execute 'cpp', so it must be in your PATH.
cpp_args:
If use_cpp is True, set this to the command line
arguments strings to cpp. Be careful with quotes -
it's best to pass a raw string (r'') here.
For example:
r'-I../utils/fake_libc_include'
If several arguments are required, pass a list of
strings.
When successful, an AST is returned. ParseError can be
thrown if the file doesn't parse successfully.
Errors from cpp will be printed out.
"""
if use_cpp:
path_list = [cpp_path]
if isinstance(cpp_args, ListType):
path_list += cpp_args
elif cpp_args != '':
path_list += [cpp_args]
path_list += [filename]
# Note the use of universal_newlines to treat all newlines
# as \n for Python's purpose
#
pipe = Popen( path_list,
stdout=PIPE,
universal_newlines=True)
text = pipe.communicate()[0]
else:
text = open(filename).read()
parser = CParser()
return parser.parse(text, filename)
if __name__ == "__main__":
pass

View File

@ -0,0 +1,249 @@
#-----------------------------------------------------------------
# _ast_gen.py
#
# Generates the AST Node classes from a specification given in
# a .yaml file
#
# The design of this module was inspired by astgen.py from the
# Python 2.5 code-base.
#
# Copyright (C) 2008-2009, Eli Bendersky
# License: LGPL
#-----------------------------------------------------------------
import pprint
from string import Template
import yaml
class ASTCodeGenerator(object):
def __init__(self, cfg_filename='_c_ast.yaml'):
""" Initialize the code generator from a configuration
file.
"""
self.cfg_filename = cfg_filename
cfg = yaml.load(open(cfg_filename).read())
self.node_cfg = [NodeCfg(name, cfg[name]) for name in cfg]
#~ pprint.pprint(self.node_cfg)
#~ print ''
def generate(self, file=None):
""" Generates the code into file, an open file buffer.
"""
src = Template(_PROLOGUE_COMMENT).substitute(
cfg_filename=self.cfg_filename)
src += _PROLOGUE_CODE
for node_cfg in self.node_cfg:
src += node_cfg.generate_source() + '\n\n'
file.write(src)
class NodeCfg(object):
def __init__(self, name, contents):
self.name = name
self.all_entries = []
self.attr = []
self.child = []
self.seq_child = []
for entry in contents:
clean_entry = entry.rstrip('*')
self.all_entries.append(clean_entry)
if entry.endswith('**'):
self.seq_child.append(clean_entry)
elif entry.endswith('*'):
self.child.append(clean_entry)
else:
self.attr.append(entry)
def generate_source(self):
src = self._gen_init()
src += '\n' + self._gen_children()
src += '\n' + self._gen_show()
return src
def _gen_init(self):
src = "class %s(Node):\n" % self.name
if self.all_entries:
args = ', '.join(self.all_entries)
arglist = '(self, %s, coord=None)' % args
else:
arglist = '(self, coord=None)'
src += " def __init__%s:\n" % arglist
for name in self.all_entries + ['coord']:
src += " self.%s = %s\n" % (name, name)
return src
def _gen_children(self):
src = ' def children(self):\n'
if self.all_entries:
src += ' nodelist = []\n'
template = ('' +
' if self.%s is not None:' +
' nodelist.%s(self.%s)\n')
for child in self.child:
src += template % (
child, 'append', child)
for seq_child in self.seq_child:
src += template % (
seq_child, 'extend', seq_child)
src += ' return tuple(nodelist)\n'
else:
src += ' return ()\n'
return src
def _gen_show(self):
src = ' def show(self, buf=sys.stdout, offset=0, attrnames=False, showcoord=False):\n'
src += " lead = ' ' * offset\n"
src += " buf.write(lead + '%s: ')\n\n" % self.name
if self.attr:
src += " if attrnames:\n"
src += " attrstr = ', '.join('%s=%s' % nv for nv in ["
src += ', '.join('("%s", repr(%s))' % (nv, 'self.%s' % nv) for nv in self.attr)
src += '])\n'
src += " else:\n"
src += " attrstr = ', '.join('%s' % v for v in ["
src += ', '.join('self.%s' % v for v in self.attr)
src += '])\n'
src += " buf.write(attrstr)\n\n"
src += " if showcoord:\n"
src += " buf.write(' (at %s)' % self.coord)\n"
src += " buf.write('\\n')\n\n"
src += " for c in self.children():\n"
src += " c.show(buf, offset + 2, attrnames, showcoord)\n"
return src
_PROLOGUE_COMMENT = \
r'''#-----------------------------------------------------------------
# ** ATTENTION **
# This code was automatically generated from the file:
# $cfg_filename
#
# Do not modify it directly. Modify the configuration file and
# run the generator again.
# ** ** *** ** **
#
# pycparser: c_ast.py
#
# AST Node classes.
#
# Copyright (C) 2008, Eli Bendersky
# License: LGPL
#-----------------------------------------------------------------
'''
_PROLOGUE_CODE = r'''
import sys
class Node(object):
""" Abstract base class for AST nodes.
"""
def children(self):
""" A sequence of all children that are Nodes
"""
pass
def show(self, buf=sys.stdout, offset=0, attrnames=False, showcoord=False):
""" Pretty print the Node and all its attributes and
children (recursively) to a buffer.
file:
Open IO buffer into which the Node is printed.
offset:
Initial offset (amount of leading spaces)
attrnames:
True if you want to see the attribute names in
name=value pairs. False to only see the values.
showcoord:
Do you want the coordinates of each Node to be
displayed.
"""
pass
class NodeVisitor(object):
""" A base NodeVisitor class for visiting c_ast nodes.
Subclass it and define your own visit_XXX methods, where
XXX is the class name you want to visit with these
methods.
For example:
class ConstantVisitor(NodeVisitor):
def __init__(self):
self.values = []
def visit_Constant(self, node):
self.values.append(node.value)
Creates a list of values of all the constant nodes
encountered below the given node. To use it:
cv = ConstantVisitor()
cv.visit(node)
Notes:
* generic_visit() will be called for AST nodes for which
no visit_XXX method was defined.
* The children of nodes for which a visit_XXX was
defined will not be visited - if you need this, call
generic_visit() on the node.
You can use:
NodeVisitor.generic_visit(self, node)
* Modeled after Python's own AST visiting facilities
(the ast module of Python 3.0)
"""
def visit(self, node):
""" Visit a node.
"""
method = 'visit_' + node.__class__.__name__
visitor = getattr(self, method, self.generic_visit)
return visitor(node)
def generic_visit(self, node):
""" Called if no explicit visitor function exists for a
node. Implements preorder visiting of the node.
"""
for c in node.children():
self.visit(c)
'''
if __name__ == "__main__":
import sys
ast_gen = ASTCodeGenerator('_c_ast.yaml')
ast_gen.generate(open('c_ast.py', 'w'))

View File

@ -0,0 +1,31 @@
#-----------------------------------------------------------------
# pycparser: _build_tables.py
#
# A dummy for generating the lexing/parsing tables and and
# compiling them into .pyc for faster execution in optimized mode.
# Also generates AST code from the _c_ast.yaml configuration file.
#
# Copyright (C) 2008, Eli Bendersky
# License: LGPL
#-----------------------------------------------------------------
# Generate c_ast.py
#
from _ast_gen import ASTCodeGenerator
ast_gen = ASTCodeGenerator('_c_ast.yaml')
ast_gen.generate(open('c_ast.py', 'w'))
import c_parser
# Generates the tables
#
c_parser.CParser(
lex_optimize=True,
yacc_debug=False,
yacc_optimize=True)
# Load to compile into .pyc
#
import lextab
import yacctab
import c_ast

View File

@ -0,0 +1,164 @@
#-----------------------------------------------------------------
# pycparser: _c_ast_gen.yaml
#
# Defines the AST Node classes used in pycparser.
#
# Each entry is a Node sub-class name, listing the attributes
# and child nodes of the class:
# <name>* - a child node
# <name>** - a sequence of child nodes
# <name> - an attribute
#
# Copyright (C) 2008-2009, Eli Bendersky
# License: LGPL
#-----------------------------------------------------------------
ArrayDecl: [type*, dim*]
ArrayRef: [name*, subscript*]
# op: =, +=, /= etc.
#
Assignment: [op, lvalue*, rvalue*]
BinaryOp: [op, left*, right*]
Break: []
Case: [expr*, stmt*]
Cast: [to_type*, expr*]
# Compound statement: { declarations... statements...}
#
Compound: [decls**, stmts**]
# type: int, char, float, etc. see CLexer for constant token types
#
Constant: [type, value]
Continue: []
# name: the variable being declared
# quals: list of qualifiers (const, volatile)
# storage: list of storage specifiers (extern, register, etc.)
# type: declaration type (probably nested with all the modifiers)
# init: initialization value, or None
# bitsize: bit field size, or None
#
Decl: [name, quals, storage, type*, init*, bitsize*]
Default: [stmt*]
DoWhile: [cond*, stmt*]
# Represents the ellipsis (...) parameter in a function
# declaration
#
EllipsisParam: []
# Enumeration type specifier
# name: an optional ID
# values: an EnumeratorList
#
Enum: [name, values*]
# A name/value pair for enumeration values
#
Enumerator: [name, value*]
# A list of enumerators
#
EnumeratorList: [enumerators**]
# a list of comma separated expressions
#
ExprList: [exprs**]
# This is the top of the AST, representing a single C file (a
# translation unit in K&R jargon). It contains a list of
# "external-declaration"s, which is either declarations (Decl),
# Typedef or function definitions (FuncDef).
#
FileAST: [ext**]
# for (init; cond; next) stmt
#
For: [init*, cond*, next*, stmt*]
# name: Id
# args: ExprList
#
FuncCall: [name*, args*]
# type <decl>(args)
#
FuncDecl: [args*, type*]
# Function definition: a declarator for the function name and
# a body, which is a compound statement.
# There's an optional list of parameter declarations for old
# K&R-style definitions
#
FuncDef: [decl*, param_decls**, body*]
Goto: [name]
ID: [name]
# Holder for types that are a simple identifier (e.g. the built
# ins void, char etc. and typedef-defined types)
#
IdentifierType: [names]
If: [cond*, iftrue*, iffalse*]
Label: [name, stmt*]
# a list of comma separated function parameter declarations
#
ParamList: [params**]
PtrDecl: [quals, type*]
Return: [expr*]
# name: struct tag name
# decls: declaration of members
#
Struct: [name, decls**]
# type: . or ->
# name.field or name->field
#
StructRef: [name*, type, field*]
Switch: [cond*, stmt*]
# cond ? iftrue : iffalse
#
TernaryOp: [cond*, iftrue*, iffalse*]
# A base type declaration
#
TypeDecl: [declname, quals, type*]
# A typedef declaration.
# Very similar to Decl, but without some attributes
#
Typedef: [name, quals, storage, type*]
Typename: [quals, type*]
UnaryOp: [op, expr*]
# name: union tag name
# decls: declaration of members
#
Union: [name, decls**]
While: [cond*, stmt*]

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,443 @@
#-----------------------------------------------------------------
# pycparser: clex.py
#
# CLexer class: lexer for the C language
#
# Copyright (C) 2008, Eli Bendersky
# License: LGPL
#-----------------------------------------------------------------
import re
import sys
import ply.lex
from ply.lex import TOKEN
class CLexer(object):
""" A lexer for the C language. After building it, set the
input text with input(), and call token() to get new
tokens.
The public attribute filename can be set to an initial
filaneme, but the lexer will update it upon #line
directives.
"""
def __init__(self, error_func, type_lookup_func):
""" Create a new Lexer.
error_func:
An error function. Will be called with an error
message, line and column as arguments, in case of
an error during lexing.
type_lookup_func:
A type lookup function. Given a string, it must
return True IFF this string is a name of a type
that was defined with a typedef earlier.
"""
self.error_func = error_func
self.type_lookup_func = type_lookup_func
self.filename = ''
# Allow either "# line" or "# <num>" to support GCC's
# cpp output
#
self.line_pattern = re.compile('([ \t]*line\W)|([ \t]*\d+)')
def build(self, **kwargs):
""" Builds the lexer from the specification. Must be
called after the lexer object is created.
This method exists separately, because the PLY
manual warns against calling lex.lex inside
__init__
"""
self.lexer = ply.lex.lex(object=self, **kwargs)
def reset_lineno(self):
""" Resets the internal line number counter of the lexer.
"""
self.lexer.lineno = 1
def input(self, text):
self.lexer.input(text)
def token(self):
g = self.lexer.token()
return g
######################-- PRIVATE --######################
##
## Internal auxiliary methods
##
def _error(self, msg, token):
location = self._make_tok_location(token)
self.error_func(msg, location[0], location[1])
self.lexer.skip(1)
def _find_tok_column(self, token):
i = token.lexpos
while i > 0:
if self.lexer.lexdata[i] == '\n': break
i -= 1
return (token.lexpos - i) + 1
def _make_tok_location(self, token):
return (token.lineno, self._find_tok_column(token))
##
## Reserved keywords
##
keywords = (
'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST', 'CONTINUE',
'DEFAULT', 'DO', 'DOUBLE', 'ELSE', 'ENUM', 'EXTERN',
'FLOAT', 'FOR', 'GOTO', 'IF', 'INT', 'LONG', 'REGISTER',
'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT',
'SWITCH', 'TYPEDEF', 'UNION', 'UNSIGNED', 'VOID',
'VOLATILE', 'WHILE',
)
keyword_map = {}
for r in keywords:
keyword_map[r.lower()] = r
##
## All the tokens recognized by the lexer
##
tokens = keywords + (
# Identifiers
'ID',
# Type identifiers (identifiers previously defined as
# types with typedef)
'TYPEID',
# constants
'INT_CONST_DEC', 'INT_CONST_OCT', 'INT_CONST_HEX',
'FLOAT_CONST',
'CHAR_CONST',
'WCHAR_CONST',
# String literals
'STRING_LITERAL',
'WSTRING_LITERAL',
# Operators
'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',
'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
'LOR', 'LAND', 'LNOT',
'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
# Assignment
'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL',
'PLUSEQUAL', 'MINUSEQUAL',
'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL',
'OREQUAL',
# Increment/decrement
'PLUSPLUS', 'MINUSMINUS',
# Structure dereference (->)
'ARROW',
# Conditional operator (?)
'CONDOP',
# Delimeters
'LPAREN', 'RPAREN', # ( )
'LBRACKET', 'RBRACKET', # [ ]
'LBRACE', 'RBRACE', # { }
'COMMA', 'PERIOD', # . ,
'SEMI', 'COLON', # ; :
# Ellipsis (...)
'ELLIPSIS',
# pre-processor
'PPHASH', # '#'
)
##
## Regexes for use in tokens
##
##
# valid C identifiers (K&R2: A.2.3)
identifier = r'[a-zA-Z_][0-9a-zA-Z_]*'
# integer constants (K&R2: A.2.5.1)
integer_suffix_opt = r'(([uU][lL])|([lL][uU])|[uU]|[lL])?'
decimal_constant = '(0'+integer_suffix_opt+')|([1-9][0-9]*'+integer_suffix_opt+')'
octal_constant = '0[0-7]*'+integer_suffix_opt
hex_constant = '0[xX][0-9a-fA-F]+'+integer_suffix_opt
bad_octal_constant = '0[0-7]*[89]'
# character constants (K&R2: A.2.5.2)
# Note: a-zA-Z are allowed as escape chars to support #line
# directives with Windows paths as filenames (\dir\file...)
#
simple_escape = r"""([a-zA-Z\\?'"])"""
octal_escape = r"""([0-7]{1,3})"""
hex_escape = r"""(x[0-9a-fA-F]+)"""
bad_escape = r"""([\\][^a-zA-Z\\?'"x0-7])"""
escape_sequence = r"""(\\("""+simple_escape+'|'+octal_escape+'|'+hex_escape+'))'
cconst_char = r"""([^'\\\n]|"""+escape_sequence+')'
char_const = "'"+cconst_char+"'"
wchar_const = 'L'+char_const
unmatched_quote = "('"+cconst_char+"*\\n)|('"+cconst_char+"*$)"
bad_char_const = r"""('"""+cconst_char+"""[^'\n]+')|('')|('"""+bad_escape+r"""[^'\n]*')"""
# string literals (K&R2: A.2.6)
string_char = r"""([^"\\\n]|"""+escape_sequence+')'
string_literal = '"'+string_char+'*"'
wstring_literal = 'L'+string_literal
bad_string_literal = '"'+string_char+'*'+bad_escape+string_char+'*"'
# floating constants (K&R2: A.2.5.3)
exponent_part = r"""([eE][-+]?[0-9]+)"""
fractional_constant = r"""([0-9]*\.[0-9]+)|([0-9]+\.)"""
floating_constant = '(((('+fractional_constant+')'+exponent_part+'?)|([0-9]+'+exponent_part+'))[FfLl]?)'
##
## Lexer states
##
states = (
# ppline: preprocessor line directives
#
('ppline', 'exclusive'),
)
def t_PPHASH(self, t):
r'[ \t]*\#'
m = self.line_pattern.match(
t.lexer.lexdata, pos=t.lexer.lexpos)
if m:
t.lexer.begin('ppline')
self.pp_line = self.pp_filename = None
#~ print "ppline starts on line %s" % t.lexer.lineno
else:
t.type = 'PPHASH'
return t
##
## Rules for the ppline state
##
@TOKEN(string_literal)
def t_ppline_FILENAME(self, t):
if self.pp_line is None:
self._error('filename before line number in #line', t)
else:
self.pp_filename = t.value.lstrip('"').rstrip('"')
#~ print "PP got filename: ", self.pp_filename
@TOKEN(decimal_constant)
def t_ppline_LINE_NUMBER(self, t):
if self.pp_line is None:
self.pp_line = t.value
else:
# Ignore: GCC's cpp sometimes inserts a numeric flag
# after the file name
pass
def t_ppline_NEWLINE(self, t):
r'\n'
if self.pp_line is None:
self._error('line number missing in #line', t)
else:
self.lexer.lineno = int(self.pp_line)
if self.pp_filename is not None:
self.filename = self.pp_filename
t.lexer.begin('INITIAL')
def t_ppline_PPLINE(self, t):
r'line'
pass
t_ppline_ignore = ' \t'
def t_ppline_error(self, t):
msg = 'invalid #line directive'
self._error(msg, t)
##
## Rules for the normal state
##
t_ignore = ' \t'
# Newlines
def t_NEWLINE(self, t):
r'\n+'
t.lexer.lineno += t.value.count("\n")
# Operators
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_MOD = r'%'
t_OR = r'\|'
t_AND = r'&'
t_NOT = r'~'
t_XOR = r'\^'
t_LSHIFT = r'<<'
t_RSHIFT = r'>>'
t_LOR = r'\|\|'
t_LAND = r'&&'
t_LNOT = r'!'
t_LT = r'<'
t_GT = r'>'
t_LE = r'<='
t_GE = r'>='
t_EQ = r'=='
t_NE = r'!='
# Assignment operators
t_EQUALS = r'='
t_TIMESEQUAL = r'\*='
t_DIVEQUAL = r'/='
t_MODEQUAL = r'%='
t_PLUSEQUAL = r'\+='
t_MINUSEQUAL = r'-='
t_LSHIFTEQUAL = r'<<='
t_RSHIFTEQUAL = r'>>='
t_ANDEQUAL = r'&='
t_OREQUAL = r'\|='
t_XOREQUAL = r'^='
# Increment/decrement
t_PLUSPLUS = r'\+\+'
t_MINUSMINUS = r'--'
# ->
t_ARROW = r'->'
# ?
t_CONDOP = r'\?'
# Delimeters
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_LBRACKET = r'\['
t_RBRACKET = r'\]'
t_LBRACE = r'\{'
t_RBRACE = r'\}'
t_COMMA = r','
t_PERIOD = r'\.'
t_SEMI = r';'
t_COLON = r':'
t_ELLIPSIS = r'\.\.\.'
t_STRING_LITERAL = string_literal
# The following floating and integer constants are defined as
# functions to impose a strict order (otherwise, decimal
# is placed before the others because its regex is longer,
# and this is bad)
#
@TOKEN(floating_constant)
def t_FLOAT_CONST(self, t):
return t
@TOKEN(hex_constant)
def t_INT_CONST_HEX(self, t):
return t
@TOKEN(bad_octal_constant)
def t_BAD_CONST_OCT(self, t):
msg = "Invalid octal constant"
self._error(msg, t)
@TOKEN(octal_constant)
def t_INT_CONST_OCT(self, t):
return t
@TOKEN(decimal_constant)
def t_INT_CONST_DEC(self, t):
return t
# Must come before bad_char_const, to prevent it from
# catching valid char constants as invalid
#
@TOKEN(char_const)
def t_CHAR_CONST(self, t):
return t
@TOKEN(wchar_const)
def t_WCHAR_CONST(self, t):
return t
@TOKEN(unmatched_quote)
def t_UNMATCHED_QUOTE(self, t):
msg = "Unmatched '"
self._error(msg, t)
@TOKEN(bad_char_const)
def t_BAD_CHAR_CONST(self, t):
msg = "Invalid char constant %s" % t.value
self._error(msg, t)
@TOKEN(wstring_literal)
def t_WSTRING_LITERAL(self, t):
return t
# unmatched string literals are caught by the preprocessor
@TOKEN(bad_string_literal)
def t_BAD_STRING_LITERAL(self, t):
msg = "String contains invalid escape code"
self._error(msg, t)
@TOKEN(identifier)
def t_ID(self, t):
t.type = self.keyword_map.get(t.value, "ID")
if t.type == 'ID' and self.type_lookup_func(t.value):
t.type = "TYPEID"
return t
def t_error(self, t):
msg = 'Illegal character %s' % repr(t.value[0])
self._error(msg, t)
if __name__ == "__main__":
filename = '../zp.c'
text = open(filename).read()
#~ text = '"'+r"""ka \p ka"""+'"'
text = r"""
546
#line 66 "kwas\df.h"
id 4
# 5
dsf
"""
def errfoo(msg, a, b):
print msg
sys.exit()
def typelookup(namd):
return False
clex = CLexer(errfoo, typelookup)
clex.build()
clex.input(text)
while 1:
tok = clex.token()
if not tok: break
#~ print type(tok)
print "-", tok.value, tok.type, tok.lineno, clex.filename, tok.lexpos

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,4 @@
# PLY package
# Author: David Beazley (dave@dabeaz.com)
__all__ = ['lex','yacc']

View File

@ -0,0 +1,898 @@
# -----------------------------------------------------------------------------
# cpp.py
#
# Author: David Beazley (http://www.dabeaz.com)
# Copyright (C) 2007
# All rights reserved
#
# This module implements an ANSI-C style lexical preprocessor for PLY.
# -----------------------------------------------------------------------------
from __future__ import generators
# -----------------------------------------------------------------------------
# Default preprocessor lexer definitions. These tokens are enough to get
# a basic preprocessor working. Other modules may import these if they want
# -----------------------------------------------------------------------------
tokens = (
'CPP_ID','CPP_INTEGER', 'CPP_FLOAT', 'CPP_STRING', 'CPP_CHAR', 'CPP_WS', 'CPP_COMMENT', 'CPP_POUND','CPP_DPOUND'
)
literals = "+-*/%|&~^<>=!?()[]{}.,;:\\\'\""
# Whitespace
def t_CPP_WS(t):
r'\s+'
t.lexer.lineno += t.value.count("\n")
return t
t_CPP_POUND = r'\#'
t_CPP_DPOUND = r'\#\#'
# Identifier
t_CPP_ID = r'[A-Za-z_][\w_]*'
# Integer literal
def CPP_INTEGER(t):
r'(((((0x)|(0X))[0-9a-fA-F]+)|(\d+))([uU]|[lL]|[uU][lL]|[lL][uU])?)'
return t
t_CPP_INTEGER = CPP_INTEGER
# Floating literal
t_CPP_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
# String literal
def t_CPP_STRING(t):
r'\"([^\\\n]|(\\(.|\n)))*?\"'
t.lexer.lineno += t.value.count("\n")
return t
# Character constant 'c' or L'c'
def t_CPP_CHAR(t):
r'(L)?\'([^\\\n]|(\\(.|\n)))*?\''
t.lexer.lineno += t.value.count("\n")
return t
# Comment
def t_CPP_COMMENT(t):
r'(/\*(.|\n)*?\*/)|(//.*?\n)'
t.lexer.lineno += t.value.count("\n")
return t
def t_error(t):
t.type = t.value[0]
t.value = t.value[0]
t.lexer.skip(1)
return t
import re
import copy
import time
import os.path
# -----------------------------------------------------------------------------
# trigraph()
#
# Given an input string, this function replaces all trigraph sequences.
# The following mapping is used:
#
# ??= #
# ??/ \
# ??' ^
# ??( [
# ??) ]
# ??! |
# ??< {
# ??> }
# ??- ~
# -----------------------------------------------------------------------------
_trigraph_pat = re.compile(r'''\?\?[=/\'\(\)\!<>\-]''')
_trigraph_rep = {
'=':'#',
'/':'\\',
"'":'^',
'(':'[',
')':']',
'!':'|',
'<':'{',
'>':'}',
'-':'~'
}
def trigraph(input):
return _trigraph_pat.sub(lambda g: _trigraph_rep[g.group()[-1]],input)
# ------------------------------------------------------------------
# Macro object
#
# This object holds information about preprocessor macros
#
# .name - Macro name (string)
# .value - Macro value (a list of tokens)
# .arglist - List of argument names
# .variadic - Boolean indicating whether or not variadic macro
# .vararg - Name of the variadic parameter
#
# When a macro is created, the macro replacement token sequence is
# pre-scanned and used to create patch lists that are later used
# during macro expansion
# ------------------------------------------------------------------
class Macro(object):
def __init__(self,name,value,arglist=None,variadic=False):
self.name = name
self.value = value
self.arglist = arglist
self.variadic = variadic
if variadic:
self.vararg = arglist[-1]
self.source = None
# ------------------------------------------------------------------
# Preprocessor object
#
# Object representing a preprocessor. Contains macro definitions,
# include directories, and other information
# ------------------------------------------------------------------
class Preprocessor(object):
def __init__(self,lexer=None):
if lexer is None:
lexer = lex.lexer
self.lexer = lexer
self.macros = { }
self.path = []
self.temp_path = []
# Probe the lexer for selected tokens
self.lexprobe()
tm = time.localtime()
self.define("__DATE__ \"%s\"" % time.strftime("%b %d %Y",tm))
self.define("__TIME__ \"%s\"" % time.strftime("%H:%M:%S",tm))
self.parser = None
# -----------------------------------------------------------------------------
# tokenize()
#
# Utility function. Given a string of text, tokenize into a list of tokens
# -----------------------------------------------------------------------------
def tokenize(self,text):
tokens = []
self.lexer.input(text)
while True:
tok = self.lexer.token()
if not tok: break
tokens.append(tok)
return tokens
# ---------------------------------------------------------------------
# error()
#
# Report a preprocessor error/warning of some kind
# ----------------------------------------------------------------------
def error(self,file,line,msg):
print >>sys.stderr,"%s:%d %s" % (file,line,msg)
# ----------------------------------------------------------------------
# lexprobe()
#
# This method probes the preprocessor lexer object to discover
# the token types of symbols that are important to the preprocessor.
# If this works right, the preprocessor will simply "work"
# with any suitable lexer regardless of how tokens have been named.
# ----------------------------------------------------------------------
def lexprobe(self):
# Determine the token type for identifiers
self.lexer.input("identifier")
tok = self.lexer.token()
if not tok or tok.value != "identifier":
print "Couldn't determine identifier type"
else:
self.t_ID = tok.type
# Determine the token type for integers
self.lexer.input("12345")
tok = self.lexer.token()
if not tok or int(tok.value) != 12345:
print "Couldn't determine integer type"
else:
self.t_INTEGER = tok.type
self.t_INTEGER_TYPE = type(tok.value)
# Determine the token type for strings enclosed in double quotes
self.lexer.input("\"filename\"")
tok = self.lexer.token()
if not tok or tok.value != "\"filename\"":
print "Couldn't determine string type"
else:
self.t_STRING = tok.type
# Determine the token type for whitespace--if any
self.lexer.input(" ")
tok = self.lexer.token()
if not tok or tok.value != " ":
self.t_SPACE = None
else:
self.t_SPACE = tok.type
# Determine the token type for newlines
self.lexer.input("\n")
tok = self.lexer.token()
if not tok or tok.value != "\n":
self.t_NEWLINE = None
print "Couldn't determine token for newlines"
else:
self.t_NEWLINE = tok.type
self.t_WS = (self.t_SPACE, self.t_NEWLINE)
# Check for other characters used by the preprocessor
chars = [ '<','>','#','##','\\','(',')',',','.']
for c in chars:
self.lexer.input(c)
tok = self.lexer.token()
if not tok or tok.value != c:
print "Unable to lex '%s' required for preprocessor" % c
# ----------------------------------------------------------------------
# add_path()
#
# Adds a search path to the preprocessor.
# ----------------------------------------------------------------------
def add_path(self,path):
self.path.append(path)
# ----------------------------------------------------------------------
# group_lines()
#
# Given an input string, this function splits it into lines. Trailing whitespace
# is removed. Any line ending with \ is grouped with the next line. This
# function forms the lowest level of the preprocessor---grouping into text into
# a line-by-line format.
# ----------------------------------------------------------------------
def group_lines(self,input):
lex = self.lexer.clone()
lines = [x.rstrip() for x in input.splitlines()]
for i in xrange(len(lines)):
j = i+1
while lines[i].endswith('\\') and (j < len(lines)):
lines[i] = lines[i][:-1]+lines[j]
lines[j] = ""
j += 1
input = "\n".join(lines)
lex.input(input)
lex.lineno = 1
current_line = []
while True:
tok = lex.token()
if not tok:
break
current_line.append(tok)
if tok.type in self.t_WS and '\n' in tok.value:
yield current_line
current_line = []
if current_line:
yield current_line
# ----------------------------------------------------------------------
# tokenstrip()
#
# Remove leading/trailing whitespace tokens from a token list
# ----------------------------------------------------------------------
def tokenstrip(self,tokens):
i = 0
while i < len(tokens) and tokens[i].type in self.t_WS:
i += 1
del tokens[:i]
i = len(tokens)-1
while i >= 0 and tokens[i].type in self.t_WS:
i -= 1
del tokens[i+1:]
return tokens
# ----------------------------------------------------------------------
# collect_args()
#
# Collects comma separated arguments from a list of tokens. The arguments
# must be enclosed in parenthesis. Returns a tuple (tokencount,args,positions)
# where tokencount is the number of tokens consumed, args is a list of arguments,
# and positions is a list of integers containing the starting index of each
# argument. Each argument is represented by a list of tokens.
#
# When collecting arguments, leading and trailing whitespace is removed
# from each argument.
#
# This function properly handles nested parenthesis and commas---these do not
# define new arguments.
# ----------------------------------------------------------------------
def collect_args(self,tokenlist):
args = []
positions = []
current_arg = []
nesting = 1
tokenlen = len(tokenlist)
# Search for the opening '('.
i = 0
while (i < tokenlen) and (tokenlist[i].type in self.t_WS):
i += 1
if (i < tokenlen) and (tokenlist[i].value == '('):
positions.append(i+1)
else:
self.error(self.source,tokenlist[0].lineno,"Missing '(' in macro arguments")
return 0, [], []
i += 1
while i < tokenlen:
t = tokenlist[i]
if t.value == '(':
current_arg.append(t)
nesting += 1
elif t.value == ')':
nesting -= 1
if nesting == 0:
if current_arg:
args.append(self.tokenstrip(current_arg))
positions.append(i)
return i+1,args,positions
current_arg.append(t)
elif t.value == ',' and nesting == 1:
args.append(self.tokenstrip(current_arg))
positions.append(i+1)
current_arg = []
else:
current_arg.append(t)
i += 1
# Missing end argument
self.error(self.source,tokenlist[-1].lineno,"Missing ')' in macro arguments")
return 0, [],[]
# ----------------------------------------------------------------------
# macro_prescan()
#
# Examine the macro value (token sequence) and identify patch points
# This is used to speed up macro expansion later on---we'll know
# right away where to apply patches to the value to form the expansion
# ----------------------------------------------------------------------
def macro_prescan(self,macro):
macro.patch = [] # Standard macro arguments
macro.str_patch = [] # String conversion expansion
macro.var_comma_patch = [] # Variadic macro comma patch
i = 0
while i < len(macro.value):
if macro.value[i].type == self.t_ID and macro.value[i].value in macro.arglist:
argnum = macro.arglist.index(macro.value[i].value)
# Conversion of argument to a string
if i > 0 and macro.value[i-1].value == '#':
macro.value[i] = copy.copy(macro.value[i])
macro.value[i].type = self.t_STRING
del macro.value[i-1]
macro.str_patch.append((argnum,i-1))
continue
# Concatenation
elif (i > 0 and macro.value[i-1].value == '##'):
macro.patch.append(('c',argnum,i-1))
del macro.value[i-1]
continue
elif ((i+1) < len(macro.value) and macro.value[i+1].value == '##'):
macro.patch.append(('c',argnum,i))
i += 1
continue
# Standard expansion
else:
macro.patch.append(('e',argnum,i))
elif macro.value[i].value == '##':
if macro.variadic and (i > 0) and (macro.value[i-1].value == ',') and \
((i+1) < len(macro.value)) and (macro.value[i+1].type == self.t_ID) and \
(macro.value[i+1].value == macro.vararg):
macro.var_comma_patch.append(i-1)
i += 1
macro.patch.sort(key=lambda x: x[2],reverse=True)
# ----------------------------------------------------------------------
# macro_expand_args()
#
# Given a Macro and list of arguments (each a token list), this method
# returns an expanded version of a macro. The return value is a token sequence
# representing the replacement macro tokens
# ----------------------------------------------------------------------
def macro_expand_args(self,macro,args):
# Make a copy of the macro token sequence
rep = [copy.copy(_x) for _x in macro.value]
# Make string expansion patches. These do not alter the length of the replacement sequence
str_expansion = {}
for argnum, i in macro.str_patch:
if argnum not in str_expansion:
str_expansion[argnum] = ('"%s"' % "".join([x.value for x in args[argnum]])).replace("\\","\\\\")
rep[i] = copy.copy(rep[i])
rep[i].value = str_expansion[argnum]
# Make the variadic macro comma patch. If the variadic macro argument is empty, we get rid
comma_patch = False
if macro.variadic and not args[-1]:
for i in macro.var_comma_patch:
rep[i] = None
comma_patch = True
# Make all other patches. The order of these matters. It is assumed that the patch list
# has been sorted in reverse order of patch location since replacements will cause the
# size of the replacement sequence to expand from the patch point.
expanded = { }
for ptype, argnum, i in macro.patch:
# Concatenation. Argument is left unexpanded
if ptype == 'c':
rep[i:i+1] = args[argnum]
# Normal expansion. Argument is macro expanded first
elif ptype == 'e':
if argnum not in expanded:
expanded[argnum] = self.expand_macros(args[argnum])
rep[i:i+1] = expanded[argnum]
# Get rid of removed comma if necessary
if comma_patch:
rep = [_i for _i in rep if _i]
return rep
# ----------------------------------------------------------------------
# expand_macros()
#
# Given a list of tokens, this function performs macro expansion.
# The expanded argument is a dictionary that contains macros already
# expanded. This is used to prevent infinite recursion.
# ----------------------------------------------------------------------
def expand_macros(self,tokens,expanded=None):
if expanded is None:
expanded = {}
i = 0
while i < len(tokens):
t = tokens[i]
if t.type == self.t_ID:
if t.value in self.macros and t.value not in expanded:
# Yes, we found a macro match
expanded[t.value] = True
m = self.macros[t.value]
if not m.arglist:
# A simple macro
ex = self.expand_macros([copy.copy(_x) for _x in m.value],expanded)
for e in ex:
e.lineno = t.lineno
tokens[i:i+1] = ex
i += len(ex)
else:
# A macro with arguments
j = i + 1
while j < len(tokens) and tokens[j].type in self.t_WS:
j += 1
if tokens[j].value == '(':
tokcount,args,positions = self.collect_args(tokens[j:])
if not m.variadic and len(args) != len(m.arglist):
self.error(self.source,t.lineno,"Macro %s requires %d arguments" % (t.value,len(m.arglist)))
i = j + tokcount
elif m.variadic and len(args) < len(m.arglist)-1:
if len(m.arglist) > 2:
self.error(self.source,t.lineno,"Macro %s must have at least %d arguments" % (t.value, len(m.arglist)-1))
else:
self.error(self.source,t.lineno,"Macro %s must have at least %d argument" % (t.value, len(m.arglist)-1))
i = j + tokcount
else:
if m.variadic:
if len(args) == len(m.arglist)-1:
args.append([])
else:
args[len(m.arglist)-1] = tokens[j+positions[len(m.arglist)-1]:j+tokcount-1]
del args[len(m.arglist):]
# Get macro replacement text
rep = self.macro_expand_args(m,args)
rep = self.expand_macros(rep,expanded)
for r in rep:
r.lineno = t.lineno
tokens[i:j+tokcount] = rep
i += len(rep)
del expanded[t.value]
continue
elif t.value == '__LINE__':
t.type = self.t_INTEGER
t.value = self.t_INTEGER_TYPE(t.lineno)
i += 1
return tokens
# ----------------------------------------------------------------------
# evalexpr()
#
# Evaluate an expression token sequence for the purposes of evaluating
# integral expressions.
# ----------------------------------------------------------------------
def evalexpr(self,tokens):
# tokens = tokenize(line)
# Search for defined macros
i = 0
while i < len(tokens):
if tokens[i].type == self.t_ID and tokens[i].value == 'defined':
j = i + 1
needparen = False
result = "0L"
while j < len(tokens):
if tokens[j].type in self.t_WS:
j += 1
continue
elif tokens[j].type == self.t_ID:
if tokens[j].value in self.macros:
result = "1L"
else:
result = "0L"
if not needparen: break
elif tokens[j].value == '(':
needparen = True
elif tokens[j].value == ')':
break
else:
self.error(self.source,tokens[i].lineno,"Malformed defined()")
j += 1
tokens[i].type = self.t_INTEGER
tokens[i].value = self.t_INTEGER_TYPE(result)
del tokens[i+1:j+1]
i += 1
tokens = self.expand_macros(tokens)
for i,t in enumerate(tokens):
if t.type == self.t_ID:
tokens[i] = copy.copy(t)
tokens[i].type = self.t_INTEGER
tokens[i].value = self.t_INTEGER_TYPE("0L")
elif t.type == self.t_INTEGER:
tokens[i] = copy.copy(t)
# Strip off any trailing suffixes
tokens[i].value = str(tokens[i].value)
while tokens[i].value[-1] not in "0123456789abcdefABCDEF":
tokens[i].value = tokens[i].value[:-1]
expr = "".join([str(x.value) for x in tokens])
expr = expr.replace("&&"," and ")
expr = expr.replace("||"," or ")
expr = expr.replace("!"," not ")
try:
result = eval(expr)
except StandardError:
self.error(self.source,tokens[0].lineno,"Couldn't evaluate expression")
result = 0
return result
# ----------------------------------------------------------------------
# parsegen()
#
# Parse an input string/
# ----------------------------------------------------------------------
def parsegen(self,input,source=None):
# Replace trigraph sequences
t = trigraph(input)
lines = self.group_lines(t)
if not source:
source = ""
self.define("__FILE__ \"%s\"" % source)
self.source = source
chunk = []
enable = True
iftrigger = False
ifstack = []
for x in lines:
for i,tok in enumerate(x):
if tok.type not in self.t_WS: break
if tok.value == '#':
# Preprocessor directive
for tok in x:
if tok in self.t_WS and '\n' in tok.value:
chunk.append(tok)
dirtokens = self.tokenstrip(x[i+1:])
if dirtokens:
name = dirtokens[0].value
args = self.tokenstrip(dirtokens[1:])
else:
name = ""
args = []
if name == 'define':
if enable:
for tok in self.expand_macros(chunk):
yield tok
chunk = []
self.define(args)
elif name == 'include':
if enable:
for tok in self.expand_macros(chunk):
yield tok
chunk = []
oldfile = self.macros['__FILE__']
for tok in self.include(args):
yield tok
self.macros['__FILE__'] = oldfile
self.source = source
elif name == 'undef':
if enable:
for tok in self.expand_macros(chunk):
yield tok
chunk = []
self.undef(args)
elif name == 'ifdef':
ifstack.append((enable,iftrigger))
if enable:
if not args[0].value in self.macros:
enable = False
iftrigger = False
else:
iftrigger = True
elif name == 'ifndef':
ifstack.append((enable,iftrigger))
if enable:
if args[0].value in self.macros:
enable = False
iftrigger = False
else:
iftrigger = True
elif name == 'if':
ifstack.append((enable,iftrigger))
if enable:
result = self.evalexpr(args)
if not result:
enable = False
iftrigger = False
else:
iftrigger = True
elif name == 'elif':
if ifstack:
if ifstack[-1][0]: # We only pay attention if outer "if" allows this
if enable: # If already true, we flip enable False
enable = False
elif not iftrigger: # If False, but not triggered yet, we'll check expression
result = self.evalexpr(args)
if result:
enable = True
iftrigger = True
else:
self.error(self.source,dirtokens[0].lineno,"Misplaced #elif")
elif name == 'else':
if ifstack:
if ifstack[-1][0]:
if enable:
enable = False
elif not iftrigger:
enable = True
iftrigger = True
else:
self.error(self.source,dirtokens[0].lineno,"Misplaced #else")
elif name == 'endif':
if ifstack:
enable,iftrigger = ifstack.pop()
else:
self.error(self.source,dirtokens[0].lineno,"Misplaced #endif")
else:
# Unknown preprocessor directive
pass
else:
# Normal text
if enable:
chunk.extend(x)
for tok in self.expand_macros(chunk):
yield tok
chunk = []
# ----------------------------------------------------------------------
# include()
#
# Implementation of file-inclusion
# ----------------------------------------------------------------------
def include(self,tokens):
# Try to extract the filename and then process an include file
if not tokens:
return
if tokens:
if tokens[0].value != '<' and tokens[0].type != self.t_STRING:
tokens = self.expand_macros(tokens)
if tokens[0].value == '<':
# Include <...>
i = 1
while i < len(tokens):
if tokens[i].value == '>':
break
i += 1
else:
print "Malformed #include <...>"
return
filename = "".join([x.value for x in tokens[1:i]])
path = self.path + [""] + self.temp_path
elif tokens[0].type == self.t_STRING:
filename = tokens[0].value[1:-1]
path = self.temp_path + [""] + self.path
else:
print "Malformed #include statement"
return
for p in path:
iname = os.path.join(p,filename)
try:
data = open(iname,"r").read()
dname = os.path.dirname(iname)
if dname:
self.temp_path.insert(0,dname)
for tok in self.parsegen(data,filename):
yield tok
if dname:
del self.temp_path[0]
break
except IOError,e:
pass
else:
print "Couldn't find '%s'" % filename
# ----------------------------------------------------------------------
# define()
#
# Define a new macro
# ----------------------------------------------------------------------
def define(self,tokens):
if isinstance(tokens,(str,unicode)):
tokens = self.tokenize(tokens)
linetok = tokens
try:
name = linetok[0]
if len(linetok) > 1:
mtype = linetok[1]
else:
mtype = None
if not mtype:
m = Macro(name.value,[])
self.macros[name.value] = m
elif mtype.type in self.t_WS:
# A normal macro
m = Macro(name.value,self.tokenstrip(linetok[2:]))
self.macros[name.value] = m
elif mtype.value == '(':
# A macro with arguments
tokcount, args, positions = self.collect_args(linetok[1:])
variadic = False
for a in args:
if variadic:
print "No more arguments may follow a variadic argument"
break
astr = "".join([str(_i.value) for _i in a])
if astr == "...":
variadic = True
a[0].type = self.t_ID
a[0].value = '__VA_ARGS__'
variadic = True
del a[1:]
continue
elif astr[-3:] == "..." and a[0].type == self.t_ID:
variadic = True
del a[1:]
# If, for some reason, "." is part of the identifier, strip off the name for the purposes
# of macro expansion
if a[0].value[-3:] == '...':
a[0].value = a[0].value[:-3]
continue
if len(a) > 1 or a[0].type != self.t_ID:
print "Invalid macro argument"
break
else:
mvalue = self.tokenstrip(linetok[1+tokcount:])
i = 0
while i < len(mvalue):
if i+1 < len(mvalue):
if mvalue[i].type in self.t_WS and mvalue[i+1].value == '##':
del mvalue[i]
continue
elif mvalue[i].value == '##' and mvalue[i+1].type in self.t_WS:
del mvalue[i+1]
i += 1
m = Macro(name.value,mvalue,[x[0].value for x in args],variadic)
self.macro_prescan(m)
self.macros[name.value] = m
else:
print "Bad macro definition"
except LookupError:
print "Bad macro definition"
# ----------------------------------------------------------------------
# undef()
#
# Undefine a macro
# ----------------------------------------------------------------------
def undef(self,tokens):
id = tokens[0].value
try:
del self.macros[id]
except LookupError:
pass
# ----------------------------------------------------------------------
# parse()
#
# Parse input text.
# ----------------------------------------------------------------------
def parse(self,input,source=None,ignore={}):
self.ignore = ignore
self.parser = self.parsegen(input,source)
# ----------------------------------------------------------------------
# token()
#
# Method to return individual tokens
# ----------------------------------------------------------------------
def token(self):
try:
while True:
tok = self.parser.next()
if tok.type not in self.ignore: return tok
except StopIteration:
self.parser = None
return None
if __name__ == '__main__':
import ply.lex as lex
lexer = lex.lex()
# Run a preprocessor
import sys
f = open(sys.argv[1])
input = f.read()
p = Preprocessor(lexer)
p.parse(input,sys.argv[1])
while True:
tok = p.token()
if not tok: break
print p.source, tok

View File

@ -0,0 +1,133 @@
# ----------------------------------------------------------------------
# ctokens.py
#
# Token specifications for symbols in ANSI C and C++. This file is
# meant to be used as a library in other tokenizers.
# ----------------------------------------------------------------------
# Reserved words
tokens = [
# Literals (identifier, integer constant, float constant, string constant, char const)
'ID', 'TYPEID', 'ICONST', 'FCONST', 'SCONST', 'CCONST',
# Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=)
'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',
'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
'LOR', 'LAND', 'LNOT',
'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
# Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=)
'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL',
'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL',
# Increment/decrement (++,--)
'PLUSPLUS', 'MINUSMINUS',
# Structure dereference (->)
'ARROW',
# Ternary operator (?)
'TERNARY',
# Delimeters ( ) [ ] { } , . ; :
'LPAREN', 'RPAREN',
'LBRACKET', 'RBRACKET',
'LBRACE', 'RBRACE',
'COMMA', 'PERIOD', 'SEMI', 'COLON',
# Ellipsis (...)
'ELLIPSIS',
]
# Operators
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_MODULO = r'%'
t_OR = r'\|'
t_AND = r'&'
t_NOT = r'~'
t_XOR = r'\^'
t_LSHIFT = r'<<'
t_RSHIFT = r'>>'
t_LOR = r'\|\|'
t_LAND = r'&&'
t_LNOT = r'!'
t_LT = r'<'
t_GT = r'>'
t_LE = r'<='
t_GE = r'>='
t_EQ = r'=='
t_NE = r'!='
# Assignment operators
t_EQUALS = r'='
t_TIMESEQUAL = r'\*='
t_DIVEQUAL = r'/='
t_MODEQUAL = r'%='
t_PLUSEQUAL = r'\+='
t_MINUSEQUAL = r'-='
t_LSHIFTEQUAL = r'<<='
t_RSHIFTEQUAL = r'>>='
t_ANDEQUAL = r'&='
t_OREQUAL = r'\|='
t_XOREQUAL = r'^='
# Increment/decrement
t_INCREMENT = r'\+\+'
t_DECREMENT = r'--'
# ->
t_ARROW = r'->'
# ?
t_TERNARY = r'\?'
# Delimeters
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_LBRACKET = r'\['
t_RBRACKET = r'\]'
t_LBRACE = r'\{'
t_RBRACE = r'\}'
t_COMMA = r','
t_PERIOD = r'\.'
t_SEMI = r';'
t_COLON = r':'
t_ELLIPSIS = r'\.\.\.'
# Identifiers
t_ID = r'[A-Za-z_][A-Za-z0-9_]*'
# Integer literal
t_INTEGER = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?'
# Floating literal
t_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
# String literal
t_STRING = r'\"([^\\\n]|(\\.))*?\"'
# Character constant 'c' or L'c'
t_CHARACTER = r'(L)?\'([^\\\n]|(\\.))*?\''
# Comment (C-Style)
def t_COMMENT(t):
r'/\*(.|\n)*?\*/'
t.lexer.lineno += t.value.count('\n')
return t
# Comment (C++-Style)
def t_CPPCOMMENT(t):
r'//.*\n'
t.lexer.lineno += 1
return t

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,67 @@
#-----------------------------------------------------------------
# plyparser.py
#
# PLYParser class and other utilites for simplifying programming
# parsers with PLY
#
# Copyright (C) 2008-2009, Eli Bendersky
# License: LGPL
#-----------------------------------------------------------------
class Coord(object):
""" Coordinates of a syntactic element. Consists of:
- File name
- Line number
- (optional) column number, for the Lexer
"""
def __init__(self, file, line, column=None):
self.file = file
self.line = line
self.column = column
def __str__(self):
str = "%s:%s" % (self.file, self.line)
if self.column: str += ":%s" % self.column
return str
class ParseError(Exception): pass
class PLYParser(object):
def _create_opt_rule(self, rulename):
""" Given a rule name, creates an optional ply.yacc rule
for it. The name of the optional rule is
<rulename>_opt
"""
optname = rulename + '_opt'
def optrule(self, p):
p[0] = p[1]
optrule.__doc__ = '%s : empty\n| %s' % (optname, rulename)
optrule.__name__ = 'p_%s' % optname
setattr(self.__class__, optrule.__name__, optrule)
def _coord(self, lineno, column=None):
return Coord(
file=self.clex.filename,
line=lineno,
column=column)
def _parse_error(self, msg, coord):
raise ParseError("%s: %s" % (coord, msg))
if __name__ == '__main__':
pp = PLYParser()
pp._create_opt_rule('java')
ar = [4, 6]
pp.p_java_opt(ar)
print ar
print pp.p_java_opt.__doc__
print dir(pp)

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,25 @@
import urllib
import re
from util import hook
re_lineends = re.compile(r'[\r\n]*')
@hook.command
def py(inp):
".py <prog> -- executes python code <prog>"
if not inp:
return py.__doc__
res = urllib.urlopen("http://eval.appspot.com/eval?statement=%s" %
urllib.quote(inp.strip(), safe='')).readlines()
if len(res) == 0:
return
res[0] = re_lineends.split(res[0])[0]
if not res[0] == 'Traceback (most recent call last):':
return res[0]
else:
return res[-1]

View File

@ -0,0 +1,89 @@
"""
remember.py: written by Scaevolus 2009
"""
import os
import thread
import codecs
from util import hook
lock = thread.allocate_lock()
memory = {}
def load_memory(filename, mtimes={}):
if not os.path.exists(filename):
return {}
mtime = os.stat(filename).st_mtime
if mtimes.get(filename, 0) != mtime:
mtimes[filename] = mtime
return dict((x.split(None, 1)[0].lower(), x.strip()) for x in
codecs.open(filename, 'r', 'utf-8'))
def save_memory(filename, memory):
out = codecs.open(filename, 'w', 'utf-8')
out.write('\n'.join(sorted(memory.itervalues())))
out.flush()
out.close()
def make_filename(dir, chan):
return os.path.join(dir, 'memory')
@hook.command
def remember(bot, input):
".remember <word> <data> -- maps word to data in the memory"
with lock:
filename = make_filename(bot.persist_dir, input.chan)
memory.setdefault(filename, load_memory(filename))
try:
head, tail = input.inp.split(None, 1)
except ValueError:
return remember.__doc__
tail = tail.strip()
low = head.lower()
if low not in memory[filename]:
input.reply("done.")
else:
input.reply('forgetting that "%s", remembering this instead.' %
memory[filename][low])
memory[filename][low] = input.inp.strip()
save_memory(filename, memory[filename])
@hook.command
def forget(bot, input):
".forget <word> -- forgets the mapping that word had"
with lock:
filename = make_filename(bot.persist_dir, input.chan)
memory.setdefault(filename, load_memory(filename))
if not input.inp.strip():
return forget.__doc__
low = input.inp.strip().lower()
if low not in memory[filename]:
return "I don't know about that."
if not hasattr(input, 'chan'):
return "I won't forget anything in private."
input.say("Forgot that %s" % memory[filename][low])
del memory[filename][low]
save_memory(filename, memory[filename])
@hook.command(hook='\?(.+)', prefix=False)
def question(bot, input):
"?<word> -- shows what data is associated with word"
with lock:
filename = make_filename(bot.persist_dir, input.chan)
memory.setdefault(filename, load_memory(filename))
word = input.inp.split()[0].lower()
if word in memory[filename]:
input.say("%s" % memory[filename][word])

77
plugins_available/seen.py Normal file
View File

@ -0,0 +1,77 @@
" seen.py: written by sklnd in about two beers July 2009"
import os
import time
from datetime import datetime
import sqlite3
from util import hook, timesince
dbname = "skybot.db"
def adapt_datetime(ts):
return time.mktime(ts.timetuple())
sqlite3.register_adapter(datetime, adapt_datetime)
@hook.tee
def seeninput(bot, input):
if input.command != 'PRIVMSG':
return
dbpath = os.path.join(bot.persist_dir, dbname)
conn = dbconnect(dbpath)
cursor = conn.cursor()
cursor.execute("INSERT OR REPLACE INTO seen(name, date, quote, chan)"
"values(?,?,?,?)", (input.nick.lower(), datetime.now(),
input.msg, input.chan))
conn.commit()
conn.close()
@hook.command
def seen(bot, input):
".seen <nick> -- Tell when a nickname was last in active in irc"
if len(input.msg) < 6:
return seen.__doc__
query = input.inp.strip()
if query.lower() == input.nick.lower():
return "Have you looked in a mirror lately?"
dbpath = os.path.join(bot.persist_dir, dbname)
conn = dbconnect(dbpath)
cursor = conn.cursor()
command = "SELECT date, quote FROM seen WHERE name LIKE ? AND chan = ?" \
"ORDER BY date DESC"
cursor.execute(command, (query, input.chan))
results = cursor.fetchone()
conn.close()
if(results != None):
reltime = timesince.timesince(datetime.fromtimestamp(results[0]))
return '%s was last seen %s ago saying: %s' % \
(query, reltime, results[1])
else:
return "I've never seen %s" % query
def dbconnect(db):
"check to see that our db has the the seen table and return a connection."
conn = sqlite3.connect(db)
conn.execute("CREATE TABLE IF NOT EXISTS "
"seen(name varchar(30) not null, date datetime not null, "
"quote varchar(250) not null, chan varchar(32) not null, "
"primary key(name, chan));")
conn.commit()
return conn

View File

@ -0,0 +1,29 @@
import re
from util import hook
@hook.sieve
def sieve_suite(bot, input, func, args):
events = args.get('events', ['PRIVMSG'])
if input.command not in events and events != '*':
return None
if input.nick.lower()[-3:] == 'bot' and args.get('ignorebots', True):
return None
hook = args.get('hook', r'(.*)')
if args.get('prefix', True):
# add a prefix, unless it's a private message
hook = (r'^(?:[.!]|' if input.chan != input.nick else r'^(?:[.!]?|') \
+ input.conn.nick + r'[:,]*\s*)' + hook
input.re = re.match(hook, input.msg, flags=re.I)
if input.re is None:
return None
input.inp = ' '.join(input.re.groups())
return input

View File

@ -0,0 +1,36 @@
import random
import urllib
import urllib2
import re
import json
from util import hook
@hook.command
def suggest(inp):
".suggest [#n] <phrase> -- gets a random/the nth suggested google search"
if not inp.strip():
return suggest.__doc__
m = re.match('^#(\d+) (.+)$', inp)
if m:
num, inp = m.groups()
num = int(num)
if num > 10:
return 'can only get first ten suggestions'
else:
num = 0
url = 'http://google.com/complete/search?q=' + urllib.quote(inp, safe='')
page = urllib2.urlopen(url).read()
page_json = page.split('(', 1)[1][:-1]
suggestions = json.loads(page_json)[1]
if not suggestions:
return 'no suggestions found'
if num:
if len(suggestions) + 1 <= num:
return 'only got %d suggestions' % len(suggestions)
out = suggestions[num - 1]
else:
out = random.choice(suggestions)
return '#%d: %s (%s)' % (int(out[2][0]) + 1, out[0], out[1])

131
plugins_available/tell.py Normal file
View File

@ -0,0 +1,131 @@
" tell.py: written by sklnd in July 2009"
import os
import time
from datetime import datetime
import sqlite3
from util import hook, timesince
dbname = "skybot.db"
def adapt_datetime(ts):
return time.mktime(ts.timetuple())
sqlite3.register_adapter(datetime, adapt_datetime)
@hook.command(hook=r'(.*)', prefix=False, ignorebots=True)
def tellinput(bot, input):
dbpath = os.path.join(bot.persist_dir, dbname)
conn = dbconnect(dbpath)
cursor = conn.cursor()
command = "select count(name) from tell where name LIKE ? and chan = ?"
results = cursor.execute(command, (input.nick, input.chan)).fetchone()
if results[0] > 0:
command = "select id, user_from, quote, date from tell " \
"where name LIKE ? and chan = ? limit 1"
tell = cursor.execute(command, (input.nick, input.chan)).fetchall()[0]
more = results[0] - 1
reltime = timesince.timesince(datetime.fromtimestamp(tell[3]))
reply = "%(teller)s said %(reltime)s ago: %(quote)s" % \
{"teller": tell[1], "quote": tell[2], "reltime": reltime}
if more:
reply += " (+%(more)d more, to view say .showtells)" % {"more": more}
input.reply(reply)
command = "delete from tell where id = ?"
cursor.execute(command, (tell[0], ))
conn.commit()
conn.close()
@hook.command
def showtells(bot, input):
".showtells -- view all pending tell messages (sent in PM)."
dbpath = os.path.join(bot.persist_dir, dbname)
conn = dbconnect(dbpath)
cursor = conn.cursor()
command = "SELECT id, user_from, quote, date FROM tell " \
"WHERE name LIKE ? and chan = ?"
tells = cursor.execute(command, (input.nick, input.chan)).fetchall()
if(len(tells) > 0):
for tell in tells:
reltime = timesince.timesince(datetime.fromtimestamp(tell[3]))
input.msg(input.nick, '%(teller)s said %(time)s ago: %(quote)s' %
{'teller': tell[1], 'quote': tell[2], 'time': reltime})
command = "delete from tell where id = ?"
cursor.execute(command, (tell[0], ))
conn.commit()
else:
input.pm("You have no pending tells.")
conn.close()
@hook.command
def tell(bot, input):
".tell <nick> <message> -- relay <message> to <nick> when <nick> is around"
if len(input.msg) < 6:
return tell.__doc__
query = input.msg[6:].strip().partition(" ")
if query[0] == input.nick:
return "No."
if query[2] != "":
dbpath = os.path.join(bot.persist_dir, dbname)
conn = dbconnect(dbpath)
command = "select count(*) from tell_probation where name=? and chan=?"
if conn.execute(command, (input.nick, input.chan)).fetchone()[0] > 0:
return "No."
command = "select count(*) from tell where name=? and user_from=?"
if conn.execute(command, (query[0], input.nick)).fetchone()[0] >= 3:
return "You've told that person too many things."
cursor = conn.cursor()
command = "insert into tell(name, user_from, quote, chan, date) " \
"values(?,?,?,?,?)"
cursor.execute(command, (query[0], input.nick, query[2], input.chan,
datetime.now()))
conn.commit()
conn.close()
return "I'll pass that along."
else:
return tell.__doc__
def dbconnect(db):
"check to see that our db has the tell table and return a connection."
conn = sqlite3.connect(db)
conn.execute("CREATE TABLE IF NOT EXISTS tell(id integer primary key "
"autoincrement, name text not null, user_from text not null,"
"quote text not null, chan text not null, "
"date datetime not null);")
conn.execute("CREATE TABLE IF NOT EXISTS "
"tell_probation(name text, chan text,"
"primary key(name, chan));")
conn.commit()
return conn

View File

@ -0,0 +1,18 @@
import re
import urllib2
from util import hook
tinyurl_re = re.compile(r'http://(?:www\.)?tinyurl.com/([A-Za-z0-9\-]+)',
flags=re.IGNORECASE)
@hook.command(hook=r'(.*)', prefix=False)
def tinyurl(inp):
tumatch = tinyurl_re.search(inp)
if tumatch:
try:
return urllib2.urlopen(tumatch.group()).url.strip()
except urllib2.URLError:
pass

View File

@ -0,0 +1,137 @@
"""
twitter.py: written by Scaevolus 2009
retrieves most recent tweets
"""
import re
import random
import urllib2
from lxml import etree
from time import strptime, strftime
from util import hook
def unescape_xml(string):
# unescape the 5 chars that might be escaped in xml
# gratuitously functional
# return reduce(lambda x, y: x.replace(*y), (string,
# zip('&gt; &lt; &apos; &quote; &amp'.split(), '> < \' " &'.split()))
# boring, normal
return string.replace('&gt;', '>').replace('&lt;', '<').replace('&apos;',
"'").replace('&quote;', '"').replace('&amp;', '&')
history = []
history_max_size = 250
@hook.command
def twitter(inp):
".twitter <user>/<user> <n>/<id>/#<hashtag>/@<user> -- gets last/<n>th tweet from"\
"<user>/gets tweet <id>/gets random tweet with #<hashtag>/gets replied tweet from @<user>"
inp = inp.strip()
if not inp:
return twitter.__doc__
def add_reply(reply_name, reply_id):
if len(history) == history_max_size:
history.pop()
history.insert(0, (reply_name, reply_id))
def find_reply(reply_name):
for name, id in history:
if name == reply_name:
return id
if inp[0] == '@':
reply_id = find_reply(inp[1:])
if reply_id == None:
return 'error: no replies to %s found' % inp
inp = reply_id
url = 'http://twitter.com'
getting_nth = False
getting_id = False
searching_hashtag = False
time = 'status/created_at'
text = 'status/text'
reply_name = 'status/in_reply_to_screen_name'
reply_id = 'status/in_reply_to_status_id'
if re.match(r'^\d+$', inp):
getting_id = True
url += '/statuses/show/%s.xml' % inp
screen_name = 'user/screen_name'
time = 'created_at'
text = 'text'
reply_name = 'in_reply_to_screen_name'
reply_id = 'in_reply_to_status_id'
elif re.match(r'^\w{1,15}$', inp):
url += '/users/show/%s.xml' % inp
screen_name = 'screen_name'
elif re.match(r'^\w{1,15}\s+\d+$', inp):
getting_nth = True
name, num = inp.split()
if int(num) > 3200:
return 'error: only supports up to the 3200th tweet'
url += '/statuses/user_timeline/%s.xml?count=1&page=%s' % (name, num)
screen_name = 'status/user/screen_name'
elif re.match(r'^#\w+$', inp):
url = 'http://search.twitter.com/search.atom?q=%23' + inp[1:]
searching_hashtag = True
else:
return 'error: invalid request'
try:
xml = urllib2.urlopen(url).read()
except urllib2.HTTPError, e:
errors = {400 : 'bad request (ratelimited?)',
401: 'tweet is private',
404: 'invalid user/id',
500: 'twitter is broken',
502: 'twitter is down ("getting upgraded")',
503: 'twitter is overloaded (lol, RoR)'}
if e.code == 404:
return 'error: invalid ' + ['username', 'tweet id'][getting_id]
if e.code in errors:
return 'error: ' + errors[e.code]
return 'error: unknown'
except urllib2.URLerror, e:
return 'error: timeout'
tweet = etree.fromstring(xml)
if searching_hashtag:
ns = '{http://www.w3.org/2005/Atom}'
tweets = tweet.findall(ns + 'entry/' + ns + 'id')
if not tweets:
return 'error: hashtag not found'
id = random.choice(tweets).text
id = id[id.rfind(':') + 1:]
print id
return twitter(id)
if getting_nth:
if tweet.find('status') is None:
return 'error: user does not have that many tweets'
time = tweet.find(time)
if time is None:
return 'error: user has no tweets'
reply_name = tweet.find(reply_name).text
reply_id = tweet.find(reply_id).text
if reply_name is not None and reply_id is not None:
add_reply(reply_name, reply_id)
time = strftime('%Y-%m-%d %H:%M:%S',
strptime(time.text,
'%a %b %d %H:%M:%S +0000 %Y'))
text = unescape_xml(tweet.find(text).text.replace('\n', ''))
screen_name = tweet.find(screen_name).text
return "%s %s: %s" % (time, screen_name, text)

View File

@ -0,0 +1,29 @@
from lxml import html
import urllib
from util import hook
@hook.command('u')
@hook.command
def urban(inp):
'''.u/.urban <phrase> -- looks up <phrase> on urbandictionary.com'''
if not inp.strip():
return urban.__doc__
url = 'http://www.urbandictionary.com/define.php?term=' + \
urllib.quote(inp.strip(), safe='')
page = html.parse(url)
words = page.xpath("//td[@class='word']")
defs = page.xpath("//div[@class='definition']")
if not defs:
return 'no definitions found'
out = words[0].text_content().strip() + ': ' + ' '.join(
defs[0].text_content().split())
if len(out) > 400:
out = out[:out.rfind(' ', 0, 400)] + '...'
return out

View File

View File

@ -0,0 +1,94 @@
import Queue
import thread
import traceback
def _isfunc(x):
if type(x) == type(_isfunc):
return True
return False
def _hook_add(func, add):
if not hasattr(func, '_skybot_hook'):
func._skybot_hook = []
func._skybot_hook.append(add)
def _make_sig(f):
return f.func_code.co_filename, f.func_name, f.func_code.co_firstlineno
def sieve(func):
if func.func_code.co_argcount != 4:
raise ValueError(
'sieves must take 4 arguments: (bot, input, func, args)')
_hook_add(func, ['sieve', (_make_sig(func), func)])
return func
def command(func=None, hook=None, **kwargs):
args = {}
def command_wrapper(func):
if func.func_code.co_argcount not in (1, 2):
raise ValueError(
'commands must take 1 or 2 arguments: (inp) or (bot, input)')
args.setdefault('name', func.func_name)
args.setdefault('hook', args['name'] + r'(?:\s+|$)(.*)')
_hook_add(func, ['command', (_make_sig(func), func, args)])
return func
if hook is not None or kwargs or not _isfunc(func):
if func is not None:
args['name'] = func
if hook is not None:
args['hook'] = hook
args.update(kwargs)
return command_wrapper
else:
return command_wrapper(func)
def event(arg=None, **kwargs):
args = kwargs
def event_wrapper(func):
if func.func_code.co_argcount != 2:
raise ValueError('events must take 2 arguments: (bot, input)')
args['name'] = func.func_name
args['prefix'] = False
args.setdefault('events', '*')
_hook_add(func, ['event', (_make_sig(func), func, args)])
return func
if _isfunc(arg):
return event_wrapper(arg, kwargs)
else:
if arg is not None:
args['events'] = arg.split()
return event_wrapper
def tee(func, **kwargs):
"passes _all_ input lines to function, in order (skips sieves)"
if func.func_code.co_argcount != 2:
raise ValueError('tees must take 2 arguments: (bot, input)')
_hook_add(func, ['tee', (_make_sig(func), func, kwargs)])
func._iqueue = Queue.Queue()
def trampoline(func):
input = None
while True:
input = func._iqueue.get()
if input == StopIteration:
return
try:
func(*input)
except Exception:
traceback.print_exc(Exception)
thread.start_new_thread(trampoline, (func,))
return func

View File

@ -0,0 +1,96 @@
# Copyright (c) Django Software Foundation and individual contributors.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
# are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of Django nor the names of its contributors may be used
# to endorse or promote products derived from this software without
# specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import datetime
import time
def timesince(d, now=None):
"""
Takes two datetime objects and returns the time between d and now
as a nicely formatted string, e.g. "10 minutes". If d occurs after now,
then "0 minutes" is returned.
Units used are years, months, weeks, days, hours, and minutes.
Seconds and microseconds are ignored. Up to two adjacent units will be
displayed. For example, "2 weeks, 3 days" and "1 year, 3 months" are
possible outputs, but "2 weeks, 3 hours" and "1 year, 5 days" are not.
Adapted from http://blog.natbat.co.uk/archive/2003/Jun/14/time_since
"""
chunks = (
(60 * 60 * 24 * 365, ('year', 'years')),
(60 * 60 * 24 * 30, ('month', 'months')),
(60 * 60 * 24 * 7, ('week', 'weeks')),
(60 * 60 * 24, ('day', 'days')),
(60 * 60, ('hour', 'hours')),
(60, ('minute', 'minutes'))
)
# Convert datetime.date to datetime.datetime for comparison.
if not isinstance(d, datetime.datetime):
d = datetime.datetime(d.year, d.month, d.day)
if now and not isinstance(now, datetime.datetime):
now = datetime.datetime(now.year, now.month, now.day)
if not now:
now = datetime.datetime.now()
# ignore microsecond part of 'd' since we removed it from 'now'
delta = now - (d - datetime.timedelta(0, 0, d.microsecond))
since = delta.days * 24 * 60 * 60 + delta.seconds
if since <= 0:
# d is in the future compared to now, stop processing.
return u'0 ' + 'minutes'
for i, (seconds, name) in enumerate(chunks):
count = since // seconds
if count != 0:
break
if count == 1:
s = '%(number)d %(type)s' % {'number': count, 'type': name[0]}
else:
s = '%(number)d %(type)s' % {'number': count, 'type': name[1]}
if i + 1 < len(chunks):
# Now get the second item
seconds2, name2 = chunks[i + 1]
count2 = (since - (seconds * count)) // seconds2
if count2 != 0:
if count2 == 1:
s += ', %(number)d %(type)s' % {'number': count2, 'type': name2[0]}
else:
s += ', %(number)d %(type)s' % {'number': count2, 'type': name2[1]}
return s
def timeuntil(d, now=None):
"""
Like timesince, but returns a string measuring the time until
the given time.
"""
if not now:
now = datetime.datetime.now()
return timesince(now, d)

View File

@ -0,0 +1,19 @@
Copyright (c) 2006 Kirill Simonov
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -0,0 +1,36 @@
Metadata-Version: 1.0
Name: PyYAML
Version: 3.09
Summary: YAML parser and emitter for Python
Home-page: http://pyyaml.org/wiki/PyYAML
Author: Kirill Simonov
Author-email: xi@resolvent.net
License: MIT
Download-URL: http://pyyaml.org/download/pyyaml/PyYAML-3.09.tar.gz
Description: YAML is a data serialization format designed for human readability
and interaction with scripting languages. PyYAML is a YAML parser
and emitter for Python.
PyYAML features a complete YAML 1.1 parser, Unicode support, pickle
support, capable extension API, and sensible error messages. PyYAML
supports standard YAML tags and provides Python-specific tags that
allow to represent an arbitrary Python object.
PyYAML is applicable for a broad range of tasks from complex
configuration files to object serialization and persistance.
Platform: Any
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: MIT License
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 2
Classifier: Programming Language :: Python :: 2.3
Classifier: Programming Language :: Python :: 2.4
Classifier: Programming Language :: Python :: 2.5
Classifier: Programming Language :: Python :: 2.6
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.0
Classifier: Programming Language :: Python :: 3.1
Classifier: Topic :: Software Development :: Libraries :: Python Modules
Classifier: Topic :: Text Processing :: Markup

View File

@ -0,0 +1,35 @@
PyYAML - The next generation YAML parser and emitter for Python.
To install, type 'python setup.py install'.
By default, the setup.py script checks whether LibYAML is installed
and if so, builds and installs LibYAML bindings. To skip the check
and force installation of LibYAML bindings, use the option '--with-libyaml':
'python setup.py --with-libyaml install'. To disable the check and
skip building and installing LibYAML bindings, use '--without-libyaml':
'python setup.py --without-libyaml install'.
When LibYAML bindings are installed, you may use fast LibYAML-based
parser and emitter as follows:
>>> yaml.load(stream, Loader=yaml.CLoader)
>>> yaml.dump(data, Dumper=yaml.CDumper)
PyYAML includes a comprehensive test suite. To run the tests,
type 'python setup.py test'.
For more information, check the PyYAML homepage:
'http://pyyaml.org/wiki/PyYAML'.
For PyYAML tutorial and reference, see:
'http://pyyaml.org/wiki/PyYAMLDocumentation'.
Post your questions and opinions to the YAML-Core mailing list:
'http://lists.sourceforge.net/lists/listinfo/yaml-core'.
Submit bug reports and feature requests to the PyYAML bug tracker:
'http://pyyaml.org/newticket?component=pyyaml'.
PyYAML is written by Kirill Simonov <xi@resolvent.net>. It is released
under the MIT license. See the file LICENSE for more details.

View File

@ -0,0 +1,288 @@
from error import *
from tokens import *
from events import *
from nodes import *
from loader import *
from dumper import *
__version__ = '3.09'
try:
from cyaml import *
__with_libyaml__ = True
except ImportError:
__with_libyaml__ = False
def scan(stream, Loader=Loader):
"""
Scan a YAML stream and produce scanning tokens.
"""
loader = Loader(stream)
while loader.check_token():
yield loader.get_token()
def parse(stream, Loader=Loader):
"""
Parse a YAML stream and produce parsing events.
"""
loader = Loader(stream)
while loader.check_event():
yield loader.get_event()
def compose(stream, Loader=Loader):
"""
Parse the first YAML document in a stream
and produce the corresponding representation tree.
"""
loader = Loader(stream)
return loader.get_single_node()
def compose_all(stream, Loader=Loader):
"""
Parse all YAML documents in a stream
and produce corresponding representation trees.
"""
loader = Loader(stream)
while loader.check_node():
yield loader.get_node()
def load(stream, Loader=Loader):
"""
Parse the first YAML document in a stream
and produce the corresponding Python object.
"""
loader = Loader(stream)
return loader.get_single_data()
def load_all(stream, Loader=Loader):
"""
Parse all YAML documents in a stream
and produce corresponding Python objects.
"""
loader = Loader(stream)
while loader.check_data():
yield loader.get_data()
def safe_load(stream):
"""
Parse the first YAML document in a stream
and produce the corresponding Python object.
Resolve only basic YAML tags.
"""
return load(stream, SafeLoader)
def safe_load_all(stream):
"""
Parse all YAML documents in a stream
and produce corresponding Python objects.
Resolve only basic YAML tags.
"""
return load_all(stream, SafeLoader)
def emit(events, stream=None, Dumper=Dumper,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None):
"""
Emit YAML parsing events into a stream.
If stream is None, return the produced string instead.
"""
getvalue = None
if stream is None:
from StringIO import StringIO
stream = StringIO()
getvalue = stream.getvalue
dumper = Dumper(stream, canonical=canonical, indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break)
for event in events:
dumper.emit(event)
if getvalue:
return getvalue()
def serialize_all(nodes, stream=None, Dumper=Dumper,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding='utf-8', explicit_start=None, explicit_end=None,
version=None, tags=None):
"""
Serialize a sequence of representation trees into a YAML stream.
If stream is None, return the produced string instead.
"""
getvalue = None
if stream is None:
if encoding is None:
from StringIO import StringIO
else:
from cStringIO import StringIO
stream = StringIO()
getvalue = stream.getvalue
dumper = Dumper(stream, canonical=canonical, indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break,
encoding=encoding, version=version, tags=tags,
explicit_start=explicit_start, explicit_end=explicit_end)
dumper.open()
for node in nodes:
dumper.serialize(node)
dumper.close()
if getvalue:
return getvalue()
def serialize(node, stream=None, Dumper=Dumper, **kwds):
"""
Serialize a representation tree into a YAML stream.
If stream is None, return the produced string instead.
"""
return serialize_all([node], stream, Dumper=Dumper, **kwds)
def dump_all(documents, stream=None, Dumper=Dumper,
default_style=None, default_flow_style=None,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding='utf-8', explicit_start=None, explicit_end=None,
version=None, tags=None):
"""
Serialize a sequence of Python objects into a YAML stream.
If stream is None, return the produced string instead.
"""
getvalue = None
if stream is None:
if encoding is None:
from StringIO import StringIO
else:
from cStringIO import StringIO
stream = StringIO()
getvalue = stream.getvalue
dumper = Dumper(stream, default_style=default_style,
default_flow_style=default_flow_style,
canonical=canonical, indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break,
encoding=encoding, version=version, tags=tags,
explicit_start=explicit_start, explicit_end=explicit_end)
dumper.open()
for data in documents:
dumper.represent(data)
dumper.close()
if getvalue:
return getvalue()
def dump(data, stream=None, Dumper=Dumper, **kwds):
"""
Serialize a Python object into a YAML stream.
If stream is None, return the produced string instead.
"""
return dump_all([data], stream, Dumper=Dumper, **kwds)
def safe_dump_all(documents, stream=None, **kwds):
"""
Serialize a sequence of Python objects into a YAML stream.
Produce only basic YAML tags.
If stream is None, return the produced string instead.
"""
return dump_all(documents, stream, Dumper=SafeDumper, **kwds)
def safe_dump(data, stream=None, **kwds):
"""
Serialize a Python object into a YAML stream.
Produce only basic YAML tags.
If stream is None, return the produced string instead.
"""
return dump_all([data], stream, Dumper=SafeDumper, **kwds)
def add_implicit_resolver(tag, regexp, first=None,
Loader=Loader, Dumper=Dumper):
"""
Add an implicit scalar detector.
If an implicit scalar value matches the given regexp,
the corresponding tag is assigned to the scalar.
first is a sequence of possible initial characters or None.
"""
Loader.add_implicit_resolver(tag, regexp, first)
Dumper.add_implicit_resolver(tag, regexp, first)
def add_path_resolver(tag, path, kind=None, Loader=Loader, Dumper=Dumper):
"""
Add a path based resolver for the given tag.
A path is a list of keys that forms a path
to a node in the representation tree.
Keys can be string values, integers, or None.
"""
Loader.add_path_resolver(tag, path, kind)
Dumper.add_path_resolver(tag, path, kind)
def add_constructor(tag, constructor, Loader=Loader):
"""
Add a constructor for the given tag.
Constructor is a function that accepts a Loader instance
and a node object and produces the corresponding Python object.
"""
Loader.add_constructor(tag, constructor)
def add_multi_constructor(tag_prefix, multi_constructor, Loader=Loader):
"""
Add a multi-constructor for the given tag prefix.
Multi-constructor is called for a node if its tag starts with tag_prefix.
Multi-constructor accepts a Loader instance, a tag suffix,
and a node object and produces the corresponding Python object.
"""
Loader.add_multi_constructor(tag_prefix, multi_constructor)
def add_representer(data_type, representer, Dumper=Dumper):
"""
Add a representer for the given type.
Representer is a function accepting a Dumper instance
and an instance of the given data type
and producing the corresponding representation node.
"""
Dumper.add_representer(data_type, representer)
def add_multi_representer(data_type, multi_representer, Dumper=Dumper):
"""
Add a representer for the given type.
Multi-representer is a function accepting a Dumper instance
and an instance of the given data type or subtype
and producing the corresponding representation node.
"""
Dumper.add_multi_representer(data_type, multi_representer)
class YAMLObjectMetaclass(type):
"""
The metaclass for YAMLObject.
"""
def __init__(cls, name, bases, kwds):
super(YAMLObjectMetaclass, cls).__init__(name, bases, kwds)
if 'yaml_tag' in kwds and kwds['yaml_tag'] is not None:
cls.yaml_loader.add_constructor(cls.yaml_tag, cls.from_yaml)
cls.yaml_dumper.add_representer(cls, cls.to_yaml)
class YAMLObject(object):
"""
An object that can dump itself to a YAML stream
and load itself from a YAML stream.
"""
__metaclass__ = YAMLObjectMetaclass
__slots__ = () # no direct instantiation, so allow immutable subclasses
yaml_loader = Loader
yaml_dumper = Dumper
yaml_tag = None
yaml_flow_style = None
def from_yaml(cls, loader, node):
"""
Convert a representation node to a Python object.
"""
return loader.construct_yaml_object(node, cls)
from_yaml = classmethod(from_yaml)
def to_yaml(cls, dumper, data):
"""
Convert a Python object to a representation node.
"""
return dumper.represent_yaml_object(cls.yaml_tag, data, cls,
flow_style=cls.yaml_flow_style)
to_yaml = classmethod(to_yaml)

View File

@ -0,0 +1,139 @@
__all__ = ['Composer', 'ComposerError']
from error import MarkedYAMLError
from events import *
from nodes import *
class ComposerError(MarkedYAMLError):
pass
class Composer(object):
def __init__(self):
self.anchors = {}
def check_node(self):
# Drop the STREAM-START event.
if self.check_event(StreamStartEvent):
self.get_event()
# If there are more documents available?
return not self.check_event(StreamEndEvent)
def get_node(self):
# Get the root node of the next document.
if not self.check_event(StreamEndEvent):
return self.compose_document()
def get_single_node(self):
# Drop the STREAM-START event.
self.get_event()
# Compose a document if the stream is not empty.
document = None
if not self.check_event(StreamEndEvent):
document = self.compose_document()
# Ensure that the stream contains no more documents.
if not self.check_event(StreamEndEvent):
event = self.get_event()
raise ComposerError("expected a single document in the stream",
document.start_mark, "but found another document",
event.start_mark)
# Drop the STREAM-END event.
self.get_event()
return document
def compose_document(self):
# Drop the DOCUMENT-START event.
self.get_event()
# Compose the root node.
node = self.compose_node(None, None)
# Drop the DOCUMENT-END event.
self.get_event()
self.anchors = {}
return node
def compose_node(self, parent, index):
if self.check_event(AliasEvent):
event = self.get_event()
anchor = event.anchor
if anchor not in self.anchors:
raise ComposerError(None, None, "found undefined alias %r"
% anchor.encode('utf-8'), event.start_mark)
return self.anchors[anchor]
event = self.peek_event()
anchor = event.anchor
if anchor is not None:
if anchor in self.anchors:
raise ComposerError("found duplicate anchor %r; first occurence"
% anchor.encode('utf-8'), self.anchors[anchor].start_mark,
"second occurence", event.start_mark)
self.descend_resolver(parent, index)
if self.check_event(ScalarEvent):
node = self.compose_scalar_node(anchor)
elif self.check_event(SequenceStartEvent):
node = self.compose_sequence_node(anchor)
elif self.check_event(MappingStartEvent):
node = self.compose_mapping_node(anchor)
self.ascend_resolver()
return node
def compose_scalar_node(self, anchor):
event = self.get_event()
tag = event.tag
if tag is None or tag == u'!':
tag = self.resolve(ScalarNode, event.value, event.implicit)
node = ScalarNode(tag, event.value,
event.start_mark, event.end_mark, style=event.style)
if anchor is not None:
self.anchors[anchor] = node
return node
def compose_sequence_node(self, anchor):
start_event = self.get_event()
tag = start_event.tag
if tag is None or tag == u'!':
tag = self.resolve(SequenceNode, None, start_event.implicit)
node = SequenceNode(tag, [],
start_event.start_mark, None,
flow_style=start_event.flow_style)
if anchor is not None:
self.anchors[anchor] = node
index = 0
while not self.check_event(SequenceEndEvent):
node.value.append(self.compose_node(node, index))
index += 1
end_event = self.get_event()
node.end_mark = end_event.end_mark
return node
def compose_mapping_node(self, anchor):
start_event = self.get_event()
tag = start_event.tag
if tag is None or tag == u'!':
tag = self.resolve(MappingNode, None, start_event.implicit)
node = MappingNode(tag, [],
start_event.start_mark, None,
flow_style=start_event.flow_style)
if anchor is not None:
self.anchors[anchor] = node
while not self.check_event(MappingEndEvent):
#key_event = self.peek_event()
item_key = self.compose_node(node, None)
#if item_key in node.value:
# raise ComposerError("while composing a mapping", start_event.start_mark,
# "found duplicate key", key_event.start_mark)
item_value = self.compose_node(node, item_key)
#node.value[item_key] = item_value
node.value.append((item_key, item_value))
end_event = self.get_event()
node.end_mark = end_event.end_mark
return node

View File

@ -0,0 +1,684 @@
__all__ = ['BaseConstructor', 'SafeConstructor', 'Constructor',
'ConstructorError']
from error import *
from nodes import *
import datetime
try:
set
except NameError:
from sets import Set as set
import binascii, re, sys, types
class ConstructorError(MarkedYAMLError):
pass
class BaseConstructor(object):
yaml_constructors = {}
yaml_multi_constructors = {}
def __init__(self):
self.constructed_objects = {}
self.recursive_objects = {}
self.state_generators = []
self.deep_construct = False
def check_data(self):
# If there are more documents available?
return self.check_node()
def get_data(self):
# Construct and return the next document.
if self.check_node():
return self.construct_document(self.get_node())
def get_single_data(self):
# Ensure that the stream contains a single document and construct it.
node = self.get_single_node()
if node is not None:
return self.construct_document(node)
return None
def construct_document(self, node):
data = self.construct_object(node)
while self.state_generators:
state_generators = self.state_generators
self.state_generators = []
for generator in state_generators:
for dummy in generator:
pass
self.constructed_objects = {}
self.recursive_objects = {}
self.deep_construct = False
return data
def construct_object(self, node, deep=False):
if deep:
old_deep = self.deep_construct
self.deep_construct = True
if node in self.constructed_objects:
return self.constructed_objects[node]
if node in self.recursive_objects:
raise ConstructorError(None, None,
"found unconstructable recursive node", node.start_mark)
self.recursive_objects[node] = None
constructor = None
tag_suffix = None
if node.tag in self.yaml_constructors:
constructor = self.yaml_constructors[node.tag]
else:
for tag_prefix in self.yaml_multi_constructors:
if node.tag.startswith(tag_prefix):
tag_suffix = node.tag[len(tag_prefix):]
constructor = self.yaml_multi_constructors[tag_prefix]
break
else:
if None in self.yaml_multi_constructors:
tag_suffix = node.tag
constructor = self.yaml_multi_constructors[None]
elif None in self.yaml_constructors:
constructor = self.yaml_constructors[None]
elif isinstance(node, ScalarNode):
constructor = self.__class__.construct_scalar
elif isinstance(node, SequenceNode):
constructor = self.__class__.construct_sequence
elif isinstance(node, MappingNode):
constructor = self.__class__.construct_mapping
if tag_suffix is None:
data = constructor(self, node)
else:
data = constructor(self, tag_suffix, node)
if isinstance(data, types.GeneratorType):
generator = data
data = generator.next()
if self.deep_construct:
for dummy in generator:
pass
else:
self.state_generators.append(generator)
self.constructed_objects[node] = data
del self.recursive_objects[node]
if deep:
self.deep_construct = old_deep
return data
def construct_scalar(self, node):
if not isinstance(node, ScalarNode):
raise ConstructorError(None, None,
"expected a scalar node, but found %s" % node.id,
node.start_mark)
return node.value
def construct_sequence(self, node, deep=False):
if not isinstance(node, SequenceNode):
raise ConstructorError(None, None,
"expected a sequence node, but found %s" % node.id,
node.start_mark)
return [self.construct_object(child, deep=deep)
for child in node.value]
def construct_mapping(self, node, deep=False):
if not isinstance(node, MappingNode):
raise ConstructorError(None, None,
"expected a mapping node, but found %s" % node.id,
node.start_mark)
mapping = {}
for key_node, value_node in node.value:
key = self.construct_object(key_node, deep=deep)
try:
hash(key)
except TypeError, exc:
raise ConstructorError("while constructing a mapping", node.start_mark,
"found unacceptable key (%s)" % exc, key_node.start_mark)
value = self.construct_object(value_node, deep=deep)
mapping[key] = value
return mapping
def construct_pairs(self, node, deep=False):
if not isinstance(node, MappingNode):
raise ConstructorError(None, None,
"expected a mapping node, but found %s" % node.id,
node.start_mark)
pairs = []
for key_node, value_node in node.value:
key = self.construct_object(key_node, deep=deep)
value = self.construct_object(value_node, deep=deep)
pairs.append((key, value))
return pairs
def add_constructor(cls, tag, constructor):
if not 'yaml_constructors' in cls.__dict__:
cls.yaml_constructors = cls.yaml_constructors.copy()
cls.yaml_constructors[tag] = constructor
add_constructor = classmethod(add_constructor)
def add_multi_constructor(cls, tag_prefix, multi_constructor):
if not 'yaml_multi_constructors' in cls.__dict__:
cls.yaml_multi_constructors = cls.yaml_multi_constructors.copy()
cls.yaml_multi_constructors[tag_prefix] = multi_constructor
add_multi_constructor = classmethod(add_multi_constructor)
class SafeConstructor(BaseConstructor):
def construct_scalar(self, node):
if isinstance(node, MappingNode):
for key_node, value_node in node.value:
if key_node.tag == u'tag:yaml.org,2002:value':
return self.construct_scalar(value_node)
return BaseConstructor.construct_scalar(self, node)
def flatten_mapping(self, node):
merge = []
index = 0
while index < len(node.value):
key_node, value_node = node.value[index]
if key_node.tag == u'tag:yaml.org,2002:merge':
del node.value[index]
if isinstance(value_node, MappingNode):
self.flatten_mapping(value_node)
merge.extend(value_node.value)
elif isinstance(value_node, SequenceNode):
submerge = []
for subnode in value_node.value:
if not isinstance(subnode, MappingNode):
raise ConstructorError("while constructing a mapping",
node.start_mark,
"expected a mapping for merging, but found %s"
% subnode.id, subnode.start_mark)
self.flatten_mapping(subnode)
submerge.append(subnode.value)
submerge.reverse()
for value in submerge:
merge.extend(value)
else:
raise ConstructorError("while constructing a mapping", node.start_mark,
"expected a mapping or list of mappings for merging, but found %s"
% value_node.id, value_node.start_mark)
elif key_node.tag == u'tag:yaml.org,2002:value':
key_node.tag = u'tag:yaml.org,2002:str'
index += 1
else:
index += 1
if merge:
node.value = merge + node.value
def construct_mapping(self, node, deep=False):
if isinstance(node, MappingNode):
self.flatten_mapping(node)
return BaseConstructor.construct_mapping(self, node, deep=deep)
def construct_yaml_null(self, node):
self.construct_scalar(node)
return None
bool_values = {
u'yes': True,
u'no': False,
u'true': True,
u'false': False,
u'on': True,
u'off': False,
}
def construct_yaml_bool(self, node):
value = self.construct_scalar(node)
return self.bool_values[value.lower()]
def construct_yaml_int(self, node):
value = str(self.construct_scalar(node))
value = value.replace('_', '')
sign = +1
if value[0] == '-':
sign = -1
if value[0] in '+-':
value = value[1:]
if value == '0':
return 0
elif value.startswith('0b'):
return sign*int(value[2:], 2)
elif value.startswith('0x'):
return sign*int(value[2:], 16)
elif value[0] == '0':
return sign*int(value, 8)
elif ':' in value:
digits = [int(part) for part in value.split(':')]
digits.reverse()
base = 1
value = 0
for digit in digits:
value += digit*base
base *= 60
return sign*value
else:
return sign*int(value)
inf_value = 1e300
while inf_value != inf_value*inf_value:
inf_value *= inf_value
nan_value = -inf_value/inf_value # Trying to make a quiet NaN (like C99).
def construct_yaml_float(self, node):
value = str(self.construct_scalar(node))
value = value.replace('_', '').lower()
sign = +1
if value[0] == '-':
sign = -1
if value[0] in '+-':
value = value[1:]
if value == '.inf':
return sign*self.inf_value
elif value == '.nan':
return self.nan_value
elif ':' in value:
digits = [float(part) for part in value.split(':')]
digits.reverse()
base = 1
value = 0.0
for digit in digits:
value += digit*base
base *= 60
return sign*value
else:
return sign*float(value)
def construct_yaml_binary(self, node):
value = self.construct_scalar(node)
try:
return str(value).decode('base64')
except (binascii.Error, UnicodeEncodeError), exc:
raise ConstructorError(None, None,
"failed to decode base64 data: %s" % exc, node.start_mark)
timestamp_regexp = re.compile(
ur'''^(?P<year>[0-9][0-9][0-9][0-9])
-(?P<month>[0-9][0-9]?)
-(?P<day>[0-9][0-9]?)
(?:(?:[Tt]|[ \t]+)
(?P<hour>[0-9][0-9]?)
:(?P<minute>[0-9][0-9])
:(?P<second>[0-9][0-9])
(?:\.(?P<fraction>[0-9]*))?
(?:[ \t]*(?P<tz>Z|(?P<tz_sign>[-+])(?P<tz_hour>[0-9][0-9]?)
(?::(?P<tz_minute>[0-9][0-9]))?))?)?$''', re.X)
def construct_yaml_timestamp(self, node):
value = self.construct_scalar(node)
match = self.timestamp_regexp.match(node.value)
values = match.groupdict()
year = int(values['year'])
month = int(values['month'])
day = int(values['day'])
if not values['hour']:
return datetime.date(year, month, day)
hour = int(values['hour'])
minute = int(values['minute'])
second = int(values['second'])
fraction = 0
if values['fraction']:
fraction = values['fraction'][:6]
while len(fraction) < 6:
fraction += '0'
fraction = int(fraction)
delta = None
if values['tz_sign']:
tz_hour = int(values['tz_hour'])
tz_minute = int(values['tz_minute'] or 0)
delta = datetime.timedelta(hours=tz_hour, minutes=tz_minute)
if values['tz_sign'] == '-':
delta = -delta
data = datetime.datetime(year, month, day, hour, minute, second, fraction)
if delta:
data -= delta
return data
def construct_yaml_omap(self, node):
# Note: we do not check for duplicate keys, because it's too
# CPU-expensive.
omap = []
yield omap
if not isinstance(node, SequenceNode):
raise ConstructorError("while constructing an ordered map", node.start_mark,
"expected a sequence, but found %s" % node.id, node.start_mark)
for subnode in node.value:
if not isinstance(subnode, MappingNode):
raise ConstructorError("while constructing an ordered map", node.start_mark,
"expected a mapping of length 1, but found %s" % subnode.id,
subnode.start_mark)
if len(subnode.value) != 1:
raise ConstructorError("while constructing an ordered map", node.start_mark,
"expected a single mapping item, but found %d items" % len(subnode.value),
subnode.start_mark)
key_node, value_node = subnode.value[0]
key = self.construct_object(key_node)
value = self.construct_object(value_node)
omap.append((key, value))
def construct_yaml_pairs(self, node):
# Note: the same code as `construct_yaml_omap`.
pairs = []
yield pairs
if not isinstance(node, SequenceNode):
raise ConstructorError("while constructing pairs", node.start_mark,
"expected a sequence, but found %s" % node.id, node.start_mark)
for subnode in node.value:
if not isinstance(subnode, MappingNode):
raise ConstructorError("while constructing pairs", node.start_mark,
"expected a mapping of length 1, but found %s" % subnode.id,
subnode.start_mark)
if len(subnode.value) != 1:
raise ConstructorError("while constructing pairs", node.start_mark,
"expected a single mapping item, but found %d items" % len(subnode.value),
subnode.start_mark)
key_node, value_node = subnode.value[0]
key = self.construct_object(key_node)
value = self.construct_object(value_node)
pairs.append((key, value))
def construct_yaml_set(self, node):
data = set()
yield data
value = self.construct_mapping(node)
data.update(value)
def construct_yaml_str(self, node):
value = self.construct_scalar(node)
try:
return value.encode('ascii')
except UnicodeEncodeError:
return value
def construct_yaml_seq(self, node):
data = []
yield data
data.extend(self.construct_sequence(node))
def construct_yaml_map(self, node):
data = {}
yield data
value = self.construct_mapping(node)
data.update(value)
def construct_yaml_object(self, node, cls):
data = cls.__new__(cls)
yield data
if hasattr(data, '__setstate__'):
state = self.construct_mapping(node, deep=True)
data.__setstate__(state)
else:
state = self.construct_mapping(node)
data.__dict__.update(state)
def construct_undefined(self, node):
raise ConstructorError(None, None,
"could not determine a constructor for the tag %r" % node.tag.encode('utf-8'),
node.start_mark)
SafeConstructor.add_constructor(
u'tag:yaml.org,2002:null',
SafeConstructor.construct_yaml_null)
SafeConstructor.add_constructor(
u'tag:yaml.org,2002:bool',
SafeConstructor.construct_yaml_bool)
SafeConstructor.add_constructor(
u'tag:yaml.org,2002:int',
SafeConstructor.construct_yaml_int)
SafeConstructor.add_constructor(
u'tag:yaml.org,2002:float',
SafeConstructor.construct_yaml_float)
SafeConstructor.add_constructor(
u'tag:yaml.org,2002:binary',
SafeConstructor.construct_yaml_binary)
SafeConstructor.add_constructor(
u'tag:yaml.org,2002:timestamp',
SafeConstructor.construct_yaml_timestamp)
SafeConstructor.add_constructor(
u'tag:yaml.org,2002:omap',
SafeConstructor.construct_yaml_omap)
SafeConstructor.add_constructor(
u'tag:yaml.org,2002:pairs',
SafeConstructor.construct_yaml_pairs)
SafeConstructor.add_constructor(
u'tag:yaml.org,2002:set',
SafeConstructor.construct_yaml_set)
SafeConstructor.add_constructor(
u'tag:yaml.org,2002:str',
SafeConstructor.construct_yaml_str)
SafeConstructor.add_constructor(
u'tag:yaml.org,2002:seq',
SafeConstructor.construct_yaml_seq)
SafeConstructor.add_constructor(
u'tag:yaml.org,2002:map',
SafeConstructor.construct_yaml_map)
SafeConstructor.add_constructor(None,
SafeConstructor.construct_undefined)
class Constructor(SafeConstructor):
def construct_python_str(self, node):
return self.construct_scalar(node).encode('utf-8')
def construct_python_unicode(self, node):
return self.construct_scalar(node)
def construct_python_long(self, node):
return long(self.construct_yaml_int(node))
def construct_python_complex(self, node):
return complex(self.construct_scalar(node))
def construct_python_tuple(self, node):
return tuple(self.construct_sequence(node))
def find_python_module(self, name, mark):
if not name:
raise ConstructorError("while constructing a Python module", mark,
"expected non-empty name appended to the tag", mark)
try:
__import__(name)
except ImportError, exc:
raise ConstructorError("while constructing a Python module", mark,
"cannot find module %r (%s)" % (name.encode('utf-8'), exc), mark)
return sys.modules[name]
def find_python_name(self, name, mark):
if not name:
raise ConstructorError("while constructing a Python object", mark,
"expected non-empty name appended to the tag", mark)
if u'.' in name:
# Python 2.4 only
#module_name, object_name = name.rsplit('.', 1)
items = name.split('.')
object_name = items.pop()
module_name = '.'.join(items)
else:
module_name = '__builtin__'
object_name = name
try:
__import__(module_name)
except ImportError, exc:
raise ConstructorError("while constructing a Python object", mark,
"cannot find module %r (%s)" % (module_name.encode('utf-8'), exc), mark)
module = sys.modules[module_name]
if not hasattr(module, object_name):
raise ConstructorError("while constructing a Python object", mark,
"cannot find %r in the module %r" % (object_name.encode('utf-8'),
module.__name__), mark)
return getattr(module, object_name)
def construct_python_name(self, suffix, node):
value = self.construct_scalar(node)
if value:
raise ConstructorError("while constructing a Python name", node.start_mark,
"expected the empty value, but found %r" % value.encode('utf-8'),
node.start_mark)
return self.find_python_name(suffix, node.start_mark)
def construct_python_module(self, suffix, node):
value = self.construct_scalar(node)
if value:
raise ConstructorError("while constructing a Python module", node.start_mark,
"expected the empty value, but found %r" % value.encode('utf-8'),
node.start_mark)
return self.find_python_module(suffix, node.start_mark)
class classobj: pass
def make_python_instance(self, suffix, node,
args=None, kwds=None, newobj=False):
if not args:
args = []
if not kwds:
kwds = {}
cls = self.find_python_name(suffix, node.start_mark)
if newobj and isinstance(cls, type(self.classobj)) \
and not args and not kwds:
instance = self.classobj()
instance.__class__ = cls
return instance
elif newobj and isinstance(cls, type):
return cls.__new__(cls, *args, **kwds)
else:
return cls(*args, **kwds)
def set_python_instance_state(self, instance, state):
if hasattr(instance, '__setstate__'):
instance.__setstate__(state)
else:
slotstate = {}
if isinstance(state, tuple) and len(state) == 2:
state, slotstate = state
if hasattr(instance, '__dict__'):
instance.__dict__.update(state)
elif state:
slotstate.update(state)
for key, value in slotstate.items():
setattr(object, key, value)
def construct_python_object(self, suffix, node):
# Format:
# !!python/object:module.name { ... state ... }
instance = self.make_python_instance(suffix, node, newobj=True)
yield instance
deep = hasattr(instance, '__setstate__')
state = self.construct_mapping(node, deep=deep)
self.set_python_instance_state(instance, state)
def construct_python_object_apply(self, suffix, node, newobj=False):
# Format:
# !!python/object/apply # (or !!python/object/new)
# args: [ ... arguments ... ]
# kwds: { ... keywords ... }
# state: ... state ...
# listitems: [ ... listitems ... ]
# dictitems: { ... dictitems ... }
# or short format:
# !!python/object/apply [ ... arguments ... ]
# The difference between !!python/object/apply and !!python/object/new
# is how an object is created, check make_python_instance for details.
if isinstance(node, SequenceNode):
args = self.construct_sequence(node, deep=True)
kwds = {}
state = {}
listitems = []
dictitems = {}
else:
value = self.construct_mapping(node, deep=True)
args = value.get('args', [])
kwds = value.get('kwds', {})
state = value.get('state', {})
listitems = value.get('listitems', [])
dictitems = value.get('dictitems', {})
instance = self.make_python_instance(suffix, node, args, kwds, newobj)
if state:
self.set_python_instance_state(instance, state)
if listitems:
instance.extend(listitems)
if dictitems:
for key in dictitems:
instance[key] = dictitems[key]
return instance
def construct_python_object_new(self, suffix, node):
return self.construct_python_object_apply(suffix, node, newobj=True)
Constructor.add_constructor(
u'tag:yaml.org,2002:python/none',
Constructor.construct_yaml_null)
Constructor.add_constructor(
u'tag:yaml.org,2002:python/bool',
Constructor.construct_yaml_bool)
Constructor.add_constructor(
u'tag:yaml.org,2002:python/str',
Constructor.construct_python_str)
Constructor.add_constructor(
u'tag:yaml.org,2002:python/unicode',
Constructor.construct_python_unicode)
Constructor.add_constructor(
u'tag:yaml.org,2002:python/int',
Constructor.construct_yaml_int)
Constructor.add_constructor(
u'tag:yaml.org,2002:python/long',
Constructor.construct_python_long)
Constructor.add_constructor(
u'tag:yaml.org,2002:python/float',
Constructor.construct_yaml_float)
Constructor.add_constructor(
u'tag:yaml.org,2002:python/complex',
Constructor.construct_python_complex)
Constructor.add_constructor(
u'tag:yaml.org,2002:python/list',
Constructor.construct_yaml_seq)
Constructor.add_constructor(
u'tag:yaml.org,2002:python/tuple',
Constructor.construct_python_tuple)
Constructor.add_constructor(
u'tag:yaml.org,2002:python/dict',
Constructor.construct_yaml_map)
Constructor.add_multi_constructor(
u'tag:yaml.org,2002:python/name:',
Constructor.construct_python_name)
Constructor.add_multi_constructor(
u'tag:yaml.org,2002:python/module:',
Constructor.construct_python_module)
Constructor.add_multi_constructor(
u'tag:yaml.org,2002:python/object:',
Constructor.construct_python_object)
Constructor.add_multi_constructor(
u'tag:yaml.org,2002:python/object/apply:',
Constructor.construct_python_object_apply)
Constructor.add_multi_constructor(
u'tag:yaml.org,2002:python/object/new:',
Constructor.construct_python_object_new)

View File

@ -0,0 +1,85 @@
__all__ = ['CBaseLoader', 'CSafeLoader', 'CLoader',
'CBaseDumper', 'CSafeDumper', 'CDumper']
from _yaml import CParser, CEmitter
from constructor import *
from serializer import *
from representer import *
from resolver import *
class CBaseLoader(CParser, BaseConstructor, BaseResolver):
def __init__(self, stream):
CParser.__init__(self, stream)
BaseConstructor.__init__(self)
BaseResolver.__init__(self)
class CSafeLoader(CParser, SafeConstructor, Resolver):
def __init__(self, stream):
CParser.__init__(self, stream)
SafeConstructor.__init__(self)
Resolver.__init__(self)
class CLoader(CParser, Constructor, Resolver):
def __init__(self, stream):
CParser.__init__(self, stream)
Constructor.__init__(self)
Resolver.__init__(self)
class CBaseDumper(CEmitter, BaseRepresenter, BaseResolver):
def __init__(self, stream,
default_style=None, default_flow_style=None,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None):
CEmitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width, encoding=encoding,
allow_unicode=allow_unicode, line_break=line_break,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)
Representer.__init__(self, default_style=default_style,
default_flow_style=default_flow_style)
Resolver.__init__(self)
class CSafeDumper(CEmitter, SafeRepresenter, Resolver):
def __init__(self, stream,
default_style=None, default_flow_style=None,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None):
CEmitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width, encoding=encoding,
allow_unicode=allow_unicode, line_break=line_break,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)
SafeRepresenter.__init__(self, default_style=default_style,
default_flow_style=default_flow_style)
Resolver.__init__(self)
class CDumper(CEmitter, Serializer, Representer, Resolver):
def __init__(self, stream,
default_style=None, default_flow_style=None,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None):
CEmitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width, encoding=encoding,
allow_unicode=allow_unicode, line_break=line_break,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)
Representer.__init__(self, default_style=default_style,
default_flow_style=default_flow_style)
Resolver.__init__(self)

View File

@ -0,0 +1,62 @@
__all__ = ['BaseDumper', 'SafeDumper', 'Dumper']
from emitter import *
from serializer import *
from representer import *
from resolver import *
class BaseDumper(Emitter, Serializer, BaseRepresenter, BaseResolver):
def __init__(self, stream,
default_style=None, default_flow_style=None,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None):
Emitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break)
Serializer.__init__(self, encoding=encoding,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)
Representer.__init__(self, default_style=default_style,
default_flow_style=default_flow_style)
Resolver.__init__(self)
class SafeDumper(Emitter, Serializer, SafeRepresenter, Resolver):
def __init__(self, stream,
default_style=None, default_flow_style=None,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None):
Emitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break)
Serializer.__init__(self, encoding=encoding,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)
SafeRepresenter.__init__(self, default_style=default_style,
default_flow_style=default_flow_style)
Resolver.__init__(self)
class Dumper(Emitter, Serializer, Representer, Resolver):
def __init__(self, stream,
default_style=None, default_flow_style=None,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None):
Emitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break)
Serializer.__init__(self, encoding=encoding,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)
Representer.__init__(self, default_style=default_style,
default_flow_style=default_flow_style)
Resolver.__init__(self)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,75 @@
__all__ = ['Mark', 'YAMLError', 'MarkedYAMLError']
class Mark(object):
def __init__(self, name, index, line, column, buffer, pointer):
self.name = name
self.index = index
self.line = line
self.column = column
self.buffer = buffer
self.pointer = pointer
def get_snippet(self, indent=4, max_length=75):
if self.buffer is None:
return None
head = ''
start = self.pointer
while start > 0 and self.buffer[start-1] not in u'\0\r\n\x85\u2028\u2029':
start -= 1
if self.pointer-start > max_length/2-1:
head = ' ... '
start += 5
break
tail = ''
end = self.pointer
while end < len(self.buffer) and self.buffer[end] not in u'\0\r\n\x85\u2028\u2029':
end += 1
if end-self.pointer > max_length/2-1:
tail = ' ... '
end -= 5
break
snippet = self.buffer[start:end].encode('utf-8')
return ' '*indent + head + snippet + tail + '\n' \
+ ' '*(indent+self.pointer-start+len(head)) + '^'
def __str__(self):
snippet = self.get_snippet()
where = " in \"%s\", line %d, column %d" \
% (self.name, self.line+1, self.column+1)
if snippet is not None:
where += ":\n"+snippet
return where
class YAMLError(Exception):
pass
class MarkedYAMLError(YAMLError):
def __init__(self, context=None, context_mark=None,
problem=None, problem_mark=None, note=None):
self.context = context
self.context_mark = context_mark
self.problem = problem
self.problem_mark = problem_mark
self.note = note
def __str__(self):
lines = []
if self.context is not None:
lines.append(self.context)
if self.context_mark is not None \
and (self.problem is None or self.problem_mark is None
or self.context_mark.name != self.problem_mark.name
or self.context_mark.line != self.problem_mark.line
or self.context_mark.column != self.problem_mark.column):
lines.append(str(self.context_mark))
if self.problem is not None:
lines.append(self.problem)
if self.problem_mark is not None:
lines.append(str(self.problem_mark))
if self.note is not None:
lines.append(self.note)
return '\n'.join(lines)

View File

@ -0,0 +1,86 @@
# Abstract classes.
class Event(object):
def __init__(self, start_mark=None, end_mark=None):
self.start_mark = start_mark
self.end_mark = end_mark
def __repr__(self):
attributes = [key for key in ['anchor', 'tag', 'implicit', 'value']
if hasattr(self, key)]
arguments = ', '.join(['%s=%r' % (key, getattr(self, key))
for key in attributes])
return '%s(%s)' % (self.__class__.__name__, arguments)
class NodeEvent(Event):
def __init__(self, anchor, start_mark=None, end_mark=None):
self.anchor = anchor
self.start_mark = start_mark
self.end_mark = end_mark
class CollectionStartEvent(NodeEvent):
def __init__(self, anchor, tag, implicit, start_mark=None, end_mark=None,
flow_style=None):
self.anchor = anchor
self.tag = tag
self.implicit = implicit
self.start_mark = start_mark
self.end_mark = end_mark
self.flow_style = flow_style
class CollectionEndEvent(Event):
pass
# Implementations.
class StreamStartEvent(Event):
def __init__(self, start_mark=None, end_mark=None, encoding=None):
self.start_mark = start_mark
self.end_mark = end_mark
self.encoding = encoding
class StreamEndEvent(Event):
pass
class DocumentStartEvent(Event):
def __init__(self, start_mark=None, end_mark=None,
explicit=None, version=None, tags=None):
self.start_mark = start_mark
self.end_mark = end_mark
self.explicit = explicit
self.version = version
self.tags = tags
class DocumentEndEvent(Event):
def __init__(self, start_mark=None, end_mark=None,
explicit=None):
self.start_mark = start_mark
self.end_mark = end_mark
self.explicit = explicit
class AliasEvent(NodeEvent):
pass
class ScalarEvent(NodeEvent):
def __init__(self, anchor, tag, implicit, value,
start_mark=None, end_mark=None, style=None):
self.anchor = anchor
self.tag = tag
self.implicit = implicit
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
self.style = style
class SequenceStartEvent(CollectionStartEvent):
pass
class SequenceEndEvent(CollectionEndEvent):
pass
class MappingStartEvent(CollectionStartEvent):
pass
class MappingEndEvent(CollectionEndEvent):
pass

View File

@ -0,0 +1,40 @@
__all__ = ['BaseLoader', 'SafeLoader', 'Loader']
from reader import *
from scanner import *
from parser import *
from composer import *
from constructor import *
from resolver import *
class BaseLoader(Reader, Scanner, Parser, Composer, BaseConstructor, BaseResolver):
def __init__(self, stream):
Reader.__init__(self, stream)
Scanner.__init__(self)
Parser.__init__(self)
Composer.__init__(self)
BaseConstructor.__init__(self)
BaseResolver.__init__(self)
class SafeLoader(Reader, Scanner, Parser, Composer, SafeConstructor, Resolver):
def __init__(self, stream):
Reader.__init__(self, stream)
Scanner.__init__(self)
Parser.__init__(self)
Composer.__init__(self)
SafeConstructor.__init__(self)
Resolver.__init__(self)
class Loader(Reader, Scanner, Parser, Composer, Constructor, Resolver):
def __init__(self, stream):
Reader.__init__(self, stream)
Scanner.__init__(self)
Parser.__init__(self)
Composer.__init__(self)
Constructor.__init__(self)
Resolver.__init__(self)

View File

@ -0,0 +1,49 @@
class Node(object):
def __init__(self, tag, value, start_mark, end_mark):
self.tag = tag
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
def __repr__(self):
value = self.value
#if isinstance(value, list):
# if len(value) == 0:
# value = '<empty>'
# elif len(value) == 1:
# value = '<1 item>'
# else:
# value = '<%d items>' % len(value)
#else:
# if len(value) > 75:
# value = repr(value[:70]+u' ... ')
# else:
# value = repr(value)
value = repr(value)
return '%s(tag=%r, value=%s)' % (self.__class__.__name__, self.tag, value)
class ScalarNode(Node):
id = 'scalar'
def __init__(self, tag, value,
start_mark=None, end_mark=None, style=None):
self.tag = tag
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
self.style = style
class CollectionNode(Node):
def __init__(self, tag, value,
start_mark=None, end_mark=None, flow_style=None):
self.tag = tag
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
self.flow_style = flow_style
class SequenceNode(CollectionNode):
id = 'sequence'
class MappingNode(CollectionNode):
id = 'mapping'

View File

@ -0,0 +1,584 @@
# The following YAML grammar is LL(1) and is parsed by a recursive descent
# parser.
#
# stream ::= STREAM-START implicit_document? explicit_document* STREAM-END
# implicit_document ::= block_node DOCUMENT-END*
# explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
# block_node_or_indentless_sequence ::=
# ALIAS
# | properties (block_content | indentless_block_sequence)?
# | block_content
# | indentless_block_sequence
# block_node ::= ALIAS
# | properties block_content?
# | block_content
# flow_node ::= ALIAS
# | properties flow_content?
# | flow_content
# properties ::= TAG ANCHOR? | ANCHOR TAG?
# block_content ::= block_collection | flow_collection | SCALAR
# flow_content ::= flow_collection | SCALAR
# block_collection ::= block_sequence | block_mapping
# flow_collection ::= flow_sequence | flow_mapping
# block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
# indentless_sequence ::= (BLOCK-ENTRY block_node?)+
# block_mapping ::= BLOCK-MAPPING_START
# ((KEY block_node_or_indentless_sequence?)?
# (VALUE block_node_or_indentless_sequence?)?)*
# BLOCK-END
# flow_sequence ::= FLOW-SEQUENCE-START
# (flow_sequence_entry FLOW-ENTRY)*
# flow_sequence_entry?
# FLOW-SEQUENCE-END
# flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
# flow_mapping ::= FLOW-MAPPING-START
# (flow_mapping_entry FLOW-ENTRY)*
# flow_mapping_entry?
# FLOW-MAPPING-END
# flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
#
# FIRST sets:
#
# stream: { STREAM-START }
# explicit_document: { DIRECTIVE DOCUMENT-START }
# implicit_document: FIRST(block_node)
# block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START }
# flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START }
# block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
# flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
# block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START }
# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
# block_sequence: { BLOCK-SEQUENCE-START }
# block_mapping: { BLOCK-MAPPING-START }
# block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START BLOCK-ENTRY }
# indentless_sequence: { ENTRY }
# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
# flow_sequence: { FLOW-SEQUENCE-START }
# flow_mapping: { FLOW-MAPPING-START }
# flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY }
# flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY }
__all__ = ['Parser', 'ParserError']
from error import MarkedYAMLError
from tokens import *
from events import *
from scanner import *
class ParserError(MarkedYAMLError):
pass
class Parser(object):
# Since writing a recursive-descendant parser is a straightforward task, we
# do not give many comments here.
DEFAULT_TAGS = {
u'!': u'!',
u'!!': u'tag:yaml.org,2002:',
}
def __init__(self):
self.current_event = None
self.yaml_version = None
self.tag_handles = {}
self.states = []
self.marks = []
self.state = self.parse_stream_start
def check_event(self, *choices):
# Check the type of the next event.
if self.current_event is None:
if self.state:
self.current_event = self.state()
if self.current_event is not None:
if not choices:
return True
for choice in choices:
if isinstance(self.current_event, choice):
return True
return False
def peek_event(self):
# Get the next event.
if self.current_event is None:
if self.state:
self.current_event = self.state()
return self.current_event
def get_event(self):
# Get the next event and proceed further.
if self.current_event is None:
if self.state:
self.current_event = self.state()
value = self.current_event
self.current_event = None
return value
# stream ::= STREAM-START implicit_document? explicit_document* STREAM-END
# implicit_document ::= block_node DOCUMENT-END*
# explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
def parse_stream_start(self):
# Parse the stream start.
token = self.get_token()
event = StreamStartEvent(token.start_mark, token.end_mark,
encoding=token.encoding)
# Prepare the next state.
self.state = self.parse_implicit_document_start
return event
def parse_implicit_document_start(self):
# Parse an implicit document.
if not self.check_token(DirectiveToken, DocumentStartToken,
StreamEndToken):
self.tag_handles = self.DEFAULT_TAGS
token = self.peek_token()
start_mark = end_mark = token.start_mark
event = DocumentStartEvent(start_mark, end_mark,
explicit=False)
# Prepare the next state.
self.states.append(self.parse_document_end)
self.state = self.parse_block_node
return event
else:
return self.parse_document_start()
def parse_document_start(self):
# Parse any extra document end indicators.
while self.check_token(DocumentEndToken):
self.get_token()
# Parse an explicit document.
if not self.check_token(StreamEndToken):
token = self.peek_token()
start_mark = token.start_mark
version, tags = self.process_directives()
if not self.check_token(DocumentStartToken):
raise ParserError(None, None,
"expected '<document start>', but found %r"
% self.peek_token().id,
self.peek_token().start_mark)
token = self.get_token()
end_mark = token.end_mark
event = DocumentStartEvent(start_mark, end_mark,
explicit=True, version=version, tags=tags)
self.states.append(self.parse_document_end)
self.state = self.parse_document_content
else:
# Parse the end of the stream.
token = self.get_token()
event = StreamEndEvent(token.start_mark, token.end_mark)
assert not self.states
assert not self.marks
self.state = None
return event
def parse_document_end(self):
# Parse the document end.
token = self.peek_token()
start_mark = end_mark = token.start_mark
explicit = False
if self.check_token(DocumentEndToken):
token = self.get_token()
end_mark = token.end_mark
explicit = True
event = DocumentEndEvent(start_mark, end_mark,
explicit=explicit)
# Prepare the next state.
self.state = self.parse_document_start
return event
def parse_document_content(self):
if self.check_token(DirectiveToken,
DocumentStartToken, DocumentEndToken, StreamEndToken):
event = self.process_empty_scalar(self.peek_token().start_mark)
self.state = self.states.pop()
return event
else:
return self.parse_block_node()
def process_directives(self):
self.yaml_version = None
self.tag_handles = {}
while self.check_token(DirectiveToken):
token = self.get_token()
if token.name == u'YAML':
if self.yaml_version is not None:
raise ParserError(None, None,
"found duplicate YAML directive", token.start_mark)
major, minor = token.value
if major != 1:
raise ParserError(None, None,
"found incompatible YAML document (version 1.* is required)",
token.start_mark)
self.yaml_version = token.value
elif token.name == u'TAG':
handle, prefix = token.value
if handle in self.tag_handles:
raise ParserError(None, None,
"duplicate tag handle %r" % handle.encode('utf-8'),
token.start_mark)
self.tag_handles[handle] = prefix
if self.tag_handles:
value = self.yaml_version, self.tag_handles.copy()
else:
value = self.yaml_version, None
for key in self.DEFAULT_TAGS:
if key not in self.tag_handles:
self.tag_handles[key] = self.DEFAULT_TAGS[key]
return value
# block_node_or_indentless_sequence ::= ALIAS
# | properties (block_content | indentless_block_sequence)?
# | block_content
# | indentless_block_sequence
# block_node ::= ALIAS
# | properties block_content?
# | block_content
# flow_node ::= ALIAS
# | properties flow_content?
# | flow_content
# properties ::= TAG ANCHOR? | ANCHOR TAG?
# block_content ::= block_collection | flow_collection | SCALAR
# flow_content ::= flow_collection | SCALAR
# block_collection ::= block_sequence | block_mapping
# flow_collection ::= flow_sequence | flow_mapping
def parse_block_node(self):
return self.parse_node(block=True)
def parse_flow_node(self):
return self.parse_node()
def parse_block_node_or_indentless_sequence(self):
return self.parse_node(block=True, indentless_sequence=True)
def parse_node(self, block=False, indentless_sequence=False):
if self.check_token(AliasToken):
token = self.get_token()
event = AliasEvent(token.value, token.start_mark, token.end_mark)
self.state = self.states.pop()
else:
anchor = None
tag = None
start_mark = end_mark = tag_mark = None
if self.check_token(AnchorToken):
token = self.get_token()
start_mark = token.start_mark
end_mark = token.end_mark
anchor = token.value
if self.check_token(TagToken):
token = self.get_token()
tag_mark = token.start_mark
end_mark = token.end_mark
tag = token.value
elif self.check_token(TagToken):
token = self.get_token()
start_mark = tag_mark = token.start_mark
end_mark = token.end_mark
tag = token.value
if self.check_token(AnchorToken):
token = self.get_token()
end_mark = token.end_mark
anchor = token.value
if tag is not None:
handle, suffix = tag
if handle is not None:
if handle not in self.tag_handles:
raise ParserError("while parsing a node", start_mark,
"found undefined tag handle %r" % handle.encode('utf-8'),
tag_mark)
tag = self.tag_handles[handle]+suffix
else:
tag = suffix
#if tag == u'!':
# raise ParserError("while parsing a node", start_mark,
# "found non-specific tag '!'", tag_mark,
# "Please check 'http://pyyaml.org/wiki/YAMLNonSpecificTag' and share your opinion.")
if start_mark is None:
start_mark = end_mark = self.peek_token().start_mark
event = None
implicit = (tag is None or tag == u'!')
if indentless_sequence and self.check_token(BlockEntryToken):
end_mark = self.peek_token().end_mark
event = SequenceStartEvent(anchor, tag, implicit,
start_mark, end_mark)
self.state = self.parse_indentless_sequence_entry
else:
if self.check_token(ScalarToken):
token = self.get_token()
end_mark = token.end_mark
if (token.plain and tag is None) or tag == u'!':
implicit = (True, False)
elif tag is None:
implicit = (False, True)
else:
implicit = (False, False)
event = ScalarEvent(anchor, tag, implicit, token.value,
start_mark, end_mark, style=token.style)
self.state = self.states.pop()
elif self.check_token(FlowSequenceStartToken):
end_mark = self.peek_token().end_mark
event = SequenceStartEvent(anchor, tag, implicit,
start_mark, end_mark, flow_style=True)
self.state = self.parse_flow_sequence_first_entry
elif self.check_token(FlowMappingStartToken):
end_mark = self.peek_token().end_mark
event = MappingStartEvent(anchor, tag, implicit,
start_mark, end_mark, flow_style=True)
self.state = self.parse_flow_mapping_first_key
elif block and self.check_token(BlockSequenceStartToken):
end_mark = self.peek_token().start_mark
event = SequenceStartEvent(anchor, tag, implicit,
start_mark, end_mark, flow_style=False)
self.state = self.parse_block_sequence_first_entry
elif block and self.check_token(BlockMappingStartToken):
end_mark = self.peek_token().start_mark
event = MappingStartEvent(anchor, tag, implicit,
start_mark, end_mark, flow_style=False)
self.state = self.parse_block_mapping_first_key
elif anchor is not None or tag is not None:
# Empty scalars are allowed even if a tag or an anchor is
# specified.
event = ScalarEvent(anchor, tag, (implicit, False), u'',
start_mark, end_mark)
self.state = self.states.pop()
else:
if block:
node = 'block'
else:
node = 'flow'
token = self.peek_token()
raise ParserError("while parsing a %s node" % node, start_mark,
"expected the node content, but found %r" % token.id,
token.start_mark)
return event
# block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
def parse_block_sequence_first_entry(self):
token = self.get_token()
self.marks.append(token.start_mark)
return self.parse_block_sequence_entry()
def parse_block_sequence_entry(self):
if self.check_token(BlockEntryToken):
token = self.get_token()
if not self.check_token(BlockEntryToken, BlockEndToken):
self.states.append(self.parse_block_sequence_entry)
return self.parse_block_node()
else:
self.state = self.parse_block_sequence_entry
return self.process_empty_scalar(token.end_mark)
if not self.check_token(BlockEndToken):
token = self.peek_token()
raise ParserError("while parsing a block collection", self.marks[-1],
"expected <block end>, but found %r" % token.id, token.start_mark)
token = self.get_token()
event = SequenceEndEvent(token.start_mark, token.end_mark)
self.state = self.states.pop()
self.marks.pop()
return event
# indentless_sequence ::= (BLOCK-ENTRY block_node?)+
def parse_indentless_sequence_entry(self):
if self.check_token(BlockEntryToken):
token = self.get_token()
if not self.check_token(BlockEntryToken,
KeyToken, ValueToken, BlockEndToken):
self.states.append(self.parse_indentless_sequence_entry)
return self.parse_block_node()
else:
self.state = self.parse_indentless_sequence_entry
return self.process_empty_scalar(token.end_mark)
token = self.peek_token()
event = SequenceEndEvent(token.start_mark, token.start_mark)
self.state = self.states.pop()
return event
# block_mapping ::= BLOCK-MAPPING_START
# ((KEY block_node_or_indentless_sequence?)?
# (VALUE block_node_or_indentless_sequence?)?)*
# BLOCK-END
def parse_block_mapping_first_key(self):
token = self.get_token()
self.marks.append(token.start_mark)
return self.parse_block_mapping_key()
def parse_block_mapping_key(self):
if self.check_token(KeyToken):
token = self.get_token()
if not self.check_token(KeyToken, ValueToken, BlockEndToken):
self.states.append(self.parse_block_mapping_value)
return self.parse_block_node_or_indentless_sequence()
else:
self.state = self.parse_block_mapping_value
return self.process_empty_scalar(token.end_mark)
if not self.check_token(BlockEndToken):
token = self.peek_token()
raise ParserError("while parsing a block mapping", self.marks[-1],
"expected <block end>, but found %r" % token.id, token.start_mark)
token = self.get_token()
event = MappingEndEvent(token.start_mark, token.end_mark)
self.state = self.states.pop()
self.marks.pop()
return event
def parse_block_mapping_value(self):
if self.check_token(ValueToken):
token = self.get_token()
if not self.check_token(KeyToken, ValueToken, BlockEndToken):
self.states.append(self.parse_block_mapping_key)
return self.parse_block_node_or_indentless_sequence()
else:
self.state = self.parse_block_mapping_key
return self.process_empty_scalar(token.end_mark)
else:
self.state = self.parse_block_mapping_key
token = self.peek_token()
return self.process_empty_scalar(token.start_mark)
# flow_sequence ::= FLOW-SEQUENCE-START
# (flow_sequence_entry FLOW-ENTRY)*
# flow_sequence_entry?
# FLOW-SEQUENCE-END
# flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
#
# Note that while production rules for both flow_sequence_entry and
# flow_mapping_entry are equal, their interpretations are different.
# For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?`
# generate an inline mapping (set syntax).
def parse_flow_sequence_first_entry(self):
token = self.get_token()
self.marks.append(token.start_mark)
return self.parse_flow_sequence_entry(first=True)
def parse_flow_sequence_entry(self, first=False):
if not self.check_token(FlowSequenceEndToken):
if not first:
if self.check_token(FlowEntryToken):
self.get_token()
else:
token = self.peek_token()
raise ParserError("while parsing a flow sequence", self.marks[-1],
"expected ',' or ']', but got %r" % token.id, token.start_mark)
if self.check_token(KeyToken):
token = self.peek_token()
event = MappingStartEvent(None, None, True,
token.start_mark, token.end_mark,
flow_style=True)
self.state = self.parse_flow_sequence_entry_mapping_key
return event
elif not self.check_token(FlowSequenceEndToken):
self.states.append(self.parse_flow_sequence_entry)
return self.parse_flow_node()
token = self.get_token()
event = SequenceEndEvent(token.start_mark, token.end_mark)
self.state = self.states.pop()
self.marks.pop()
return event
def parse_flow_sequence_entry_mapping_key(self):
token = self.get_token()
if not self.check_token(ValueToken,
FlowEntryToken, FlowSequenceEndToken):
self.states.append(self.parse_flow_sequence_entry_mapping_value)
return self.parse_flow_node()
else:
self.state = self.parse_flow_sequence_entry_mapping_value
return self.process_empty_scalar(token.end_mark)
def parse_flow_sequence_entry_mapping_value(self):
if self.check_token(ValueToken):
token = self.get_token()
if not self.check_token(FlowEntryToken, FlowSequenceEndToken):
self.states.append(self.parse_flow_sequence_entry_mapping_end)
return self.parse_flow_node()
else:
self.state = self.parse_flow_sequence_entry_mapping_end
return self.process_empty_scalar(token.end_mark)
else:
self.state = self.parse_flow_sequence_entry_mapping_end
token = self.peek_token()
return self.process_empty_scalar(token.start_mark)
def parse_flow_sequence_entry_mapping_end(self):
self.state = self.parse_flow_sequence_entry
token = self.peek_token()
return MappingEndEvent(token.start_mark, token.start_mark)
# flow_mapping ::= FLOW-MAPPING-START
# (flow_mapping_entry FLOW-ENTRY)*
# flow_mapping_entry?
# FLOW-MAPPING-END
# flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
def parse_flow_mapping_first_key(self):
token = self.get_token()
self.marks.append(token.start_mark)
return self.parse_flow_mapping_key(first=True)
def parse_flow_mapping_key(self, first=False):
if not self.check_token(FlowMappingEndToken):
if not first:
if self.check_token(FlowEntryToken):
self.get_token()
else:
token = self.peek_token()
raise ParserError("while parsing a flow mapping", self.marks[-1],
"expected ',' or '}', but got %r" % token.id, token.start_mark)
if self.check_token(KeyToken):
token = self.get_token()
if not self.check_token(ValueToken,
FlowEntryToken, FlowMappingEndToken):
self.states.append(self.parse_flow_mapping_value)
return self.parse_flow_node()
else:
self.state = self.parse_flow_mapping_value
return self.process_empty_scalar(token.end_mark)
elif not self.check_token(FlowMappingEndToken):
self.states.append(self.parse_flow_mapping_empty_value)
return self.parse_flow_node()
token = self.get_token()
event = MappingEndEvent(token.start_mark, token.end_mark)
self.state = self.states.pop()
self.marks.pop()
return event
def parse_flow_mapping_value(self):
if self.check_token(ValueToken):
token = self.get_token()
if not self.check_token(FlowEntryToken, FlowMappingEndToken):
self.states.append(self.parse_flow_mapping_key)
return self.parse_flow_node()
else:
self.state = self.parse_flow_mapping_key
return self.process_empty_scalar(token.end_mark)
else:
self.state = self.parse_flow_mapping_key
token = self.peek_token()
return self.process_empty_scalar(token.start_mark)
def parse_flow_mapping_empty_value(self):
self.state = self.parse_flow_mapping_key
return self.process_empty_scalar(self.peek_token().start_mark)
def process_empty_scalar(self, mark):
return ScalarEvent(None, None, (True, False), u'', mark, mark)

View File

@ -0,0 +1,225 @@
# This module contains abstractions for the input stream. You don't have to
# looks further, there are no pretty code.
#
# We define two classes here.
#
# Mark(source, line, column)
# It's just a record and its only use is producing nice error messages.
# Parser does not use it for any other purposes.
#
# Reader(source, data)
# Reader determines the encoding of `data` and converts it to unicode.
# Reader provides the following methods and attributes:
# reader.peek(length=1) - return the next `length` characters
# reader.forward(length=1) - move the current position to `length` characters.
# reader.index - the number of the current character.
# reader.line, stream.column - the line and the column of the current character.
__all__ = ['Reader', 'ReaderError']
from error import YAMLError, Mark
import codecs, re
# Unfortunately, codec functions in Python 2.3 does not support the `finish`
# arguments, so we have to write our own wrappers.
try:
codecs.utf_8_decode('', 'strict', False)
from codecs import utf_8_decode, utf_16_le_decode, utf_16_be_decode
except TypeError:
def utf_16_le_decode(data, errors, finish=False):
if not finish and len(data) % 2 == 1:
data = data[:-1]
return codecs.utf_16_le_decode(data, errors)
def utf_16_be_decode(data, errors, finish=False):
if not finish and len(data) % 2 == 1:
data = data[:-1]
return codecs.utf_16_be_decode(data, errors)
def utf_8_decode(data, errors, finish=False):
if not finish:
# We are trying to remove a possible incomplete multibyte character
# from the suffix of the data.
# The first byte of a multi-byte sequence is in the range 0xc0 to 0xfd.
# All further bytes are in the range 0x80 to 0xbf.
# UTF-8 encoded UCS characters may be up to six bytes long.
count = 0
while count < 5 and count < len(data) \
and '\x80' <= data[-count-1] <= '\xBF':
count -= 1
if count < 5 and count < len(data) \
and '\xC0' <= data[-count-1] <= '\xFD':
data = data[:-count-1]
return codecs.utf_8_decode(data, errors)
class ReaderError(YAMLError):
def __init__(self, name, position, character, encoding, reason):
self.name = name
self.character = character
self.position = position
self.encoding = encoding
self.reason = reason
def __str__(self):
if isinstance(self.character, str):
return "'%s' codec can't decode byte #x%02x: %s\n" \
" in \"%s\", position %d" \
% (self.encoding, ord(self.character), self.reason,
self.name, self.position)
else:
return "unacceptable character #x%04x: %s\n" \
" in \"%s\", position %d" \
% (self.character, self.reason,
self.name, self.position)
class Reader(object):
# Reader:
# - determines the data encoding and converts it to unicode,
# - checks if characters are in allowed range,
# - adds '\0' to the end.
# Reader accepts
# - a `str` object,
# - a `unicode` object,
# - a file-like object with its `read` method returning `str`,
# - a file-like object with its `read` method returning `unicode`.
# Yeah, it's ugly and slow.
def __init__(self, stream):
self.name = None
self.stream = None
self.stream_pointer = 0
self.eof = True
self.buffer = u''
self.pointer = 0
self.raw_buffer = None
self.raw_decode = None
self.encoding = None
self.index = 0
self.line = 0
self.column = 0
if isinstance(stream, unicode):
self.name = "<unicode string>"
self.check_printable(stream)
self.buffer = stream+u'\0'
elif isinstance(stream, str):
self.name = "<string>"
self.raw_buffer = stream
self.determine_encoding()
else:
self.stream = stream
self.name = getattr(stream, 'name', "<file>")
self.eof = False
self.raw_buffer = ''
self.determine_encoding()
def peek(self, index=0):
try:
return self.buffer[self.pointer+index]
except IndexError:
self.update(index+1)
return self.buffer[self.pointer+index]
def prefix(self, length=1):
if self.pointer+length >= len(self.buffer):
self.update(length)
return self.buffer[self.pointer:self.pointer+length]
def forward(self, length=1):
if self.pointer+length+1 >= len(self.buffer):
self.update(length+1)
while length:
ch = self.buffer[self.pointer]
self.pointer += 1
self.index += 1
if ch in u'\n\x85\u2028\u2029' \
or (ch == u'\r' and self.buffer[self.pointer] != u'\n'):
self.line += 1
self.column = 0
elif ch != u'\uFEFF':
self.column += 1
length -= 1
def get_mark(self):
if self.stream is None:
return Mark(self.name, self.index, self.line, self.column,
self.buffer, self.pointer)
else:
return Mark(self.name, self.index, self.line, self.column,
None, None)
def determine_encoding(self):
while not self.eof and len(self.raw_buffer) < 2:
self.update_raw()
if not isinstance(self.raw_buffer, unicode):
if self.raw_buffer.startswith(codecs.BOM_UTF16_LE):
self.raw_decode = utf_16_le_decode
self.encoding = 'utf-16-le'
elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE):
self.raw_decode = utf_16_be_decode
self.encoding = 'utf-16-be'
else:
self.raw_decode = utf_8_decode
self.encoding = 'utf-8'
self.update(1)
NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]')
def check_printable(self, data):
match = self.NON_PRINTABLE.search(data)
if match:
character = match.group()
position = self.index+(len(self.buffer)-self.pointer)+match.start()
raise ReaderError(self.name, position, ord(character),
'unicode', "special characters are not allowed")
def update(self, length):
if self.raw_buffer is None:
return
self.buffer = self.buffer[self.pointer:]
self.pointer = 0
while len(self.buffer) < length:
if not self.eof:
self.update_raw()
if self.raw_decode is not None:
try:
data, converted = self.raw_decode(self.raw_buffer,
'strict', self.eof)
except UnicodeDecodeError, exc:
character = exc.object[exc.start]
if self.stream is not None:
position = self.stream_pointer-len(self.raw_buffer)+exc.start
else:
position = exc.start
raise ReaderError(self.name, position, character,
exc.encoding, exc.reason)
else:
data = self.raw_buffer
converted = len(data)
self.check_printable(data)
self.buffer += data
self.raw_buffer = self.raw_buffer[converted:]
if self.eof:
self.buffer += u'\0'
self.raw_buffer = None
break
def update_raw(self, size=1024):
data = self.stream.read(size)
if data:
self.raw_buffer += data
self.stream_pointer += len(data)
else:
self.eof = True
#try:
# import psyco
# psyco.bind(Reader)
#except ImportError:
# pass

View File

@ -0,0 +1,489 @@
__all__ = ['BaseRepresenter', 'SafeRepresenter', 'Representer',
'RepresenterError']
from error import *
from nodes import *
import datetime
try:
set
except NameError:
from sets import Set as set
import sys, copy_reg, types
class RepresenterError(YAMLError):
pass
class BaseRepresenter(object):
yaml_representers = {}
yaml_multi_representers = {}
def __init__(self, default_style=None, default_flow_style=None):
self.default_style = default_style
self.default_flow_style = default_flow_style
self.represented_objects = {}
self.object_keeper = []
self.alias_key = None
def represent(self, data):
node = self.represent_data(data)
self.serialize(node)
self.represented_objects = {}
self.object_keeper = []
self.alias_key = None
def get_classobj_bases(self, cls):
bases = [cls]
for base in cls.__bases__:
bases.extend(self.get_classobj_bases(base))
return bases
def represent_data(self, data):
if self.ignore_aliases(data):
self.alias_key = None
else:
self.alias_key = id(data)
if self.alias_key is not None:
if self.alias_key in self.represented_objects:
node = self.represented_objects[self.alias_key]
#if node is None:
# raise RepresenterError("recursive objects are not allowed: %r" % data)
return node
#self.represented_objects[alias_key] = None
self.object_keeper.append(data)
data_types = type(data).__mro__
if type(data) is types.InstanceType:
data_types = self.get_classobj_bases(data.__class__)+list(data_types)
if data_types[0] in self.yaml_representers:
node = self.yaml_representers[data_types[0]](self, data)
else:
for data_type in data_types:
if data_type in self.yaml_multi_representers:
node = self.yaml_multi_representers[data_type](self, data)
break
else:
if None in self.yaml_multi_representers:
node = self.yaml_multi_representers[None](self, data)
elif None in self.yaml_representers:
node = self.yaml_representers[None](self, data)
else:
node = ScalarNode(None, unicode(data))
#if alias_key is not None:
# self.represented_objects[alias_key] = node
return node
def add_representer(cls, data_type, representer):
if not 'yaml_representers' in cls.__dict__:
cls.yaml_representers = cls.yaml_representers.copy()
cls.yaml_representers[data_type] = representer
add_representer = classmethod(add_representer)
def add_multi_representer(cls, data_type, representer):
if not 'yaml_multi_representers' in cls.__dict__:
cls.yaml_multi_representers = cls.yaml_multi_representers.copy()
cls.yaml_multi_representers[data_type] = representer
add_multi_representer = classmethod(add_multi_representer)
def represent_scalar(self, tag, value, style=None):
if style is None:
style = self.default_style
node = ScalarNode(tag, value, style=style)
if self.alias_key is not None:
self.represented_objects[self.alias_key] = node
return node
def represent_sequence(self, tag, sequence, flow_style=None):
value = []
node = SequenceNode(tag, value, flow_style=flow_style)
if self.alias_key is not None:
self.represented_objects[self.alias_key] = node
best_style = True
for item in sequence:
node_item = self.represent_data(item)
if not (isinstance(node_item, ScalarNode) and not node_item.style):
best_style = False
value.append(node_item)
if flow_style is None:
if self.default_flow_style is not None:
node.flow_style = self.default_flow_style
else:
node.flow_style = best_style
return node
def represent_mapping(self, tag, mapping, flow_style=None):
value = []
node = MappingNode(tag, value, flow_style=flow_style)
if self.alias_key is not None:
self.represented_objects[self.alias_key] = node
best_style = True
if hasattr(mapping, 'items'):
mapping = mapping.items()
mapping.sort()
for item_key, item_value in mapping:
node_key = self.represent_data(item_key)
node_value = self.represent_data(item_value)
if not (isinstance(node_key, ScalarNode) and not node_key.style):
best_style = False
if not (isinstance(node_value, ScalarNode) and not node_value.style):
best_style = False
value.append((node_key, node_value))
if flow_style is None:
if self.default_flow_style is not None:
node.flow_style = self.default_flow_style
else:
node.flow_style = best_style
return node
def ignore_aliases(self, data):
return False
class SafeRepresenter(BaseRepresenter):
def ignore_aliases(self, data):
if data in [None, ()]:
return True
if isinstance(data, (str, unicode, bool, int, float)):
return True
def represent_none(self, data):
return self.represent_scalar(u'tag:yaml.org,2002:null',
u'null')
def represent_str(self, data):
tag = None
style = None
try:
data = unicode(data, 'ascii')
tag = u'tag:yaml.org,2002:str'
except UnicodeDecodeError:
try:
data = unicode(data, 'utf-8')
tag = u'tag:yaml.org,2002:str'
except UnicodeDecodeError:
data = data.encode('base64')
tag = u'tag:yaml.org,2002:binary'
style = '|'
return self.represent_scalar(tag, data, style=style)
def represent_unicode(self, data):
return self.represent_scalar(u'tag:yaml.org,2002:str', data)
def represent_bool(self, data):
if data:
value = u'true'
else:
value = u'false'
return self.represent_scalar(u'tag:yaml.org,2002:bool', value)
def represent_int(self, data):
return self.represent_scalar(u'tag:yaml.org,2002:int', unicode(data))
def represent_long(self, data):
return self.represent_scalar(u'tag:yaml.org,2002:int', unicode(data))
inf_value = 1e300
while repr(inf_value) != repr(inf_value*inf_value):
inf_value *= inf_value
def represent_float(self, data):
if data != data or (data == 0.0 and data == 1.0):
value = u'.nan'
elif data == self.inf_value:
value = u'.inf'
elif data == -self.inf_value:
value = u'-.inf'
else:
value = unicode(repr(data)).lower()
# Note that in some cases `repr(data)` represents a float number
# without the decimal parts. For instance:
# >>> repr(1e17)
# '1e17'
# Unfortunately, this is not a valid float representation according
# to the definition of the `!!float` tag. We fix this by adding
# '.0' before the 'e' symbol.
if u'.' not in value and u'e' in value:
value = value.replace(u'e', u'.0e', 1)
return self.represent_scalar(u'tag:yaml.org,2002:float', value)
def represent_list(self, data):
#pairs = (len(data) > 0 and isinstance(data, list))
#if pairs:
# for item in data:
# if not isinstance(item, tuple) or len(item) != 2:
# pairs = False
# break
#if not pairs:
return self.represent_sequence(u'tag:yaml.org,2002:seq', data)
#value = []
#for item_key, item_value in data:
# value.append(self.represent_mapping(u'tag:yaml.org,2002:map',
# [(item_key, item_value)]))
#return SequenceNode(u'tag:yaml.org,2002:pairs', value)
def represent_dict(self, data):
return self.represent_mapping(u'tag:yaml.org,2002:map', data)
def represent_set(self, data):
value = {}
for key in data:
value[key] = None
return self.represent_mapping(u'tag:yaml.org,2002:set', value)
def represent_date(self, data):
value = unicode(data.isoformat())
return self.represent_scalar(u'tag:yaml.org,2002:timestamp', value)
def represent_datetime(self, data):
value = unicode(data.isoformat(' '))
return self.represent_scalar(u'tag:yaml.org,2002:timestamp', value)
def represent_yaml_object(self, tag, data, cls, flow_style=None):
if hasattr(data, '__getstate__'):
state = data.__getstate__()
else:
state = data.__dict__.copy()
return self.represent_mapping(tag, state, flow_style=flow_style)
def represent_undefined(self, data):
raise RepresenterError("cannot represent an object: %s" % data)
SafeRepresenter.add_representer(type(None),
SafeRepresenter.represent_none)
SafeRepresenter.add_representer(str,
SafeRepresenter.represent_str)
SafeRepresenter.add_representer(unicode,
SafeRepresenter.represent_unicode)
SafeRepresenter.add_representer(bool,
SafeRepresenter.represent_bool)
SafeRepresenter.add_representer(int,
SafeRepresenter.represent_int)
SafeRepresenter.add_representer(long,
SafeRepresenter.represent_long)
SafeRepresenter.add_representer(float,
SafeRepresenter.represent_float)
SafeRepresenter.add_representer(list,
SafeRepresenter.represent_list)
SafeRepresenter.add_representer(tuple,
SafeRepresenter.represent_list)
SafeRepresenter.add_representer(dict,
SafeRepresenter.represent_dict)
SafeRepresenter.add_representer(set,
SafeRepresenter.represent_set)
SafeRepresenter.add_representer(datetime.date,
SafeRepresenter.represent_date)
SafeRepresenter.add_representer(datetime.datetime,
SafeRepresenter.represent_datetime)
SafeRepresenter.add_representer(None,
SafeRepresenter.represent_undefined)
class Representer(SafeRepresenter):
def represent_str(self, data):
tag = None
style = None
try:
data = unicode(data, 'ascii')
tag = u'tag:yaml.org,2002:str'
except UnicodeDecodeError:
try:
data = unicode(data, 'utf-8')
tag = u'tag:yaml.org,2002:python/str'
except UnicodeDecodeError:
data = data.encode('base64')
tag = u'tag:yaml.org,2002:binary'
style = '|'
return self.represent_scalar(tag, data, style=style)
def represent_unicode(self, data):
tag = None
try:
data.encode('ascii')
tag = u'tag:yaml.org,2002:python/unicode'
except UnicodeEncodeError:
tag = u'tag:yaml.org,2002:str'
return self.represent_scalar(tag, data)
def represent_long(self, data):
tag = u'tag:yaml.org,2002:int'
if int(data) is not data:
tag = u'tag:yaml.org,2002:python/long'
return self.represent_scalar(tag, unicode(data))
def represent_complex(self, data):
if data.imag == 0.0:
data = u'%r' % data.real
elif data.real == 0.0:
data = u'%rj' % data.imag
elif data.imag > 0:
data = u'%r+%rj' % (data.real, data.imag)
else:
data = u'%r%rj' % (data.real, data.imag)
return self.represent_scalar(u'tag:yaml.org,2002:python/complex', data)
def represent_tuple(self, data):
return self.represent_sequence(u'tag:yaml.org,2002:python/tuple', data)
def represent_name(self, data):
name = u'%s.%s' % (data.__module__, data.__name__)
return self.represent_scalar(u'tag:yaml.org,2002:python/name:'+name, u'')
def represent_module(self, data):
return self.represent_scalar(
u'tag:yaml.org,2002:python/module:'+data.__name__, u'')
def represent_instance(self, data):
# For instances of classic classes, we use __getinitargs__ and
# __getstate__ to serialize the data.
# If data.__getinitargs__ exists, the object must be reconstructed by
# calling cls(**args), where args is a tuple returned by
# __getinitargs__. Otherwise, the cls.__init__ method should never be
# called and the class instance is created by instantiating a trivial
# class and assigning to the instance's __class__ variable.
# If data.__getstate__ exists, it returns the state of the object.
# Otherwise, the state of the object is data.__dict__.
# We produce either a !!python/object or !!python/object/new node.
# If data.__getinitargs__ does not exist and state is a dictionary, we
# produce a !!python/object node . Otherwise we produce a
# !!python/object/new node.
cls = data.__class__
class_name = u'%s.%s' % (cls.__module__, cls.__name__)
args = None
state = None
if hasattr(data, '__getinitargs__'):
args = list(data.__getinitargs__())
if hasattr(data, '__getstate__'):
state = data.__getstate__()
else:
state = data.__dict__
if args is None and isinstance(state, dict):
return self.represent_mapping(
u'tag:yaml.org,2002:python/object:'+class_name, state)
if isinstance(state, dict) and not state:
return self.represent_sequence(
u'tag:yaml.org,2002:python/object/new:'+class_name, args)
value = {}
if args:
value['args'] = args
value['state'] = state
return self.represent_mapping(
u'tag:yaml.org,2002:python/object/new:'+class_name, value)
def represent_object(self, data):
# We use __reduce__ API to save the data. data.__reduce__ returns
# a tuple of length 2-5:
# (function, args, state, listitems, dictitems)
# For reconstructing, we calls function(*args), then set its state,
# listitems, and dictitems if they are not None.
# A special case is when function.__name__ == '__newobj__'. In this
# case we create the object with args[0].__new__(*args).
# Another special case is when __reduce__ returns a string - we don't
# support it.
# We produce a !!python/object, !!python/object/new or
# !!python/object/apply node.
cls = type(data)
if cls in copy_reg.dispatch_table:
reduce = copy_reg.dispatch_table[cls](data)
elif hasattr(data, '__reduce_ex__'):
reduce = data.__reduce_ex__(2)
elif hasattr(data, '__reduce__'):
reduce = data.__reduce__()
else:
raise RepresenterError("cannot represent object: %r" % data)
reduce = (list(reduce)+[None]*5)[:5]
function, args, state, listitems, dictitems = reduce
args = list(args)
if state is None:
state = {}
if listitems is not None:
listitems = list(listitems)
if dictitems is not None:
dictitems = dict(dictitems)
if function.__name__ == '__newobj__':
function = args[0]
args = args[1:]
tag = u'tag:yaml.org,2002:python/object/new:'
newobj = True
else:
tag = u'tag:yaml.org,2002:python/object/apply:'
newobj = False
function_name = u'%s.%s' % (function.__module__, function.__name__)
if not args and not listitems and not dictitems \
and isinstance(state, dict) and newobj:
return self.represent_mapping(
u'tag:yaml.org,2002:python/object:'+function_name, state)
if not listitems and not dictitems \
and isinstance(state, dict) and not state:
return self.represent_sequence(tag+function_name, args)
value = {}
if args:
value['args'] = args
if state or not isinstance(state, dict):
value['state'] = state
if listitems:
value['listitems'] = listitems
if dictitems:
value['dictitems'] = dictitems
return self.represent_mapping(tag+function_name, value)
Representer.add_representer(str,
Representer.represent_str)
Representer.add_representer(unicode,
Representer.represent_unicode)
Representer.add_representer(long,
Representer.represent_long)
Representer.add_representer(complex,
Representer.represent_complex)
Representer.add_representer(tuple,
Representer.represent_tuple)
Representer.add_representer(type,
Representer.represent_name)
Representer.add_representer(types.ClassType,
Representer.represent_name)
Representer.add_representer(types.FunctionType,
Representer.represent_name)
Representer.add_representer(types.BuiltinFunctionType,
Representer.represent_name)
Representer.add_representer(types.ModuleType,
Representer.represent_module)
Representer.add_multi_representer(types.InstanceType,
Representer.represent_instance)
Representer.add_multi_representer(object,
Representer.represent_object)

View File

@ -0,0 +1,224 @@
__all__ = ['BaseResolver', 'Resolver']
from error import *
from nodes import *
import re
class ResolverError(YAMLError):
pass
class BaseResolver(object):
DEFAULT_SCALAR_TAG = u'tag:yaml.org,2002:str'
DEFAULT_SEQUENCE_TAG = u'tag:yaml.org,2002:seq'
DEFAULT_MAPPING_TAG = u'tag:yaml.org,2002:map'
yaml_implicit_resolvers = {}
yaml_path_resolvers = {}
def __init__(self):
self.resolver_exact_paths = []
self.resolver_prefix_paths = []
def add_implicit_resolver(cls, tag, regexp, first):
if not 'yaml_implicit_resolvers' in cls.__dict__:
cls.yaml_implicit_resolvers = cls.yaml_implicit_resolvers.copy()
if first is None:
first = [None]
for ch in first:
cls.yaml_implicit_resolvers.setdefault(ch, []).append((tag, regexp))
add_implicit_resolver = classmethod(add_implicit_resolver)
def add_path_resolver(cls, tag, path, kind=None):
# Note: `add_path_resolver` is experimental. The API could be changed.
# `new_path` is a pattern that is matched against the path from the
# root to the node that is being considered. `node_path` elements are
# tuples `(node_check, index_check)`. `node_check` is a node class:
# `ScalarNode`, `SequenceNode`, `MappingNode` or `None`. `None`
# matches any kind of a node. `index_check` could be `None`, a boolean
# value, a string value, or a number. `None` and `False` match against
# any _value_ of sequence and mapping nodes. `True` matches against
# any _key_ of a mapping node. A string `index_check` matches against
# a mapping value that corresponds to a scalar key which content is
# equal to the `index_check` value. An integer `index_check` matches
# against a sequence value with the index equal to `index_check`.
if not 'yaml_path_resolvers' in cls.__dict__:
cls.yaml_path_resolvers = cls.yaml_path_resolvers.copy()
new_path = []
for element in path:
if isinstance(element, (list, tuple)):
if len(element) == 2:
node_check, index_check = element
elif len(element) == 1:
node_check = element[0]
index_check = True
else:
raise ResolverError("Invalid path element: %s" % element)
else:
node_check = None
index_check = element
if node_check is str:
node_check = ScalarNode
elif node_check is list:
node_check = SequenceNode
elif node_check is dict:
node_check = MappingNode
elif node_check not in [ScalarNode, SequenceNode, MappingNode] \
and not isinstance(node_check, basestring) \
and node_check is not None:
raise ResolverError("Invalid node checker: %s" % node_check)
if not isinstance(index_check, (basestring, int)) \
and index_check is not None:
raise ResolverError("Invalid index checker: %s" % index_check)
new_path.append((node_check, index_check))
if kind is str:
kind = ScalarNode
elif kind is list:
kind = SequenceNode
elif kind is dict:
kind = MappingNode
elif kind not in [ScalarNode, SequenceNode, MappingNode] \
and kind is not None:
raise ResolverError("Invalid node kind: %s" % kind)
cls.yaml_path_resolvers[tuple(new_path), kind] = tag
add_path_resolver = classmethod(add_path_resolver)
def descend_resolver(self, current_node, current_index):
if not self.yaml_path_resolvers:
return
exact_paths = {}
prefix_paths = []
if current_node:
depth = len(self.resolver_prefix_paths)
for path, kind in self.resolver_prefix_paths[-1]:
if self.check_resolver_prefix(depth, path, kind,
current_node, current_index):
if len(path) > depth:
prefix_paths.append((path, kind))
else:
exact_paths[kind] = self.yaml_path_resolvers[path, kind]
else:
for path, kind in self.yaml_path_resolvers:
if not path:
exact_paths[kind] = self.yaml_path_resolvers[path, kind]
else:
prefix_paths.append((path, kind))
self.resolver_exact_paths.append(exact_paths)
self.resolver_prefix_paths.append(prefix_paths)
def ascend_resolver(self):
if not self.yaml_path_resolvers:
return
self.resolver_exact_paths.pop()
self.resolver_prefix_paths.pop()
def check_resolver_prefix(self, depth, path, kind,
current_node, current_index):
node_check, index_check = path[depth-1]
if isinstance(node_check, basestring):
if current_node.tag != node_check:
return
elif node_check is not None:
if not isinstance(current_node, node_check):
return
if index_check is True and current_index is not None:
return
if (index_check is False or index_check is None) \
and current_index is None:
return
if isinstance(index_check, basestring):
if not (isinstance(current_index, ScalarNode)
and index_check == current_index.value):
return
elif isinstance(index_check, int) and not isinstance(index_check, bool):
if index_check != current_index:
return
return True
def resolve(self, kind, value, implicit):
if kind is ScalarNode and implicit[0]:
if value == u'':
resolvers = self.yaml_implicit_resolvers.get(u'', [])
else:
resolvers = self.yaml_implicit_resolvers.get(value[0], [])
resolvers += self.yaml_implicit_resolvers.get(None, [])
for tag, regexp in resolvers:
if regexp.match(value):
return tag
implicit = implicit[1]
if self.yaml_path_resolvers:
exact_paths = self.resolver_exact_paths[-1]
if kind in exact_paths:
return exact_paths[kind]
if None in exact_paths:
return exact_paths[None]
if kind is ScalarNode:
return self.DEFAULT_SCALAR_TAG
elif kind is SequenceNode:
return self.DEFAULT_SEQUENCE_TAG
elif kind is MappingNode:
return self.DEFAULT_MAPPING_TAG
class Resolver(BaseResolver):
pass
Resolver.add_implicit_resolver(
u'tag:yaml.org,2002:bool',
re.compile(ur'''^(?:yes|Yes|YES|no|No|NO
|true|True|TRUE|false|False|FALSE
|on|On|ON|off|Off|OFF)$''', re.X),
list(u'yYnNtTfFoO'))
Resolver.add_implicit_resolver(
u'tag:yaml.org,2002:float',
re.compile(ur'''^(?:[-+]?(?:[0-9][0-9_]*)\.[0-9_]*(?:[eE][-+][0-9]+)?
|\.[0-9_]+(?:[eE][-+][0-9]+)?
|[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\.[0-9_]*
|[-+]?\.(?:inf|Inf|INF)
|\.(?:nan|NaN|NAN))$''', re.X),
list(u'-+0123456789.'))
Resolver.add_implicit_resolver(
u'tag:yaml.org,2002:int',
re.compile(ur'''^(?:[-+]?0b[0-1_]+
|[-+]?0[0-7_]+
|[-+]?(?:0|[1-9][0-9_]*)
|[-+]?0x[0-9a-fA-F_]+
|[-+]?[1-9][0-9_]*(?::[0-5]?[0-9])+)$''', re.X),
list(u'-+0123456789'))
Resolver.add_implicit_resolver(
u'tag:yaml.org,2002:merge',
re.compile(ur'^(?:<<)$'),
[u'<'])
Resolver.add_implicit_resolver(
u'tag:yaml.org,2002:null',
re.compile(ur'''^(?: ~
|null|Null|NULL
| )$''', re.X),
[u'~', u'n', u'N', u''])
Resolver.add_implicit_resolver(
u'tag:yaml.org,2002:timestamp',
re.compile(ur'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]
|[0-9][0-9][0-9][0-9] -[0-9][0-9]? -[0-9][0-9]?
(?:[Tt]|[ \t]+)[0-9][0-9]?
:[0-9][0-9] :[0-9][0-9] (?:\.[0-9]*)?
(?:[ \t]*(?:Z|[-+][0-9][0-9]?(?::[0-9][0-9])?))?)$''', re.X),
list(u'0123456789'))
Resolver.add_implicit_resolver(
u'tag:yaml.org,2002:value',
re.compile(ur'^(?:=)$'),
[u'='])
# The following resolver is only for documentation purposes. It cannot work
# because plain scalars cannot start with '!', '&', or '*'.
Resolver.add_implicit_resolver(
u'tag:yaml.org,2002:yaml',
re.compile(ur'^(?:!|&|\*)$'),
list(u'!&*'))

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,111 @@
__all__ = ['Serializer', 'SerializerError']
from error import YAMLError
from events import *
from nodes import *
class SerializerError(YAMLError):
pass
class Serializer(object):
ANCHOR_TEMPLATE = u'id%03d'
def __init__(self, encoding=None,
explicit_start=None, explicit_end=None, version=None, tags=None):
self.use_encoding = encoding
self.use_explicit_start = explicit_start
self.use_explicit_end = explicit_end
self.use_version = version
self.use_tags = tags
self.serialized_nodes = {}
self.anchors = {}
self.last_anchor_id = 0
self.closed = None
def open(self):
if self.closed is None:
self.emit(StreamStartEvent(encoding=self.use_encoding))
self.closed = False
elif self.closed:
raise SerializerError("serializer is closed")
else:
raise SerializerError("serializer is already opened")
def close(self):
if self.closed is None:
raise SerializerError("serializer is not opened")
elif not self.closed:
self.emit(StreamEndEvent())
self.closed = True
#def __del__(self):
# self.close()
def serialize(self, node):
if self.closed is None:
raise SerializerError("serializer is not opened")
elif self.closed:
raise SerializerError("serializer is closed")
self.emit(DocumentStartEvent(explicit=self.use_explicit_start,
version=self.use_version, tags=self.use_tags))
self.anchor_node(node)
self.serialize_node(node, None, None)
self.emit(DocumentEndEvent(explicit=self.use_explicit_end))
self.serialized_nodes = {}
self.anchors = {}
self.last_anchor_id = 0
def anchor_node(self, node):
if node in self.anchors:
if self.anchors[node] is None:
self.anchors[node] = self.generate_anchor(node)
else:
self.anchors[node] = None
if isinstance(node, SequenceNode):
for item in node.value:
self.anchor_node(item)
elif isinstance(node, MappingNode):
for key, value in node.value:
self.anchor_node(key)
self.anchor_node(value)
def generate_anchor(self, node):
self.last_anchor_id += 1
return self.ANCHOR_TEMPLATE % self.last_anchor_id
def serialize_node(self, node, parent, index):
alias = self.anchors[node]
if node in self.serialized_nodes:
self.emit(AliasEvent(alias))
else:
self.serialized_nodes[node] = True
self.descend_resolver(parent, index)
if isinstance(node, ScalarNode):
detected_tag = self.resolve(ScalarNode, node.value, (True, False))
default_tag = self.resolve(ScalarNode, node.value, (False, True))
implicit = (node.tag == detected_tag), (node.tag == default_tag)
self.emit(ScalarEvent(alias, node.tag, implicit, node.value,
style=node.style))
elif isinstance(node, SequenceNode):
implicit = (node.tag
== self.resolve(SequenceNode, node.value, True))
self.emit(SequenceStartEvent(alias, node.tag, implicit,
flow_style=node.flow_style))
index = 0
for item in node.value:
self.serialize_node(item, node, index)
index += 1
self.emit(SequenceEndEvent())
elif isinstance(node, MappingNode):
implicit = (node.tag
== self.resolve(MappingNode, node.value, True))
self.emit(MappingStartEvent(alias, node.tag, implicit,
flow_style=node.flow_style))
for key, value in node.value:
self.serialize_node(key, node, None)
self.serialize_node(value, node, key)
self.emit(MappingEndEvent())
self.ascend_resolver()

View File

@ -0,0 +1,104 @@
class Token(object):
def __init__(self, start_mark, end_mark):
self.start_mark = start_mark
self.end_mark = end_mark
def __repr__(self):
attributes = [key for key in self.__dict__
if not key.endswith('_mark')]
attributes.sort()
arguments = ', '.join(['%s=%r' % (key, getattr(self, key))
for key in attributes])
return '%s(%s)' % (self.__class__.__name__, arguments)
#class BOMToken(Token):
# id = '<byte order mark>'
class DirectiveToken(Token):
id = '<directive>'
def __init__(self, name, value, start_mark, end_mark):
self.name = name
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
class DocumentStartToken(Token):
id = '<document start>'
class DocumentEndToken(Token):
id = '<document end>'
class StreamStartToken(Token):
id = '<stream start>'
def __init__(self, start_mark=None, end_mark=None,
encoding=None):
self.start_mark = start_mark
self.end_mark = end_mark
self.encoding = encoding
class StreamEndToken(Token):
id = '<stream end>'
class BlockSequenceStartToken(Token):
id = '<block sequence start>'
class BlockMappingStartToken(Token):
id = '<block mapping start>'
class BlockEndToken(Token):
id = '<block end>'
class FlowSequenceStartToken(Token):
id = '['
class FlowMappingStartToken(Token):
id = '{'
class FlowSequenceEndToken(Token):
id = ']'
class FlowMappingEndToken(Token):
id = '}'
class KeyToken(Token):
id = '?'
class ValueToken(Token):
id = ':'
class BlockEntryToken(Token):
id = '-'
class FlowEntryToken(Token):
id = ','
class AliasToken(Token):
id = '<alias>'
def __init__(self, value, start_mark, end_mark):
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
class AnchorToken(Token):
id = '<anchor>'
def __init__(self, value, start_mark, end_mark):
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
class TagToken(Token):
id = '<tag>'
def __init__(self, value, start_mark, end_mark):
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
class ScalarToken(Token):
id = '<scalar>'
def __init__(self, value, plain, start_mark, end_mark, style=None):
self.value = value
self.plain = plain
self.start_mark = start_mark
self.end_mark = end_mark
self.style = style

View File

@ -0,0 +1,72 @@
"weather, thanks to google"
import os
import codecs
import thread
import urllib
from lxml import etree
from util import hook
lock = thread.allocate_lock()
stalk = {}
def load_stalk(filename, mtimes={}):
if not os.path.exists(filename):
return {}
mtime = os.stat(filename).st_mtime
if mtimes.get(filename, 0) != mtime:
mtimes[filename] = mtime
return dict(x.strip().split(None, 1) for x in
codecs.open(filename, 'r', 'utf-8'))
def save_stalk(filename, houses):
out = codecs.open(filename, 'w', 'utf-8')
out.write('\n'.join('%s %s' % x for x in sorted(houses.iteritems()))) #heh
out.flush()
out.close()
@hook.command
def weather(bot, input):
".weather <location> [dontsave] -- queries the google weather API for weather data"
global stalk
filename = os.path.join(bot.persist_dir, 'weather')
if not stalk:
with lock:
stalk = load_stalk(filename)
nick = input.nick.lower()
loc = input.inp.strip()
dontsave = loc.endswith(" dontsave")
if dontsave:
loc = loc[:-9].strip().lower()
if not loc: # blank line
loc = stalk.get(nick, '')
if not loc:
return weather.__doc__
data = urllib.urlencode({'weather': loc.encode('utf-8')})
url = 'http://www.google.com/ig/api?' + data
w = etree.parse(url).find('weather')
if w.find('problem_cause') is not None:
return "Couldn't fetch weather data for '%s', try using a zip or " \
"postal code." % input.inp
info = dict((e.tag, e.get('data')) for e in w.find('current_conditions'))
info['city'] = w.find('forecast_information/city').get('data')
info['high'] = w.find('forecast_conditions/high').get('data')
info['low'] = w.find('forecast_conditions/low').get('data')
input.reply('%(city)s: %(condition)s, %(temp_f)sF/%(temp_c)sC (H:%(high)sF'\
', L:%(low)sF), %(humidity)s, %(wind_condition)s.' % info)
if not dontsave and loc != stalk.get(nick, ''):
with lock:
stalk[nick] = loc
save_stalk(filename, stalk)

View File

@ -0,0 +1,57 @@
'''Searches wikipedia and returns first sentence of article
Scaevolus 2009'''
import urllib
from lxml import etree
import re
from util import hook
api_prefix = "http://en.wikipedia.org/w/api.php"
search_url = api_prefix + "?action=opensearch&search=%s&format=xml"
paren_re = re.compile('\s*\(.*\)$')
@hook.command(hook='w(\s+.*|$)')
@hook.command
def wiki(query):
'''.w/.wiki <phrase> -- gets first sentence of wikipedia ''' \
'''article on <phrase>'''
if not query.strip():
return wiki.__doc__
q = search_url % (urllib.quote(query.strip(), safe=''))
x = etree.parse(q)
ns = '{http://opensearch.org/searchsuggest2}'
items = x.findall(ns + 'Section/' + ns + 'Item')
if items == []:
if x.find('error') is not None:
return 'error: %(code)s: %(info)s' % x.find('error').attrib
else:
return 'no results found'
def extract(item):
return [item.find(ns + x).text for x in
('Text', 'Description', 'Url')]
title, desc, url = extract(items[0])
if 'may refer to' in desc:
title, desc, url = extract(items[1])
title = paren_re.sub('', title)
if title.lower() not in desc.lower():
desc = title + desc
desc = re.sub('\s+', ' ', desc).strip() #remove excess spaces
if len(desc) > 300:
desc = desc[:300] + '...'
return '%s -- %s' % (desc, url)

View File

@ -0,0 +1,41 @@
import re
from lxml import etree
import locale
from util import hook
def ytdata(id):
url = 'http://gdata.youtube.com/feeds/api/videos/' + id
x = etree.parse(url)
# I can't figure out how to deal with schemas/namespaces properly :(
yt = '{http://gdata.youtube.com/schemas/2007}'
media = '{http://search.yahoo.com/mrss/}'
rating = x.find('{http://schemas.google.com/g/2005}rating')
data = dict(rating.items())
data['title'] = x.find('{http://www.w3.org/2005/Atom}title').text
data['views'] = locale.format('%d', int(x.find(yt + 'statistics').get(
'viewCount')), 1)
length = int(x.find(media + 'group/' + yt + 'duration').get('seconds'))
data['length'] = ''
if length / 3600: # > 1 hour
data['length'] += str(length/3600) + 'h '
if length / 60: # > 1 minute
data['length'] += str(length/60 % 60) + 'm '
data['length'] += "%ds" % (length % 60)
return data
youtube_re = re.compile(r'.*youtube.*v=([-_a-z0-9]+)', flags=re.IGNORECASE)
#@hook.command(hook=r'(.*)', prefix=False)
def youtube(inp):
m = youtube_re.match(inp)
if m:
data = ytdata(m.group(1))
return '\x02%(title)s\x02 - rated \x02%(average)s/%(max)s\x02 ' \
'(%(numRaters)s) - views \x02%(views)s\x02 - length \x02' \
'%(length)s\x02' % data