fix mtg parser
This commit is contained in:
parent
278bc2e13a
commit
309bf87202
|
@ -11,26 +11,35 @@ def mtg(inp):
|
||||||
url = 'http://magiccards.info/query.php?cardname='
|
url = 'http://magiccards.info/query.php?cardname='
|
||||||
url += urllib2.quote(inp, safe='')
|
url += urllib2.quote(inp, safe='')
|
||||||
h = html.parse(url)
|
h = html.parse(url)
|
||||||
name = h.find('/body/table/tr/td/table/tr/td/h1')
|
|
||||||
|
name = h.find('/body/table/tr/td/span/a')
|
||||||
if name is None:
|
if name is None:
|
||||||
return "no cards found"
|
return "no cards found"
|
||||||
card = name.getparent()
|
card = name.getparent().getparent().getparent()
|
||||||
text = card.find('p')
|
|
||||||
|
|
||||||
type = text.text
|
type = card.find('td/p').text.replace('\n', '')
|
||||||
text = text.find('b').text_content()
|
|
||||||
|
# this is ugly
|
||||||
|
text = html.tostring(card.xpath("//p[@class='ctext']/b")[0])
|
||||||
|
text = text.replace('<br>', '$')
|
||||||
|
text = html.fromstring(text).text_content()
|
||||||
|
text = re.sub(r'(\w+\s*)\$+(\s*\w+)', r'\1. \2', text)
|
||||||
|
text = text.replace('$', ' ')
|
||||||
text = re.sub(r'\(.*?\)', '', text) # strip parenthetical explanations
|
text = re.sub(r'\(.*?\)', '', text) # strip parenthetical explanations
|
||||||
text = re.sub(r'\.(\S)', r'. \1', text) # fix spacing
|
text = re.sub(r'\.(\S)', r'. \1', text) # fix spacing
|
||||||
|
|
||||||
printings = card.find('table/tr/td/img').getparent().text_content()
|
|
||||||
|
printings = card.find('td/small').text_content()
|
||||||
|
printings = re.search(r'Editions:(.*)Languages:', printings).group(1)
|
||||||
printings = re.findall(r'\s*(.+?(?: \([^)]+\))*) \((.*?)\)',
|
printings = re.findall(r'\s*(.+?(?: \([^)]+\))*) \((.*?)\)',
|
||||||
' '.join(printings.split()))
|
' '.join(printings.split()))
|
||||||
|
|
||||||
printing_out = ', '.join('%s (%s)' % (set_abbrevs.get(x[0], x[0]),
|
printing_out = ', '.join('%s (%s)' % (set_abbrevs.get(x[0], x[0]),
|
||||||
rarity_abbrevs.get(x[1], x[1]))
|
rarity_abbrevs.get(x[1], x[1]))
|
||||||
for x in printings)
|
for x in printings)
|
||||||
|
|
||||||
name.make_links_absolute()
|
name.make_links_absolute()
|
||||||
link = name.find('a').attrib['href']
|
link = name.attrib['href']
|
||||||
name = name.text_content().strip()
|
name = name.text_content().strip()
|
||||||
type = type.strip()
|
type = type.strip()
|
||||||
text = ' '.join(text.split())
|
text = ' '.join(text.split())
|
||||||
|
@ -50,7 +59,9 @@ set_abbrevs = {
|
||||||
'Arena League': 'ARENA',
|
'Arena League': 'ARENA',
|
||||||
'Asia Pacific Land Program': 'APAC',
|
'Asia Pacific Land Program': 'APAC',
|
||||||
'Battle Royale': 'BR',
|
'Battle Royale': 'BR',
|
||||||
'Beatdown': 'BD',
|
'Battle Royale Box Set': 'BRB',
|
||||||
|
'Beatdown': 'BTD',
|
||||||
|
'Beatdown Box Set': 'BTD',
|
||||||
'Betrayers of Kamigawa': 'BOK',
|
'Betrayers of Kamigawa': 'BOK',
|
||||||
'Celebration Cards': 'UQC',
|
'Celebration Cards': 'UQC',
|
||||||
'Champions of Kamigawa': 'CHK',
|
'Champions of Kamigawa': 'CHK',
|
||||||
|
@ -70,6 +81,7 @@ set_abbrevs = {
|
||||||
'Duel Decks: Elves vs. Goblins': 'EVG',
|
'Duel Decks: Elves vs. Goblins': 'EVG',
|
||||||
'Duel Decks: Garruk vs. Liliana': 'GVL',
|
'Duel Decks: Garruk vs. Liliana': 'GVL',
|
||||||
'Duel Decks: Jace vs. Chandra': 'JVC',
|
'Duel Decks: Jace vs. Chandra': 'JVC',
|
||||||
|
'Eighth Edition': '8ED',
|
||||||
'Eighth Edition Box Set': '8EB',
|
'Eighth Edition Box Set': '8EB',
|
||||||
'European Land Program': 'EURO',
|
'European Land Program': 'EURO',
|
||||||
'Eventide': 'EVE',
|
'Eventide': 'EVE',
|
||||||
|
@ -97,8 +109,10 @@ set_abbrevs = {
|
||||||
'Legend Membership': 'DCILM',
|
'Legend Membership': 'DCILM',
|
||||||
'Legends': 'LG',
|
'Legends': 'LG',
|
||||||
'Legions': 'LE',
|
'Legions': 'LE',
|
||||||
'Limited Edition (Alpha)': 'AL',
|
'Limited Edition (Alpha)': 'LEA',
|
||||||
'Limited Edition (Beta)': 'BE',
|
'Limited Edition (Beta)': 'LEB',
|
||||||
|
'Limited Edition Alpha': 'LEA',
|
||||||
|
'Limited Edition Beta': 'LEB',
|
||||||
'Lorwyn': 'LW',
|
'Lorwyn': 'LW',
|
||||||
'MTGO Masters Edition': 'MED',
|
'MTGO Masters Edition': 'MED',
|
||||||
'MTGO Masters Edition II': 'ME2',
|
'MTGO Masters Edition II': 'ME2',
|
||||||
|
@ -138,6 +152,7 @@ set_abbrevs = {
|
||||||
'Shadowmoor': 'SHM',
|
'Shadowmoor': 'SHM',
|
||||||
'Shards of Alara': 'ALA',
|
'Shards of Alara': 'ALA',
|
||||||
'Starter': 'ST',
|
'Starter': 'ST',
|
||||||
|
'Starter 1999': 'S99',
|
||||||
'Starter 2000 Box Set': 'ST2K',
|
'Starter 2000 Box Set': 'ST2K',
|
||||||
'Stronghold': 'SH',
|
'Stronghold': 'SH',
|
||||||
'Summer of Magic': 'SOM',
|
'Summer of Magic': 'SOM',
|
||||||
|
|
Loading…
Reference in New Issue