h/plugins/metacritic.py

# metacritic.com scraper

import re
from urllib2 import HTTPError

from util import hook, http


@hook.command('mc')
def metacritic(inp):
    '.mc [all|movie|tv|album|x360|ps3|pc|gba|ds|3ds|wii|vita|wiiu|xone|ps4] <title> -- gets rating for'\
    ' <title> from metacritic on the specified medium'

    # if the results suck, it's metacritic's fault

    args = inp.strip()

    game_platforms = ('x360', 'ps3', 'pc', 'gba', 'ds', '3ds', 'wii', 'vita', 'wiiu', 'xone', 'ps4')
    all_platforms = game_platforms + ('all', 'movie', 'tv', 'album')

    try:
        plat, title = args.split(' ', 1)
        if plat not in all_platforms:
            # raise the ValueError so that the except block catches it
            # in this case, or in the case of the .split above raising the
            # ValueError, we want the same thing to happen
            raise ValueError
    except ValueError:
        plat = 'all'
        title = args

    cat = 'game' if plat in game_platforms else plat

    title_safe = http.quote_plus(title)

    url = 'http://www.metacritic.com/search/%s/%s/results' % (cat, title_safe)

    try:
        doc = http.get_html(url)
    except HTTPError:
        return 'error fetching results'

    ''' result format:
    -- game result, with score
    -- subsequent results are the same structure, without first_result class
    <li class="result first_result">
        <div class="result_type">
            <strong>Game</strong>
            <span class="platform">WII</span>
        </div>
        <div class="result_wrap">
            <div class="basic_stats has_score">
                <div class="main_stats">
                    <h3 class="product_title basic_stat">...</h3>
                    <div class="std_score">
                      <div class="score_wrap">
                        <span class="label">Metascore: </span>
                        <span class="data metascore score_favorable">87</span>
                      </div>
                    </div>
                </div>
                <div class="more_stats extended_stats">...</div>
            </div>
        </div>
    </li>

    -- other platforms are the same basic layout
    -- if it doesn't have a score, there is no div.basic_score
    -- the <div class="result_type"> changes content for non-games:
    <div class="result_type"><strong>Movie</strong></div>
    '''

    # get the proper result element we want to pull data from

    result = None

    if not doc.find_class('query_results'):
        return 'no results found'

    # if they specified an invalid search term, the input box will be empty
    if doc.get_element_by_id('search_term').value == '':
        return 'invalid search term'

    if plat not in game_platforms:
        # for [all] results, or non-game platforms, get the first result
        result = doc.find_class('result first_result')[0]

        # find the platform, if it exists
        result_type = result.find_class('result_type')
        if result_type:

            # if the result_type div has a platform div, get that one
            platform_div = result_type[0].find_class('platform')
            if platform_div:
                plat = platform_div[0].text_content().strip()
            else:
                # otherwise, use the result_type text_content
                plat = result_type[0].text_content().strip()

    else:
        # for games, we want to pull the first result with the correct
        # platform
        results = doc.find_class('result')
        for res in results:
            result_plat = res.find_class('platform')[0].text_content().strip()
            if result_plat == plat.upper():
                result = res
                break

    if not result:
        return 'no results found'

    # get the name, release date, and score from the result
    product_title = result.find_class('product_title')[0]
    name = product_title.text_content()
    link = 'http://metacritic.com' + product_title.find('a').attrib['href']

    try:
        release = result.find_class('release_date')[0].\
            find_class('data')[0].text_content()

        # strip extra spaces out of the release date
        release = re.sub(r'\s{2,}', ' ', release)
    except IndexError:
        release = None

    try:
        score = result.find_class('metascore_w')[0].text_content()
    except IndexError:
        score = None

    return '[%s] %s - %s, %s -- %s' % (plat.upper(), name,
                                       score or 'no score',
                                       'release: %s' % release if release else 'unreleased',
                                       link)
update comments 2011-02-22 18:03:46 +00:00			`# metacritic.com scraper`
working metacritic plugin 2011-02-22 17:19:00 +00:00
strip extra spaces from the release date 2011-02-22 17:28:35 +00:00			`import re`
working metacritic plugin 2011-02-22 17:19:00 +00:00			`from urllib2 import HTTPError`

reconnect on socket.error, PEP8 2011-05-11 20:40:04 +00:00			`from util import hook, http`

working metacritic plugin 2011-02-22 17:19:00 +00:00
			`@hook.command('mc')`
			`def metacritic(inp):`
metacritic: Resets PSV to VITA, added XONE and PS4, changes score class check from metascore to metascore_w 2013-11-20 22:20:54 +00:00			`'.mc [all\|movie\|tv\|album\|x360\|ps3\|pc\|gba\|ds\|3ds\|wii\|vita\|wiiu\|xone\|ps4] <title> -- gets rating for'\`
			`' <title> from metacritic on the specified medium'`
working metacritic plugin 2011-02-22 17:19:00 +00:00
update comments 2011-02-22 18:03:46 +00:00			`# if the results suck, it's metacritic's fault`

working metacritic plugin 2011-02-22 17:19:00 +00:00			`args = inp.strip()`

metacritic: Resets PSV to VITA, added XONE and PS4, changes score class check from metascore to metascore_w 2013-11-20 22:20:54 +00:00			`game_platforms = ('x360', 'ps3', 'pc', 'gba', 'ds', '3ds', 'wii', 'vita', 'wiiu', 'xone', 'ps4')`
reconnect on socket.error, PEP8 2011-05-11 20:40:04 +00:00			`all_platforms = game_platforms + ('all', 'movie', 'tv', 'album')`
working metacritic plugin 2011-02-22 17:19:00 +00:00
			`try:`
			`plat, title = args.split(' ', 1)`
			`if plat not in all_platforms:`
update comments 2011-02-22 18:03:46 +00:00			`# raise the ValueError so that the except block catches it`
			`# in this case, or in the case of the .split above raising the`
			`# ValueError, we want the same thing to happen`
working metacritic plugin 2011-02-22 17:19:00 +00:00			`raise ValueError`
			`except ValueError:`
			`plat = 'all'`
			`title = args`

			`cat = 'game' if plat in game_platforms else plat`

			`title_safe = http.quote_plus(title)`

			`url = 'http://www.metacritic.com/search/%s/%s/results' % (cat, title_safe)`

			`try:`
			`doc = http.get_html(url)`
clean up metacritic error messages 2011-02-22 17:20:14 +00:00			`except HTTPError:`
			`return 'error fetching results'`
working metacritic plugin 2011-02-22 17:19:00 +00:00
			`''' result format:`
			`-- game result, with score`
			`-- subsequent results are the same structure, without first_result class`
			`<li class="result first_result">`
			`<div class="result_type">`
			`<strong>Game</strong>`
			`<span class="platform">WII</span>`
			`</div>`
			`<div class="result_wrap">`
			`<div class="basic_stats has_score">`
			`<div class="main_stats">`
			`<h3 class="product_title basic_stat">...</h3>`
			`<div class="std_score">`
reconnect on socket.error, PEP8 2011-05-11 20:40:04 +00:00			`<div class="score_wrap">`
			`<span class="label">Metascore: </span>`
			`<span class="data metascore score_favorable">87</span>`
			`</div>`
working metacritic plugin 2011-02-22 17:19:00 +00:00			`</div>`
			`</div>`
			`<div class="more_stats extended_stats">...</div>`
			`</div>`
			`</div>`
			`</li>`

			`-- other platforms are the same basic layout`
			`-- if it doesn't have a score, there is no div.basic_score`
			`-- the <div class="result_type"> changes content for non-games:`
			`<div class="result_type"><strong>Movie</strong></div>`
			`'''`

			`# get the proper result element we want to pull data from`

			`result = None`

			`if not doc.find_class('query_results'):`
			`return 'no results found'`

handle invalid search terms 2011-02-22 17:37:29 +00:00			`# if they specified an invalid search term, the input box will be empty`
			`if doc.get_element_by_id('search_term').value == '':`
			`return 'invalid search term'`

working metacritic plugin 2011-02-22 17:19:00 +00:00			`if plat not in game_platforms:`
			`# for [all] results, or non-game platforms, get the first result`
			`result = doc.find_class('result first_result')[0]`

			`# find the platform, if it exists`
			`result_type = result.find_class('result_type')`
			`if result_type:`

			`# if the result_type div has a platform div, get that one`
			`platform_div = result_type[0].find_class('platform')`
			`if platform_div:`
return link to metacritic title 2011-02-22 17:54:20 +00:00			`plat = platform_div[0].text_content().strip()`
			`else:`
			`# otherwise, use the result_type text_content`
			`plat = result_type[0].text_content().strip()`
working metacritic plugin 2011-02-22 17:19:00 +00:00
			`else:`
			`# for games, we want to pull the first result with the correct`
			`# platform`
			`results = doc.find_class('result')`
			`for res in results:`
return link to metacritic title 2011-02-22 17:54:20 +00:00			`result_plat = res.find_class('platform')[0].text_content().strip()`
working metacritic plugin 2011-02-22 17:19:00 +00:00			`if result_plat == plat.upper():`
			`result = res`
			`break`

			`if not result:`
clean up metacritic error messages 2011-02-22 17:20:14 +00:00			`return 'no results found'`
working metacritic plugin 2011-02-22 17:19:00 +00:00
			`# get the name, release date, and score from the result`
return link to metacritic title 2011-02-22 17:54:20 +00:00			`product_title = result.find_class('product_title')[0]`
			`name = product_title.text_content()`
			`link = 'http://metacritic.com' + product_title.find('a').attrib['href']`
working metacritic plugin 2011-02-22 17:19:00 +00:00
strip extra spaces from the release date 2011-02-22 17:28:35 +00:00			`try:`
			`release = result.find_class('release_date')[0].\`
			`find_class('data')[0].text_content()`

			`# strip extra spaces out of the release date`
			`release = re.sub(r'\s{2,}', ' ', release)`
			`except IndexError:`
			`release = None`
working metacritic plugin 2011-02-22 17:19:00 +00:00
			`try:`
metacritic: Resets PSV to VITA, added XONE and PS4, changes score class check from metascore to metascore_w 2013-11-20 22:20:54 +00:00			`score = result.find_class('metascore_w')[0].text_content()`
working metacritic plugin 2011-02-22 17:19:00 +00:00			`except IndexError:`
			`score = None`

reconnect on socket.error, PEP8 2011-05-11 20:40:04 +00:00			`return '[%s] %s - %s, %s -- %s' % (plat.upper(), name,`
flake8 + autopep8 (whitespace fixes) 2014-01-14 21:12:37 +00:00			`score or 'no score',`
			`'release: %s' % release if release else 'unreleased',`
			`link)`