fix snopes to work with the new page format
This commit is contained in:
parent
c8b4d5e15a
commit
68f2680b54
|
@ -19,8 +19,14 @@ def snopes(inp):
|
||||||
snopes_page = http.get_html(result_urls[0])
|
snopes_page = http.get_html(result_urls[0])
|
||||||
snopes_text = snopes_page.text_content()
|
snopes_text = snopes_page.text_content()
|
||||||
|
|
||||||
claim = re.search(r"Claim: .*", snopes_text).group(0)
|
claim = re.search(r"Claim: .*", snopes_text).group(0).strip()
|
||||||
status = re.search(r"Status: .*", snopes_text).group(0)
|
status = re.search(r"Status: .*", snopes_text)
|
||||||
|
|
||||||
|
if status is not None:
|
||||||
|
status = status.group(0).strip()
|
||||||
|
else: # new-style statuses
|
||||||
|
status = "Status: %s." % re.search(r"FALSE|TRUE|MIXTURE|UNDETERMINED",
|
||||||
|
snopes_text).group(0).title()
|
||||||
|
|
||||||
claim = re.sub(r"[\s\xa0]+", " ", claim) # compress whitespace
|
claim = re.sub(r"[\s\xa0]+", " ", claim) # compress whitespace
|
||||||
status = re.sub(r"[\s\xa0]+", " ", status)
|
status = re.sub(r"[\s\xa0]+", " ", status)
|
||||||
|
|
Loading…
Reference in New Issue