JSON-LD: Tweak (News)Article description extraction
Let JSON-LD extract description from articleBody and fall back to description field when processing (News)Article typed nodes
This commit is contained in:
parent
d6469de1da
commit
a2fd63ce22
@ -1338,7 +1338,7 @@ class InfoExtractor(object):
|
|||||||
info.update({
|
info.update({
|
||||||
'timestamp': parse_iso8601(e.get('datePublished')),
|
'timestamp': parse_iso8601(e.get('datePublished')),
|
||||||
'title': unescapeHTML(e.get('headline')),
|
'title': unescapeHTML(e.get('headline')),
|
||||||
'description': unescapeHTML(e.get('articleBody')),
|
'description': unescapeHTML(e.get('articleBody') or e.get('description')),
|
||||||
})
|
})
|
||||||
elif item_type == 'VideoObject':
|
elif item_type == 'VideoObject':
|
||||||
extract_video_object(e)
|
extract_video_object(e)
|
||||||
|
Loading…
Reference in New Issue
Block a user