diff --git a/pycon_speakers/spiders/fosdem.py b/pycon_speakers/spiders/fosdem.py new file mode 100644 index 0000000..b66f837 --- /dev/null +++ b/pycon_speakers/spiders/fosdem.py @@ -0,0 +1,20 @@ +from scrapy.spider import Spider +from scrapy.http import Request + +from pycon_speakers.items import Speaker + + +class FosdemSpider(Spider): + """A spider to crawl Fosdem conference speakers. + """ + name = 'fosdem.org' + + def start_requests(self): + yield Request('https://fosdem.org/2015/schedule/events/') + + def parse(self, response): + for speaker in response.xpath('//tr/td[2]/a/text()').extract(): + yield Speaker( + name=speaker, + conference=self.name, + year=2015)