import csv,re def search(req,line): text = re.search(req,line) if text: data = text.group(1) else: data = 'no' return data csvfile = file('serp_html.csv','rb') reader = csv.reader(csvfile) '''输出百度搜索结果数据:当前关键词,排名,排名网站,百度url(需转义后才是真实的url),标题''' for line in reader: word = line[0] html = line[1] number = search(r'id="(\d+)"',html) domain = search(r'(.*?)/.*',html) bdurl = search(r'href="(http://www.baidu.com/link\?url=[^"]*?)"',html) title = search(r'"title":"([^"]*?)"',html) print '%s,%s,%s,%s,%s' % (word,number,domain,bdurl,title)
以上是一个继承程序,运行后能print出正确结果,但是我希望能生成csv报表文件,尝试修改for为函数失败。
小菜鸟一枚,不知道怎么搞了,求大神指点
可以这样
import csv,re def search(req,line): text = re.search(req,line) if text: data = text.group(1) else: data = 'no' return data reuslts = [] result_csv = file('new_file.csv', 'wb') result_csv_writer = csv.writer(result_csv) '''输出百度搜索结果数据:当前关键词,排名,排名网站,百度url(需转义后才是真实的url),标题''' # 保存标题 result_csv_writer.writerow(['关键词', '排名', '排名网站', '百度url', '标题']) for line in reader: word = line[0] html = line[1] number = search(r'id="(\d+)"',html) domain = search(r'<span class="g">(.*?)/.*</span>',html) bdurl = search(r'href="(http://www.baidu.com/link\?url=[^"]*?)"',html) title = search(r'"title":"([^"]*?)"',html) reuslts.append((word, number, domain, bdurl, title)) # print '%s,%s,%s,%s,%s' % (word,number,domain,bdurl,title) # 保存多行 result_csv_writer.writerows(reuslts) result_csv.close()
代码未测试,有问题请简单修改