Python 获取公开信息-Toy模板网

这篇具有很好参考价值的文章主要介绍了Python 获取公开信息。希望对大家有所帮助。如果存在错误或未考虑完全的地方，请大家不吝赐教，您也可以点击"举报违法"按钮提交疑问。

大众点评店铺页面最大分页数是50页，要抓取信息就是通过区域、店铺类型分解到最小达到尽可能全的抓取。以成都餐饮为例，每种分类先取到最小，区域范围依次从成都到区县到街道，如果大区域该分类小于50页就可以抓取，否则继续分解。

大众的页面有时候有加密，是通过把数据字体设置为它独有格式来实现，下载对应字体对应转码即可，有时候没有加密就可以跳过不管。

首先把数据根据地区和类型分解到小于50页并存在数据库，然后一页页抓取基本信息，最后通过观察的接口获取详细信息如详细地址、经纬度、各项评分、评价数等。

# -*- coding: utf-8 -*-
import json
import requests
import pymysql
import time
from fontTools.ttLib import TTFont


def woff_dict(key):
        if key == 'address':
            woff = TTFont('C:\\Users\\Administrator\\Desktop\\address.woff') # 读取woff文件
        elif key == 'num':
            woff = TTFont('C:\\Users\\Administrator\\Desktop\\num.woff') # 读取woff文件
        # woff文件中ID编号为2~602的601个字符
        woff_str_601 = '1234567890店中美家馆小车大市公酒行国品发电金心业商司超生装园场食有新限天面工服海华水房饰城乐汽香部利子老艺花专东肉菜学福饭人百餐茶务通味所山区门药银农龙停尚安广鑫一容动南具源兴鲜记时机烤文康信果阳理锅宝达地儿衣特产西批坊州牛佳化五米修爱北养卖建材三会鸡室红站德王光名丽油院堂烧江社合星货型村自科快便日民营和活童明器烟育宾精屋经居庄石顺林尔县手厅销用好客火雅盛体旅之鞋辣作粉包楼校鱼平彩上吧保永万物教吃设医正造丰健点汤网庆技斯洗料配汇木缘加麻联卫川泰色世方寓风幼羊烫来高厂兰阿贝皮全女拉成云维贸道术运都口博河瑞宏京际路祥青镇厨培力惠连马鸿钢训影甲助窗布富牌头四多妆吉苑沙恒隆春干饼氏里二管诚制售嘉长轩杂副清计黄讯太鸭号街交与叉附近层旁对巷栋环省桥湖段乡厦府铺内侧元购前幢滨处向座下臬凤港开关景泉塘放昌线湾政步宁解白田町溪十八古双胜本单同九迎第台玉锦底后七斜期武岭松角纪朝峰六振珠局岗洲横边济井办汉代临弄团外塔杨铁浦字年岛陵原梅进荣友虹央桂沿事津凯莲丁秀柳集紫旗张谷的是不了很还个也这我就在以可到错没去过感次要比觉看得说常真们但最喜哈么别位能较境非为欢然他挺着价那意种想出员两推做排实分间甜度起满给热完格荐喝等其再几只现朋候样直而买于般豆量选奶打每评少算又因情找些份置适什蛋师气你姐棒试总定啊足级整带虾如态且尝主话强当更板知己无酸让入啦式笑赞片酱差像提队走嫩才刚午接重串回晚微周值费性桌拍跟块调糕'
        # ['cmap']为字符与Unicode编码的映射关系列表
        woff_unicode = woff['cmap'].tables[0].ttFont.getGlyphOrder()  # 获取603个字符对应的unicode编码
        woff_character = ['.notdef', 'x'] + list(woff_str_601) # 添加编号为0、1的两个特殊字符
        woff_dict = dict(zip(woff_unicode, woff_character))
        return woff_dict

def decodestr(firststr):
    strlist = firststr.split("<")
    laststr = ""
    for single in strlist:
        single = single.replace("/d>","").replace("/e>","")
        if single.find("address")>0:
            single = single[-5:-1]
            laststr += addressdict[single]
            #print(addressdict[single])
        elif single.find("num")>0:
            single = single[-5:-1]
            #print(numdict[single])
            laststr += numdict[single]
        elif single !="":
            laststr += single
    return laststr

#根据链接获取当前条件下结果的页数
def getpagecount(URLstr,countryname):
        try:
            res = requests.get(URLstr,headers=headers).text
        except:
            time.sleep(120)
            return getpagecount(URLstr,countryname)
        #如果抓取被限制，休眠后重新抓取
        if res.find("403 Forbidden")>0:
            time.sleep(60)
            print(URLstr+"  "+"403 forbidden   "+countryname)
            return getpagecount(URLstr,countryname)
        #当分页栏不存在说明只有一页
        if res.find("没有找到符合条件的商户")>0:
            pageCount = 0
        elif res.find("div class=\"page\"")<0:
            pageCount = 1
            print(URLstr+" "+"1页   "+countryname)
        else:
            pagestr = res[res.find("div class=\"page\""):]
            pagestr = pagestr[:pagestr.find("</div>")].replace("title=\"下一页\">下一页","")
            pagestr = pagestr.split("</a>")
            pagestr.reverse()
            for page in pagestr:
                if page.find("title=\"")>0:
                    pageCount = page[page.find("title=\"")+7:]
                    pageCount = pageCount[:pageCount.find("\"")]
                    print(URLstr+" "+pageCount+"页  "+countryname)
                    pageCount = (int)(pageCount) 
                    break
        return pageCount

if __name__ == '__main__':
    woffnum = (str)(woff_dict('num')).replace("{","").replace("}","").replace(" ","").replace("'uni","'")
    woffaddress = (str)(woff_dict('address')).replace("{","").replace("}","").replace(" ","").replace("'uni","'")
    numdict = {}
    newdict = woffnum.split(",")
    for d in newdict:
            d = '{' + d + '}'
            d = eval(d)
            numdict.update(d)

    addressdict = {}
    newdict = woffaddress.split(",")
    for d in newdict:
            d = '{' + d + '}'
            d = eval(d)
            addressdict.update(d)
    
    baseURL =  "https://www.dianping.com/chengdu/ch10"
    requeststr1 = baseURL
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36",
        "Cookie" : "自己的cookie",
        'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7'
    }

     #打开数据库连接
    conn = pymysql.connect(host = 'localhost',user = "root",passwd = "自己的密码",db = "大众点评")
    cur=conn.cursor()
    querysql = "SELECT mainParentCategoryId,pageCount,countryid,url,islast FROM dazhong_paging_restaurant"
    cur.execute(querysql)
    if cur.rowcount<1:
        print("需要初始化分页数据库")
    else:
        lists = cur.fetchall()
        for list in lists:
            mainParentCategoryId = list[0]
            pageCount = list[1]
            countryid = list[2]
            URLstr = list[3]
            islast = list[4]
            #超过五十页则继续细分
            if pageCount==None or (pageCount==50 and islast!=1): 
                            #根据链接获取当前分页的页数               
                            pageCount = getpagecount(URLstr,"")
                            if pageCount==0:
                                continue
                            #如果在50页内，更新数据库
                            if pageCount<50:
                                insertSQLStrings="REPLACE INTO `大众点评`.`dazhong_paging_restaurant`(`mainParentCategoryId`, `pageCount`, `countryid`, `url`) VALUES ({},{},{},'{}')".format(mainParentCategoryId,pageCount,countryid,URLstr)
                                cur.execute(insertSQLStrings)
                            #如果大于50页，继续细分到各区市县，如果区市县还大于50页，最低细分到街道
                            else:
                                querysql = "SELECT countryid,countryname,parentid FROM chengduareacode WHERE parentid = {}".format(countryid)
                                cur.execute(querysql)
                                #如果已经到最细分层级还大于50页，只能记录在数据库中
                                if cur.rowcount<1:
                                    insertSQLStrings="REPLACE INTO `大众点评`.`dazhong_paging_restaurant`(`mainParentCategoryId`, `pageCount`, `countryid`, `url`,`islast`) VALUES ({},50,{},'{}',1)".format(mainParentCategoryId,countryid,URLstr)
                                    cur.execute(insertSQLStrings)
                                else:
                                    countryids = cur.fetchall()
                                    for countryid in countryids:
                                        time.sleep(11)
                                        countryname = countryid[1]
                                        countryid = countryid[0]
                                        if countryid in (10,35,36,37,38,39,4956):
                                            URLstrnew = URLstr+"r"+(str)(countryid)
                                        else:
                                            URLstrnew = URLstr+"c"+(str)(countryid)
                                        pageCount = getpagecount(URLstrnew,countryname)
                                        if pageCount==0:
                                            continue
                                        insertSQLString1="DELETE from `大众点评`.`dazhong_paging_restaurant` where url='{}'".format(URLstr)
                                        cur.execute(insertSQLString1)
                                        if pageCount<50:
                                            #细分到区市县后，先删除市级条目，再把区市县条目写入
                                            insertSQLString2="REPLACE INTO `大众点评`.`dazhong_paging_restaurant`(`mainParentCategoryId`, `pageCount`, `countryid`, `url`) VALUES ({},{},{},'{}')".format(mainParentCategoryId,pageCount,countryid,URLstrnew)
                                            cur.execute(insertSQLString2)
                                            URLstrnew = URLstr
                                        else:
                                            #继续细化到街道
                                            querysql = "SELECT countryid,countryname,parentid FROM chengduareacode WHERE parentid = {}".format(countryid)
                                            cur.execute(querysql)
                                            #如果已经到最细分层级还大于50页，只能记录在数据库中
                                            if cur.rowcount<1:
                                                insertSQLStrings="REPLACE INTO `大众点评`.`dazhong_paging_restaurant`(`mainParentCategoryId`, `pageCount`, `countryid`, `url`,`islast`) VALUES ({},50,{},'{}',1)".format(mainParentCategoryId,countryid,URLstr)
                                                cur.execute(insertSQLStrings)
                                            else:
                                                countryids = cur.fetchall()
                                                for countryid in countryids:
                                                    time.sleep(11)
                                                    countryid = countryid[0]
                                                    URLstrnew = URLstr+"r"+(str)(countryid)
                                                    pageCount = getpagecount(URLstrnew,"")
                                                    if pageCount==0:
                                                        continue
                                                    if pageCount<50:
                                                        #细分到街道后，先删除区市县条目，再把街道条目写入
                                                        #insertSQLString1="DELETE from `大众点评`.`dazhong_paging_restaurant` where url='{}'".format(URLstr)
                                                        insertSQLString2="REPLACE INTO `大众点评`.`dazhong_paging_restaurant`(`mainParentCategoryId`, `pageCount`, `countryid`, `url`) VALUES ({},{},{},'{}')".format(mainParentCategoryId,pageCount,countryid,URLstrnew)
                                                        #cur.execute(insertSQLString1)
                                                        cur.execute(insertSQLString2)
                                                        URLstrnew = URLstr
                                                    #如果已经到最细分层级还大于50页，只能记录在数据库中                                                        
                                                    elif pageCount==50:
                                                        insertSQLStrings="REPLACE INTO `大众点评`.`dazhong_paging_restaurant`(`mainParentCategoryId`, `pageCount`, `countryid`, `url`,`islast`) VALUES ({},50,{},'{}',1)".format(mainParentCategoryId,countryid,URLstrnew)
                                                        cur.execute(insertSQLStrings)
                                                        URLstrnew = URLstr
                                                        print("最小限度划分已满50页：")
                                                        print(insertSQLStrings)
                                        conn.commit()
                            conn.commit()

Python 获取公开信息

这一步完成后，根据这些分好类的连接抓取基本信息

# -*- coding: utf-8 -*-
import json
import requests
from fontTools.ttLib import TTFont
import pymysql
import time


def woff_dict(key):
        if key == 'address':
            woff = TTFont('C:\\Users\\Administrator\\Desktop\\address.woff') # 读取woff文件
        elif key == 'num':
            woff = TTFont('C:\\Users\\Administrator\\Desktop\\num.woff') # 读取woff文件
        # woff文件中ID编号为2~602的601个字符
        woff_str_601 = '1234567890店中美家馆小车大市公酒行国品发电金心业商司超生装园场食有新限天面工服海华水房饰城乐汽香部利子老艺花专东肉菜学福饭人百餐茶务通味所山区门药银农龙停尚安广鑫一容动南具源兴鲜记时机烤文康信果阳理锅宝达地儿衣特产西批坊州牛佳化五米修爱北养卖建材三会鸡室红站德王光名丽油院堂烧江社合星货型村自科快便日民营和活童明器烟育宾精屋经居庄石顺林尔县手厅销用好客火雅盛体旅之鞋辣作粉包楼校鱼平彩上吧保永万物教吃设医正造丰健点汤网庆技斯洗料配汇木缘加麻联卫川泰色世方寓风幼羊烫来高厂兰阿贝皮全女拉成云维贸道术运都口博河瑞宏京际路祥青镇厨培力惠连马鸿钢训影甲助窗布富牌头四多妆吉苑沙恒隆春干饼氏里二管诚制售嘉长轩杂副清计黄讯太鸭号街交与叉附近层旁对巷栋环省桥湖段乡厦府铺内侧元购前幢滨处向座下臬凤港开关景泉塘放昌线湾政步宁解白田町溪十八古双胜本单同九迎第台玉锦底后七斜期武岭松角纪朝峰六振珠局岗洲横边济井办汉代临弄团外塔杨铁浦字年岛陵原梅进荣友虹央桂沿事津凯莲丁秀柳集紫旗张谷的是不了很还个也这我就在以可到错没去过感次要比觉看得说常真们但最喜哈么别位能较境非为欢然他挺着价那意种想出员两推做排实分间甜度起满给热完格荐喝等其再几只现朋候样直而买于般豆量选奶打每评少算又因情找些份置适什蛋师气你姐棒试总定啊足级整带虾如态且尝主话强当更板知己无酸让入啦式笑赞片酱差像提队走嫩才刚午接重串回晚微周值费性桌拍跟块调糕'
        # ['cmap']为字符与Unicode编码的映射关系列表
        woff_unicode = woff['cmap'].tables[0].ttFont.getGlyphOrder()  # 获取603个字符对应的unicode编码
        woff_character = ['.notdef', 'x'] + list(woff_str_601) # 添加编号为0、1的两个特殊字符
        woff_dict = dict(zip(woff_unicode, woff_character))
        return woff_dict

def decodestr(firststr):
    strlist = firststr.split("<")
    laststr = ""
    for single in strlist:
        single = single.replace("/d>","").replace("/e>","")
        if single.find("address")>0:
            single = single[-5:-1]
            laststr += addressdict[single]
            #print(addressdict[single])
        elif single.find("num")>0:
            single = single[-5:-1]
            #print(numdict[single])
            laststr += numdict[single]
        elif single !="":
            laststr += single
    return laststr

if __name__ == '__main__':
    woffnum = (str)(woff_dict('num')).replace("{","").replace("}","").replace(" ","").replace("'uni","'")
    woffaddress = (str)(woff_dict('address')).replace("{","").replace("}","").replace(" ","").replace("'uni","'")
    numdict = {}
    newdict = woffnum.split(",")
    for d in newdict:
            d = '{' + d + '}'
            d = eval(d)
            numdict.update(d)

    addressdict = {}
    newdict = woffaddress.split(",")
    for d in newdict:
            d = '{' + d + '}'
            d = eval(d)
            addressdict.update(d)

    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36",
        "Cookie" : "自己的",
        'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7'
    }

    conn = pymysql.connect(host = 'localhost',user = "root",passwd = "自己的",db = "大众点评")
    cur=conn.cursor()
    querysql = "SELECT url,hasGet,finish FROM dazhong_paging_restaurant"
    cur.execute(querysql)
    lists = cur.fetchall()
    for list in lists:
        url = list[0]
        hasGet = list[1]
        finish = list[2]
        if hasGet==None:
            hasGet=0             
        hasGet += 1       
        if finish!=1:
            url += "o3p"
            for i in range(1,51):
                if hasGet>i:
                    print("已抓取，跳过该页："+(str)(i))
                    continue
                urlnew = url+(str)(i)
                requeststr0 = urlnew
                try:
                    res = requests.get(requeststr0,headers=headers,timeout=100).text
                except:
                    time.sleep(80)
                    res = requests.get(requeststr0,headers=headers,timeout=100).text
                #如果抓取被限制，休眠后重新抓取
                if res.find("403 Forbidden")>0:
                    print("403访问被限制，已退出")
                    exit()
                #如果页数到尽头 就退出该分类
                if res.find("没有找到符合条件的商户")>0:
                    break
                res = res[res.find("shop-all-list"):res.find("商户没有被收录")]
                res = res.split("<li class=\"\" >")
                for re in res:
                    if len(re)<50:
                        continue
                    shopid = re[re.find("data-shopid=\"")+13:]
                    shopid = shopid[:shopid.find("\"")]
                    shopAllname = re[re.find("<h4>")+4:re.find("</h4>")].replace("'","\\'")
                    if re.find("https://www.dianping.com/brands/")>0:
                        shopGroupId = re[re.find("https://www.dianping.com/brands/")+32:re.find("\" module=\"list-branch\"")]
                    else:
                        shopGroupId = ""
                    if re.find("我要评价")>0:
                        defaultReviewCount = 0
                    else:
                        defaultReviewCount = re[re.find("<b>")+3:re.find("</b>")]
                    avgPrice = re[re.find("人均"):]
                    if avgPrice.find("-")==13:
                        avgPrice=0
                    else:
                        avgPrice = avgPrice[avgPrice.find("<b>")+4:avgPrice.find("</b>")]
                    if re.find("istopTrade")>0:
                        status = re[re.find("istopTrade")+12:]
                        status = status[:status.find("</span>")]
                    else:
                        status=""
                    countryAndtype = re[re.find("tag-addr"):]
                    mainParentCategoryId = countryAndtype[countryAndtype.find("/g")+2:countryAndtype.find("\" data-click-name")]
                    categoryName = countryAndtype[countryAndtype.find("class=\"tag\">")+12:countryAndtype.find("</span>")]
                    countryAndtype = countryAndtype[countryAndtype.find("\"sep\""):]
                    countryid = countryAndtype[countryAndtype.find("/r")+2:countryAndtype.find("\" data-click-name")]
                    countryname = countryAndtype[countryAndtype.find("class=\"tag\">")+12:countryAndtype.find("</span>")]
                    if countryid.find("|")>0:
                        print("该店铺信息异常被跳过："+shopid)
                        continue
                    if re.find("class=\"recommend\"")>0: 
                        recommendstr = re[re.find("class=\"recommend\"")+16:]
                        recommendstr = recommendstr[:recommendstr.find("</div>")]
                        recommendstr = recommendstr.split("\">")
                        recommend = ""
                        for recommendtemp in recommendstr:
                            if recommendtemp.find("</a>")>0:                            
                                recommendtemp = recommendtemp[:recommendtemp.find("</a>")]
                                recommend = recommend+recommendtemp+" "
                    else:
                        recommend = ""                                                        
                    print(shopid+" "+shopAllname+" "+shopGroupId+" "+(str)(defaultReviewCount)+" "+(str)(avgPrice)+" "+mainParentCategoryId+" "+categoryName+" "+countryid+" "+countryname+" "+status+" "+recommend)
                    insertSQLStrings="REPLACE INTO `大众点评`.`shopdetail_restaurant`(`shopid`, `shopAllname`, `shopGroupId`, `defaultReviewCount`,`avgPrice`,`mainParentCategoryId`,`categoryName`,`countryid`,`countryname`,`status`,`recommend`) VALUES ('{}','{}','{}',{},{},{},'{}',{},'{}','{}','{}')".format(shopid, shopAllname, shopGroupId, defaultReviewCount,avgPrice,mainParentCategoryId,categoryName,countryid,countryname,status,recommend)
                    cur.execute(insertSQLStrings)
                print("第"+(str)(i)+"页已抓取")
                updatesql1 = "UPDATE dazhong_paging_restaurant SET hasGet={} WHERE url='{}'".format(i,list[0])
                cur.execute(updatesql1)
                conn.commit()
                time.sleep(15)
            updatesql2 = "UPDATE dazhong_paging_restaurant SET finish=1 WHERE url='{}'".format(list[0])
            cur.execute(updatesql2)
            conn.commit()

最后通过接口获取更多详细丰富信息

# -*- coding: utf-8 -*-
import json
import requests
from fontTools.ttLib import TTFont
import pymysql
import time


def woff_dict(key):
        if key == 'address':
            woff = TTFont('C:\\Users\\Administrator\\Desktop\\address.woff') # 读取woff文件
        elif key == 'num':
            woff = TTFont('C:\\Users\\Administrator\\Desktop\\num.woff') # 读取woff文件
        # woff文件中ID编号为2~602的601个字符
        woff_str_601 = '1234567890店中美家馆小车大市公酒行国品发电金心业商司超生装园场食有新限天面工服海华水房饰城乐汽香部利子老艺花专东肉菜学福饭人百餐茶务通味所山区门药银农龙停尚安广鑫一容动南具源兴鲜记时机烤文康信果阳理锅宝达地儿衣特产西批坊州牛佳化五米修爱北养卖建材三会鸡室红站德王光名丽油院堂烧江社合星货型村自科快便日民营和活童明器烟育宾精屋经居庄石顺林尔县手厅销用好客火雅盛体旅之鞋辣作粉包楼校鱼平彩上吧保永万物教吃设医正造丰健点汤网庆技斯洗料配汇木缘加麻联卫川泰色世方寓风幼羊烫来高厂兰阿贝皮全女拉成云维贸道术运都口博河瑞宏京际路祥青镇厨培力惠连马鸿钢训影甲助窗布富牌头四多妆吉苑沙恒隆春干饼氏里二管诚制售嘉长轩杂副清计黄讯太鸭号街交与叉附近层旁对巷栋环省桥湖段乡厦府铺内侧元购前幢滨处向座下臬凤港开关景泉塘放昌线湾政步宁解白田町溪十八古双胜本单同九迎第台玉锦底后七斜期武岭松角纪朝峰六振珠局岗洲横边济井办汉代临弄团外塔杨铁浦字年岛陵原梅进荣友虹央桂沿事津凯莲丁秀柳集紫旗张谷的是不了很还个也这我就在以可到错没去过感次要比觉看得说常真们但最喜哈么别位能较境非为欢然他挺着价那意种想出员两推做排实分间甜度起满给热完格荐喝等其再几只现朋候样直而买于般豆量选奶打每评少算又因情找些份置适什蛋师气你姐棒试总定啊足级整带虾如态且尝主话强当更板知己无酸让入啦式笑赞片酱差像提队走嫩才刚午接重串回晚微周值费性桌拍跟块调糕'
        # ['cmap']为字符与Unicode编码的映射关系列表
        woff_unicode = woff['cmap'].tables[0].ttFont.getGlyphOrder()  # 获取603个字符对应的unicode编码
        woff_character = ['.notdef', 'x'] + list(woff_str_601) # 添加编号为0、1的两个特殊字符
        woff_dict = dict(zip(woff_unicode, woff_character))
        return woff_dict

def decodestr(firststr):
    strlist = firststr.split("<")
    laststr = ""
    for single in strlist:
        single = single.replace("/d>","").replace("/e>","")
        if single.find("address")>0:
            single = single[-5:-1]
            laststr += addressdict[single]
            #print(addressdict[single])
        elif single.find("num")>0:
            single = single[-5:-1]
            #print(numdict[single])
            laststr += numdict[single]
        elif single !="":
            laststr += single
    return laststr

if __name__ == '__main__':
    woffnum = (str)(woff_dict('num')).replace("{","").replace("}","").replace(" ","").replace("'uni","'")
    woffaddress = (str)(woff_dict('address')).replace("{","").replace("}","").replace(" ","").replace("'uni","'")
    numdict = {}
    newdict = woffnum.split(",")
    for d in newdict:
            d = '{' + d + '}'
            d = eval(d)
            numdict.update(d)

    addressdict = {}
    newdict = woffaddress.split(",")
    for d in newdict:
            d = '{' + d + '}'
            d = eval(d)
            addressdict.update(d)

    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36",
        #"Cookie" : "自己的",
        'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7'
    }

    conn = pymysql.connect(host = 'localhost',user = "root",passwd = "自己的",db = "大众点评")
    cur=conn.cursor()
    querysql = "SELECT shopid FROM shopdetail_restaurant where fivescore is NULL"
    cur.execute(querysql)
    lists = cur.fetchall()
    for list in lists:
        shopid = list[0]
        requeststr1 = "https://www.dianping.com/ajax/json/shopDynamic/reviewAndStar?shopId={}&cityId=1&mainCategoryId=10".format(shopid)
        requeststr2 = "https://www.dianping.com/ajax/json/shopDynamic/basicHideInfo?shopId="+shopid
        requeststr3 = "https://www.dianping.com/ajax/json/shopDynamic/shopAside?shopId="+shopid
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.82 Safari/537.36"
        }
        res = requests.get(requeststr1,headers=headers).json()
        avgPrice = decodestr(res['avgPrice'])
        defaultReviewCount = decodestr(res['defaultReviewCount'])
        try:
            fivescore = res['fiveScore']
        except:
            fivescore = '-'
        if fivescore=="-":
            fivescore=0
        scoreTaste = decodestr(res['shopRefinedScoreValueList'][0])
        if scoreTaste=="-":
            scoreTaste=0
        scoreEnvironment = decodestr(res['shopRefinedScoreValueList'][1])
        if scoreEnvironment=="-":
            scoreEnvironment=0
        scoreService = decodestr(res['shopRefinedScoreValueList'][2])
        if scoreService=="-":
            scoreService=0
        res = requests.get(requeststr2,headers=headers).json()
        shopName = res['msg']['shopInfo']['shopName'].replace("'","\\'")
        branchName = res['msg']['shopInfo']['branchName']
        address = decodestr(res['msg']['shopInfo']['address']).replace("'","\\'")
        phoneNo = decodestr(res['msg']['shopInfo']['phoneNo'])
        shopGroupId = res['msg']['shopInfo']['shopGroupId']
        if shopGroupId==shopid:
            shopGroupId=""
        res = requests.get(requeststr3,headers=headers).json()
        glat = res['shop']['glat']
        glng = res['shop']['glng']
        categoryName = res['category']['categoryName']
        #enl = res['category']['mainParentCategoryId']
        if branchName==None:
            branchName=""
        #print(avgPrice+" "+defaultReviewCount+" "+fivescore+" "+scoreTaste+" "+scoreEnvironment+" "+scoreService+" "+shopName+" "+branchName+" "+address+" "+phoneNo+" "+shopGroupId+" "+(str)(glat)+" "+(str)(glng)+" "+categoryName+" "+(str)(mainParentCategoryId))
        print(avgPrice+" "+defaultReviewCount+" "+(str)(fivescore)+" "+(str)(scoreTaste)+" "+(str)(scoreEnvironment)+" "+(str)(scoreService)+" "+shopName+" "+branchName+" "+address+" "+phoneNo+" "+shopGroupId+" "+(str)(glat)+" "+(str)(glng)+" "+categoryName)
        insertSQLStrings="update `大众点评`.`shopdetail_restaurant` SET `fivescore` = {},`scoreTaste`={},`scoreEnvironment`={},`scoreService`={},`avgPrice`={},`defaultReviewCount`={},`shopName`='{}',`branchName`='{}',`address`='{}',`phoneNo`='{}',`shopGroupId`='{}',`glat`={},`glng`={} WHERE shopid = '{}'".format(fivescore, scoreTaste, scoreEnvironment,scoreService,avgPrice,defaultReviewCount,shopName,branchName,address,phoneNo,shopGroupId,glat,glng,shopid)
        #print(insertSQLStrings)
        cur.execute(insertSQLStrings)
        conn.commit()
        time.sleep(2)
        #exit()

最后结束如下

Python 获取公开信息文章来源地址https://www.toymoban.com/news/detail-472156.html