В общем сохраняю json ответ в сsv, получается как на картинке.
В итоге хочу достигнуть такой результат. Подскажите как реализовать(желательно с
примером), или литературу какую-нибудь.
Вот сам код.
# -*- coding: utf-8 -*-
import requests
from bs4 import BeautifulSoup
from random import choice
from time import sleep
from random import uniform
import json
import csv
def get_html(url,useragent,proxy,StartFroms):
#print("get_html")
#print(StartFroms)
#print(proxy)
#print(useragent)
s = requests.Session()
s.get("http://toto-info.co",proxies = proxy)
pl = {"options": {"DrawingId": 632, "StartFrom":StartFroms, "Count": 20, "SortField":
"CouponCode", "SortDir": "ASC"}}
res = s.post(url, headers={'User-Agent':useragent,
'Accept-Language' : 'ru,en;q=0.8',
'Accept-Encoding' : 'gzip, deflate, sdch',
'Connection': 'keep - alive',
'Content-Type': 'application/json',
'Host': 'old.toto-info.co',
'Origin': 'http://toto-info.co',
'Referer': 'http://toto-info.co/'},proxies = proxy, data=json.dumps(pl))
data = res.json()
with open('response.csv', 'w', encoding='utf-8') as file:
json.dump(data, file, indent=2, ensure_ascii=False)
print('------------------------------------------')
def main():
url = 'http://old.toto-info.co/DataService.svc/GetMaxPrizeCoupons'
useragents = open("useragents.txt").read().split('\n')
proxies = open("proxies.txt").read().split('\n')
start = 0 # переменная для хранения точки входа в for
finish = 1000
while start < finish:
try:
for i in range(start, finish, 20):
a = (uniform(1, 2))
sleep(a)
StartFroms = i
useragent = "'" + choice(useragents) + "'"
proxy = {'http': 'http://' + choice(proxies)}
get_html(url, useragent, proxy, StartFroms)
except:
start = i # перезапускаем for c точки исключения
if __name__ == '__main__':
main()
Ответы
Ответ 1
Вот рабочий скрипт. Для теста я указал finish = 100 (чтобы быстрее отработало):
import requests
from random import choice
from time import sleep
from random import uniform
import csv
import json
import pandas as pd
def get_html(url, useragent, proxy, StartFroms):
s = requests.Session()
s.get("http://toto-info.co")
pl = {"options": {"DrawingId": 632, "StartFrom":StartFroms, "Count": 20, "SortField":
"CouponCode", "SortDir": "ASC"}}
resp = s.post(url, headers={'User-Agent':useragent,
'Accept-Language' : 'ru,en;q=0.8',
'Accept-Encoding' : 'gzip, deflate, sdch',
'Connection': 'keep - alive',
'Content-Type': 'application/json',
'Host': 'old.toto-info.co',
'Origin': 'http://toto-info.co',
'Referer': 'http://toto-info.co/'},proxies = proxy, data=json.dumps(pl))
return resp.json()['d']['Items']
def main():
url = 'http://old.toto-info.co/DataService.svc/GetMaxPrizeCoupons'
useragents = open(r"D:\download\useragents.txt").read().split('\n')
proxies = open(r"D:\download\proxies.txt").read().split('\n')
start = 0 # переменная для хранения точки входа в for
finish = 100
data = []
while start < finish:
try:
sleep(uniform(1, 2))
useragent = "'{}'".format(choice(useragents))
proxy = {'http': 'http://{}'.format(choice(proxies))}
print('processing:\t[{}] ...'.format(start))
data += get_html(url, useragent, proxy, start)
start += 20
except Exception as e:
print('Exception:\t{}'.format(str(e)))
print('building DataFrame ...')
df = pd.DataFrame(data)
# debug: print first 5 rows of DF...
#print(df.head())
out_fn = r'd:/temp/result.xlsx'
df.set_index('CouponCode')['Options'] \
.str.extractall(r'\d+-\((.*?)\)')[0] \
.unstack().reset_index().rename_axis(None, 1) \
.to_excel(out_fn, index=False)
if __name__ == '__main__':
main()
Ответ 2
Взял код автора, немного упростил и получил json. Сам код:
import json
import requests
session = requests.Session()
session.get("http://toto-info.co")
pl = {"options": {"DrawingId": 632, "StartFrom": 1, "Count": 10, "SortField": "CouponCode",
"SortDir": "ASC"}}
rs = session.post(
url='http://old.toto-info.co/DataService.svc/GetMaxPrizeCoupons',
headers={
'Accept-Language': 'ru,en;q=0.8',
'Accept-Encoding': 'gzip, deflate, sdch',
'Connection': 'keep - alive',
'Content-Type': 'application/json',
'Host': 'old.toto-info.co',
'Origin': 'http://toto-info.co',
'Referer': 'http://toto-info.co/'
},
data=json.dumps(pl)
)
with open('response.csv', 'w', encoding='utf-8') as f:
json.dump(rs.json(), f, indent=2, ensure_ascii=False)
После считал json построчно и сохранил в excel:
with open('response.csv', mode='r', encoding='utf-8') as f:
import xlwt
wb = xlwt.Workbook()
ws = wb.add_sheet('Json data')
for i, line in enumerate(f.readlines()):
line = line.rstrip()
ws.write(i, 0, line)
wb.save('excel.xls')
Вот так выглядит excel:
Файл с json:
{
"d": {
"Summary": {
"TotalCount": 93977,
"__type": "SelectSummary:#FonbetEngine.DAL"
},
"__type": "SelectResultOfWinCouponSelectSummaryWVcc8KbY:#FonbetEngine.DAL",
"Items": [
{
"MaxResult": 6,
"CouponCode": "10000",
"TotalPrizeValue": 0,
"__type": "WinCoupon:#FonbetEntity",
"Cnt": 1,
"Options": "1-(X); 2-(1); 3-(2); 4-(2); 5-(1); 6-(X); 7-(2); 8-(1); 9-(1);
10-(2); 11-(1); 12-(2); 13-(X); 14-(X); 15-(2)",
"TotalStakeValue": 50
},
{
"MaxResult": 2,
"CouponCode": "100002",
"TotalPrizeValue": 0,
"__type": "WinCoupon:#FonbetEntity",
"Cnt": 1,
"Options": "1-(1); 2-(X); 3-(1); 4-(1); 5-(1); 6-(2); 7-(1); 8-(1); 9-(2);
10-(2); 11-(1); 12-(X); 13-(1); 14-(1); 15-(1)",
"TotalStakeValue": 50
},
{
"MaxResult": 3,
"CouponCode": "100019",
"TotalPrizeValue": 0,
"__type": "WinCoupon:#FonbetEntity",
"Cnt": 1,
"Options": "1-(2); 2-(2); 3-(X); 4-(1); 5-(1); 6-(X); 7-(2); 8-(2); 9-(X);
10-(1); 11-(1); 12-(X); 13-(2); 14-(2); 15-(X)",
"TotalStakeValue": 50
},
{
"MaxResult": 5,
"CouponCode": "100026",
"TotalPrizeValue": 0,
"__type": "WinCoupon:#FonbetEntity",
"Cnt": 1,
"Options": "1-(2); 2-(1); 3-(1); 4-(X); 5-(1); 6-(2); 7-(X); 8-(1); 9-(X);
10-(2); 11-(2); 12-(1); 13-(X); 14-(2); 15-(2)",
"TotalStakeValue": 50
},
{
"MaxResult": 10,
"CouponCode": "100033",
"TotalPrizeValue": 961.6088,
"__type": "WinCoupon:#FonbetEntity",
"Cnt": 1,
"Options": "1-(2); 2-(2); 3-(2); 4-(2); 5-(X); 6-(2); 7-(1); 8-(2); 9-(1);
10-(X); 11-(2); 12-(1); 13-(2); 14-(2); 15-(2)",
"TotalStakeValue": 50
},
{
"MaxResult": 5,
"CouponCode": "100040",
"TotalPrizeValue": 0,
"__type": "WinCoupon:#FonbetEntity",
"Cnt": 1,
"Options": "1-(2); 2-(X); 3-(1); 4-(2); 5-(1); 6-(1); 7-(1); 8-(2); 9-(X);
10-(X); 11-(1); 12-(2); 13-(X); 14-(1); 15-(X)",
"TotalStakeValue": 50
},
{
"MaxResult": 6,
"CouponCode": "100057",
"TotalPrizeValue": 0,
"__type": "WinCoupon:#FonbetEntity",
"Cnt": 1,
"Options": "1-(2); 2-(2); 3-(1); 4-(X); 5-(1); 6-(1); 7-(X); 8-(2); 9-(X);
10-(X); 11-(1); 12-(1); 13-(1); 14-(X); 15-(X)",
"TotalStakeValue": 50
},
{
"MaxResult": 9,
"CouponCode": "100064",
"TotalPrizeValue": 341.1707,
"__type": "WinCoupon:#FonbetEntity",
"Cnt": 1,
"Options": "1-(2); 2-(1); 3-(X); 4-(1); 5-(X); 6-(1); 7-(X); 8-(2); 9-(X);
10-(1); 11-(2); 12-(1); 13-(1); 14-(2); 15-(1)",
"TotalStakeValue": 50
},
{
"MaxResult": 4,
"CouponCode": "100071",
"TotalPrizeValue": 0,
"__type": "WinCoupon:#FonbetEntity",
"Cnt": 1,
"Options": "1-(2); 2-(2); 3-(1); 4-(1); 5-(2); 6-(X); 7-(1); 8-(1); 9-(2);
10-(1); 11-(X); 12-(1); 13-(2); 14-(2); 15-(1)",
"TotalStakeValue": 50
},
{
"MaxResult": 5,
"CouponCode": "100088",
"TotalPrizeValue": 0,
"__type": "WinCoupon:#FonbetEntity",
"Cnt": 1,
"Options": "1-(1); 2-(2); 3-(2); 4-(1); 5-(1); 6-(X); 7-(2); 8-(2); 9-(X);
10-(1); 11-(1); 12-(2); 13-(1); 14-(X); 15-(X)",
"TotalStakeValue": 50
}
]
}
}
Ответ 3
Трудно найти модуль для обработки табличных данных, который смог бы превзойти Pandas:
import json
import pandas as pd
# если: `data = res.json()`, т.е. `data` - словарь (parsed to dictionary JSON)
df = pd.DataFrame(data['d']['Items'])
# если `json_str` - строка содержащая JSON response ...
#df = pd.DataFrame(json.loads(json_str)['d']['Items'])
df.set_index('CouponCode')['Options'] \
.str.extractall(r'\d+-\((.*?)\)')[0] \
.unstack().reset_index().rename_axis(None, 1) \
.to_excel('d:/temp/result.xlsx', index=False)
Результат (D:\temp\result.xlsx):
"Распарсенный" в DataFrame JSON:
In [303]: df
Out[303]:
Cnt CouponCode MaxResult Options TotalPrizeValue \
0 1 10000 6 1-(X); 2-(1); 3-(2); 4-(2)... 0.0000
1 1 100002 2 1-(1); 2-(X); 3-(1); 4-(1)... 0.0000
2 1 100019 3 1-(2); 2-(2); 3-(X); 4-(1)... 0.0000
3 1 100026 5 1-(2); 2-(1); 3-(1); 4-(X)... 0.0000
4 1 100033 10 1-(2); 2-(2); 3-(2); 4-(2)... 961.6088
5 1 100040 5 1-(2); 2-(X); 3-(1); 4-(2)... 0.0000
6 1 100057 6 1-(2); 2-(2); 3-(1); 4-(X)... 0.0000
7 1 100064 9 1-(2); 2-(1); 3-(X); 4-(1)... 341.1707
8 1 100071 4 1-(2); 2-(2); 3-(1); 4-(1)... 0.0000
9 1 100088 5 1-(1); 2-(2); 3-(2); 4-(1)... 0.0000
TotalStakeValue __type
0 50 WinCoupon:#FonbetEntity
1 50 WinCoupon:#FonbetEntity
2 50 WinCoupon:#FonbetEntity
3 50 WinCoupon:#FonbetEntity
4 50 WinCoupon:#FonbetEntity
5 50 WinCoupon:#FonbetEntity
6 50 WinCoupon:#FonbetEntity
7 50 WinCoupon:#FonbetEntity
8 50 WinCoupon:#FonbetEntity
9 50 WinCoupon:#FonbetEntity