Страницы

Поиск по вопросам

вторник, 18 февраля 2020 г.

Сохранение JSON ответа в сsv. Python

#python #json #python_3x #парсер #csv


В общем сохраняю json ответ в сsv, получается как на картинке.



В итоге хочу достигнуть такой результат. Подскажите как реализовать(желательно с
примером), или литературу какую-нибудь.



Вот сам код.

# -*- coding: utf-8 -*-

import requests
from bs4 import BeautifulSoup
from random import choice
from time import sleep
from random import uniform
import json
import csv

def get_html(url,useragent,proxy,StartFroms):
    #print("get_html")
    #print(StartFroms)
    #print(proxy)
    #print(useragent)
    s = requests.Session()
    s.get("http://toto-info.co",proxies = proxy)

    pl = {"options": {"DrawingId": 632, "StartFrom":StartFroms, "Count": 20, "SortField":
"CouponCode", "SortDir": "ASC"}}
    res = s.post(url, headers={'User-Agent':useragent,
                'Accept-Language' : 'ru,en;q=0.8',
                'Accept-Encoding' : 'gzip, deflate, sdch',
                'Connection': 'keep - alive',
                'Content-Type': 'application/json',
                'Host': 'old.toto-info.co',
                'Origin': 'http://toto-info.co',
                'Referer': 'http://toto-info.co/'},proxies = proxy, data=json.dumps(pl))

    data = res.json()

    with open('response.csv', 'w', encoding='utf-8') as file:
        json.dump(data, file, indent=2, ensure_ascii=False)

    print('------------------------------------------')

def main():

    url = 'http://old.toto-info.co/DataService.svc/GetMaxPrizeCoupons'

    useragents = open("useragents.txt").read().split('\n')
    proxies = open("proxies.txt").read().split('\n')

    start = 0 # переменная для хранения точки входа в for
    finish = 1000

    while start < finish:
        try:
            for i in range(start, finish, 20):
              a = (uniform(1, 2))
              sleep(a)
              StartFroms = i
              useragent = "'" + choice(useragents) + "'"
              proxy = {'http': 'http://' + choice(proxies)}
              get_html(url, useragent, proxy, StartFroms)
        except:
            start = i # перезапускаем for c точки исключения

if __name__ == '__main__':
    main()

    


Ответы

Ответ 1



Вот рабочий скрипт. Для теста я указал finish = 100 (чтобы быстрее отработало): import requests from random import choice from time import sleep from random import uniform import csv import json import pandas as pd def get_html(url, useragent, proxy, StartFroms): s = requests.Session() s.get("http://toto-info.co") pl = {"options": {"DrawingId": 632, "StartFrom":StartFroms, "Count": 20, "SortField": "CouponCode", "SortDir": "ASC"}} resp = s.post(url, headers={'User-Agent':useragent, 'Accept-Language' : 'ru,en;q=0.8', 'Accept-Encoding' : 'gzip, deflate, sdch', 'Connection': 'keep - alive', 'Content-Type': 'application/json', 'Host': 'old.toto-info.co', 'Origin': 'http://toto-info.co', 'Referer': 'http://toto-info.co/'},proxies = proxy, data=json.dumps(pl)) return resp.json()['d']['Items'] def main(): url = 'http://old.toto-info.co/DataService.svc/GetMaxPrizeCoupons' useragents = open(r"D:\download\useragents.txt").read().split('\n') proxies = open(r"D:\download\proxies.txt").read().split('\n') start = 0 # переменная для хранения точки входа в for finish = 100 data = [] while start < finish: try: sleep(uniform(1, 2)) useragent = "'{}'".format(choice(useragents)) proxy = {'http': 'http://{}'.format(choice(proxies))} print('processing:\t[{}] ...'.format(start)) data += get_html(url, useragent, proxy, start) start += 20 except Exception as e: print('Exception:\t{}'.format(str(e))) print('building DataFrame ...') df = pd.DataFrame(data) # debug: print first 5 rows of DF... #print(df.head()) out_fn = r'd:/temp/result.xlsx' df.set_index('CouponCode')['Options'] \ .str.extractall(r'\d+-\((.*?)\)')[0] \ .unstack().reset_index().rename_axis(None, 1) \ .to_excel(out_fn, index=False) if __name__ == '__main__': main()

Ответ 2



Взял код автора, немного упростил и получил json. Сам код: import json import requests session = requests.Session() session.get("http://toto-info.co") pl = {"options": {"DrawingId": 632, "StartFrom": 1, "Count": 10, "SortField": "CouponCode", "SortDir": "ASC"}} rs = session.post( url='http://old.toto-info.co/DataService.svc/GetMaxPrizeCoupons', headers={ 'Accept-Language': 'ru,en;q=0.8', 'Accept-Encoding': 'gzip, deflate, sdch', 'Connection': 'keep - alive', 'Content-Type': 'application/json', 'Host': 'old.toto-info.co', 'Origin': 'http://toto-info.co', 'Referer': 'http://toto-info.co/' }, data=json.dumps(pl) ) with open('response.csv', 'w', encoding='utf-8') as f: json.dump(rs.json(), f, indent=2, ensure_ascii=False) После считал json построчно и сохранил в excel: with open('response.csv', mode='r', encoding='utf-8') as f: import xlwt wb = xlwt.Workbook() ws = wb.add_sheet('Json data') for i, line in enumerate(f.readlines()): line = line.rstrip() ws.write(i, 0, line) wb.save('excel.xls') Вот так выглядит excel: Файл с json: { "d": { "Summary": { "TotalCount": 93977, "__type": "SelectSummary:#FonbetEngine.DAL" }, "__type": "SelectResultOfWinCouponSelectSummaryWVcc8KbY:#FonbetEngine.DAL", "Items": [ { "MaxResult": 6, "CouponCode": "10000", "TotalPrizeValue": 0, "__type": "WinCoupon:#FonbetEntity", "Cnt": 1, "Options": "1-(X); 2-(1); 3-(2); 4-(2); 5-(1); 6-(X); 7-(2); 8-(1); 9-(1); 10-(2); 11-(1); 12-(2); 13-(X); 14-(X); 15-(2)", "TotalStakeValue": 50 }, { "MaxResult": 2, "CouponCode": "100002", "TotalPrizeValue": 0, "__type": "WinCoupon:#FonbetEntity", "Cnt": 1, "Options": "1-(1); 2-(X); 3-(1); 4-(1); 5-(1); 6-(2); 7-(1); 8-(1); 9-(2); 10-(2); 11-(1); 12-(X); 13-(1); 14-(1); 15-(1)", "TotalStakeValue": 50 }, { "MaxResult": 3, "CouponCode": "100019", "TotalPrizeValue": 0, "__type": "WinCoupon:#FonbetEntity", "Cnt": 1, "Options": "1-(2); 2-(2); 3-(X); 4-(1); 5-(1); 6-(X); 7-(2); 8-(2); 9-(X); 10-(1); 11-(1); 12-(X); 13-(2); 14-(2); 15-(X)", "TotalStakeValue": 50 }, { "MaxResult": 5, "CouponCode": "100026", "TotalPrizeValue": 0, "__type": "WinCoupon:#FonbetEntity", "Cnt": 1, "Options": "1-(2); 2-(1); 3-(1); 4-(X); 5-(1); 6-(2); 7-(X); 8-(1); 9-(X); 10-(2); 11-(2); 12-(1); 13-(X); 14-(2); 15-(2)", "TotalStakeValue": 50 }, { "MaxResult": 10, "CouponCode": "100033", "TotalPrizeValue": 961.6088, "__type": "WinCoupon:#FonbetEntity", "Cnt": 1, "Options": "1-(2); 2-(2); 3-(2); 4-(2); 5-(X); 6-(2); 7-(1); 8-(2); 9-(1); 10-(X); 11-(2); 12-(1); 13-(2); 14-(2); 15-(2)", "TotalStakeValue": 50 }, { "MaxResult": 5, "CouponCode": "100040", "TotalPrizeValue": 0, "__type": "WinCoupon:#FonbetEntity", "Cnt": 1, "Options": "1-(2); 2-(X); 3-(1); 4-(2); 5-(1); 6-(1); 7-(1); 8-(2); 9-(X); 10-(X); 11-(1); 12-(2); 13-(X); 14-(1); 15-(X)", "TotalStakeValue": 50 }, { "MaxResult": 6, "CouponCode": "100057", "TotalPrizeValue": 0, "__type": "WinCoupon:#FonbetEntity", "Cnt": 1, "Options": "1-(2); 2-(2); 3-(1); 4-(X); 5-(1); 6-(1); 7-(X); 8-(2); 9-(X); 10-(X); 11-(1); 12-(1); 13-(1); 14-(X); 15-(X)", "TotalStakeValue": 50 }, { "MaxResult": 9, "CouponCode": "100064", "TotalPrizeValue": 341.1707, "__type": "WinCoupon:#FonbetEntity", "Cnt": 1, "Options": "1-(2); 2-(1); 3-(X); 4-(1); 5-(X); 6-(1); 7-(X); 8-(2); 9-(X); 10-(1); 11-(2); 12-(1); 13-(1); 14-(2); 15-(1)", "TotalStakeValue": 50 }, { "MaxResult": 4, "CouponCode": "100071", "TotalPrizeValue": 0, "__type": "WinCoupon:#FonbetEntity", "Cnt": 1, "Options": "1-(2); 2-(2); 3-(1); 4-(1); 5-(2); 6-(X); 7-(1); 8-(1); 9-(2); 10-(1); 11-(X); 12-(1); 13-(2); 14-(2); 15-(1)", "TotalStakeValue": 50 }, { "MaxResult": 5, "CouponCode": "100088", "TotalPrizeValue": 0, "__type": "WinCoupon:#FonbetEntity", "Cnt": 1, "Options": "1-(1); 2-(2); 3-(2); 4-(1); 5-(1); 6-(X); 7-(2); 8-(2); 9-(X); 10-(1); 11-(1); 12-(2); 13-(1); 14-(X); 15-(X)", "TotalStakeValue": 50 } ] } }

Ответ 3



Трудно найти модуль для обработки табличных данных, который смог бы превзойти Pandas: import json import pandas as pd # если: `data = res.json()`, т.е. `data` - словарь (parsed to dictionary JSON) df = pd.DataFrame(data['d']['Items']) # если `json_str` - строка содержащая JSON response ... #df = pd.DataFrame(json.loads(json_str)['d']['Items']) df.set_index('CouponCode')['Options'] \ .str.extractall(r'\d+-\((.*?)\)')[0] \ .unstack().reset_index().rename_axis(None, 1) \ .to_excel('d:/temp/result.xlsx', index=False) Результат (D:\temp\result.xlsx): "Распарсенный" в DataFrame JSON: In [303]: df Out[303]: Cnt CouponCode MaxResult Options TotalPrizeValue \ 0 1 10000 6 1-(X); 2-(1); 3-(2); 4-(2)... 0.0000 1 1 100002 2 1-(1); 2-(X); 3-(1); 4-(1)... 0.0000 2 1 100019 3 1-(2); 2-(2); 3-(X); 4-(1)... 0.0000 3 1 100026 5 1-(2); 2-(1); 3-(1); 4-(X)... 0.0000 4 1 100033 10 1-(2); 2-(2); 3-(2); 4-(2)... 961.6088 5 1 100040 5 1-(2); 2-(X); 3-(1); 4-(2)... 0.0000 6 1 100057 6 1-(2); 2-(2); 3-(1); 4-(X)... 0.0000 7 1 100064 9 1-(2); 2-(1); 3-(X); 4-(1)... 341.1707 8 1 100071 4 1-(2); 2-(2); 3-(1); 4-(1)... 0.0000 9 1 100088 5 1-(1); 2-(2); 3-(2); 4-(1)... 0.0000 TotalStakeValue __type 0 50 WinCoupon:#FonbetEntity 1 50 WinCoupon:#FonbetEntity 2 50 WinCoupon:#FonbetEntity 3 50 WinCoupon:#FonbetEntity 4 50 WinCoupon:#FonbetEntity 5 50 WinCoupon:#FonbetEntity 6 50 WinCoupon:#FonbetEntity 7 50 WinCoupon:#FonbetEntity 8 50 WinCoupon:#FonbetEntity 9 50 WinCoupon:#FonbetEntity

Комментариев нет:

Отправить комментарий