python下载json包(python安装jsonpath)
如何安装json包python3.5.2
python中的json解释库有好几个,不同版本使用方法不同。
常用有 json-py 与smiplejson 两个包
其中,json-py 包含json.py外,还有一个minjson,两者用法上有差别:
import一样
import json # 都是如此import的。
import minjson
# json-py库用法
json.read( json_obj )# Converting JSON to Python
json.write(python_obj)# Converting Python to JSON
#json的minjson用法
minjson.read( json_obj )#同上
minjson.write(python_obj)
# smiplejson 的用法
json.loads(json_obj) # Converting JSON to Python
json.dumps(python_obj)# Converting Python to JSON
python2.5没有内置的json,要手动安装。我们现在使用的是 json-py3.4
python2.6内置json解释库,是 smiplejson
smiplejson 2.09 下载
json-py 3.4 下载
经过我测试两者的效率,发现
python2.5.4, XP下,1K次读/写结果如下:
------------------------------------------------------------
minjosn : 1.0737601508
json : 4.49144874205
simplejson: 0.24600865082
---------------------------------------------------------------
python2.5.4, centOS5.3 (lniux)下:
minjosn : 1.8272049427
json : 8.26148796082
simplejson: 3.87293195724
-------------------------------------------------------------
以上令我不解的是XP下速度会比lniux快???
结论:
基于以上,个人感觉使用 minjson.py比较保险,现在我们开发使用的是 json-py速度最慢那个。。。
因为minjson.py只有一个文件,建议直接复制到工程公共库目录下,直接使用,免去安装痛苦。
附上测试脚本 ,希望有人在不同机子上测试结果帖上来。。。
[python] view plaincopy
#coding:utf8
import timeit
import json
import minjson
import simplejson
js_obj = ''''' ["我是中文",
{
"bar":["测试阿", null, 1.0, 2], "bool":true
}]'''
py_obj= [u"我是中文",
{
"bar":[u"测试阿", None, 1.0, 2], "bool":True
}]
def test_minjson():
minjson.read(js_obj) #可注释其中一句,单测读/写
minjson.write(py_obj)
def test_json():
json.read(js_obj) #可注释其中一句,单测读/写
json.write(py_obj)
def test_smpjson():
simplejson.loads(js_obj)
simplejson.dumps(py_obj)
if __name__=='__main__':
python 下载数据json 2021-02-27
import json
#探索数据的结构
filename = 'data/1.json'
with open(filename) as f:
? ? all_eq_data = json.load(f)? ? #存储进去一个json数据对象
'''
readable_file = 'data/readable_eq_data.json'? ? #创建一个文件对象
with open(readable_file,'w') as f:
? ? json.dump(all_eq_data,f,indent = 4)? ? #接受一个json数据对象和文件对象? ? indent缩进
'''
all_eq_dicts = all_eq_data['features']? ? #提取键"features"数据并储存
mags,titles,lons,lats= [],[],[],[]
for eq_dict in all_eq_dicts:
? ? mag = eq_dict['properties']['mag']? ? #每次地震震级存储在'properties'部分的'mag'下
? ? title = eq_dict['properties']['title']? ? #存储title
? ? lon?= eq_dict['geometry']['coordinates'][0]????
? ??lat = eq_dict['geometry']['coordinates'][1]
? ? mags.append(mag)
? ? titles.append(title)
? ? lons.append(lon)
? ? lats.append(lat)
print(mags[:10])????#提取震级
#print(len(all_eq_dicts))? ? #提取所有地震的次数
print(titles[:2])
print(lons[:5])
print(lats[:5])
绘制震级散点图:
import plotly.express as px
fig = px.scatter(
? ? x = lons,
? ? y = lats,
? ? labels = {'x':'经度','y':'纬度'},
? ? range_x = [-200,200]
? ? range_y = [-90,90]
? ? width = 800,
? ? height = 800,
????title = '全球地震散点图'
)
fig.write_html('global_earthquakes.html')? ? #保存文件
fig.show()? ? #显示
另一种指定图标数据的方式:
import pandas as pd
data = pd.DataFrame(
? ? data = zip(lons,lats,titles,mags),columns = ['经度','纬度','位置','震级']? ? #封装数据
)
data.head()
然后参数配置方式可以从:
? ? x = lons,
? ? y = lats,
? ? labels = {'x':'经度','y':'纬度'},
变更为:
????data,
? ? x = '经度'
? ? y = '纬度'
? ? ……
? ? size = '震级',
? ? size_max = 10,? ? #默认20
? ? color = '震级',? ? #标记颜色 蓝红黄
? ? hover_name = '位置',? ? #添加鼠标指向时显示的文本
? ??
python3.6 pip安装json失败?
pip下载不了,可以试试这2种方法:
1、去pypi.org上下载,把下载的文件放入python安装路径下的Lib下面;
2、或者去github上下载,/nlohmann/json,解压到Lib下面
如何在scrapy框架下用python爬取json文件
生成Request的时候与一般的网页是相同的,提交Request后scrapy就会下载相应的网页生成Response,这时只用解析response.body按照解析json的方法就可以提取数据了。代码示例如下(以京东为例,其中的parse_phone_price和parse_commnets是通过json提取的,省略部分代码):
# -*- coding: utf-8 -*-
from scrapy.spiders import Spider, CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
from jdcom.items import JdPhoneCommentItem, JdPhoneItem
from scrapy import Request
from datetime import datetime
import json
import logging
import re
logger = logging.getLogger(__name__)
class JdPhoneSpider(CrawlSpider):
name = "jdPhoneSpider"
start_urls = [""]
rules = (
Rule(
LinkExtractor(allow=r"list\.html\?cat\=9987,653,655\page\=\d+\trans\=1\JL\=6_0_0"),
callback="parse_phone_url",
follow=True,
),
)
def parse_phone_url(self, response):
hrefs = response.xpath("//div[@id='plist']/ul/li/div/div[@class='p-name']/a/@href").extract()
phoneIDs = []
for href in hrefs:
phoneID = href[14:-5]
phoneIDs.append(phoneID)
commentsUrl = "" % phoneID
yield Request(commentsUrl, callback=self.parse_commnets)
def parse_phone_price(self, response):
phoneID = response.meta['phoneID']
meta = response.meta
priceStr = response.body.decode("gbk", "ignore")
priceJson = json.loads(priceStr)
price = float(priceJson[0]["p"])
meta['price'] = price
phoneUrl = "" % phoneID
yield Request(phoneUrl, callback=self.parse_phone_info, meta=meta)
def parse_phone_info(self, response):
pass
def parse_commnets(self, response):
commentsItem = JdPhoneCommentItem()
commentsStr = response.body.decode("gbk", "ignore")
commentsJson = json.loads(commentsStr)
comments = commentsJson['comments']
for comment in comments:
commentsItem['commentId'] = comment['id']
commentsItem['guid'] = comment['guid']
commentsItem['content'] = comment['content']
commentsItem['referenceId'] = comment['referenceId']
# 2016-09-19 13:52:49 %Y-%m-%d %H:%M:%S
datetime.strptime(comment['referenceTime'], "%Y-%m-%d %H:%M:%S")
commentsItem['referenceTime'] = datetime.strptime(comment['referenceTime'], "%Y-%m-%d %H:%M:%S")
commentsItem['referenceName'] = comment['referenceName']
commentsItem['userProvince'] = comment['userProvince']
# commentsItem['userRegisterTime'] = datetime.strptime(comment['userRegisterTime'], "%Y-%m-%d %H:%M:%S")
commentsItem['userRegisterTime'] = comment.get('userRegisterTime')
commentsItem['nickname'] = comment['nickname']
commentsItem['userLevelName'] = comment['userLevelName']
commentsItem['userClientShow'] = comment['userClientShow']
commentsItem['productColor'] = comment['productColor']
# commentsItem['productSize'] = comment['productSize']
commentsItem['productSize'] = comment.get("productSize")
commentsItem['afterDays'] = int(comment['days'])
images = comment.get("images")
images_urls = ""
if images:
for image in images:
images_urls = image["imgUrl"] + ";"
commentsItem['imagesUrl'] = images_urls
yield commentsItem
commentCount = commentsJson["productCommentSummary"]["commentCount"]
goodCommentsCount = commentsJson["productCommentSummary"]["goodCount"]
goodCommentsRate = commentsJson["productCommentSummary"]["goodRate"]
generalCommentsCount = commentsJson["productCommentSummary"]["generalCount"]
generalCommentsRate = commentsJson["productCommentSummary"]["generalRate"]
poorCommentsCount = commentsJson["productCommentSummary"]["poorCount"]
poorCommentsRate = commentsJson["productCommentSummary"]["poorRate"]
phoneID = commentsJson["productCommentSummary"]["productId"]
priceUrl = "" % phoneID
meta = {
"phoneID": phoneID,
"commentCount": commentCount,
"goodCommentsCount": goodCommentsCount,
"goodCommentsRate": goodCommentsRate,
"generalCommentsCount": generalCommentsCount,
"generalCommentsRate": generalCommentsRate,
"poorCommentsCount": poorCommentsCount,
"poorCommentsRate": poorCommentsRate,
}
yield Request(priceUrl, callback=self.parse_phone_price, meta=meta)
pageNum = commentCount / 10 + 1
for i in range(pageNum):
commentsUrl = "" % (phoneID, i)
yield Request(commentsUrl, callback=self.parse_commnets)
Python中简单实用的json包
?? JSON(JavaScript Object Notation) 是一种轻量级的数据交换格式,易于人阅读和编写。
?? Demjson 是Python的第三方模块库,可用于编码和解码Json数据,包含了JSONLint的格式化及校验功能。