-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathrakuten_basic_analysis.py
More file actions
66 lines (56 loc) · 1.79 KB
/
rakuten_basic_analysis.py
File metadata and controls
66 lines (56 loc) · 1.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# /usr/bin/python
# coding: utf-8
import json
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
def read_jsonline(filename):
key = ['review_count', 'description', 'title', 'price', 'maker', 'link', 'genre', 'review']
csv = ''
f = open(filename)
for k in key:
csv += k + ','
csv = csv[0:-1] + '\n'
for line in f:
js = json.loads(line)
for k in key:
if js.has_key(k):
csv += js.get(k) + ','
else:
csv += '0,'
csv = csv[0:-1] + '\n'
f.close()
return csv
def main():
data = pd.read_csv('item.csv')
ion = data[data['genre'] == 'イオン導入器']
wave = data[data['genre'] == '超音波美顔器']
laser = data[data['genre'] == 'レーザー美顔器']
other = data[data['genre'] == 'その他']
print np.corrcoef(ion['price'], ion['review'])
print np.corrcoef(wave['price'], wave['review'])
print np.corrcoef(laser['price'], laser['review'])
print np.corrcoef(other['price'], other['review'])
# データの読み込み
# csv = read_jsonline('rakutenscrapy/rakutenscrapy/spiders/item.jl')
# f = open('item.csv', 'w')
# f.write(csv.encode('utf-8'))
# f.close()
# ヒストグラム
# plt.hist(data["price"])
# plt.xlabel('price')
# plt.xlabel('frq')
# plt.show()
# ピボットテーブル
# print pd.pivot_table(data, index="genre", aggfunc=np.mean)
# 箱ひげ図
# box = [ion['price'], wave['price'], laser['price'], other['price']]
# plt.boxplot(box)
# plt.xlabel("genre")
# plt.ylabel("price")
# plt.ylim(0, 50000)
# ax = plt.gca()
# plt.setp(ax, xticklabels=['ion', 'wave', 'laser', 'other'])
# plt.show()
if __name__ == '__main__':
main()